Inspect Modelling Performance

suppressPackageStartupMessages(library(tidyverse))
library(targets)
library(DT)
knitr::opts_knit$set(root.dir = "../../")

VIF

Maximum VIF for final model.

df_cv_preds_and_coefs <- tar_read(df_cv_preds_and_coefs)

df_coefs <- df_cv_preds_and_coefs %>%
  select(year_start_act, category_id, pred) %>%
  mutate(coefs = map(pred, "coefs")) %>%
  select(- pred) %>%
  unnest(coefs) %>%
  unnest(coefs)

df_coefs %>%
  filter(year_start_act == 2019) %>%
  group_by(category_id) %>%
  summarise(max_vif = max(vif, na.rm = TRUE)) %>%
  knitr::kable()
category_id max_vif
cnsn 1.253876
dtin 1.105846
ptpe 1.048755
sfty 1.966747
spno 1.045658

Perfermance Metrics

df_perf <- tar_read(df_perf)

AUC and Brier were calculated for each test set and then the mean and standard error was calculated.

AUC

df_perf %>%
  filter(.metric == "roc_auc") %>%
  knitr::kable(digits = 2)
category_id .metric mean sd n
cnsn roc_auc 0.61 0.15 8
dtin roc_auc 0.60 0.10 8
ptpe roc_auc 0.59 0.06 8
sfty roc_auc 0.63 0.07 8
spno roc_auc 0.53 0.06 8

Brier

df_perf %>%
  filter(.metric == "brier") %>%
  knitr::kable(digits = 2)
category_id .metric mean sd n
cnsn brier 0.24 0.03 8
dtin brier 0.19 0.04 8
ptpe brier 0.23 0.04 8
sfty brier 0.25 0.03 8
spno brier 0.24 0.03 8

Calibration

  • pools all predictions for all test sets
  • predicted test set probabilities have been split over 4 bins with a minimum of 100 observations per bin.
  • actual observed frequencies were calculated for each bin and prop.test() was used for calculating confidence intervals.

Linear Calibration

tar_read(df_calib) %>%
  select(- plot_data) %>%
  mutate(delta = upper - lower) %>%
  select(category_id, lower, base_rate, upper, delta, intercept, slope) %>%
  knitr::kable(digits = 3)
category_id lower base_rate upper delta intercept slope
cnsn 0.338 0.455 0.605 0.267 0.171 0.603
dtin 0.489 0.726 0.854 0.365 0.374 0.506
ptpe 0.537 0.691 0.786 0.249 0.482 0.304
sfty 0.261 0.474 0.694 0.434 0.193 0.577
spno 0.628 0.637 0.654 0.026 0.654 -0.026

Plot

tar_read(p_calib)