Performance (Statistical)
suppressPackageStartupMessages(library(furrr))
suppressPackageStartupMessages(library(future))
plan(multisession, workers = 6)
Load Portfolio
df_config <- readr::read_csv("ae_conf_20240220.csv")
df_ae_rates <- readr::read_csv("ae_rates_20240220.csv")
df_portf_flex <- sim_test_data_portfolio(df_config, df_ae_rates = df_ae_rates, parallel = TRUE, progress = TRUE)
df_portf_flex %>%
readr::write_csv("portf.csv")
df_portf_flex <- readr::read_csv("portf.csv")
Simulating Under Reporting
Here we reanalyze performance of {simaerep} integrating the lessons learned from previous versions. We will apply the following.
- generate portfolio using flexible AE rates
- remove AEs directly from the data set and not from an aggregated
metric using
sim_ur()
- set threshold for confusion matrix so that all methods have similar fpr
- test the following {simaerep} parameters with and without
correcting for multiplicity:
- default algorithm
- default algorithm with active over-reporting scoring
- inframe algorithm with visit_med75
- inframe algorithm
- test the following outlier detection methods
- box-plot
- funnel-plot
Functions
funnel <- function(df) {
df %>%
filter(visit == max(visit), .by = patnum) %>%
summarise(
Metric = sum(.data$n_ae) / sum(.data$visit),
n_ae = sum(n_ae),
visit = sum(visit),
.by = "site_number"
) %>%
mutate(
vMu = sum(.data$n_ae) / sum(.data$visit),
z_0 = ifelse(.data$vMu == 0,
0,
(.data$Metric - .data$vMu) /
sqrt(.data$vMu / .data$visit)
),
phi = mean(.data$z_0^2),
z_i = ifelse(.data$vMu == 0 | .data$phi == 0,
0,
(.data$Metric - .data$vMu) /
sqrt(.data$phi * .data$vMu / .data$visit)
)
)
}
box <- function(df) {
df <- df %>%
filter(visit == max(visit), .by = patnum) %>%
summarise(
event_per_visit = sum(.data$n_ae) / sum(.data$visit),
.by = "site_number"
)
bx <- boxplot.stats(df$event_per_visit)
df <- df %>%
mutate(
box_out = event_per_visit < bx$stats[1]
)
}
perf <- function(df_visit, study_id, site_number, ur_rate) {
df_vs_study <- df_visit %>%
sim_ur(study_id, site_number, ur_rate)
df_visit_med75 <- df_vs_study %>%
simaerep(under_only = TRUE, progress = FALSE, check = FALSE) %>%
.$df_eval %>%
filter(.data$site_number == .env$site_number)
df_visit_med75_over <- df_vs_study %>%
simaerep(under_only = FALSE, progress = FALSE, check = FALSE) %>%
.$df_eval %>%
filter(.data$site_number == .env$site_number)
df_inframe <- df_vs_study %>%
simaerep(inframe = TRUE, under_only = TRUE, check = FALSE, visit_med75 = FALSE) %>%
.$df_eval %>%
filter(.data$site_number == .env$site_number)
df_inframe_visit_med75 <- df_vs_study %>%
simaerep(inframe = TRUE, under_only = TRUE, check = FALSE, visit_med75 = TRUE) %>%
.$df_eval %>%
filter(.data$site_number == .env$site_number)
funnel_zi <- funnel(df_vs_study) %>%
filter(.data$site_number == .env$site_number) %>%
pull(z_i)
box_out <- box(df_vs_study) %>%
filter(.data$site_number == .env$site_number) %>%
pull(box_out)
tibble(
score_visit_med75 = df_visit_med75$prob_low_prob_ur,
score_visit_med75_no_mult = 1 - df_visit_med75$prob_low,
score_visit_med75_over = df_visit_med75_over$prob_low_prob_ur,
score_visit_med75_over_no_mult = 1 - df_visit_med75_over$prob_low,
score_inframe = df_inframe$prob_low_prob_ur,
score_inframe_no_mult = 1 - df_inframe$prob_low,
score_inframe_visit_med75 = df_inframe_visit_med75$prob_low_prob_ur,
score_inframe_visit_med75_no_mult = 1 - df_inframe_visit_med75$prob_low,
score_funnel_zi = funnel_zi,
score_box_out = as.integer(box_out),
stat_visit_med75_visit_med75 = df_visit_med75$visit_med75,
stat_visit_med75_n_pat_with_med75 = df_visit_med75$n_pat_with_med75,
stat_visit_med75_mean_ae_site_med75 = df_visit_med75$mean_ae_site_med75,
stat_visit_med75_mean_ae_study_med75 = df_visit_med75$mean_ae_study_med75,
stat_inframe_visit_med75 = df_inframe_visit_med75$visit_med75,
stat_inframe_visit_med75_n_pat_with_med75 = df_inframe_visit_med75$n_pat_with_med75,
stat_inframe_visit_med75_events_per_visit_site = df_inframe_visit_med75$events_per_visit_site,
stat_inframe_visit_med75_events_per_visit_study = df_inframe_visit_med75$events_per_visit_study,
stat_inframe_n_pat = df_inframe$n_pat,
stat_inframe_events_per_visit_site = df_inframe$events_per_visit_site,
stat_inframe_events_per_visit_study = df_inframe$events_per_visit_study,
)
}
# perf(df_portf_flex, study_id = "0010", site_number = "15153", ur_rate = 0) %>% unlist()
# perf(df_portf_flex, study_id = "0010", site_number = "15153", ur_rate = 0.5) %>% unlist()
# perf(df_portf_flex, study_id = "0010", site_number = "15153", ur_rate = 0.75) %>% unlist()
# perf(df_portf_flex, study_id = "0010", site_number = "15153", ur_rate = 1) %>% unlist()
Grid
df_grid <- df_portf_flex %>%
distinct(study_id, site_number) %>%
# to reduce calculation time we only take every xth study
filter(dense_rank(study_id)%%5 == 0) %>%
mutate(ur_rate = list(c(0, 0.1, 0.25, 0.5, 0.75, 1))) %>%
unnest(ur_rate)
df_grid
## # A tibble: 27,120 × 3
## study_id site_number ur_rate
## <chr> <chr> <dbl>
## 1 0005 4480 0
## 2 0005 4480 0.1
## 3 0005 4480 0.25
## 4 0005 4480 0.5
## 5 0005 4480 0.75
## 6 0005 4480 1
## 7 0005 4481 0
## 8 0005 4481 0.1
## 9 0005 4481 0.25
## 10 0005 4481 0.5
## # ℹ 27,110 more rows
Apply
with_progress_cnd(
df_perf <- df_grid %>%
mutate(
perf = purrr_bar(
list(study_id, site_number, ur_rate),
.purrr = furrr::future_pmap,
.f = function(x, y, z) perf(df_portf_flex, x, y, z),
.purrr_args = list(.options = furrr_options(seed = TRUE)),
.steps = nrow(.)
)
)
)
df_perf %>%
unnest(perf) %>%
readr::write_csv("perf.csv")
df_perf <- readr::read_csv("perf.csv", show_col_types = FALSE)
df_perf_long <- df_perf %>%
pivot_longer(cols = - c(study_id, site_number, ur_rate), names_to = "type", values_to = "score") %>%
filter(startsWith(type, "score_")) %>%
mutate(type = stringr::str_replace(type, "score_", ""))
Evaluation
Thresholds
We set the thresholds so that we get a fpr of 0.01.
Note that this results in probability thresholds ~ 0.99 for scores w/o multiplicity correction and in the recommended funnel plot score threshold of -2.
target_fpr <- 0.01
df_thresh <- df_perf_long %>%
group_by(type) %>%
nest() %>%
ungroup() %>%
mutate(
data = map(data, ~ filter(., ur_rate == 0)),
thresh1 = map_dbl(data, ~ quantile(pull(., score), 1 - target_fpr)),
thresh2 = map_dbl(data, ~ quantile(pull(., score), target_fpr)),
thresh = ifelse(type == "funnel_zi", thresh2, thresh1)
) %>%
select(type, thresh)
df_thresh
## # A tibble: 10 × 2
## type thresh
## <chr> <dbl>
## 1 visit_med75 0.455
## 2 visit_med75_no_mult 0.989
## 3 visit_med75_over 0.455
## 4 visit_med75_over_no_mult 0.989
## 5 inframe 0.384
## 6 inframe_no_mult 0.982
## 7 inframe_visit_med75 0.412
## 8 inframe_visit_med75_no_mult 0.987
## 9 funnel_zi -2.03
## 10 box_out 1
Aggregate
get_prop_test_ci95 <- function(..., ix) {
stopifnot(ix %in% c(1, 2))
tryCatch(
prop.test(...)$conf.int[ix],
error = function(cnd) c(NA, NA)[ix]
)
}
df_aggr <- df_perf_long %>%
left_join(df_thresh, by = "type") %>%
mutate(
is_ur = ifelse(type == "funnel_zi", score <= thresh, score >= thresh),
is_ur = ifelse(type == "box_out", score == 1, is_ur)
) %>%
summarise(
n = n(),
.by = c(type, ur_rate, is_ur)
) %>%
pivot_wider(
names_from = is_ur,
values_from = n,
names_prefix = "is_ur_",
values_fill = 0
) %>%
mutate(
n_sites = is_ur_TRUE + is_ur_FALSE,
ratio = is_ur_TRUE / n_sites,
ratio_type = ifelse(ur_rate == 0, "fpr", "tpr"),
ci95_low = map2_dbl(is_ur_TRUE, n_sites, ~ get_prop_test_ci95(.x, .y, ix = 1)),
ci95_high = map2_dbl(is_ur_TRUE, n_sites, ~ get_prop_test_ci95(.x, .y, ix = 2)),
type_strip = str_replace(type, "_no_mult", ""),
has_mult = ! str_detect(type, "no_mult") & ! type %in% c("funnel_zi", "box_out")
)
Table
Methods:
- visit_med75: default algorithm
- visit_med75_over: default algorithm including over-reporting score
- inframe: new algorithm using table operations and no visit_med75
- inframe visit_med75: new algorithm using table operations and visit_med75
- funnel_zi: funnel plot derived outlier detection
- box_out: box plot derived outlier detection
FN: false negatives TP: true positives
df_aggr %>%
select(method = type_strip, has_mult, ur_rate, FN = is_ur_FALSE, TP = is_ur_TRUE, n_sites, ratio_type, ratio, ci95_low, ci95_high) %>%
knitr::kable(digits = 4)
method | has_mult | ur_rate | FN | TP | n_sites | ratio_type | ratio | ci95_low | ci95_high |
---|---|---|---|---|---|---|---|---|---|
visit_med75 | TRUE | 0.00 | 4474 | 46 | 4520 | fpr | 0.0102 | 0.0075 | 0.0137 |
visit_med75 | FALSE | 0.00 | 4474 | 46 | 4520 | fpr | 0.0102 | 0.0075 | 0.0137 |
visit_med75_over | TRUE | 0.00 | 4474 | 46 | 4520 | fpr | 0.0102 | 0.0075 | 0.0137 |
visit_med75_over | FALSE | 0.00 | 4474 | 46 | 4520 | fpr | 0.0102 | 0.0075 | 0.0137 |
inframe | TRUE | 0.00 | 4470 | 50 | 4520 | fpr | 0.0111 | 0.0083 | 0.0147 |
inframe | FALSE | 0.00 | 4473 | 47 | 4520 | fpr | 0.0104 | 0.0077 | 0.0139 |
inframe_visit_med75 | TRUE | 0.00 | 4468 | 52 | 4520 | fpr | 0.0115 | 0.0087 | 0.0152 |
inframe_visit_med75 | FALSE | 0.00 | 4471 | 49 | 4520 | fpr | 0.0108 | 0.0081 | 0.0144 |
funnel_zi | FALSE | 0.00 | 4474 | 46 | 4520 | fpr | 0.0102 | 0.0075 | 0.0137 |
box_out | FALSE | 0.00 | 4463 | 57 | 4520 | fpr | 0.0126 | 0.0097 | 0.0164 |
visit_med75 | TRUE | 0.10 | 4264 | 256 | 4520 | tpr | 0.0566 | 0.0502 | 0.0639 |
visit_med75 | FALSE | 0.10 | 4281 | 239 | 4520 | tpr | 0.0529 | 0.0466 | 0.0599 |
visit_med75_over | TRUE | 0.10 | 4264 | 256 | 4520 | tpr | 0.0566 | 0.0502 | 0.0639 |
visit_med75_over | FALSE | 0.10 | 4281 | 239 | 4520 | tpr | 0.0529 | 0.0466 | 0.0599 |
inframe | TRUE | 0.10 | 4261 | 259 | 4520 | tpr | 0.0573 | 0.0508 | 0.0646 |
inframe | FALSE | 0.10 | 4203 | 317 | 4520 | tpr | 0.0701 | 0.0629 | 0.0781 |
inframe_visit_med75 | TRUE | 0.10 | 4228 | 292 | 4520 | tpr | 0.0646 | 0.0577 | 0.0723 |
inframe_visit_med75 | FALSE | 0.10 | 4253 | 267 | 4520 | tpr | 0.0591 | 0.0525 | 0.0664 |
funnel_zi | FALSE | 0.10 | 4359 | 161 | 4520 | tpr | 0.0356 | 0.0305 | 0.0415 |
box_out | FALSE | 0.10 | 4445 | 75 | 4520 | tpr | 0.0166 | 0.0132 | 0.0209 |
visit_med75 | TRUE | 0.25 | 3425 | 1095 | 4520 | tpr | 0.2423 | 0.2299 | 0.2551 |
visit_med75 | FALSE | 0.25 | 3378 | 1142 | 4520 | tpr | 0.2527 | 0.2401 | 0.2656 |
visit_med75_over | TRUE | 0.25 | 3425 | 1095 | 4520 | tpr | 0.2423 | 0.2299 | 0.2551 |
visit_med75_over | FALSE | 0.25 | 3378 | 1142 | 4520 | tpr | 0.2527 | 0.2401 | 0.2656 |
inframe | TRUE | 0.25 | 3458 | 1062 | 4520 | tpr | 0.2350 | 0.2227 | 0.2477 |
inframe | FALSE | 0.25 | 3233 | 1287 | 4520 | tpr | 0.2847 | 0.2717 | 0.2982 |
inframe_visit_med75 | TRUE | 0.25 | 3344 | 1176 | 4520 | tpr | 0.2602 | 0.2475 | 0.2733 |
inframe_visit_med75 | FALSE | 0.25 | 3304 | 1216 | 4520 | tpr | 0.2690 | 0.2562 | 0.2823 |
funnel_zi | FALSE | 0.25 | 3877 | 643 | 4520 | tpr | 0.1423 | 0.1323 | 0.1529 |
box_out | FALSE | 0.25 | 4357 | 163 | 4520 | tpr | 0.0361 | 0.0309 | 0.0420 |
visit_med75 | TRUE | 0.50 | 2161 | 2359 | 4520 | tpr | 0.5219 | 0.5072 | 0.5366 |
visit_med75 | FALSE | 0.50 | 1977 | 2543 | 4520 | tpr | 0.5626 | 0.5480 | 0.5771 |
visit_med75_over | TRUE | 0.50 | 2161 | 2359 | 4520 | tpr | 0.5219 | 0.5072 | 0.5366 |
visit_med75_over | FALSE | 0.50 | 1977 | 2543 | 4520 | tpr | 0.5626 | 0.5480 | 0.5771 |
inframe | TRUE | 0.50 | 2146 | 2374 | 4520 | tpr | 0.5252 | 0.5105 | 0.5399 |
inframe | FALSE | 0.50 | 1836 | 2684 | 4520 | tpr | 0.5938 | 0.5793 | 0.6081 |
inframe_visit_med75 | TRUE | 0.50 | 2081 | 2439 | 4520 | tpr | 0.5396 | 0.5249 | 0.5542 |
inframe_visit_med75 | FALSE | 0.50 | 1927 | 2593 | 4520 | tpr | 0.5737 | 0.5591 | 0.5881 |
funnel_zi | FALSE | 0.50 | 2607 | 1913 | 4520 | tpr | 0.4232 | 0.4088 | 0.4378 |
box_out | FALSE | 0.50 | 3481 | 1039 | 4520 | tpr | 0.2299 | 0.2177 | 0.2425 |
visit_med75 | TRUE | 0.75 | 1269 | 3251 | 4520 | tpr | 0.7192 | 0.7059 | 0.7323 |
visit_med75 | FALSE | 0.75 | 1095 | 3425 | 4520 | tpr | 0.7577 | 0.7449 | 0.7701 |
visit_med75_over | TRUE | 0.75 | 1269 | 3251 | 4520 | tpr | 0.7192 | 0.7059 | 0.7323 |
visit_med75_over | FALSE | 0.75 | 1095 | 3425 | 4520 | tpr | 0.7577 | 0.7449 | 0.7701 |
inframe | TRUE | 0.75 | 1200 | 3320 | 4520 | tpr | 0.7345 | 0.7213 | 0.7473 |
inframe | FALSE | 0.75 | 932 | 3588 | 4520 | tpr | 0.7938 | 0.7817 | 0.8055 |
inframe_visit_med75 | TRUE | 0.75 | 1249 | 3271 | 4520 | tpr | 0.7237 | 0.7103 | 0.7366 |
inframe_visit_med75 | FALSE | 0.75 | 1055 | 3465 | 4520 | tpr | 0.7666 | 0.7539 | 0.7788 |
funnel_zi | FALSE | 0.75 | 1627 | 2893 | 4520 | tpr | 0.6400 | 0.6258 | 0.6540 |
box_out | FALSE | 0.75 | 2242 | 2278 | 4520 | tpr | 0.5040 | 0.4893 | 0.5187 |
visit_med75 | TRUE | 1.00 | 969 | 3551 | 4520 | tpr | 0.7856 | 0.7733 | 0.7974 |
visit_med75 | FALSE | 1.00 | 791 | 3729 | 4520 | tpr | 0.8250 | 0.8135 | 0.8359 |
visit_med75_over | TRUE | 1.00 | 969 | 3551 | 4520 | tpr | 0.7856 | 0.7733 | 0.7974 |
visit_med75_over | FALSE | 1.00 | 791 | 3729 | 4520 | tpr | 0.8250 | 0.8135 | 0.8359 |
inframe | TRUE | 1.00 | 901 | 3619 | 4520 | tpr | 0.8007 | 0.7887 | 0.8122 |
inframe | FALSE | 1.00 | 623 | 3897 | 4520 | tpr | 0.8622 | 0.8517 | 0.8720 |
inframe_visit_med75 | TRUE | 1.00 | 1002 | 3518 | 4520 | tpr | 0.7783 | 0.7659 | 0.7903 |
inframe_visit_med75 | FALSE | 1.00 | 774 | 3746 | 4520 | tpr | 0.8288 | 0.8174 | 0.8396 |
funnel_zi | FALSE | 1.00 | 916 | 3604 | 4520 | tpr | 0.7973 | 0.7853 | 0.8089 |
box_out | FALSE | 1.00 | 776 | 3744 | 4520 | tpr | 0.8283 | 0.8169 | 0.8391 |
Plot
df_aggr %>%
mutate(ur_rate = paste0("under-reporting rate: ", ur_rate, " - ", ratio_type)
) %>%
group_by(ur_rate) %>%
ggplot(aes(type, ratio)) +
geom_errorbar(aes(ymin = ci95_low, ymax = ci95_high, color = type_strip, alpha = has_mult), linewidth = 1) +
facet_wrap(~ ur_rate, ncol = 1) +
coord_flip() +
theme(
legend.position = "right",
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
) +
labs(
x = "",
y = "Ratio (CI95)",
title = "{simaerep} Performance",
color = "Method",
alpha = "Multiplicity Correction"
) +
scale_color_manual(values = rev(RColorBrewer::brewer.pal(n = 6, name = "Dark2"))) +
scale_alpha_manual(values = c(1, 0.5))
Summary
- new inframe method has slightly better performance than original algorithm
The new inframe methods compares event per visit rates without dropping any patients or AEs from the analysis. This is likely to explain the performance increase. We observed a similar increase when we optimized visit_med75 in past experiments.
- multiplicity correction imposes a penalty on the true positive rate
This observation was already made by the Boeringer Ingelheim Team during the evaluation of {simaerep}. We can now reproducibly confirm this. The unaltered probability score as returned by the bootstrap algorithm already provides very realistic under-reporting probabilities.
- {simaerep} outperforms simpler methods such as funnel plot and box plot outlier detection.
These controls confirm previous observations that were made during the {simaerep} validation.