Statistical Performance • simaerep

Load

suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(tibble))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(simaerep))
suppressPackageStartupMessages(library(purrr))
suppressPackageStartupMessages(library(stringr))

Introduction

Performance (Statistical)

suppressPackageStartupMessages(library(furrr))
suppressPackageStartupMessages(library(future))

plan(multisession, workers = 6)

Load Portfolio

df_config <- readr::read_csv("ae_conf_20240220.csv")
df_ae_rates <- readr::read_csv("ae_rates_20240220.csv")

df_portf_flex <- sim_test_data_portfolio(df_config, df_ae_rates = df_ae_rates, parallel = TRUE, progress = TRUE)

df_portf_flex %>%
  readr::write_csv("portf.csv")

df_portf_flex <- readr::read_csv("portf.csv")

Simulating Under Reporting

Here we reanalyze performance of {simaerep} integrating the lessons learned from previous versions. We will apply the following.

generate portfolio using flexible AE rates
remove AEs directly from the data set and not from an aggregated metric using sim_ur()
set threshold for confusion matrix so that all methods have similar fpr
test the following {simaerep} parameters with and without correcting for multiplicity:
- default algorithm
- default algorithm with active over-reporting scoring
- inframe algorithm with visit_med75
- inframe algorithm
test the following outlier detection methods
- box-plot
- funnel-plot

Functions

funnel <- function(df) {
  df %>%
  filter(visit == max(visit), .by = patnum) %>%
  summarise(
    Metric = sum(.data$n_ae) / sum(.data$visit),
    n_ae = sum(n_ae),
    visit = sum(visit),
    .by = "site_number"
  ) %>%
  mutate(
        vMu = sum(.data$n_ae) / sum(.data$visit),
        z_0 = ifelse(.data$vMu == 0,
          0,
          (.data$Metric - .data$vMu) /
            sqrt(.data$vMu / .data$visit)
        ),
        phi = mean(.data$z_0^2),
        z_i = ifelse(.data$vMu == 0 | .data$phi == 0,
          0,
          (.data$Metric - .data$vMu) /
            sqrt(.data$phi * .data$vMu / .data$visit)
        )
  )
}

box <- function(df) {
  df <- df %>%
    filter(visit == max(visit), .by = patnum) %>%
    summarise(
      event_per_visit = sum(.data$n_ae) / sum(.data$visit),
      .by = "site_number"
    )
  
  bx <- boxplot.stats(df$event_per_visit)
  
  df <- df %>%
    mutate(
      box_out = event_per_visit < bx$stats[1]
    )
  
}


perf <- function(df_visit, study_id, site_number, ur_rate) {
  df_vs_study <- df_visit %>%
    sim_ur(study_id, site_number, ur_rate)
  
  df_visit_med75 <- df_vs_study %>%
    simaerep(under_only = TRUE, progress = FALSE, check = FALSE) %>%
    .$df_eval %>%
    filter(.data$site_number == .env$site_number)
  
  df_visit_med75_over <- df_vs_study %>%
    simaerep(under_only = FALSE, progress = FALSE, check = FALSE) %>%
    .$df_eval %>%
    filter(.data$site_number == .env$site_number)

  df_inframe <- df_vs_study %>%
    simaerep(inframe = TRUE, under_only = TRUE, check = FALSE, visit_med75 = FALSE) %>%
    .$df_eval %>%
    filter(.data$site_number == .env$site_number)
  
  df_inframe_visit_med75 <- df_vs_study %>%
    simaerep(inframe = TRUE, under_only = TRUE, check = FALSE, visit_med75 = TRUE) %>%
    .$df_eval %>%
    filter(.data$site_number == .env$site_number)
  
  funnel_zi <- funnel(df_vs_study) %>%
    filter(.data$site_number == .env$site_number) %>%
    pull(z_i)
  
  box_out <- box(df_vs_study) %>%
    filter(.data$site_number == .env$site_number) %>%
    pull(box_out)

  tibble(
    score_visit_med75 = df_visit_med75$prob_low_prob_ur,
    score_visit_med75_no_mult = 1 - df_visit_med75$prob_low,
    score_visit_med75_over = df_visit_med75_over$prob_low_prob_ur,
    score_visit_med75_over_no_mult = 1 - df_visit_med75_over$prob_low,
    score_inframe = df_inframe$prob_low_prob_ur,
    score_inframe_no_mult = 1 - df_inframe$prob_low,
    score_inframe_visit_med75 = df_inframe_visit_med75$prob_low_prob_ur,
    score_inframe_visit_med75_no_mult = 1 - df_inframe_visit_med75$prob_low,
    score_funnel_zi = funnel_zi,
    score_box_out = as.integer(box_out),
    stat_visit_med75_visit_med75 = df_visit_med75$visit_med75,
    stat_visit_med75_n_pat_with_med75 = df_visit_med75$n_pat_with_med75,
    stat_visit_med75_mean_ae_site_med75 = df_visit_med75$mean_ae_site_med75,
    stat_visit_med75_mean_ae_study_med75 = df_visit_med75$mean_ae_study_med75,
    stat_inframe_visit_med75 = df_inframe_visit_med75$visit_med75,
    stat_inframe_visit_med75_n_pat_with_med75 = df_inframe_visit_med75$n_pat_with_med75,
    stat_inframe_visit_med75_events_per_visit_site = df_inframe_visit_med75$events_per_visit_site,
    stat_inframe_visit_med75_events_per_visit_study = df_inframe_visit_med75$events_per_visit_study,
    stat_inframe_n_pat = df_inframe$n_pat,
    stat_inframe_events_per_visit_site = df_inframe$events_per_visit_site,
    stat_inframe_events_per_visit_study = df_inframe$events_per_visit_study,
  )
}

# perf(df_portf_flex, study_id = "0010", site_number = "15153", ur_rate = 0) %>% unlist()
# perf(df_portf_flex, study_id = "0010", site_number = "15153", ur_rate = 0.5) %>% unlist()
# perf(df_portf_flex, study_id = "0010", site_number = "15153", ur_rate = 0.75) %>% unlist()
# perf(df_portf_flex, study_id = "0010", site_number = "15153", ur_rate = 1) %>% unlist()

Grid

df_grid <- df_portf_flex %>%
  distinct(study_id, site_number) %>%
  # to reduce calculation time we only take every xth study
  filter(dense_rank(study_id)%%5 == 0) %>%
  mutate(ur_rate = list(c(0, 0.1, 0.25, 0.5, 0.75, 1))) %>%
  unnest(ur_rate)

df_grid

## # A tibble: 27,120 × 3
##    study_id site_number ur_rate
##    <chr>    <chr>         <dbl>
##  1 0005     4480           0   
##  2 0005     4480           0.1 
##  3 0005     4480           0.25
##  4 0005     4480           0.5 
##  5 0005     4480           0.75
##  6 0005     4480           1   
##  7 0005     4481           0   
##  8 0005     4481           0.1 
##  9 0005     4481           0.25
## 10 0005     4481           0.5 
## # ℹ 27,110 more rows

Apply

with_progress_cnd(
  df_perf <- df_grid %>%
    mutate(
      perf = purrr_bar(
        list(study_id, site_number, ur_rate),
        .purrr = furrr::future_pmap,
        .f = function(x, y, z) perf(df_portf_flex, x, y, z),
        .purrr_args = list(.options = furrr_options(seed = TRUE)),
        .steps = nrow(.)
      )
    )
)

df_perf %>%
  unnest(perf) %>%
  readr::write_csv("perf.csv")

df_perf <- readr::read_csv("perf.csv", show_col_types = FALSE)

df_perf_long <- df_perf %>%
  pivot_longer(cols = - c(study_id, site_number, ur_rate), names_to = "type", values_to = "score") %>%
  filter(startsWith(type, "score_")) %>%
  mutate(type = stringr::str_replace(type, "score_", ""))

Evaluation

Thresholds

We set the thresholds so that we get a fpr of 0.01.

Note that this results in probability thresholds ~ 0.99 for scores w/o multiplicity correction and in the recommended funnel plot score threshold of -2.

target_fpr <- 0.01

df_thresh <- df_perf_long %>%
  group_by(type) %>%
  nest() %>%
  ungroup() %>%
  mutate(
    data = map(data, ~ filter(., ur_rate == 0)),
    thresh1 = map_dbl(data, ~ quantile(pull(., score), 1 - target_fpr)),
    thresh2 = map_dbl(data, ~ quantile(pull(., score), target_fpr)),
    thresh = ifelse(type == "funnel_zi", thresh2, thresh1)
  ) %>%
  select(type, thresh)
  
df_thresh

## # A tibble: 10 × 2
##    type                        thresh
##    <chr>                        <dbl>
##  1 visit_med75                  0.455
##  2 visit_med75_no_mult          0.989
##  3 visit_med75_over             0.455
##  4 visit_med75_over_no_mult     0.989
##  5 inframe                      0.384
##  6 inframe_no_mult              0.982
##  7 inframe_visit_med75          0.412
##  8 inframe_visit_med75_no_mult  0.987
##  9 funnel_zi                   -2.03 
## 10 box_out                      1

Aggregate

get_prop_test_ci95 <- function(..., ix) {
  
  stopifnot(ix %in% c(1, 2))
  
  tryCatch(
    prop.test(...)$conf.int[ix],
    error = function(cnd) c(NA, NA)[ix]
  )
}

df_aggr <- df_perf_long %>%
  left_join(df_thresh, by = "type") %>%
  mutate(
    is_ur = ifelse(type == "funnel_zi", score <= thresh, score >= thresh),
    is_ur = ifelse(type == "box_out", score == 1, is_ur)
  ) %>%
  summarise(
    n = n(),
    .by = c(type, ur_rate, is_ur)
  ) %>%
  pivot_wider(
    names_from = is_ur,
    values_from = n,
    names_prefix = "is_ur_",
    values_fill = 0
  ) %>%
  mutate(
    n_sites = is_ur_TRUE + is_ur_FALSE,
    ratio = is_ur_TRUE / n_sites,
    ratio_type = ifelse(ur_rate == 0, "fpr", "tpr"),
    ci95_low = map2_dbl(is_ur_TRUE, n_sites, ~ get_prop_test_ci95(.x, .y, ix = 1)),
    ci95_high = map2_dbl(is_ur_TRUE, n_sites, ~ get_prop_test_ci95(.x, .y, ix = 2)),
    type_strip = str_replace(type, "_no_mult", ""),
    has_mult = ! str_detect(type, "no_mult") & ! type %in% c("funnel_zi", "box_out")
  )

Table

Methods:

visit_med75: default algorithm
visit_med75_over: default algorithm including over-reporting score
inframe: new algorithm using table operations and no visit_med75
inframe visit_med75: new algorithm using table operations and visit_med75
funnel_zi: funnel plot derived outlier detection
box_out: box plot derived outlier detection

FN: false negatives TP: true positives

df_aggr %>%
  select(method = type_strip, has_mult, ur_rate, FN = is_ur_FALSE, TP = is_ur_TRUE, n_sites, ratio_type, ratio, ci95_low, ci95_high) %>%
  knitr::kable(digits = 4)

method	has_mult	ur_rate	FN	TP	n_sites	ratio_type	ratio	ci95_low	ci95_high
visit_med75	TRUE	0.00	4474	46	4520	fpr	0.0102	0.0075	0.0137
visit_med75	FALSE	0.00	4474	46	4520	fpr	0.0102	0.0075	0.0137
visit_med75_over	TRUE	0.00	4474	46	4520	fpr	0.0102	0.0075	0.0137
visit_med75_over	FALSE	0.00	4474	46	4520	fpr	0.0102	0.0075	0.0137
inframe	TRUE	0.00	4470	50	4520	fpr	0.0111	0.0083	0.0147
inframe	FALSE	0.00	4473	47	4520	fpr	0.0104	0.0077	0.0139
inframe_visit_med75	TRUE	0.00	4468	52	4520	fpr	0.0115	0.0087	0.0152
inframe_visit_med75	FALSE	0.00	4471	49	4520	fpr	0.0108	0.0081	0.0144
funnel_zi	FALSE	0.00	4474	46	4520	fpr	0.0102	0.0075	0.0137
box_out	FALSE	0.00	4463	57	4520	fpr	0.0126	0.0097	0.0164
visit_med75	TRUE	0.10	4264	256	4520	tpr	0.0566	0.0502	0.0639
visit_med75	FALSE	0.10	4281	239	4520	tpr	0.0529	0.0466	0.0599
visit_med75_over	TRUE	0.10	4264	256	4520	tpr	0.0566	0.0502	0.0639
visit_med75_over	FALSE	0.10	4281	239	4520	tpr	0.0529	0.0466	0.0599
inframe	TRUE	0.10	4261	259	4520	tpr	0.0573	0.0508	0.0646
inframe	FALSE	0.10	4203	317	4520	tpr	0.0701	0.0629	0.0781
inframe_visit_med75	TRUE	0.10	4228	292	4520	tpr	0.0646	0.0577	0.0723
inframe_visit_med75	FALSE	0.10	4253	267	4520	tpr	0.0591	0.0525	0.0664
funnel_zi	FALSE	0.10	4359	161	4520	tpr	0.0356	0.0305	0.0415
box_out	FALSE	0.10	4445	75	4520	tpr	0.0166	0.0132	0.0209
visit_med75	TRUE	0.25	3425	1095	4520	tpr	0.2423	0.2299	0.2551
visit_med75	FALSE	0.25	3378	1142	4520	tpr	0.2527	0.2401	0.2656
visit_med75_over	TRUE	0.25	3425	1095	4520	tpr	0.2423	0.2299	0.2551
visit_med75_over	FALSE	0.25	3378	1142	4520	tpr	0.2527	0.2401	0.2656
inframe	TRUE	0.25	3458	1062	4520	tpr	0.2350	0.2227	0.2477
inframe	FALSE	0.25	3233	1287	4520	tpr	0.2847	0.2717	0.2982
inframe_visit_med75	TRUE	0.25	3344	1176	4520	tpr	0.2602	0.2475	0.2733
inframe_visit_med75	FALSE	0.25	3304	1216	4520	tpr	0.2690	0.2562	0.2823
funnel_zi	FALSE	0.25	3877	643	4520	tpr	0.1423	0.1323	0.1529
box_out	FALSE	0.25	4357	163	4520	tpr	0.0361	0.0309	0.0420
visit_med75	TRUE	0.50	2161	2359	4520	tpr	0.5219	0.5072	0.5366
visit_med75	FALSE	0.50	1977	2543	4520	tpr	0.5626	0.5480	0.5771
visit_med75_over	TRUE	0.50	2161	2359	4520	tpr	0.5219	0.5072	0.5366
visit_med75_over	FALSE	0.50	1977	2543	4520	tpr	0.5626	0.5480	0.5771
inframe	TRUE	0.50	2146	2374	4520	tpr	0.5252	0.5105	0.5399
inframe	FALSE	0.50	1836	2684	4520	tpr	0.5938	0.5793	0.6081
inframe_visit_med75	TRUE	0.50	2081	2439	4520	tpr	0.5396	0.5249	0.5542
inframe_visit_med75	FALSE	0.50	1927	2593	4520	tpr	0.5737	0.5591	0.5881
funnel_zi	FALSE	0.50	2607	1913	4520	tpr	0.4232	0.4088	0.4378
box_out	FALSE	0.50	3481	1039	4520	tpr	0.2299	0.2177	0.2425
visit_med75	TRUE	0.75	1269	3251	4520	tpr	0.7192	0.7059	0.7323
visit_med75	FALSE	0.75	1095	3425	4520	tpr	0.7577	0.7449	0.7701
visit_med75_over	TRUE	0.75	1269	3251	4520	tpr	0.7192	0.7059	0.7323
visit_med75_over	FALSE	0.75	1095	3425	4520	tpr	0.7577	0.7449	0.7701
inframe	TRUE	0.75	1200	3320	4520	tpr	0.7345	0.7213	0.7473
inframe	FALSE	0.75	932	3588	4520	tpr	0.7938	0.7817	0.8055
inframe_visit_med75	TRUE	0.75	1249	3271	4520	tpr	0.7237	0.7103	0.7366
inframe_visit_med75	FALSE	0.75	1055	3465	4520	tpr	0.7666	0.7539	0.7788
funnel_zi	FALSE	0.75	1627	2893	4520	tpr	0.6400	0.6258	0.6540
box_out	FALSE	0.75	2242	2278	4520	tpr	0.5040	0.4893	0.5187
visit_med75	TRUE	1.00	969	3551	4520	tpr	0.7856	0.7733	0.7974
visit_med75	FALSE	1.00	791	3729	4520	tpr	0.8250	0.8135	0.8359
visit_med75_over	TRUE	1.00	969	3551	4520	tpr	0.7856	0.7733	0.7974
visit_med75_over	FALSE	1.00	791	3729	4520	tpr	0.8250	0.8135	0.8359
inframe	TRUE	1.00	901	3619	4520	tpr	0.8007	0.7887	0.8122
inframe	FALSE	1.00	623	3897	4520	tpr	0.8622	0.8517	0.8720
inframe_visit_med75	TRUE	1.00	1002	3518	4520	tpr	0.7783	0.7659	0.7903
inframe_visit_med75	FALSE	1.00	774	3746	4520	tpr	0.8288	0.8174	0.8396
funnel_zi	FALSE	1.00	916	3604	4520	tpr	0.7973	0.7853	0.8089
box_out	FALSE	1.00	776	3744	4520	tpr	0.8283	0.8169	0.8391

Plot

df_aggr %>%
  mutate(ur_rate = paste0("under-reporting rate: ",  ur_rate, " - ", ratio_type)
  ) %>%
  group_by(ur_rate) %>%
  ggplot(aes(type, ratio)) +
    geom_errorbar(aes(ymin = ci95_low, ymax = ci95_high, color = type_strip, alpha = has_mult), linewidth = 1) +
    facet_wrap(~ ur_rate, ncol = 1) +
    coord_flip() +
    theme(
      legend.position = "right",
      axis.text.y = element_blank(),
      axis.ticks.y = element_blank()
    ) +
    labs(
      x = "",
      y = "Ratio (CI95)", 
      title = "{simaerep} Performance",
      color = "Method",
      alpha = "Multiplicity Correction"
    ) +
    scale_color_manual(values = rev(RColorBrewer::brewer.pal(n = 6, name = "Dark2"))) +
    scale_alpha_manual(values = c(1, 0.5))

Summary

new inframe method has slightly better performance than original algorithm

The new inframe methods compares event per visit rates without dropping any patients or AEs from the analysis. This is likely to explain the performance increase. We observed a similar increase when we optimized visit_med75 in past experiments.

multiplicity correction imposes a penalty on the true positive rate

This observation was already made by the Boeringer Ingelheim Team during the evaluation of {simaerep}. We can now reproducibly confirm this. The unaltered probability score as returned by the bootstrap algorithm already provides very realistic under-reporting probabilities.

{simaerep} outperforms simpler methods such as funnel plot and box plot outlier detection.

These controls confirm previous observations that were made during the {simaerep} validation.