Skip to contents

Introduction

Performance (Statistical)

Load Portfolio

df_config <- readr::read_csv("ae_conf_20240220.csv")
df_ae_rates <- readr::read_csv("ae_rates_20240220.csv")

df_portf_flex <- sim_test_data_portfolio(df_config, df_ae_rates = df_ae_rates, parallel = TRUE, progress = TRUE)

df_portf_flex %>%
  readr::write_csv("portf.csv")
df_portf_flex <- readr::read_csv("portf.csv")

Simulating Under Reporting

Here we reanalyze performance of {simaerep} integrating the lessons learned from previous versions. We will apply the following.

  • generate portfolio using flexible AE rates
  • remove AEs directly from the data set and not from an aggregated metric using sim_ur()
  • set threshold for confusion matrix so that all methods have similar fpr
  • test the following {simaerep} parameters with and without correcting for multiplicity:
    • default algorithm
    • default algorithm with active over-reporting scoring
    • inframe algorithm with visit_med75
    • inframe algorithm
  • test the following outlier detection methods
    • box-plot
    • funnel-plot

Functions

funnel <- function(df) {
  df %>%
  filter(visit == max(visit), .by = patnum) %>%
  summarise(
    Metric = sum(.data$n_ae) / sum(.data$visit),
    n_ae = sum(n_ae),
    visit = sum(visit),
    .by = "site_number"
  ) %>%
  mutate(
        vMu = sum(.data$n_ae) / sum(.data$visit),
        z_0 = ifelse(.data$vMu == 0,
          0,
          (.data$Metric - .data$vMu) /
            sqrt(.data$vMu / .data$visit)
        ),
        phi = mean(.data$z_0^2),
        z_i = ifelse(.data$vMu == 0 | .data$phi == 0,
          0,
          (.data$Metric - .data$vMu) /
            sqrt(.data$phi * .data$vMu / .data$visit)
        )
  )
}

box <- function(df) {
  df <- df %>%
    filter(visit == max(visit), .by = patnum) %>%
    summarise(
      event_per_visit = sum(.data$n_ae) / sum(.data$visit),
      .by = "site_number"
    )
  
  bx <- boxplot.stats(df$event_per_visit)
  
  df <- df %>%
    mutate(
      box_out = event_per_visit < bx$stats[1]
    )
  
}


perf <- function(df_visit, study_id, site_number, ur_rate) {
  df_vs_study <- df_visit %>%
    sim_ur(study_id, site_number, ur_rate)
  
  df_visit_med75 <- df_vs_study %>%
    simaerep(under_only = TRUE, progress = FALSE, check = FALSE) %>%
    .$df_eval %>%
    filter(.data$site_number == .env$site_number)
  
  df_visit_med75_over <- df_vs_study %>%
    simaerep(under_only = FALSE, progress = FALSE, check = FALSE) %>%
    .$df_eval %>%
    filter(.data$site_number == .env$site_number)

  df_inframe <- df_vs_study %>%
    simaerep(inframe = TRUE, under_only = TRUE, check = FALSE, visit_med75 = FALSE) %>%
    .$df_eval %>%
    filter(.data$site_number == .env$site_number)
  
  df_inframe_visit_med75 <- df_vs_study %>%
    simaerep(inframe = TRUE, under_only = TRUE, check = FALSE, visit_med75 = TRUE) %>%
    .$df_eval %>%
    filter(.data$site_number == .env$site_number)
  
  funnel_zi <- funnel(df_vs_study) %>%
    filter(.data$site_number == .env$site_number) %>%
    pull(z_i)
  
  box_out <- box(df_vs_study) %>%
    filter(.data$site_number == .env$site_number) %>%
    pull(box_out)

  tibble(
    score_visit_med75 = df_visit_med75$prob_low_prob_ur,
    score_visit_med75_no_mult = 1 - df_visit_med75$prob_low,
    score_visit_med75_over = df_visit_med75_over$prob_low_prob_ur,
    score_visit_med75_over_no_mult = 1 - df_visit_med75_over$prob_low,
    score_inframe = df_inframe$prob_low_prob_ur,
    score_inframe_no_mult = 1 - df_inframe$prob_low,
    score_inframe_visit_med75 = df_inframe_visit_med75$prob_low_prob_ur,
    score_inframe_visit_med75_no_mult = 1 - df_inframe_visit_med75$prob_low,
    score_funnel_zi = funnel_zi,
    score_box_out = as.integer(box_out),
    stat_visit_med75_visit_med75 = df_visit_med75$visit_med75,
    stat_visit_med75_n_pat_with_med75 = df_visit_med75$n_pat_with_med75,
    stat_visit_med75_mean_ae_site_med75 = df_visit_med75$mean_ae_site_med75,
    stat_visit_med75_mean_ae_study_med75 = df_visit_med75$mean_ae_study_med75,
    stat_inframe_visit_med75 = df_inframe_visit_med75$visit_med75,
    stat_inframe_visit_med75_n_pat_with_med75 = df_inframe_visit_med75$n_pat_with_med75,
    stat_inframe_visit_med75_events_per_visit_site = df_inframe_visit_med75$events_per_visit_site,
    stat_inframe_visit_med75_events_per_visit_study = df_inframe_visit_med75$events_per_visit_study,
    stat_inframe_n_pat = df_inframe$n_pat,
    stat_inframe_events_per_visit_site = df_inframe$events_per_visit_site,
    stat_inframe_events_per_visit_study = df_inframe$events_per_visit_study,
  )
}

# perf(df_portf_flex, study_id = "0010", site_number = "15153", ur_rate = 0) %>% unlist()
# perf(df_portf_flex, study_id = "0010", site_number = "15153", ur_rate = 0.5) %>% unlist()
# perf(df_portf_flex, study_id = "0010", site_number = "15153", ur_rate = 0.75) %>% unlist()
# perf(df_portf_flex, study_id = "0010", site_number = "15153", ur_rate = 1) %>% unlist()

Grid

df_grid <- df_portf_flex %>%
  distinct(study_id, site_number) %>%
  # to reduce calculation time we only take every xth study
  filter(dense_rank(study_id)%%5 == 0) %>%
  mutate(ur_rate = list(c(0, 0.1, 0.25, 0.5, 0.75, 1))) %>%
  unnest(ur_rate)

df_grid
## # A tibble: 27,120 × 3
##    study_id site_number ur_rate
##    <chr>    <chr>         <dbl>
##  1 0005     4480           0   
##  2 0005     4480           0.1 
##  3 0005     4480           0.25
##  4 0005     4480           0.5 
##  5 0005     4480           0.75
##  6 0005     4480           1   
##  7 0005     4481           0   
##  8 0005     4481           0.1 
##  9 0005     4481           0.25
## 10 0005     4481           0.5 
## # ℹ 27,110 more rows

Apply

with_progress_cnd(
  df_perf <- df_grid %>%
    mutate(
      perf = purrr_bar(
        list(study_id, site_number, ur_rate),
        .purrr = furrr::future_pmap,
        .f = function(x, y, z) perf(df_portf_flex, x, y, z),
        .purrr_args = list(.options = furrr_options(seed = TRUE)),
        .steps = nrow(.)
      )
    )
)

df_perf %>%
  unnest(perf) %>%
  readr::write_csv("perf.csv")
df_perf <- readr::read_csv("perf.csv", show_col_types = FALSE)
df_perf_long <- df_perf %>%
  pivot_longer(cols = - c(study_id, site_number, ur_rate), names_to = "type", values_to = "score") %>%
  filter(startsWith(type, "score_")) %>%
  mutate(type = stringr::str_replace(type, "score_", ""))

Evaluation

Thresholds

We set the thresholds so that we get a fpr of 0.01.

Note that this results in probability thresholds ~ 0.99 for scores w/o multiplicity correction and in the recommended funnel plot score threshold of -2.

target_fpr <- 0.01

df_thresh <- df_perf_long %>%
  group_by(type) %>%
  nest() %>%
  ungroup() %>%
  mutate(
    data = map(data, ~ filter(., ur_rate == 0)),
    thresh1 = map_dbl(data, ~ quantile(pull(., score), 1 - target_fpr)),
    thresh2 = map_dbl(data, ~ quantile(pull(., score), target_fpr)),
    thresh = ifelse(type == "funnel_zi", thresh2, thresh1)
  ) %>%
  select(type, thresh)
  
df_thresh
## # A tibble: 10 × 2
##    type                        thresh
##    <chr>                        <dbl>
##  1 visit_med75                  0.455
##  2 visit_med75_no_mult          0.989
##  3 visit_med75_over             0.455
##  4 visit_med75_over_no_mult     0.989
##  5 inframe                      0.384
##  6 inframe_no_mult              0.982
##  7 inframe_visit_med75          0.412
##  8 inframe_visit_med75_no_mult  0.987
##  9 funnel_zi                   -2.03 
## 10 box_out                      1

Aggregate

get_prop_test_ci95 <- function(..., ix) {
  
  stopifnot(ix %in% c(1, 2))
  
  tryCatch(
    prop.test(...)$conf.int[ix],
    error = function(cnd) c(NA, NA)[ix]
  )
}

df_aggr <- df_perf_long %>%
  left_join(df_thresh, by = "type") %>%
  mutate(
    is_ur = ifelse(type == "funnel_zi", score <= thresh, score >= thresh),
    is_ur = ifelse(type == "box_out", score == 1, is_ur)
  ) %>%
  summarise(
    n = n(),
    .by = c(type, ur_rate, is_ur)
  ) %>%
  pivot_wider(
    names_from = is_ur,
    values_from = n,
    names_prefix = "is_ur_",
    values_fill = 0
  ) %>%
  mutate(
    n_sites = is_ur_TRUE + is_ur_FALSE,
    ratio = is_ur_TRUE / n_sites,
    ratio_type = ifelse(ur_rate == 0, "fpr", "tpr"),
    ci95_low = map2_dbl(is_ur_TRUE, n_sites, ~ get_prop_test_ci95(.x, .y, ix = 1)),
    ci95_high = map2_dbl(is_ur_TRUE, n_sites, ~ get_prop_test_ci95(.x, .y, ix = 2)),
    type_strip = str_replace(type, "_no_mult", ""),
    has_mult = ! str_detect(type, "no_mult") & ! type %in% c("funnel_zi", "box_out")
  )

Table

Methods:

  • visit_med75: default algorithm
  • visit_med75_over: default algorithm including over-reporting score
  • inframe: new algorithm using table operations and no visit_med75
  • inframe visit_med75: new algorithm using table operations and visit_med75
  • funnel_zi: funnel plot derived outlier detection
  • box_out: box plot derived outlier detection

FN: false negatives TP: true positives

df_aggr %>%
  select(method = type_strip, has_mult, ur_rate, FN = is_ur_FALSE, TP = is_ur_TRUE, n_sites, ratio_type, ratio, ci95_low, ci95_high) %>%
  knitr::kable(digits = 4)
method has_mult ur_rate FN TP n_sites ratio_type ratio ci95_low ci95_high
visit_med75 TRUE 0.00 4474 46 4520 fpr 0.0102 0.0075 0.0137
visit_med75 FALSE 0.00 4474 46 4520 fpr 0.0102 0.0075 0.0137
visit_med75_over TRUE 0.00 4474 46 4520 fpr 0.0102 0.0075 0.0137
visit_med75_over FALSE 0.00 4474 46 4520 fpr 0.0102 0.0075 0.0137
inframe TRUE 0.00 4470 50 4520 fpr 0.0111 0.0083 0.0147
inframe FALSE 0.00 4473 47 4520 fpr 0.0104 0.0077 0.0139
inframe_visit_med75 TRUE 0.00 4468 52 4520 fpr 0.0115 0.0087 0.0152
inframe_visit_med75 FALSE 0.00 4471 49 4520 fpr 0.0108 0.0081 0.0144
funnel_zi FALSE 0.00 4474 46 4520 fpr 0.0102 0.0075 0.0137
box_out FALSE 0.00 4463 57 4520 fpr 0.0126 0.0097 0.0164
visit_med75 TRUE 0.10 4264 256 4520 tpr 0.0566 0.0502 0.0639
visit_med75 FALSE 0.10 4281 239 4520 tpr 0.0529 0.0466 0.0599
visit_med75_over TRUE 0.10 4264 256 4520 tpr 0.0566 0.0502 0.0639
visit_med75_over FALSE 0.10 4281 239 4520 tpr 0.0529 0.0466 0.0599
inframe TRUE 0.10 4261 259 4520 tpr 0.0573 0.0508 0.0646
inframe FALSE 0.10 4203 317 4520 tpr 0.0701 0.0629 0.0781
inframe_visit_med75 TRUE 0.10 4228 292 4520 tpr 0.0646 0.0577 0.0723
inframe_visit_med75 FALSE 0.10 4253 267 4520 tpr 0.0591 0.0525 0.0664
funnel_zi FALSE 0.10 4359 161 4520 tpr 0.0356 0.0305 0.0415
box_out FALSE 0.10 4445 75 4520 tpr 0.0166 0.0132 0.0209
visit_med75 TRUE 0.25 3425 1095 4520 tpr 0.2423 0.2299 0.2551
visit_med75 FALSE 0.25 3378 1142 4520 tpr 0.2527 0.2401 0.2656
visit_med75_over TRUE 0.25 3425 1095 4520 tpr 0.2423 0.2299 0.2551
visit_med75_over FALSE 0.25 3378 1142 4520 tpr 0.2527 0.2401 0.2656
inframe TRUE 0.25 3458 1062 4520 tpr 0.2350 0.2227 0.2477
inframe FALSE 0.25 3233 1287 4520 tpr 0.2847 0.2717 0.2982
inframe_visit_med75 TRUE 0.25 3344 1176 4520 tpr 0.2602 0.2475 0.2733
inframe_visit_med75 FALSE 0.25 3304 1216 4520 tpr 0.2690 0.2562 0.2823
funnel_zi FALSE 0.25 3877 643 4520 tpr 0.1423 0.1323 0.1529
box_out FALSE 0.25 4357 163 4520 tpr 0.0361 0.0309 0.0420
visit_med75 TRUE 0.50 2161 2359 4520 tpr 0.5219 0.5072 0.5366
visit_med75 FALSE 0.50 1977 2543 4520 tpr 0.5626 0.5480 0.5771
visit_med75_over TRUE 0.50 2161 2359 4520 tpr 0.5219 0.5072 0.5366
visit_med75_over FALSE 0.50 1977 2543 4520 tpr 0.5626 0.5480 0.5771
inframe TRUE 0.50 2146 2374 4520 tpr 0.5252 0.5105 0.5399
inframe FALSE 0.50 1836 2684 4520 tpr 0.5938 0.5793 0.6081
inframe_visit_med75 TRUE 0.50 2081 2439 4520 tpr 0.5396 0.5249 0.5542
inframe_visit_med75 FALSE 0.50 1927 2593 4520 tpr 0.5737 0.5591 0.5881
funnel_zi FALSE 0.50 2607 1913 4520 tpr 0.4232 0.4088 0.4378
box_out FALSE 0.50 3481 1039 4520 tpr 0.2299 0.2177 0.2425
visit_med75 TRUE 0.75 1269 3251 4520 tpr 0.7192 0.7059 0.7323
visit_med75 FALSE 0.75 1095 3425 4520 tpr 0.7577 0.7449 0.7701
visit_med75_over TRUE 0.75 1269 3251 4520 tpr 0.7192 0.7059 0.7323
visit_med75_over FALSE 0.75 1095 3425 4520 tpr 0.7577 0.7449 0.7701
inframe TRUE 0.75 1200 3320 4520 tpr 0.7345 0.7213 0.7473
inframe FALSE 0.75 932 3588 4520 tpr 0.7938 0.7817 0.8055
inframe_visit_med75 TRUE 0.75 1249 3271 4520 tpr 0.7237 0.7103 0.7366
inframe_visit_med75 FALSE 0.75 1055 3465 4520 tpr 0.7666 0.7539 0.7788
funnel_zi FALSE 0.75 1627 2893 4520 tpr 0.6400 0.6258 0.6540
box_out FALSE 0.75 2242 2278 4520 tpr 0.5040 0.4893 0.5187
visit_med75 TRUE 1.00 969 3551 4520 tpr 0.7856 0.7733 0.7974
visit_med75 FALSE 1.00 791 3729 4520 tpr 0.8250 0.8135 0.8359
visit_med75_over TRUE 1.00 969 3551 4520 tpr 0.7856 0.7733 0.7974
visit_med75_over FALSE 1.00 791 3729 4520 tpr 0.8250 0.8135 0.8359
inframe TRUE 1.00 901 3619 4520 tpr 0.8007 0.7887 0.8122
inframe FALSE 1.00 623 3897 4520 tpr 0.8622 0.8517 0.8720
inframe_visit_med75 TRUE 1.00 1002 3518 4520 tpr 0.7783 0.7659 0.7903
inframe_visit_med75 FALSE 1.00 774 3746 4520 tpr 0.8288 0.8174 0.8396
funnel_zi FALSE 1.00 916 3604 4520 tpr 0.7973 0.7853 0.8089
box_out FALSE 1.00 776 3744 4520 tpr 0.8283 0.8169 0.8391

Plot

df_aggr %>%
  mutate(ur_rate = paste0("under-reporting rate: ",  ur_rate, " - ", ratio_type)
  ) %>%
  group_by(ur_rate) %>%
  ggplot(aes(type, ratio)) +
    geom_errorbar(aes(ymin = ci95_low, ymax = ci95_high, color = type_strip, alpha = has_mult), linewidth = 1) +
    facet_wrap(~ ur_rate, ncol = 1) +
    coord_flip() +
    theme(
      legend.position = "right",
      axis.text.y = element_blank(),
      axis.ticks.y = element_blank()
    ) +
    labs(
      x = "",
      y = "Ratio (CI95)", 
      title = "{simaerep} Performance",
      color = "Method",
      alpha = "Multiplicity Correction"
    ) +
    scale_color_manual(values = rev(RColorBrewer::brewer.pal(n = 6, name = "Dark2"))) +
    scale_alpha_manual(values = c(1, 0.5))

Summary

  • new inframe method has slightly better performance than original algorithm

The new inframe methods compares event per visit rates without dropping any patients or AEs from the analysis. This is likely to explain the performance increase. We observed a similar increase when we optimized visit_med75 in past experiments.

  • multiplicity correction imposes a penalty on the true positive rate

This observation was already made by the Boeringer Ingelheim Team during the evaluation of {simaerep}. We can now reproducibly confirm this. The unaltered probability score as returned by the bootstrap algorithm already provides very realistic under-reporting probabilities.

  • {simaerep} outperforms simpler methods such as funnel plot and box plot outlier detection.

These controls confirm previous observations that were made during the {simaerep} validation.