Skip to contents

Install {gsm}

devtools::install_github("Gilead-BioStats/gsm@v1.9.2", ref = "main")

Introduction

The {gsm} R package provides a standardized Risk Based Quality Monitoring (RBQM) framework for clinical trials that pairs a flexible data pipeline with robust reports. It also uses Funnel Plots to flag outliers which provide broader tolerance limits for sites with low exposure and narrower limits for sites with higher exposure. This method is different to the event rate based limits we have used in previous heuristics to measure {simaerep} performance. Funnel plots are discussed in greater detail by Zink et al. 2018

One of the draw backs of using funnel plots for flagging is that they assume that the AE rate remains constant over the course of the study.

Prepare Data

Load Portfolio Configurations

We have prepared a snapshot of the AE reporting configuration of our current portfolio. For each study we have also measured a visit-specific AE rate which allows us to generate a synthetic portfolio with flexible AE rates across a study.

df_config <- readr::read_csv("ae_conf_20240220.csv")
df_ae_rates <- readr::read_csv("ae_rates_20240220.csv")

df_config %>%
  head(25) %>%
  knitr::kable()
study_id ae_per_visit_mean site_number max_visit_sd max_visit_mean n_pat
0001 0.2973806 4746 3.5355339 16.50000 2
0001 0.2973806 4747 0.7071068 27.50000 2
0001 0.2973806 4750 0.5000000 19.25000 4
0001 0.2973806 4815 8.3765546 22.16667 6
0001 0.2973806 4816 0.0000000 27.00000 1
0001 0.2973806 4817 0.0000000 31.00000 1
0001 0.2973806 4818 0.0000000 30.00000 1
0001 0.2973806 4891 2.3804761 22.00000 7
0001 0.2973806 4893 0.0000000 20.00000 2
0001 0.2973806 4932 1.0954451 26.80000 5
0001 0.2973806 4941 1.7320508 27.00000 3
0001 0.2973806 4942 1.4142136 25.00000 2
0001 0.2973806 4943 0.0000000 25.00000 1
0001 0.2973806 4966 0.0000000 18.00000 1
0001 0.2973806 4967 0.8164966 18.00000 4
0001 0.2973806 4968 0.0000000 24.00000 1
0001 0.2973806 4969 1.5275252 21.33333 3
0001 0.2973806 4984 1.7078251 30.75000 4
0001 0.2973806 4985 8.9087972 21.16667 6
0001 0.2973806 4986 0.5773503 20.33333 3
0001 0.2973806 4988 1.4142136 19.00000 2
0001 0.2973806 5079 2.1602469 23.00000 4
0001 0.2973806 5080 0.0000000 23.00000 1
0001 0.2973806 5081 0.0000000 23.00000 1
0001 0.2973806 5082 1.4142136 22.00000 2
df_ae_rates %>%
  head(25) %>%
  knitr::kable()
study_id cum_visit ae_rate n_pat
0001 1 0.017699 113
0001 2 0.088496 113
0001 3 0.230088 113
0001 4 0.223214 112
0001 5 0.187500 112
0001 6 0.196429 112
0001 7 0.396396 111
0001 8 0.207207 111
0001 9 0.216216 111
0001 10 0.315315 111
0001 11 0.263636 110
0001 12 0.318182 110
0001 13 0.345455 110
0001 14 0.379630 108
0001 15 0.401869 107
0001 16 0.485981 107
0001 17 0.383178 107
0001 18 0.396226 106
0001 19 0.370000 100
0001 20 0.439560 91
0001 21 0.369863 73
0001 22 0.370968 62
0001 23 0.345455 55
0001 24 0.446809 47
0001 25 0.292683 41

Simulate Portfolio

We generate two synthetic portfolios with no AE under-reporting sites. One portfolio with a fixed AE rate for all visits and another one with a flexible visit-specific AE rate.

df_portf_fix <- sim_test_data_portfolio(df_config, parallel = TRUE, progress = TRUE)

df_portf_fix %>%
  head(25) %>%
  knitr::kable()
study_id ae_per_visit_mean site_number max_visit_sd max_visit_mean patnum visit n_ae
0001 0.2973806 4746 3.535534 16.5 0001 1 1
0001 0.2973806 4746 3.535534 16.5 0001 2 1
0001 0.2973806 4746 3.535534 16.5 0001 3 2
0001 0.2973806 4746 3.535534 16.5 0001 4 3
0001 0.2973806 4746 3.535534 16.5 0001 5 4
0001 0.2973806 4746 3.535534 16.5 0001 6 4
0001 0.2973806 4746 3.535534 16.5 0001 7 5
0001 0.2973806 4746 3.535534 16.5 0001 8 5
0001 0.2973806 4746 3.535534 16.5 0001 9 5
0001 0.2973806 4746 3.535534 16.5 0001 10 5
0001 0.2973806 4746 3.535534 16.5 0001 11 5
0001 0.2973806 4746 3.535534 16.5 0001 12 5
0001 0.2973806 4746 3.535534 16.5 0001 13 5
0001 0.2973806 4746 3.535534 16.5 0001 14 5
0001 0.2973806 4746 3.535534 16.5 0001 15 5
0001 0.2973806 4746 3.535534 16.5 0001 16 5
0001 0.2973806 4746 3.535534 16.5 0002 1 1
0001 0.2973806 4746 3.535534 16.5 0002 2 1
0001 0.2973806 4746 3.535534 16.5 0002 3 1
0001 0.2973806 4746 3.535534 16.5 0002 4 1
0001 0.2973806 4746 3.535534 16.5 0002 5 1
0001 0.2973806 4746 3.535534 16.5 0002 6 1
0001 0.2973806 4746 3.535534 16.5 0002 7 2
0001 0.2973806 4746 3.535534 16.5 0002 8 2
0001 0.2973806 4746 3.535534 16.5 0002 9 3
df_portf_flex <- sim_test_data_portfolio(df_config, df_ae_rates = df_ae_rates, parallel = TRUE, progress = TRUE)

Compare AE rates

Next we confirm the different AE rates in our two synthetic portfolios.

df_rate_fix <- df_portf_fix %>%
  mutate(ae_rate = coalesce(n_ae - lag(n_ae), n_ae), .by = c("study_id", "patnum")) %>%
  summarise(ae_rate = mean(ae_rate), .by = c("study_id", "visit")) %>%
  mutate(rate = "fix")


df_rate_flex <- df_portf_flex %>%
  mutate(ae_rate = coalesce(n_ae - lag(n_ae), n_ae), .by = c("study_id", "patnum")) %>%
  summarise(ae_rate = mean(ae_rate), .by = c("study_id", "visit")) %>%
  mutate(rate = "flex")
bind_rows(df_rate_flex, df_rate_fix) %>%
  ggplot(aes(visit, ae_rate)) +
    geom_line(aes(group = study_id), alpha = 0.2) +
    geom_smooth() +
    facet_wrap(~ rate) +
    labs(title = "Average AE rates per Study")

bind_rows(df_rate_flex, df_rate_fix) %>%
  filter(dense_rank(study_id) <= 16) %>%
  ggplot(aes(visit, ae_rate)) +
    geom_line(aes(group = rate, color = rate)) +
    facet_wrap(~ study_id, scales = "free") +
    labs(title = "Average AE rates for Selected Studies")

We can confirm that the AE rates in the “flexible” portfolio are not constant. Moreover we see that the AE rate profile is very unique for each study.

Funnel Plots {gsm}

Example

Here we demonstrate how to use the {gsm} package on our simulated portfolios so that we can get a good visualization of the funnel plot.

get_SUBJ <- function(df_portf) {
  df_portf %>%
    select(study_id, siteid = site_number, subjid = patnum, timeonstudy = visit) %>%
    summarise(timeonstudy = max(timeonstudy), .by = c(study_id, siteid, subjid)) %>%
    group_by(study_id) %>%
    nest()
}


get_AE <- function(df_portf) {
  df_portf %>%
    select(study_id, subjid = patnum, n_ae) %>%
    summarise(n_ae = max(n_ae), .by = c(study_id, subjid)) %>%
    filter(n_ae > 0) %>%
    mutate(n_ae = map(n_ae, ~ tibble(n = seq(1, .)), .progress = TRUE)) %>%
    unnest(n_ae) %>%
    select(- n) %>%
    group_by(study_id) %>%
    nest()
}

dfSUBJ_fix <- get_SUBJ(df_portf_fix)
dfAE_fix <- get_AE(df_portf_fix)
dfInput <- gsm::AE_Map_Raw(list(dfSUBJ = dfSUBJ_fix$data[[1]], dfAE = dfAE_fix$data[[1]]))
dfInput
## # A tibble: 113 × 5
##    SubjectID SiteID Exposure Count  Rate
##    <chr>     <chr>     <int> <int> <dbl>
##  1 0001      4746         16     5 0.312
##  2 0002      4746         14     3 0.214
##  3 0003      4747         26     6 0.231
##  4 0004      4747         26    11 0.423
##  5 0005      4750         19     5 0.263
##  6 0006      4750         19     5 0.263
##  7 0007      4750         20     6 0.3  
##  8 0008      4750         18     3 0.167
##  9 0009      4815         25     3 0.12 
## 10 0010      4815         28     8 0.286
## # ℹ 103 more rows
dfTransformed <- gsm::Transform_Rate(
  dfInput,
  strNumeratorCol = "Count",
  strDenominatorCol = "Exposure"
  )
dfTransformed
## # A tibble: 44 × 4
##    GroupID Numerator Denominator Metric
##    <chr>       <int>       <int>  <dbl>
##  1 4746            8          30  0.267
##  2 4747           17          52  0.327
##  3 4750           19          76  0.25 
##  4 4815           40         153  0.261
##  5 4816            9          27  0.333
##  6 4817            5          31  0.161
##  7 4818            9          30  0.3  
##  8 4891           48         151  0.318
##  9 4893           15          40  0.375
## 10 4932           33         137  0.241
## # ℹ 34 more rows
dfAnalyzed <- gsm::Analyze_NormalApprox(dfTransformed)
dfAnalyzed
## # A tibble: 44 × 7
##    GroupID Numerator Denominator Metric OverallMetric Factor  Score
##    <chr>       <int>       <int>  <dbl>         <dbl>  <dbl>  <dbl>
##  1 4942            6          47  0.128         0.288   1.08 -2.34 
##  2 5166           17          95  0.179         0.288   1.08 -2.27 
##  3 4817            5          31  0.161         0.288   1.08 -1.51 
##  4 5229           12          58  0.207         0.288   1.08 -1.32 
##  5 4932           33         137  0.241         0.288   1.08 -1.18 
##  6 4986           13          58  0.224         0.288   1.08 -1.04 
##  7 4985           28         114  0.246         0.288   1.08 -0.972
##  8 5084            4          21  0.190         0.288   1.08 -0.954
##  9 5082            9          41  0.220         0.288   1.08 -0.938
## 10 4969           14          60  0.233         0.288   1.08 -0.907
## # ℹ 34 more rows
dfFlagged <- gsm::Flag_NormalApprox(dfAnalyzed, vThreshold = c(-3, -2, 2, 3))
dfFlagged
## # A tibble: 44 × 8
##    GroupID Numerator Denominator Metric OverallMetric Factor  Score  Flag
##    <chr>       <int>       <int>  <dbl>         <dbl>  <dbl>  <dbl> <dbl>
##  1 5194           66         182  0.363         0.288   1.08  2.13      1
##  2 4942            6          47  0.128         0.288   1.08 -2.34     -1
##  3 5166           17          95  0.179         0.288   1.08 -2.27     -1
##  4 4817            5          31  0.161         0.288   1.08 -1.51      0
##  5 5229           12          58  0.207         0.288   1.08 -1.32      0
##  6 4932           33         137  0.241         0.288   1.08 -1.18      0
##  7 4986           13          58  0.224         0.288   1.08 -1.04      0
##  8 4985           28         114  0.246         0.288   1.08 -0.972     0
##  9 5084            4          21  0.190         0.288   1.08 -0.954     0
## 10 5082            9          41  0.220         0.288   1.08 -0.938     0
## # ℹ 34 more rows
dfSummary <- gsm::Summarize(dfFlagged)
dfSummary
## # A tibble: 44 × 6
##    GroupID Numerator Denominator Metric Score  Flag
##    <chr>       <int>       <int>  <dbl> <dbl> <dbl>
##  1 5194           66         182  0.363  2.13     1
##  2 5166           17          95  0.179 -2.27    -1
##  3 4942            6          47  0.128 -2.34    -1
##  4 4968           10          24  0.417  1.34     0
##  5 4988           16          39  0.410  1.62     0
##  6 5168           11          27  0.407  1.31     0
##  7 5311            8          20  0.4    1.06     0
##  8 5273           16          40  0.4    1.50     0
##  9 4893           15          40  0.375  1.16     0
## 10 5083           16          44  0.364  1.06     0
## # ℹ 34 more rows
dfBounds <- gsm::Analyze_NormalApprox_PredictBounds(dfTransformed, vThreshold = c(-3, -2, 2, 3))
dfBounds
## # A tibble: 1,254 × 5
##    Threshold Denominator LogDenominator Numerator  Metric
##        <dbl>       <dbl>          <dbl>     <dbl>   <dbl>
##  1        -3        24.6           3.20    0.0927 0.00377
##  2        -3        25.2           3.23    0.189  0.00750
##  3        -3        25.9           3.25    0.287  0.0111 
##  4        -3        26.5           3.28    0.386  0.0145 
##  5        -3        27.2           3.30    0.485  0.0179 
##  6        -3        27.8           3.33    0.586  0.0211 
##  7        -3        28.5           3.35    0.688  0.0242 
##  8        -3        29.2           3.37    0.792  0.0272 
##  9        -3        29.8           3.39    0.895  0.0300 
## 10        -3        30.5           3.42    1.00   0.0328 
## # ℹ 1,244 more rows
chart <- gsm::Visualize_Scatter(dfFlagged, dfBounds)
chart

Simulate UR

UR Funnel

We write out own funnel function as adapted from {gsm}

funnel_ur <- function(df, site, ur_rate) {
  df %>%
    filter(visit == max(visit), .by = patnum) %>%
    summarise(
      Metric = sum(.data$n_ae) / sum(.data$visit),
      n_ae = sum(n_ae),
      visit = sum(visit),
      .by = "site_number"
    ) %>%
    mutate(
      n_ae = ifelse(site_number == site, n_ae * (1 - ur_rate), n_ae),
      Metric = n_ae / visit
    ) %>%
    mutate(
          vMu = sum(.data$n_ae) / sum(.data$visit),
          z_0 = ifelse(.data$vMu == 0,
            0,
            (.data$Metric - .data$vMu) /
              sqrt(.data$vMu / .data$visit)
          ),
          phi = mean(.data$z_0^2),
          z_i = ifelse(.data$vMu == 0 | .data$phi == 0,
            0,
            (.data$Metric - .data$vMu) /
              sqrt(.data$phi * .data$vMu / .data$visit)
          )
    ) %>%
    filter(site_number == site) %>%
    pull(z_i)
}

sim_ur_funnel <- function(df) {
  df %>%
    group_by(study_id) %>%
    nest() %>%
    ungroup() %>%
    mutate(
      sites = map(data, ~ distinct(., site_number))
    ) %>%
    unnest(sites) %>%
    mutate(ur = list(tibble(ur_rate = c(0, 0.1, 0.25, 0.5, 0.75, 1)))) %>%
    unnest(ur) %>%
    mutate(
      score = pmap_dbl(list(data, site_number, ur_rate), funnel_ur, .progress = TRUE)
    )
}

df_sim_ur_funnel_flex <- sim_ur_funnel(df_portf_flex)
df_sim_ur_funnel_fix <- sim_ur_funnel(df_portf_fix)

UR {simaerep}

We simulate under-reporting for both portfolios using {simaerep} using sim_ur_scenarios().

df_sim_simaerep_fix <- sim_ur_scenarios(
   df_portf_fix,
   extra_ur_sites = 0,
   ur_rate = c(0, 0.1, 0.25, 0.5, 0.75, 1),
   parallel = TRUE,
   poisson = TRUE,
   prob_lower = TRUE,
   progress = TRUE
)
df_sim_simaerep_flex <- sim_ur_scenarios(
   df_portf_flex,
   extra_ur_sites = 0,
   ur_rate = c(0, 0.1, 0.25, 0.5, 0.75, 1),
   parallel = TRUE,
   poisson = TRUE,
   prob_lower = TRUE,
   progress = TRUE
)

Evaluate

Combine Results

As the funnel plot score does not use multiplicity correction, we also compare the funnel plot score against the {simaerep} score w/o multiplicity correction.

df_sim_simaerep_fix$ae_rate <- "AE rate: fix"
df_sim_simaerep_flex$ae_rate <- "AE rate: flexible"
df_sim_ur_funnel_fix$ae_rate <- "AE rate: fix"
df_sim_ur_funnel_flex$ae_rate <- "AE rate: flexible"



df_sim_fun_thresh2 <- bind_rows(df_sim_ur_funnel_fix, df_sim_ur_funnel_flex) %>%
  mutate(
    type = "funnel",
  ) %>%
  select(type, ae_rate, study_id, site_number, ur_rate, score)


df_sim_simaerep_threshp95 <-  bind_rows(df_sim_simaerep_fix, df_sim_simaerep_flex) %>%
  mutate(
    type = "{simaerep}"
  ) %>%
  select(type, ae_rate, study_id, site_number, ur_rate, score = prob_low_prob_ur)

df_sim_simaerep_threshp95_no_mult <-  bind_rows(df_sim_simaerep_fix, df_sim_simaerep_flex) %>%
  mutate(
    type = "{simaerep} no mult"
  ) %>%
  mutate(
    score = 1 - prob_low
  ) %>%
  select(type, ae_rate, study_id, site_number, ur_rate, score)


df_eval <- bind_rows(
  df_sim_simaerep_threshp95,
  df_sim_simaerep_threshp95_no_mult,
  df_sim_fun_thresh2
)

AUC

We continue by comparing the ROC-AUC.

get_roc <- function(df_ur, df_nr) {
  df <- bind_rows(df_ur, df_nr) 
  pROC::roc(df, response = "is_ur", predictor = "score", quiet = TRUE)
}

# use 0 scenario to mix with ur scenario and calculate auc from scores
df_nr <- df_eval %>%
  filter(ur_rate == 0) %>%
  mutate(is_ur = "no") %>%
  select(- ur_rate) %>%
  group_by(type, study_id, ae_rate) %>%
  nest() %>%
  ungroup() %>%
  rename(data_nr = data)
  

df_ur <- df_eval %>%
  filter(ur_rate > 0) %>%
  mutate(is_ur = "yes") %>%
  group_by(type, study_id, ur_rate, ae_rate) %>%
  nest() %>%
  ungroup() %>%
  rename(data_ur = data)


df_auc <- df_ur %>%
  left_join(
    df_nr,
    by = c("type", "study_id", "ae_rate")
  ) %>%
  mutate(
    roc = map2(data_ur, data_nr, get_roc, .progress = TRUE),
    auc = map_dbl(roc, pROC::auc, .progress = TRUE)
  ) %>%
  select(type, study_id, ur_rate, ae_rate, roc, auc)

Table

df_auc %>%
  summarise(
    sd_auc = sd(.data$auc),
    auc = mean(.data$auc),
    .by = c(type, ur_rate, ae_rate)
  ) %>%
  knitr::kable(digit = 3)
type ur_rate ae_rate sd_auc auc
{simaerep} 0.10 AE rate: fix 0.108 0.630
{simaerep} 0.25 AE rate: fix 0.163 0.754
{simaerep} 0.50 AE rate: fix 0.161 0.863
{simaerep} 0.75 AE rate: fix 0.130 0.920
{simaerep} 1.00 AE rate: fix 0.111 0.938
{simaerep} 0.10 AE rate: flexible 0.114 0.627
{simaerep} 0.25 AE rate: flexible 0.167 0.752
{simaerep} 0.50 AE rate: flexible 0.161 0.860
{simaerep} 0.75 AE rate: flexible 0.127 0.923
{simaerep} 1.00 AE rate: flexible 0.108 0.943
{simaerep} no mult 0.10 AE rate: fix 0.062 0.662
{simaerep} no mult 0.25 AE rate: fix 0.090 0.814
{simaerep} no mult 0.50 AE rate: fix 0.071 0.930
{simaerep} no mult 0.75 AE rate: fix 0.043 0.973
{simaerep} no mult 1.00 AE rate: fix 0.040 0.980
{simaerep} no mult 0.10 AE rate: flexible 0.068 0.666
{simaerep} no mult 0.25 AE rate: flexible 0.096 0.819
{simaerep} no mult 0.50 AE rate: flexible 0.074 0.932
{simaerep} no mult 0.75 AE rate: flexible 0.046 0.972
{simaerep} no mult 1.00 AE rate: flexible 0.042 0.978
funnel 0.10 AE rate: fix 0.082 0.672
funnel 0.25 AE rate: fix 0.097 0.827
funnel 0.50 AE rate: fix 0.069 0.937
funnel 0.75 AE rate: fix 0.042 0.975
funnel 1.00 AE rate: fix 0.028 0.988
funnel 0.10 AE rate: flexible 0.058 0.631
funnel 0.25 AE rate: flexible 0.084 0.775
funnel 0.50 AE rate: flexible 0.072 0.908
funnel 0.75 AE rate: flexible 0.044 0.962
funnel 1.00 AE rate: flexible 0.029 0.983

Plot

df_auc %>%
  ggplot(aes(type, auc)) +
    geom_boxplot(aes(fill = type)) +
    facet_grid(ae_rate ~ ur_rate)  +
    scale_fill_brewer(palette = "Dark2") +
    theme(axis.text.x = element_blank())

Metrics

Thresholds

We set the thresholds for funnel and simaerep w/o multiplicity correction so that we get the same fpr as for simaerep with multiplicity correction and the established threshold of 0.95

thresh_default <- 0.95

target_fpr <- df_eval %>%
  filter(ur_rate == 0, type == "{simaerep}") %>%
  summarise(fpr = sum(score >= thresh_default) / n()) %>%
  pull(fpr)

thresh_no_mult <-  df_eval %>%
  filter(ur_rate == 0, type == "{simaerep} no mult") %>%
  pull(score) %>%
  quantile(1 - target_fpr)

thresh_funnel <- df_eval %>%
  filter(ur_rate == 0, type == "funnel") %>%
  pull(score) %>%
  quantile(target_fpr)

target_fpr
## [1] 0.0005309081
thresh_no_mult
## 99.94691% 
##     0.999
thresh_funnel
## 0.05309081% 
##   -2.979981

Aggregate

get_prop_test_ci95 <- function(..., ix) {
  
  stopifnot(ix %in% c(1, 2))
  
  tryCatch(
    prop.test(...)$conf.int[ix],
    error = function(cnd) c(NA, NA)[ix]
  )
}

aggr_results <- function(df_eval) {

  df_perf <- df_eval %>%
    mutate(
      is_ur = case_when(
        type == "{simaerep}" ~ score >= thresh_default,
        type == "{simaerep} no mult" ~ score >= thresh_no_mult,
        type == "funnel" ~ score <= thresh_funnel
      )
    ) %>%
    summarise(
      n = n(),
      .by = c(type, ae_rate, ur_rate, is_ur)
    ) %>%
    pivot_wider(
      names_from = is_ur,
      values_from = n,
      names_prefix = "is_ur_",
      values_fill = 0
    ) %>%
    mutate(
      n_sites = is_ur_TRUE + is_ur_FALSE,
      ratio = is_ur_TRUE / n_sites,
      ratio_type = ifelse(ur_rate == 0, "fpr", "tpr"),
      ci95_low = map2_dbl(is_ur_TRUE, n_sites, ~ get_prop_test_ci95(.x, .y, ix = 1)),
      ci95_high = map2_dbl(is_ur_TRUE, n_sites, ~ get_prop_test_ci95(.x, .y, ix = 2))
    )
  }

df_perf <- aggr_results(df_eval)

Table

df_perf %>%
  knitr::kable(digits = 4)
type ae_rate ur_rate is_ur_FALSE is_ur_TRUE n_sites ratio ratio_type ci95_low ci95_high
{simaerep} AE rate: fix 0.00 21651 10 21661 0.0005 fpr 0.0002 0.0009
{simaerep} AE rate: fix 0.10 21584 77 21661 0.0036 tpr 0.0028 0.0045
{simaerep} AE rate: fix 0.25 20581 1080 21661 0.0499 tpr 0.0470 0.0529
{simaerep} AE rate: fix 0.50 15956 5705 21661 0.2634 tpr 0.2575 0.2693
{simaerep} AE rate: fix 0.75 11018 10643 21661 0.4913 tpr 0.4847 0.4980
{simaerep} AE rate: fix 1.00 8492 13169 21661 0.6080 tpr 0.6014 0.6145
{simaerep} AE rate: flexible 0.00 21648 13 21661 0.0006 fpr 0.0003 0.0011
{simaerep} AE rate: flexible 0.10 21535 126 21661 0.0058 tpr 0.0049 0.0069
{simaerep} AE rate: flexible 0.25 20348 1313 21661 0.0606 tpr 0.0575 0.0639
{simaerep} AE rate: flexible 0.50 15435 6226 21661 0.2874 tpr 0.2814 0.2935
{simaerep} AE rate: flexible 0.75 10511 11150 21661 0.5148 tpr 0.5081 0.5214
{simaerep} AE rate: flexible 1.00 8085 13576 21661 0.6267 tpr 0.6203 0.6332
{simaerep} no mult AE rate: fix 0.00 21641 20 21661 0.0009 fpr 0.0006 0.0015
{simaerep} no mult AE rate: fix 0.10 21531 130 21661 0.0060 tpr 0.0050 0.0071
{simaerep} no mult AE rate: fix 0.25 20255 1406 21661 0.0649 tpr 0.0617 0.0683
{simaerep} no mult AE rate: fix 0.50 15130 6531 21661 0.3015 tpr 0.2954 0.3077
{simaerep} no mult AE rate: fix 0.75 9923 11738 21661 0.5419 tpr 0.5352 0.5485
{simaerep} no mult AE rate: fix 1.00 7471 14190 21661 0.6551 tpr 0.6487 0.6614
{simaerep} no mult AE rate: flexible 0.00 21635 26 21661 0.0012 fpr 0.0008 0.0018
{simaerep} no mult AE rate: flexible 0.10 21479 182 21661 0.0084 tpr 0.0072 0.0097
{simaerep} no mult AE rate: flexible 0.25 20015 1646 21661 0.0760 tpr 0.0725 0.0796
{simaerep} no mult AE rate: flexible 0.50 14490 7171 21661 0.3311 tpr 0.3248 0.3374
{simaerep} no mult AE rate: flexible 0.75 9488 12173 21661 0.5620 tpr 0.5553 0.5686
{simaerep} no mult AE rate: flexible 1.00 7123 14538 21661 0.6712 tpr 0.6649 0.6774
funnel AE rate: fix 0.00 21654 7 21661 0.0003 fpr 0.0001 0.0007
funnel AE rate: fix 0.10 21561 100 21661 0.0046 tpr 0.0038 0.0056
funnel AE rate: fix 0.25 19908 1753 21661 0.0809 tpr 0.0773 0.0847
funnel AE rate: fix 0.50 14432 7229 21661 0.3337 tpr 0.3275 0.3401
funnel AE rate: fix 0.75 9742 11919 21661 0.5503 tpr 0.5436 0.5569
funnel AE rate: fix 1.00 6598 15063 21661 0.6954 tpr 0.6892 0.7015
funnel AE rate: flexible 0.00 21645 16 21661 0.0007 fpr 0.0004 0.0012
funnel AE rate: flexible 0.10 21592 69 21661 0.0032 tpr 0.0025 0.0041
funnel AE rate: flexible 0.25 21076 585 21661 0.0270 tpr 0.0249 0.0293
funnel AE rate: flexible 0.50 17908 3753 21661 0.1733 tpr 0.1683 0.1784
funnel AE rate: flexible 0.75 13506 8155 21661 0.3765 tpr 0.3700 0.3830
funnel AE rate: flexible 1.00 9665 11996 21661 0.5538 tpr 0.5472 0.5604

Plot

plot_perf <- function(df_perf) {

  df_perf %>%
    mutate(ur_rate = paste0("under-reporting rate: ",  ur_rate, " - ", ratio_type),
           ur_rate = ifelse(str_detect(ur_rate, "fpr"), "fpr", ur_rate),
           ae_rate = forcats::fct_rev(factor(ae_rate))) %>%
    group_by(ur_rate) %>%
    ggplot(aes(type, ratio)) +
      geom_errorbar(aes(ymin = ci95_low, ymax = ci95_high, color = type, linetype = ae_rate), linewidth = 1) +
      facet_wrap(~ ur_rate, ncol = 1) +
      coord_flip() +
      theme(legend.position = "bottom") +
      labs(
        x = "",
        y = "CI95 Performance Ratio", 
        title = "{simaerep} vs Funnel-Plot Performance"
      ) +
      scale_color_brewer(palette = "Dark2")
}

plot_perf(df_perf)

Summary

  • Funnel plot expects constant event rates over time. Performance decrease when event rates are flexible.
  • {simaerep} performance is unaffected by flexible event rates.
  • {simaerep} detection rates with comparable false positive rates are greater than funnel plot detection rates when event rates are flexible.
plan(sequential)