Use with simulated portfolio data to generate under-reporting stats for specified scenarios.

sim_ur_scenarios(
  df_portf,
  extra_ur_sites = 3,
  ur_rate = c(0.25, 0.5),
  r = 1000,
  poisson_test = FALSE,
  prob_lower = TRUE,
  parallel = FALSE,
  progress = TRUE,
  site_aggr_args = list(),
  eval_sites_args = list()
)

Arguments

df_portf

dataframe as returned by sim_test_data_portfolio

extra_ur_sites

numeric, set maximum number of additional under-reporting sites, see details Default: 3

ur_rate

numeric vector, set under-reporting rates for scenarios Default: c(0.25, 0.5)

r

integer, denotes number of simulations, default = 1000

poisson_test

logical, calculates poisson.test pvalue

prob_lower

logical, calculates probability for getting a lower value

parallel

logical, use parallel processing see details, Default: FALSE

progress

logical, show progress bar, Default: TRUE

site_aggr_args

named list of parameters passed to site_aggr, Default: list()

eval_sites_args

named list of parameters passed to eval_sites, Default: list()

Value

dataframe with the following columns:

study_id

study identification

site_number

site identification

n_pat

number of patients at site

n_pat_with_med75

number of patients at site with visit_med75

visit_med75

median(max(visit)) * 0.75

mean_ae_site_med75

mean AE at visit_med75 site level

mean_ae_study_med75

mean AE at visit_med75 study level

n_pat_with_med75_study

number of patients at site with visit_med75 at study excl site

extra_ur_sites

additional sites with under-reporting patients

frac_pat_with_ur

ratio of patients in study that are under-reporting

ur_rate

under-reporting rate

pval

p-value as returned by poisson.test

prob_low

bootstrapped probability for having mean_ae_site_med75 or lower

pval_adj

adjusted p-values

prob_low_adj

adjusted bootstrapped probability for having mean_ae_site_med75 or lower

pval_prob_ur

probability under-reporting as 1 - pval_adj, poisson.test (use as benchmark)

prob_low_prob_ur

probability under-reporting as 1 - prob_low_adj, bootstrapped (use)

Details

The function will apply under-reporting scenarios to each site. Reducing the number of AEs by a given under-reporting (ur_rate) for all patients at the site and add the corresponding under-reporting statistics. Since the under-reporting probability is also affected by the number of other sites that are under-reporting we additionally calculate under-reporting statistics in a scenario where additional under reporting sites are present. For this we use the median number of patients per site at the study to calculate the final number of patients for which we lower the AEs in a given under-reporting scenario. We use the furrr package to implement parallel processing as these simulations can take a long time to run. For this to work we need to specify the plan for how the code should run, e.g. plan(multisession, workers = 18)

Examples

# \donttest{
df_visit1 <- sim_test_data_study(n_pat = 100, n_sites = 10,
                                 frac_site_with_ur = 0.4, ur_rate = 0.6)

df_visit1$study_id <- "A"

df_visit2 <- sim_test_data_study(n_pat = 100, n_sites = 10,
                                 frac_site_with_ur = 0.2, ur_rate = 0.1)

df_visit2$study_id <- "B"

df_visit <- dplyr::bind_rows(df_visit1, df_visit2)

df_site_max <- df_visit %>%
  dplyr::group_by(study_id, site_number, patnum) %>%
  dplyr::summarise(max_visit = max(visit),
            max_ae = max(n_ae),
            .groups = "drop")

df_config <- get_config(df_site_max)

df_config
#> # A tibble: 20 × 6
#>    study_id ae_per_visit_mean site_number max_visit_sd max_visit_mean n_pat
#>    <chr>                <dbl> <chr>              <dbl>          <dbl> <int>
#>  1 0001                 0.366 0001                4.42           20      10
#>  2 0001                 0.366 0002                4.03           21.4    10
#>  3 0001                 0.366 0003                4.13           20.2    10
#>  4 0001                 0.366 0004                2.58           18.3    10
#>  5 0001                 0.366 0005                4.64           17.8    10
#>  6 0001                 0.366 0006                2.37           17.6    10
#>  7 0001                 0.366 0007                4.80           19.8    10
#>  8 0001                 0.366 0008                2              20      10
#>  9 0001                 0.366 0009                3.17           19.5    10
#> 10 0001                 0.366 0010                6.57           19.9    10
#> 11 0002                 0.489 0001                2.85           19.9    10
#> 12 0002                 0.489 0002                3.31           18.1    10
#> 13 0002                 0.489 0003                3.14           18.1    10
#> 14 0002                 0.489 0004                4.74           20.7    10
#> 15 0002                 0.489 0005                5.20           19.2    10
#> 16 0002                 0.489 0006                3.30           21      10
#> 17 0002                 0.489 0007                4.07           19.9    10
#> 18 0002                 0.489 0008                4.53           18.5    10
#> 19 0002                 0.489 0009                2.95           21.7    10
#> 20 0002                 0.489 0010                3.36           20.2    10

df_portf <- sim_test_data_portfolio(df_config)

df_portf
#> # A tibble: 3,834 × 8
#>    study_id ae_per_visit_mean site_number max_visit_sd max_visit_mean patnum
#>    <chr>                <dbl> <chr>              <dbl>          <dbl> <chr> 
#>  1 0001                 0.366 0001                4.42             20 0001  
#>  2 0001                 0.366 0001                4.42             20 0001  
#>  3 0001                 0.366 0001                4.42             20 0001  
#>  4 0001                 0.366 0001                4.42             20 0001  
#>  5 0001                 0.366 0001                4.42             20 0001  
#>  6 0001                 0.366 0001                4.42             20 0001  
#>  7 0001                 0.366 0001                4.42             20 0001  
#>  8 0001                 0.366 0001                4.42             20 0001  
#>  9 0001                 0.366 0001                4.42             20 0001  
#> 10 0001                 0.366 0001                4.42             20 0001  
#> # ℹ 3,824 more rows
#> # ℹ 2 more variables: visit <int>, n_ae <int>

df_scen <- sim_ur_scenarios(df_portf,
                            extra_ur_sites = 2,
                            ur_rate = c(0.5, 1))
#> aggregating site level
#> prepping for simulation
#> generating scenarios
#> getting under-reporting stats
#> evaluating stats


df_scen
#> # A tibble: 140 × 14
#>    study_id site_number n_pat n_pat_with_med75 visit_med75 mean_ae_site_med75
#>    <chr>    <chr>       <int>            <int>       <dbl>              <dbl>
#>  1 0001     0001           10                9          16               5.44
#>  2 0001     0001           10                9          16               2.72
#>  3 0001     0001           10                9          16               0   
#>  4 0001     0001           10                9          16               2.72
#>  5 0001     0001           10                9          16               0   
#>  6 0001     0001           10                9          16               2.72
#>  7 0001     0001           10                9          16               0   
#>  8 0001     0002           10                9          16               4.89
#>  9 0001     0002           10                9          16               2.44
#> 10 0001     0002           10                9          16               0   
#> # ℹ 130 more rows
#> # ℹ 8 more variables: mean_ae_study_med75 <dbl>, n_pat_with_med75_study <int>,
#> #   extra_ur_sites <dbl>, frac_pat_with_ur <dbl>, ur_rate <dbl>,
#> #   prob_low <dbl>, prob_low_adj <dbl>, prob_low_prob_ur <dbl>

df_perf <- get_portf_perf(df_scen)

df_perf
#> # A tibble: 27 × 5
#>      fpr thresh extra_ur_sites ur_rate   tpr
#>    <dbl>  <dbl>          <dbl>   <dbl> <dbl>
#>  1 0.001  0.858              0     0    0.05
#>  2 0.001  0.858              1     0    0.05
#>  3 0.001  0.858              2     0    0.05
#>  4 0.001  0.858              0     0.5  1   
#>  5 0.001  0.858              1     0.5  1   
#>  6 0.001  0.858              2     0.5  1   
#>  7 0.001  0.858              0     1    1   
#>  8 0.001  0.858              1     1    1   
#>  9 0.001  0.858              2     1    1   
#> 10 0.01   0.857              0     0    0.05
#> # ℹ 17 more rows
# }