Get Portfolio configuration from a dataframe aggregated on
patient level with max_ae and max_visit. Will filter studies with only a few
sites and patients and will anonymize IDs. Portfolio configuration can be
used by sim_test_data_portfolio
to generate data for an
artificial portfolio.
get_config(
df_site,
min_pat_per_study = 100,
min_sites_per_study = 10,
anonymize = TRUE,
pad_width = 4
)
dataframe aggregated on patient level with max_ae and max_visit
minimum number of patients per study, Default: 100
minimum number of sites per study, Default: 10
logical, Default: TRUE
padding width for newly created IDs, Default: 4
dataframe with the following columns:
study identification
mean AE per visit per study
site
standard deviation of maximum patient visits per site
mean of maximum patient visits per site
number of patients
# \donttest{
df_visit1 <- sim_test_data_study(n_pat = 100, n_sites = 10,
frac_site_with_ur = 0.4, ur_rate = 0.6)
df_visit1$study_id <- "A"
df_visit2 <- sim_test_data_study(n_pat = 100, n_sites = 10,
frac_site_with_ur = 0.2, ur_rate = 0.1)
df_visit2$study_id <- "B"
df_visit <- dplyr::bind_rows(df_visit1, df_visit2)
df_site_max <- df_visit %>%
dplyr::group_by(study_id, site_number, patnum) %>%
dplyr::summarise(max_visit = max(visit),
max_ae = max(n_ae),
.groups = "drop")
df_config <- get_config(df_site_max)
df_config
#> # A tibble: 20 × 6
#> study_id ae_per_visit_mean site_number max_visit_sd max_visit_mean n_pat
#> <chr> <dbl> <chr> <dbl> <dbl> <int>
#> 1 0001 0.400 0001 4.61 18.8 10
#> 2 0001 0.400 0002 3.78 19.4 10
#> 3 0001 0.400 0003 3.28 20.1 10
#> 4 0001 0.400 0004 4.84 19.4 10
#> 5 0001 0.400 0005 4.30 21.6 10
#> 6 0001 0.400 0006 4.18 19.1 10
#> 7 0001 0.400 0007 4.06 17.5 10
#> 8 0001 0.400 0008 4.10 20.8 10
#> 9 0001 0.400 0009 4.74 18.5 10
#> 10 0001 0.400 0010 4.30 18.6 10
#> 11 0002 0.487 0001 3.10 20.6 10
#> 12 0002 0.487 0002 2.75 20 10
#> 13 0002 0.487 0003 3.71 19.2 10
#> 14 0002 0.487 0004 5.34 20.4 10
#> 15 0002 0.487 0005 3.84 18.5 10
#> 16 0002 0.487 0006 4.35 19.4 10
#> 17 0002 0.487 0007 5.48 19.4 10
#> 18 0002 0.487 0008 3.62 22 10
#> 19 0002 0.487 0009 4.00 17.7 10
#> 20 0002 0.487 0010 2.60 19.1 10
df_portf <- sim_test_data_portfolio(df_config)
df_portf
#> # A tibble: 3,792 × 8
#> study_id ae_per_visit_mean site_number max_visit_sd max_visit_mean patnum
#> <chr> <dbl> <chr> <dbl> <dbl> <chr>
#> 1 0001 0.400 0001 4.61 18.8 0001
#> 2 0001 0.400 0001 4.61 18.8 0001
#> 3 0001 0.400 0001 4.61 18.8 0001
#> 4 0001 0.400 0001 4.61 18.8 0001
#> 5 0001 0.400 0001 4.61 18.8 0001
#> 6 0001 0.400 0001 4.61 18.8 0001
#> 7 0001 0.400 0001 4.61 18.8 0001
#> 8 0001 0.400 0001 4.61 18.8 0001
#> 9 0001 0.400 0001 4.61 18.8 0001
#> 10 0001 0.400 0001 4.61 18.8 0001
#> # ℹ 3,782 more rows
#> # ℹ 2 more variables: visit <int>, n_ae <int>
df_scen <- sim_ur_scenarios(df_portf,
extra_ur_sites = 2,
ur_rate = c(0.5, 1))
#> aggregating site level
#> prepping for simulation
#> generating scenarios
#> getting under-reporting stats
#> evaluating stats
df_scen
#> # A tibble: 140 × 14
#> study_id site_number n_pat n_pat_with_med75 visit_med75 mean_ae_site_med75
#> <chr> <chr> <int> <int> <dbl> <dbl>
#> 1 0001 0001 10 9 14 5.78
#> 2 0001 0001 10 9 14 2.89
#> 3 0001 0001 10 9 14 0
#> 4 0001 0001 10 9 14 2.89
#> 5 0001 0001 10 9 14 0
#> 6 0001 0001 10 9 14 2.89
#> 7 0001 0001 10 9 14 0
#> 8 0001 0002 10 9 15 5.78
#> 9 0001 0002 10 9 15 2.89
#> 10 0001 0002 10 9 15 0
#> # ℹ 130 more rows
#> # ℹ 8 more variables: mean_ae_study_med75 <dbl>, n_pat_with_med75_study <int>,
#> # extra_ur_sites <dbl>, frac_pat_with_ur <dbl>, ur_rate <dbl>,
#> # prob_low <dbl>, prob_low_adj <dbl>, prob_low_prob_ur <dbl>
df_perf <- get_portf_perf(df_scen)
df_perf
#> # A tibble: 27 × 5
#> fpr thresh extra_ur_sites ur_rate tpr
#> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 0.001 0.925 0 0 0.05
#> 2 0.001 0.925 1 0 0.05
#> 3 0.001 0.925 2 0 0.05
#> 4 0.001 0.925 0 0.5 1
#> 5 0.001 0.925 1 0.5 1
#> 6 0.001 0.925 2 0.5 1
#> 7 0.001 0.925 0 1 1
#> 8 0.001 0.925 1 1 1
#> 9 0.001 0.925 2 1 1
#> 10 0.01 0.920 0 0 0.05
#> # ℹ 17 more rows
# }