Get Portfolio configuration from a dataframe aggregated on
patient level with max_ae and max_visit. Will filter studies with only a few
sites and patients and will anonymize IDs. Portfolio configuration can be
used by sim_test_data_portfolio
to generate data for an
artificial portfolio.
get_config(
df_site,
min_pat_per_study = 100,
min_sites_per_study = 10,
anonymize = TRUE,
pad_width = 4
)
dataframe aggregated on patient level with max_ae and max_visit
minimum number of patients per study, Default: 100
minimum number of sites per study, Default: 10
logical, Default: TRUE
padding width for newly created IDs, Default: 4
dataframe with the following columns:
study identification
mean AE per visit per study
site
standard deviation of maximum patient visits per site
mean of maximum patient visits per site
number of patients
# \donttest{
df_visit1 <- sim_test_data_study(n_pat = 100, n_sites = 10,
frac_site_with_ur = 0.4, ur_rate = 0.6)
df_visit1$study_id <- "A"
df_visit2 <- sim_test_data_study(n_pat = 100, n_sites = 10,
frac_site_with_ur = 0.2, ur_rate = 0.1)
df_visit2$study_id <- "B"
df_visit <- dplyr::bind_rows(df_visit1, df_visit2)
df_site_max <- df_visit %>%
dplyr::group_by(study_id, site_number, patnum) %>%
dplyr::summarise(max_visit = max(visit),
max_ae = max(n_ae),
.groups = "drop")
df_config <- get_config(df_site_max)
df_config
#> # A tibble: 20 × 6
#> study_id ae_per_visit_mean site_number max_visit_sd max_visit_mean n_pat
#> <chr> <dbl> <chr> <dbl> <dbl> <int>
#> 1 0001 0.401 0001 3.41 20.1 10
#> 2 0001 0.401 0002 2.33 21.1 10
#> 3 0001 0.401 0003 3.97 20.2 10
#> 4 0001 0.401 0004 3.30 21.3 10
#> 5 0001 0.401 0005 4.25 20.9 10
#> 6 0001 0.401 0006 4.50 19.5 10
#> 7 0001 0.401 0007 3.92 17.3 10
#> 8 0001 0.401 0008 4.76 19.7 10
#> 9 0001 0.401 0009 4.40 19.3 10
#> 10 0001 0.401 0010 4.27 19.3 10
#> 11 0002 0.487 0001 2.99 20.4 10
#> 12 0002 0.487 0002 4.30 19.7 10
#> 13 0002 0.487 0003 2.49 19.8 10
#> 14 0002 0.487 0004 5.32 19.4 10
#> 15 0002 0.487 0005 3.77 19.3 10
#> 16 0002 0.487 0006 5.44 19.7 10
#> 17 0002 0.487 0007 4.72 19.5 10
#> 18 0002 0.487 0008 4.10 20.8 10
#> 19 0002 0.487 0009 3.88 17.8 10
#> 20 0002 0.487 0010 3.89 18.3 10
df_portf <- sim_test_data_portfolio(df_config)
df_portf
#> # A tibble: 3,829 × 8
#> study_id ae_per_visit_mean site_number max_visit…¹ max_v…² patnum visit n_ae
#> <chr> <dbl> <chr> <dbl> <dbl> <chr> <int> <int>
#> 1 0001 0.401 0001 3.41 20.1 0001 1 0
#> 2 0001 0.401 0001 3.41 20.1 0001 2 0
#> 3 0001 0.401 0001 3.41 20.1 0001 3 2
#> 4 0001 0.401 0001 3.41 20.1 0001 4 2
#> 5 0001 0.401 0001 3.41 20.1 0001 5 2
#> 6 0001 0.401 0001 3.41 20.1 0001 6 2
#> 7 0001 0.401 0001 3.41 20.1 0001 7 2
#> 8 0001 0.401 0001 3.41 20.1 0001 8 3
#> 9 0001 0.401 0001 3.41 20.1 0001 9 3
#> 10 0001 0.401 0001 3.41 20.1 0001 10 3
#> # … with 3,819 more rows, and abbreviated variable names ¹max_visit_sd,
#> # ²max_visit_mean
df_scen <- sim_ur_scenarios(df_portf,
extra_ur_sites = 2,
ur_rate = c(0.5, 1))
#> aggregating site level
#> prepping for simulation
#> generating scenarios
#> getting under-reporting stats
#> evaluating stats
df_scen
#> # A tibble: 140 × 14
#> study…¹ site_…² n_pat n_pat…³ visit…⁴ mean_…⁵ mean_…⁶ n_pat…⁷ extra…⁸ frac_…⁹
#> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <int> <dbl> <dbl>
#> 1 0001 0001 10 10 15 5.8 5.87 78 0 0
#> 2 0001 0001 10 10 15 2.9 5.87 78 0 0.114
#> 3 0001 0001 10 10 15 0 5.87 78 0 0.114
#> 4 0001 0001 10 10 15 2.9 5.53 78 1 0.214
#> 5 0001 0001 10 10 15 0 5.18 78 1 0.214
#> 6 0001 0001 10 10 15 2.9 5.25 78 2 0.314
#> 7 0001 0001 10 10 15 0 4.63 78 2 0.314
#> 8 0001 0002 10 10 17 7.1 6.46 65 0 0
#> 9 0001 0002 10 10 17 3.55 6.46 65 0 0.133
#> 10 0001 0002 10 10 17 0 6.46 65 0 0.133
#> # … with 130 more rows, 4 more variables: ur_rate <dbl>, prob_low <dbl>,
#> # prob_low_adj <dbl>, prob_low_prob_ur <dbl>, and abbreviated variable names
#> # ¹study_id, ²site_number, ³n_pat_with_med75, ⁴visit_med75,
#> # ⁵mean_ae_site_med75, ⁶mean_ae_study_med75, ⁷n_pat_with_med75_study,
#> # ⁸extra_ur_sites, ⁹frac_pat_with_ur
df_perf <- get_portf_perf(df_scen)
df_perf
#> # A tibble: 27 × 5
#> fpr thresh extra_ur_sites ur_rate tpr
#> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 0.001 0.922 0 0 0.05
#> 2 0.001 0.922 1 0 0.05
#> 3 0.001 0.922 2 0 0.05
#> 4 0.001 0.922 0 0.5 1
#> 5 0.001 0.922 1 0.5 1
#> 6 0.001 0.922 2 0.5 1
#> 7 0.001 0.922 0 1 1
#> 8 0.001 0.922 1 1 1
#> 9 0.001 0.922 2 1 1
#> 10 0.01 0.909 0 0 0.05
#> # … with 17 more rows
# }