Get Portfolio configuration from a dataframe aggregated on
patient level with max_ae and max_visit. Will filter studies with only a few
sites and patients and will anonymize IDs. Portfolio configuration can be
used by sim_test_data_portfolio
to generate data for an
artificial portfolio.
Usage
get_config(
df_site,
min_pat_per_study = 100,
min_sites_per_study = 10,
anonymize = TRUE,
pad_width = 4
)
Value
dataframe with the following columns:
- study_id
study identification
- ae_per_visit_mean
mean AE per visit per study
- site_number
site
- max_visit_sd
standard deviation of maximum patient visits per site
- max_visit_mean
mean of maximum patient visits per site
- n_pat
number of patients
Examples
# \donttest{
df_visit1 <- sim_test_data_study(n_pat = 100, n_sites = 10,
frac_site_with_ur = 0.4, ur_rate = 0.6)
df_visit1$study_id <- "A"
df_visit2 <- sim_test_data_study(n_pat = 100, n_sites = 10,
frac_site_with_ur = 0.2, ur_rate = 0.1)
df_visit2$study_id <- "B"
df_visit <- dplyr::bind_rows(df_visit1, df_visit2)
df_site_max <- df_visit %>%
dplyr::group_by(study_id, site_number, patnum) %>%
dplyr::summarise(max_visit = max(visit),
max_ae = max(n_ae),
.groups = "drop")
df_config <- get_config(df_site_max)
df_config
#> # A tibble: 20 × 6
#> study_id ae_per_visit_mean site_number max_visit_sd max_visit_mean n_pat
#> <chr> <dbl> <chr> <dbl> <dbl> <int>
#> 1 0001 0.402 0001 4.98 18.2 10
#> 2 0001 0.402 0002 2.92 20.5 10
#> 3 0001 0.402 0003 3.77 21.2 10
#> 4 0001 0.402 0004 3.81 20.9 10
#> 5 0001 0.402 0005 4.19 21 10
#> 6 0001 0.402 0006 4.23 18.9 10
#> 7 0001 0.402 0007 4.41 18.1 10
#> 8 0001 0.402 0008 4.53 19.9 10
#> 9 0001 0.402 0009 4.52 19.2 10
#> 10 0001 0.402 0010 4.47 18.8 10
#> 11 0002 0.485 0001 2.98 20.3 10
#> 12 0002 0.485 0002 3.34 20.6 10
#> 13 0002 0.485 0003 3.51 19.1 10
#> 14 0002 0.485 0004 5.32 19.4 10
#> 15 0002 0.485 0005 3.77 19.2 10
#> 16 0002 0.485 0006 4.37 18.8 10
#> 17 0002 0.485 0007 5.60 20.3 10
#> 18 0002 0.485 0008 3.57 21.4 10
#> 19 0002 0.485 0009 3.98 17.6 10
#> 20 0002 0.485 0010 2.67 19 10
df_portf <- sim_test_data_portfolio(df_config)
df_portf
#> # A tibble: 3,844 × 8
#> study_id ae_per_visit_mean site_number max_visit_sd max_visit_mean patnum
#> <chr> <dbl> <chr> <dbl> <dbl> <chr>
#> 1 0001 0.402 0001 4.98 18.2 0001
#> 2 0001 0.402 0001 4.98 18.2 0001
#> 3 0001 0.402 0001 4.98 18.2 0001
#> 4 0001 0.402 0001 4.98 18.2 0001
#> 5 0001 0.402 0001 4.98 18.2 0001
#> 6 0001 0.402 0001 4.98 18.2 0001
#> 7 0001 0.402 0001 4.98 18.2 0002
#> 8 0001 0.402 0001 4.98 18.2 0002
#> 9 0001 0.402 0001 4.98 18.2 0002
#> 10 0001 0.402 0001 4.98 18.2 0002
#> # ℹ 3,834 more rows
#> # ℹ 2 more variables: visit <int>, n_ae <int>
df_scen <- sim_ur_scenarios(df_portf,
extra_ur_sites = 2,
ur_rate = c(0.5, 1))
#> aggregating site level
#> prepping for simulation
#> generating scenarios
#> getting under-reporting stats
#> evaluating stats
df_scen
#> # A tibble: 140 × 14
#> study_id site_number extra_ur_sites frac_pat_with_ur ur_rate n_pat
#> <chr> <chr> <dbl> <dbl> <dbl> <int>
#> 1 0001 0001 0 0 0 10
#> 2 0001 0001 0 0.108 0.5 10
#> 3 0001 0001 0 0.108 1 10
#> 4 0001 0001 1 0.208 0.5 10
#> 5 0001 0001 1 0.208 1 10
#> 6 0001 0001 2 0.308 0.5 10
#> 7 0001 0001 2 0.308 1 10
#> 8 0001 0002 0 0 0 10
#> 9 0001 0002 0 0.107 0.5 10
#> 10 0001 0002 0 0.107 1 10
#> # ℹ 130 more rows
#> # ℹ 8 more variables: n_pat_with_med75 <dbl>, visit_med75 <dbl>,
#> # mean_ae_site_med75 <dbl>, mean_ae_study_med75 <dbl>,
#> # n_pat_with_med75_study <int>, prob_low <dbl>, prob_low_adj <dbl>,
#> # prob_low_prob_ur <dbl>
df_perf <- get_portf_perf(df_scen)
df_perf
#> # A tibble: 27 × 5
#> fpr thresh extra_ur_sites ur_rate tpr
#> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 0.001 0.382 0 0 0.3
#> 2 0.001 0.382 1 0 0.3
#> 3 0.001 0.382 2 0 0.3
#> 4 0.001 0.382 0 0.5 1
#> 5 0.001 0.382 1 0.5 1
#> 6 0.001 0.382 2 0.5 1
#> 7 0.001 0.382 0 1 1
#> 8 0.001 0.382 1 1 1
#> 9 0.001 0.382 2 1 1
#> 10 0.01 0.382 0 0 0.3
#> # ℹ 17 more rows
# }