Simulate visit level data from a portfolio configuration.

sim_test_data_portfolio(df_config, parallel = FALSE, progress = TRUE)

Arguments

df_config

dataframe as returned by get_config

parallel

logical activate parallel processing, see details, Default: FALSE

progress

logical, Default: TRUE

Value

dataframe with the following columns:

study_id

study identification

ae_per_visit_mean

mean AE per visit per study

site_number

site

max_visit_sd

standard deviation of maximum patient visits per site

max_visit_mean

mean of maximum patient visits per site

patnum

number of patients

visit

visit number

n_ae

cumulative sum of AEs

Details

uses sim_test_data_study. We use the furrr package to implement parallel processing as these simulations can take a long time to run. For this to work we need to specify the plan for how the code should run, e.g. `plan(multisession, workers = 3)

Examples

# \donttest{
df_visit1 <- sim_test_data_study(n_pat = 100, n_sites = 10,
                                 frac_site_with_ur = 0.4, ur_rate = 0.6)

df_visit1$study_id <- "A"

df_visit2 <- sim_test_data_study(n_pat = 100, n_sites = 10,
                                 frac_site_with_ur = 0.2, ur_rate = 0.1)

df_visit2$study_id <- "B"

df_visit <- dplyr::bind_rows(df_visit1, df_visit2)

df_site_max <- df_visit %>%
  dplyr::group_by(study_id, site_number, patnum) %>%
  dplyr::summarise(max_visit = max(visit),
            max_ae = max(n_ae),
            .groups = "drop")

df_config <- get_config(df_site_max)

df_config
#> # A tibble: 20 × 6
#>    study_id ae_per_visit_mean site_number max_visit_sd max_visit_mean n_pat
#>    <chr>                <dbl> <chr>              <dbl>          <dbl> <int>
#>  1 0001                 0.355 0001                4.23           19.9    10
#>  2 0001                 0.355 0002                4.13           19.8    10
#>  3 0001                 0.355 0003                5.03           19.3    10
#>  4 0001                 0.355 0004                4.40           20      10
#>  5 0001                 0.355 0005                3.77           20.3    10
#>  6 0001                 0.355 0006                3.58           17.2    10
#>  7 0001                 0.355 0007                5.10           19.6    10
#>  8 0001                 0.355 0008                3.74           19.2    10
#>  9 0001                 0.355 0009                4.40           19.6    10
#> 10 0001                 0.355 0010                4.50           19.4    10
#> 11 0002                 0.505 0001                1.51           17.6    10
#> 12 0002                 0.505 0002                2.99           20.5    10
#> 13 0002                 0.505 0003                3.59           19.7    10
#> 14 0002                 0.505 0004                2.70           17.8    10
#> 15 0002                 0.505 0005                4.72           21.1    10
#> 16 0002                 0.505 0006                5.70           18.9    10
#> 17 0002                 0.505 0007                2.68           20.5    10
#> 18 0002                 0.505 0008                4.28           19.1    10
#> 19 0002                 0.505 0009                3.44           18.5    10
#> 20 0002                 0.505 0010                4.86           20.1    10

df_portf <- sim_test_data_portfolio(df_config)

df_portf
#> # A tibble: 3,729 × 8
#>    study_id ae_per_visit_mean site_number max_visit…¹ max_v…² patnum visit  n_ae
#>    <chr>                <dbl> <chr>             <dbl>   <dbl> <chr>  <int> <int>
#>  1 0001                 0.355 0001               4.23    19.9 0001       1     1
#>  2 0001                 0.355 0001               4.23    19.9 0001       2     2
#>  3 0001                 0.355 0001               4.23    19.9 0001       3     2
#>  4 0001                 0.355 0001               4.23    19.9 0001       4     2
#>  5 0001                 0.355 0001               4.23    19.9 0001       5     2
#>  6 0001                 0.355 0001               4.23    19.9 0001       6     2
#>  7 0001                 0.355 0001               4.23    19.9 0001       7     2
#>  8 0001                 0.355 0001               4.23    19.9 0001       8     3
#>  9 0001                 0.355 0001               4.23    19.9 0001       9     3
#> 10 0001                 0.355 0001               4.23    19.9 0001      10     3
#> # … with 3,719 more rows, and abbreviated variable names ¹​max_visit_sd,
#> #   ²​max_visit_mean

df_scen <- sim_ur_scenarios(df_portf,
                            extra_ur_sites = 2,
                            ur_rate = c(0.5, 1))
#> aggregating site level
#> prepping for simulation
#> generating scenarios
#> getting under-reporting stats
#> evaluating stats


df_scen
#> # A tibble: 140 × 14
#>    study…¹ site_…² n_pat n_pat…³ visit…⁴ mean_…⁵ mean_…⁶ n_pat…⁷ extra…⁸ frac_…⁹
#>    <chr>   <chr>   <int>   <int>   <dbl>   <dbl>   <dbl>   <int>   <dbl>   <dbl>
#>  1 0001    0001       10       8      21    6.25    7.75      28       0   0    
#>  2 0001    0001       10       8      21    3.12    7.75      28       0   0.222
#>  3 0001    0001       10       8      21    0       7.75      28       0   0.222
#>  4 0001    0001       10       8      21    3.12    7.30      28       1   0.333
#>  5 0001    0001       10       8      21    0       6.86      28       1   0.333
#>  6 0001    0001       10       8      21    3.12    6.79      28       2   0.444
#>  7 0001    0001       10       8      21    0       5.82      28       2   0.444
#>  8 0001    0002       10      10      14    3.8     4.74      74       0   0    
#>  9 0001    0002       10      10      14    1.9     4.74      74       0   0.119
#> 10 0001    0002       10      10      14    0       4.74      74       0   0.119
#> # … with 130 more rows, 4 more variables: ur_rate <dbl>, prob_low <dbl>,
#> #   prob_low_adj <dbl>, prob_low_prob_ur <dbl>, and abbreviated variable names
#> #   ¹​study_id, ²​site_number, ³​n_pat_with_med75, ⁴​visit_med75,
#> #   ⁵​mean_ae_site_med75, ⁶​mean_ae_study_med75, ⁷​n_pat_with_med75_study,
#> #   ⁸​extra_ur_sites, ⁹​frac_pat_with_ur

df_perf <- get_portf_perf(df_scen)

df_perf
#> # A tibble: 27 × 5
#>      fpr thresh extra_ur_sites ur_rate   tpr
#>    <dbl>  <dbl>          <dbl>   <dbl> <dbl>
#>  1 0.001  0.916              0     0    0.05
#>  2 0.001  0.916              1     0    0.05
#>  3 0.001  0.916              2     0    0.05
#>  4 0.001  0.916              0     0.5  1   
#>  5 0.001  0.916              1     0.5  1   
#>  6 0.001  0.916              2     0.5  1   
#>  7 0.001  0.916              0     1    1   
#>  8 0.001  0.916              1     1    1   
#>  9 0.001  0.916              2     1    1   
#> 10 0.01   0.913              0     0    0.05
#> # … with 17 more rows
# }