Skip to contents

Simulate AE under-reporting probabilities.

Usage

simaerep(
  df_visit,
  r = 1000,
  check = TRUE,
  under_only = TRUE,
  visit_med75 = TRUE,
  inframe = FALSE,
  progress = TRUE,
  mult_corr = TRUE,
  param_site_aggr = list(method = "med75_adj", min_pat_pool = 0.2),
  param_sim_sites = list(r = 1000, poisson_test = FALSE, prob_lower = TRUE),
  param_eval_sites = list(method = "BH"),
  env = parent.frame()
)

Arguments

df_visit

Data frame with columns: study_id, site_number, patnum, visit, n_ae.

r

Integer or tbl_object, number of repetitions for bootstrap simulation. Pass a tbl object referring to a table with one column and as many rows as desired repetitions. Default: 1000.

check

Logical, perform data check and attempt repair with check_df_visit(). Computationally expensive on large data sets. Default: TRUE.

under_only

Logical, compute under-reporting probabilities only. Supersedes under_only parameter passed to eval_sites() and sim_sites(). Default: TRUE.

visit_med75

Logical, should evaluation point visit_med75 be used. Default: TRUE.

inframe

Logical, only table operations to be used; does not require visit_med75. Compatible with dbplyr supported database backends.

progress

Logical, display progress bar. Default: TRUE.

mult_corr

Logical, multiplicity correction, Default: TRUE

param_site_aggr

List of parameters passed to site_aggr(). Default: list(method = "med75_adj", min_pat_pool = 0.2).

param_sim_sites

List of parameters passed to sim_sites(). Default: list(r = 1000, poisson_test = FALSE, prob_lower = TRUE).

param_eval_sites

List of parameters passed to eval_sites(). Default: list(method = "BH").

env

Optional, provide environment of original visit data. Default: parent.frame().

Value

A simaerep object.

Details

Executes site_aggr(), sim_sites(), and eval_sites() on original visit data and stores all intermediate results. Stores lazy reference to original visit data for facilitated plotting using generic plot(x).

Examples

df_visit <- sim_test_data_study(
  n_pat = 100,
  n_sites = 5,
  frac_site_with_ur = 0.4,
  ur_rate = 0.6
)
df_visit$study_id <- "A"
aerep <- simaerep(df_visit)
aerep
#> simaerep object:
#> Check aerep$df_eval prob_low_prob_ur column for under-reporting probabililty.
#> Plot results using plot() generic.
str(aerep)
#> List of 11
#>  $ visit           :List of 3
#>   ..$ dim       : int [1:2] 1972 9
#>   ..$ df_summary: tibble [1 × 5] (S3: tbl_df/tbl/data.frame)
#>   .. ..$ n_studies : int 1
#>   .. ..$ n_sites   : int 5
#>   .. ..$ n_patients: int 100
#>   .. ..$ n_visits  : int 1972
#>   .. ..$ n_aes     : int 705
#>   ..$ str_call  : chr "df_visit"
#>   ..- attr(*, "class")= chr "orivisit"
#>  $ df_site         : tibble [5 × 6] (S3: tbl_df/tbl/data.frame)
#>   ..$ study_id          : chr [1:5] "A" "A" "A" "A" ...
#>   ..$ site_number       : chr [1:5] "S0001" "S0002" "S0003" "S0004" ...
#>   ..$ n_pat             : int [1:5] 20 20 20 20 20
#>   ..$ n_pat_with_med75  : num [1:5] 18 18 17 20 16
#>   ..$ visit_med75       : Named num [1:5] 17 15 16 15 16
#>   .. ..- attr(*, "names")= chr [1:5] "80%" "80%" "80%" "80%" ...
#>   ..$ mean_ae_site_med75: num [1:5] 3.11 2.56 8.53 6.4 8.38
#>  $ df_sim_sites    : tibble [5 × 9] (S3: tbl_df/tbl/data.frame)
#>   ..$ study_id              : chr [1:5] "A" "A" "A" "A" ...
#>   ..$ site_number           : chr [1:5] "S0001" "S0002" "S0003" "S0004" ...
#>   ..$ n_pat                 : int [1:5] 20 20 20 20 20
#>   ..$ n_pat_with_med75      : num [1:5] 18 18 17 20 16
#>   ..$ visit_med75           : Named num [1:5] 17 15 16 15 16
#>   .. ..- attr(*, "names")= chr [1:5] "80%" "80%" "80%" "80%" ...
#>   ..$ mean_ae_site_med75    : num [1:5] 3.11 2.56 8.53 6.4 8.38
#>   ..$ mean_ae_study_med75   : num [1:5] 7.03 6.21 5.26 5.21 5.34
#>   ..$ n_pat_with_med75_study: int [1:5] 59 72 66 70 67
#>   ..$ prob_low              : num [1:5] 0 0 1 1 1
#>  $ df_eval         : tibble [5 × 11] (S3: tbl_df/tbl/data.frame)
#>   ..$ study_id              : chr [1:5] "A" "A" "A" "A" ...
#>   ..$ site_number           : chr [1:5] "S0001" "S0002" "S0003" "S0004" ...
#>   ..$ n_pat                 : int [1:5] 20 20 20 20 20
#>   ..$ n_pat_with_med75      : num [1:5] 18 18 17 20 16
#>   ..$ visit_med75           : Named num [1:5] 17 15 16 15 16
#>   .. ..- attr(*, "names")= chr [1:5] "80%" "80%" "80%" "80%" ...
#>   ..$ mean_ae_site_med75    : num [1:5] 3.11 2.56 8.53 6.4 8.38
#>   ..$ mean_ae_study_med75   : num [1:5] 7.03 6.21 5.26 5.21 5.34
#>   ..$ n_pat_with_med75_study: int [1:5] 59 72 66 70 67
#>   ..$ prob_low              : num [1:5] 0 0 1 1 1
#>   ..$ prob_low_adj          : num [1:5] 0 0 1 1 1
#>   ..$ prob_low_prob_ur      : num [1:5] 1 1 0 0 0
#>  $ r               : num 1000
#>  $ visit_med75     : logi TRUE
#>  $ inframe         : logi FALSE
#>  $ under_only      : logi TRUE
#>  $ param_site_aggr :List of 2
#>   ..$ method      : chr "med75_adj"
#>   ..$ min_pat_pool: num 0.2
#>  $ param_sim_sites :List of 4
#>   ..$ r           : num 1000
#>   ..$ poisson_test: logi FALSE
#>   ..$ prob_lower  : logi TRUE
#>   ..$ under_only  : logi TRUE
#>  $ param_eval_sites:List of 2
#>   ..$ method    : chr "BH"
#>   ..$ under_only: logi TRUE
#>  - attr(*, "class")= chr "simaerep"
# \donttest{
  # In-frame table operations
  simaerep(df_visit, inframe = TRUE, visit_med75 = FALSE, under_only = FALSE)$df_eval
#> # A tibble: 5 × 13
#>   study_id site_number events_per_visit_site events visits n_pat prob_low
#>   <chr>    <chr>                       <dbl>  <dbl>  <dbl> <int>    <dbl>
#> 1 A        S0001                       0.183     76    415    20    0    
#> 2 A        S0002                       0.186     75    403    20    0    
#> 3 A        S0003                       0.496    187    377    20    0.999
#> 4 A        S0004                       0.453    180    397    20    0.987
#> 5 A        S0005                       0.492    187    380    20    0.996
#> # ℹ 6 more variables: events_per_visit_study <dbl>, prob_low_adj <dbl>,
#> #   prob_low_prob_ur <dbl>, prob_high <dbl>, prob_high_adj <dbl>,
#> #   prob_high_prob_or <dbl>
  simaerep(df_visit, inframe = TRUE, visit_med75 = TRUE, under_only = FALSE)$df_eval
#> # A tibble: 5 × 15
#>   study_id site_number events_per_visit_site events visits n_pat prob_low
#>   <chr>    <chr>                       <dbl>  <dbl>  <dbl> <int>    <dbl>
#> 1 A        S0001                       0.183     56    306    18    0    
#> 2 A        S0002                       0.170     46    270    18    0    
#> 3 A        S0003                       0.533    145    272    17    1    
#> 4 A        S0004                       0.427    128    300    20    0.892
#> 5 A        S0005                       0.523    134    256    16    0.996
#> # ℹ 8 more variables: events_per_visit_study <dbl>, prob_low_adj <dbl>,
#> #   prob_low_prob_ur <dbl>, prob_high <dbl>, prob_high_adj <dbl>,
#> #   prob_high_prob_or <dbl>, n_pat_with_med75 <dbl>, visit_med75 <dbl>
  # Database example
  con <- DBI::dbConnect(duckdb::duckdb(), dbdir = ":memory:")
  df_r <- tibble::tibble(rep = seq(1, 1000))
  dplyr::copy_to(con, df_visit, "visit")
  dplyr::copy_to(con, df_r, "r")
  tbl_visit <- dplyr::tbl(con, "visit")
  tbl_r <- dplyr::tbl(con, "r")
  simaerep(tbl_visit, r = tbl_r, inframe = TRUE, visit_med75 = FALSE, under_only = FALSE)$df_eval
#> # Source:     SQL [5 x 13]
#> # Database:   DuckDB v1.0.0 [koneswab@Darwin 23.6.0:R 4.4.1/:memory:]
#> # Ordered by: study_id, site_number
#>   study_id site_number events_per_visit_site events visits n_pat prob_low
#>   <chr>    <chr>                       <dbl>  <dbl>  <dbl> <dbl>    <dbl>
#> 1 A        S0003                       0.496    187    377    20    1    
#> 2 A        S0005                       0.492    187    380    20    0.999
#> 3 A        S0001                       0.183     76    415    20    0.001
#> 4 A        S0004                       0.453    180    397    20    0.982
#> 5 A        S0002                       0.186     75    403    20    0    
#> # ℹ 6 more variables: events_per_visit_study <dbl>, prob_low_adj <dbl>,
#> #   prob_low_prob_ur <dbl>, prob_high <dbl>, prob_high_adj <dbl>,
#> #   prob_high_prob_or <dbl>
  simaerep(tbl_visit, r = tbl_r, inframe = TRUE, visit_med75 = TRUE, under_only = FALSE)$df_eval
#> # Source:     SQL [5 x 15]
#> # Database:   DuckDB v1.0.0 [koneswab@Darwin 23.6.0:R 4.4.1/:memory:]
#> # Ordered by: study_id, site_number
#>   study_id site_number events_per_visit_site events visits n_pat prob_low
#>   <chr>    <chr>                       <dbl>  <dbl>  <dbl> <dbl>    <dbl>
#> 1 A        S0003                       0.533    145    272    17    1    
#> 2 A        S0005                       0.523    134    256    16    0.999
#> 3 A        S0004                       0.427    128    300    20    0.908
#> 4 A        S0002                       0.170     46    270    18    0    
#> 5 A        S0001                       0.183     56    306    18    0    
#> # ℹ 8 more variables: events_per_visit_study <dbl>, prob_low_adj <dbl>,
#> #   prob_low_prob_ur <dbl>, prob_high <dbl>, prob_high_adj <dbl>,
#> #   prob_high_prob_or <dbl>, n_pat_with_med75 <dbl>, visit_med75 <int>
  DBI::dbDisconnect(con)
# }