Skip to contents

Simulate AE under-reporting probabilities.

Usage

simaerep(
  df_visit,
  r = 1000,
  check = TRUE,
  under_only = TRUE,
  visit_med75 = TRUE,
  inframe = FALSE,
  progress = TRUE,
  mult_corr = TRUE,
  param_site_aggr = list(method = "med75_adj", min_pat_pool = 0.2),
  param_sim_sites = list(r = 1000, poisson_test = FALSE, prob_lower = TRUE),
  param_eval_sites = list(method = "BH"),
  env = parent.frame(),
  event_names = c("ae")
)

Arguments

df_visit

Data frame with columns: study_id, site_number, patnum, visit, n_ae.

r

Integer or tbl_object, number of repetitions for bootstrap simulation. Pass a tbl object referring to a table with one column and as many rows as desired repetitions. Default: 1000.

check

Logical, perform data check and attempt repair with check_df_visit(). Computationally expensive on large data sets. Default: TRUE.

under_only

Logical, compute under-reporting probabilities only. Supersedes under_only parameter passed to eval_sites() and sim_sites(). Default: TRUE.

visit_med75

Logical, should evaluation point visit_med75 be used. Default: TRUE.

inframe

Logical, only table operations to be used; does not require visit_med75. Compatible with dbplyr supported database backends.

progress

Logical, display progress bar. Default: TRUE.

mult_corr

Logical, multiplicity correction, Default: TRUE

param_site_aggr

List of parameters passed to site_aggr(). Default: list(method = "med75_adj", min_pat_pool = 0.2).

param_sim_sites

List of parameters passed to sim_sites(). Default: list(r = 1000, poisson_test = FALSE, prob_lower = TRUE).

param_eval_sites

List of parameters passed to eval_sites(). Default: list(method = "BH").

env

Optional, provide environment of original visit data. Default: parent.frame().

event_names

vector, contains the event names, default = "ae"

Value

A simaerep object.

Details

Executes site_aggr(), sim_sites(), and eval_sites() on original visit data and stores all intermediate results. Stores lazy reference to original visit data for facilitated plotting using generic plot(x).

Examples

df_visit <- sim_test_data_study(
  n_pat = 100,
  n_sites = 5,
  frac_site_with_ur = 0.4,
  ur_rate = 0.6
)
df_visit$study_id <- "A"
aerep <- simaerep(df_visit)
aerep
#> simaerep object:
#> Check aerep$df_eval prob_low_prob_ur column for under-reporting probabililty.
#> Plot results using plot() generic.
str(aerep)
#> List of 12
#>  $ visit           :List of 4
#>   ..$ dim        : int [1:2] 1972 9
#>   ..$ df_summary : tibble [1 × 5] (S3: tbl_df/tbl/data.frame)
#>   .. ..$ n_studies : int 1
#>   .. ..$ n_sites   : int 5
#>   .. ..$ n_patients: int 100
#>   .. ..$ n_visits  : int 1972
#>   .. ..$ n_aes     : int 705
#>   ..$ str_call   : chr "df_visit"
#>   ..$ event_names: chr "ae"
#>   ..- attr(*, "class")= chr "orivisit"
#>  $ df_site         : tibble [5 × 6] (S3: tbl_df/tbl/data.frame)
#>   ..$ study_id          : chr [1:5] "A" "A" "A" "A" ...
#>   ..$ site_number       : chr [1:5] "S0001" "S0002" "S0003" "S0004" ...
#>   ..$ n_pat             : int [1:5] 20 20 20 20 20
#>   ..$ n_pat_with_med75  : num [1:5] 18 18 17 20 16
#>   ..$ visit_med75       : Named num [1:5] 17 15 16 15 16
#>   .. ..- attr(*, "names")= chr [1:5] "80%" "80%" "80%" "80%" ...
#>   ..$ mean_ae_site_med75: num [1:5] 3.11 2.56 8.53 6.4 8.38
#>  $ df_sim_sites    : tibble [5 × 9] (S3: tbl_df/tbl/data.frame)
#>   ..$ study_id              : chr [1:5] "A" "A" "A" "A" ...
#>   ..$ site_number           : chr [1:5] "S0001" "S0002" "S0003" "S0004" ...
#>   ..$ n_pat                 : int [1:5] 20 20 20 20 20
#>   ..$ n_pat_with_med75      : num [1:5] 18 18 17 20 16
#>   ..$ visit_med75           : Named num [1:5] 17 15 16 15 16
#>   .. ..- attr(*, "names")= chr [1:5] "80%" "80%" "80%" "80%" ...
#>   ..$ mean_ae_site_med75    : num [1:5] 3.11 2.56 8.53 6.4 8.38
#>   ..$ mean_ae_study_med75   : num [1:5] 7.03 6.21 5.26 5.21 5.34
#>   ..$ n_pat_with_med75_study: int [1:5] 59 72 66 70 67
#>   ..$ prob_low              : num [1:5] 0 0 1 1 1
#>  $ df_eval         : tibble [5 × 11] (S3: tbl_df/tbl/data.frame)
#>   ..$ study_id              : chr [1:5] "A" "A" "A" "A" ...
#>   ..$ site_number           : chr [1:5] "S0001" "S0002" "S0003" "S0004" ...
#>   ..$ n_pat                 : int [1:5] 20 20 20 20 20
#>   ..$ n_pat_with_med75      : num [1:5] 18 18 17 20 16
#>   ..$ visit_med75           : Named num [1:5] 17 15 16 15 16
#>   .. ..- attr(*, "names")= chr [1:5] "80%" "80%" "80%" "80%" ...
#>   ..$ mean_ae_site_med75    : num [1:5] 3.11 2.56 8.53 6.4 8.38
#>   ..$ mean_ae_study_med75   : num [1:5] 7.03 6.21 5.26 5.21 5.34
#>   ..$ n_pat_with_med75_study: int [1:5] 59 72 66 70 67
#>   ..$ prob_low              : num [1:5] 0 0 1 1 1
#>   ..$ prob_low_adj          : num [1:5] 0 0 1 1 1
#>   ..$ prob_low_prob_ur      : num [1:5] 1 1 0 0 0
#>  $ r               : num 1000
#>  $ visit_med75     : logi TRUE
#>  $ inframe         : logi FALSE
#>  $ under_only      : logi TRUE
#>  $ param_site_aggr :List of 2
#>   ..$ method      : chr "med75_adj"
#>   ..$ min_pat_pool: num 0.2
#>  $ param_sim_sites :List of 4
#>   ..$ r           : num 1000
#>   ..$ poisson_test: logi FALSE
#>   ..$ prob_lower  : logi TRUE
#>   ..$ under_only  : logi TRUE
#>  $ param_eval_sites:List of 2
#>   ..$ method    : chr "BH"
#>   ..$ under_only: logi TRUE
#>  $ event_names     : chr "ae"
#>  - attr(*, "class")= chr "simaerep"


df_visit_events_test <- sim_test_data_events(n_pat = 100, n_sites = 5,
                                 ae_per_visit_mean = c(0.4, 0.5), event_names = c("ae", "pd"))
aerep_events <- simaerep(df_visit_events_test, inframe = TRUE,event_names = c("ae", "pd"))
aerep_events
#> simaerep object:
#> Check aerep$df_eval prob_low_prob_ur column for under-reporting probabililty.
#> Plot results using plot() generic.
# \donttest{
  # In-frame table operations
  simaerep(df_visit, inframe = TRUE, visit_med75 = FALSE, under_only = FALSE)$df_eval
#> # A tibble: 5 × 13
#>   study_id site_number events events_per_visit_site visits n_pat
#>   <chr>    <chr>        <dbl>                 <dbl>  <dbl> <int>
#> 1 A        S0001           76                 0.183    415    20
#> 2 A        S0002           75                 0.186    403    20
#> 3 A        S0003          187                 0.496    377    20
#> 4 A        S0004          180                 0.453    397    20
#> 5 A        S0005          187                 0.492    380    20
#> # ℹ 7 more variables: events_per_visit_study <dbl>, prob_low <dbl>,
#> #   prob_low_adj <dbl>, prob_low_prob_ur <dbl>, prob_high <dbl>,
#> #   prob_high_adj <dbl>, prob_high_prob_or <dbl>
  simaerep(df_visit, inframe = TRUE, visit_med75 = TRUE, under_only = FALSE)$df_eval
#> # A tibble: 5 × 15
#>   study_id site_number events events_per_visit_site visits n_pat
#>   <chr>    <chr>        <dbl>                 <dbl>  <dbl> <int>
#> 1 A        S0001           56                 0.183    306    18
#> 2 A        S0002           46                 0.170    270    18
#> 3 A        S0003          145                 0.533    272    17
#> 4 A        S0004          128                 0.427    300    20
#> 5 A        S0005          134                 0.523    256    16
#> # ℹ 9 more variables: events_per_visit_study <dbl>, prob_low <dbl>,
#> #   prob_low_adj <dbl>, prob_low_prob_ur <dbl>, prob_high <dbl>,
#> #   prob_high_adj <dbl>, prob_high_prob_or <dbl>, n_pat_with_med75 <dbl>,
#> #   visit_med75 <dbl>
  # Database example
  con <- DBI::dbConnect(duckdb::duckdb(), dbdir = ":memory:")
  df_r <- tibble::tibble(rep = seq(1, 1000))
  dplyr::copy_to(con, df_visit, "visit")
  dplyr::copy_to(con, df_r, "r")
  tbl_visit <- dplyr::tbl(con, "visit")
  tbl_r <- dplyr::tbl(con, "r")
  simaerep(tbl_visit, r = tbl_r, inframe = TRUE, visit_med75 = FALSE, under_only = FALSE)$df_eval
#> # Source:     SQL [?? x 13]
#> # Database:   DuckDB v1.2.1 [koneswab@Darwin 23.6.0:R 4.4.1/:memory:]
#> # Ordered by: study_id, site_number
#>   study_id site_number events events_per_visit_site visits n_pat
#>   <chr>    <chr>        <dbl>                 <dbl>  <dbl> <dbl>
#> 1 A        S0002           75                 0.186    403    20
#> 2 A        S0001           76                 0.183    415    20
#> 3 A        S0003          187                 0.496    377    20
#> 4 A        S0005          187                 0.492    380    20
#> 5 A        S0004          180                 0.453    397    20
#> # ℹ 7 more variables: events_per_visit_study <dbl>, prob_low <dbl>,
#> #   prob_low_adj <dbl>, prob_low_prob_ur <dbl>, prob_high <dbl>,
#> #   prob_high_adj <dbl>, prob_high_prob_or <dbl>
  simaerep(tbl_visit, r = tbl_r, inframe = TRUE, visit_med75 = TRUE, under_only = FALSE)$df_eval
#> # Source:     SQL [?? x 15]
#> # Database:   DuckDB v1.2.1 [koneswab@Darwin 23.6.0:R 4.4.1/:memory:]
#> # Ordered by: study_id, site_number
#>   study_id site_number events events_per_visit_site visits n_pat
#>   <chr>    <chr>        <dbl>                 <dbl>  <dbl> <dbl>
#> 1 A        S0004          128                 0.427    300    20
#> 2 A        S0001           56                 0.183    306    18
#> 3 A        S0002           46                 0.170    270    18
#> 4 A        S0005          134                 0.523    256    16
#> 5 A        S0003          145                 0.533    272    17
#> # ℹ 9 more variables: events_per_visit_study <dbl>, prob_low <dbl>,
#> #   prob_low_adj <dbl>, prob_low_prob_ur <dbl>, prob_high <dbl>,
#> #   prob_high_adj <dbl>, prob_high_prob_or <dbl>, n_pat_with_med75 <dbl>,
#> #   visit_med75 <int>
  DBI::dbDisconnect(con)
# }