Skip to contents

Simulate AE under-reporting probabilities.

Usage

simaerep(
  df_visit,
  r = 1000,
  check = TRUE,
  under_only = FALSE,
  visit_med75 = FALSE,
  inframe = TRUE,
  progress = TRUE,
  mult_corr = TRUE,
  poisson_test = FALSE,
  env = parent.frame(),
  event_names = c("event"),
  col_names = list(study_id = "study_id", site_id = "site_id", patient_id = "patient_id",
    visit = "visit")
)

simaerep_inframe(
  df_visit,
  r = 1000,
  under_only = FALSE,
  visit_med75 = FALSE,
  check = TRUE,
  env = parent.frame(),
  event_names = c("event"),
  mult_corr = FALSE,
  col_names = list(study_id = "study_id", site_id = "site_id", patient_id = "patient_id",
    visit = "visit")
)

simaerep_classic(
  df_visit,
  check = TRUE,
  progress = TRUE,
  env = parent.frame(),
  under_only = TRUE,
  r = 1000,
  mult_corr = FALSE,
  poisson_test = FALSE,
  event_names = "event",
  col_names = list(study_id = "study_id", site_id = "site_id", patient_id = "patient_id",
    visit = "visit")
)

Arguments

df_visit

Data frame with columns: study_id, site_number, patnum, visit, n_ae.

r

Integer or tbl_object, number of repetitions for bootstrap simulation. Pass a tbl object referring to a table with one column and as many rows as desired repetitions. Default: 1000.

check

Logical, perform data check and attempt repair with check_df_visit(). Computationally expensive on large data sets. Default: TRUE.

under_only

Logical, compute under-reporting probabilities only. only applies to the classic algorithm in which a one-sided evaluation can save computation time. Default: FALSE

visit_med75

Logical, should evaluation point visit_med75 be used. Compatible with inframe and classic version of the algorithm. Default: FALSE

inframe

Logical, when FALSE classic simaerep algorithm will be used. The default inframe method uses only table operations and is compatible with dbplyr supported database backends. Default: TRUE

progress

Logical, display progress bar. Default: TRUE.

mult_corr

Logical, multiplicity correction, Default: TRUE

poisson_test

logical, compute p-value with poisson test, only supported by the classic algorithm using visit_med75. Default: FALSE

env

Optional, provide environment of original visit data. Default: parent.frame().

event_names

vector, contains the event names, default = "event"

col_names

named list, indicate study_id, site_id, patient_id and visit column in df_visit input dataframe. Default: list( study_id = "study_id", site_id = "site_id", patient_id = "patient_id", visit = "visit" )

Value

A simaerep object. Results are contained in the attached df_eval dataframe.

Column NameDescriptionType
study_idThe study IDCharacter
site_id.The site IDCharacter
(event)_countSite event countNumeric
(event)_per_visit_siteSite Ratio of event count divided by visitsNumeric
visitsSite visit countNumeric
n_patSite patient countNumeric
(event)_per_visit_studySimulated study ratioNumeric
(event)_probSite event ratio probability from -1 to 1Numeric
(event)_deltaDifference expected vs reported eventsNumeric

Details

Executes site_aggr(), sim_sites(), and eval_sites() on original visit data and stores all intermediate results. Stores lazy reference to original visit data for facilitated plotting using generic plot(x).

Examples

df_visit <- sim_test_data_study(
  n_pat = 100,
  n_sites = 5,
  ratio_out = 0.4,
  factor_event_rate = - 0.6
)

evrep <- simaerep(df_visit)
evrep
#> simaerep object:
#> ----------------
#> Plot results using plot() generic.
#> Full results available in "df_eval".
#> 
#> Summary:
#> Number of sites: 5
#> Number of studies: 1
#> 
#> Multiplicity correction applied to '*_prob' columns.
#> 
#> First 10 rows of df_eval:
#> # A tibble: 5 × 10
#>   study_id site_id event_count event_per_visit_site visits n_pat
#>   <chr>    <chr>         <dbl>                <dbl>  <dbl> <int>
#> 1 A        S0001           103                0.266    387    20
#> 2 A        S0002            93                0.221    421    20
#> 3 A        S0003           262                0.657    399    20
#> 4 A        S0004           256                0.674    380    20
#> 5 A        S0005           232                0.611    380    20
#> # ℹ 4 more variables: event_per_visit_study <dbl>, event_prob_no_mult <dbl>,
#> #   event_prob <dbl>, event_delta <dbl>
str(evrep)
#> List of 12
#>  $ visit       :List of 5
#>   ..$ dim        : int [1:2] 1967 9
#>   ..$ df_summary : tibble [1 × 5] (S3: tbl_df/tbl/data.frame)
#>   .. ..$ n_studies : int 1
#>   .. ..$ n_sites   : int 5
#>   .. ..$ n_patients: int 100
#>   .. ..$ n_visits  : int 1967
#>   .. ..$ n_events  : num 946
#>   ..$ str_call   : chr "df_visit"
#>   ..$ event_names: chr "event"
#>   ..$ col_names  :List of 4
#>   .. ..$ study_id  : chr "study_id"
#>   .. ..$ site_id   : chr "site_id"
#>   .. ..$ patient_id: chr "patient_id"
#>   .. ..$ visit     : chr "visit"
#>   ..- attr(*, "class")= chr "orivisit"
#>  $ df_site     : tibble [5 × 5] (S3: tbl_df/tbl/data.frame)
#>   ..$ study_id            : chr [1:5] "A" "A" "A" "A" ...
#>   ..$ site_id             : chr [1:5] "S0001" "S0002" "S0003" "S0004" ...
#>   ..$ max_visit           : num [1:5] 27 27 28 27 25
#>   ..$ n_pat               : int [1:5] 20 20 20 20 20
#>   ..$ n_pat_with_max_visit: int [1:5] 2 2 3 3 3
#>  $ df_sim_sites: tibble [5 × 8] (S3: tbl_df/tbl/data.frame)
#>   ..$ study_id             : chr [1:5] "A" "A" "A" "A" ...
#>   ..$ site_id              : chr [1:5] "S0001" "S0002" "S0003" "S0004" ...
#>   ..$ event_count          : num [1:5] 103 93 262 256 232
#>   ..$ event_per_visit_site : num [1:5] 0.266 0.221 0.657 0.674 0.611
#>   ..$ visits               : num [1:5] 387 421 399 380 380
#>   ..$ n_pat                : int [1:5] 20 20 20 20 20
#>   ..$ event_per_visit_study: num [1:5] 0.466 0.428 0.464 0.476 0.473
#>   ..$ event_prob_low       : num [1:5] 0 0 1 1 0.99
#>  $ df_eval     : tibble [5 × 10] (S3: tbl_df/tbl/data.frame)
#>   ..$ study_id             : chr [1:5] "A" "A" "A" "A" ...
#>   ..$ site_id              : chr [1:5] "S0001" "S0002" "S0003" "S0004" ...
#>   ..$ event_count          : num [1:5] 103 93 262 256 232
#>   ..$ event_per_visit_site : num [1:5] 0.266 0.221 0.657 0.674 0.611
#>   ..$ visits               : num [1:5] 387 421 399 380 380
#>   ..$ n_pat                : int [1:5] 20 20 20 20 20
#>   ..$ event_per_visit_study: num [1:5] 0.466 0.428 0.464 0.476 0.473
#>   ..$ event_prob_no_mult   : num [1:5] -1 -1 1 1 0.99
#>   ..$ event_prob           : num [1:5] -1 -1 1 1 0.983
#>   ..$ event_delta          : num [1:5] -77.2 -87.3 76.7 75 52.3
#>  $ r           : num 1000
#>  $ visit_med75 : logi FALSE
#>  $ inframe     : logi TRUE
#>  $ under_only  : logi FALSE
#>  $ event_names : chr "event"
#>  $ mult_corr   : logi TRUE
#>  $ poisson_test: logi FALSE
#>  $ col_names   :List of 4
#>   ..$ study_id  : chr "study_id"
#>   ..$ site_id   : chr "site_id"
#>   ..$ patient_id: chr "patient_id"
#>   ..$ visit     : chr "visit"
#>  - attr(*, "class")= chr "simaerep"

# simaerep classic algorithm

evrep <- simaerep(df_visit, inframe = FALSE, under_only = TRUE, mult_corr = TRUE)
evrep
#> simaerep object:
#> ----------------
#> Plot results using plot() generic.
#> Full results available in "df_eval".
#> 
#> Summary:
#> Number of sites: 5
#> Number of studies: 1
#> 
#> Classic algorithm used to calculate probabilities!!
#> 
#> Only under-reporting probability calculated !!!
#> 
#> Multiplicity correction applied to prob column.
#> 
#> First 10 rows of df_eval:
#> # A tibble: 5 × 9
#>   study_id site_id n_pat n_pat_with_med75 visit_med75 mean_event_site_med75
#>   <chr>    <chr>   <int>            <dbl>       <dbl>                 <dbl>
#> 1 A        S0001      20               18          16                  5.11
#> 2 A        S0002      20               17          18                  4.18
#> 3 A        S0003      20               18          17                 12.7 
#> 4 A        S0004      20               18          14                 12.3 
#> 5 A        S0005      20               17          15                 11.7 
#> # ℹ 3 more variables: mean_event_study_med75 <dbl>,
#> #   n_pat_with_med75_study <int>, prob <dbl>

# multiple events

df_visit_events_test <- sim_test_data_study(
  n_pat = 100,
  n_sites = 5,
  ratio_out = 0.4,
  factor_event_rate = - 0.6,
  event_rates = list(0.5, 0.3),
  event_names = c("ae", "pd")
)

evsrep <- simaerep(df_visit_events_test, inframe = TRUE, event_names = c("ae", "pd"))

evsrep
#> simaerep object:
#> ----------------
#> Plot results using plot() generic.
#> Full results available in "df_eval".
#> 
#> Summary:
#> Number of sites: 5
#> Number of studies: 1
#> 
#> Multiplicity correction applied to '*_prob' columns.
#> 
#> Reporting probabilities calculated for: ae, pd 
#> 
#> First 10 rows of df_eval:
#> # A tibble: 5 × 16
#>   study_id site_id ae_count pd_count ae_per_visit_site pd_per_visit_site visits
#>   <chr>    <chr>      <dbl>    <dbl>             <dbl>             <dbl>  <dbl>
#> 1 A        S0001         76       43             0.203            0.115     374
#> 2 A        S0002         67       29             0.189            0.0817    355
#> 3 A        S0003        185      107             0.465            0.269     398
#> 4 A        S0004        182      111             0.483            0.294     377
#> 5 A        S0005        199      125             0.504            0.316     395
#> # ℹ 9 more variables: n_pat <int>, ae_per_visit_study <dbl>,
#> #   pd_per_visit_study <dbl>, ae_prob_no_mult <dbl>, ae_prob <dbl>,
#> #   ae_delta <dbl>, pd_prob_no_mult <dbl>, pd_prob <dbl>, pd_delta <dbl>

# \donttest{
# Database example
con <- DBI::dbConnect(duckdb::duckdb(), dbdir = ":memory:")
df_r <- tibble::tibble(rep = seq(1, 1000))
dplyr::copy_to(con, df_visit, "visit")
dplyr::copy_to(con, df_r, "r")
tbl_visit <- dplyr::tbl(con, "visit")
tbl_r <- dplyr::tbl(con, "r")
simaerep(tbl_visit, r = tbl_r)
#> simaerep object:
#> ----------------
#> Plot results using plot() generic.
#> Full results available in "df_eval".
#> 
#> Summary:
#> Multiplicity correction applied to '*_prob' columns.
#> 
#> First 10 rows of df_eval:
#> # Source:     SQL [?? x 10]
#> # Database:   DuckDB v1.3.0 [koneswab@Darwin 23.6.0:R 4.4.1/:memory:]
#> # Ordered by: study_id, site_id
#>   study_id site_id event_count
#>   <chr>    <chr>         <dbl>
#> 1 A        S0004           256
#> 2 A        S0001           103
#> 3 A        S0002            93
#> 4 A        S0003           262
#> 5 A        S0005           232
#> # ℹ 7 more variables:
#> #   event_per_visit_site <dbl>, visits <dbl>,
#> #   n_pat <dbl>, event_per_visit_study <dbl>,
#> #   event_prob_no_mult <dbl>, event_prob <dbl>,
#> #   event_delta <dbl>
DBI::dbDisconnect(con)
# }