Simulate AE under-reporting probabilities.
Usage
simaerep(
df_visit,
r = 1000,
check = TRUE,
under_only = FALSE,
visit_med75 = FALSE,
inframe = TRUE,
progress = TRUE,
mult_corr = TRUE,
poisson_test = FALSE,
env = parent.frame(),
event_names = c("event"),
col_names = list(study_id = "study_id", site_id = "site_id", patient_id = "patient_id",
visit = "visit")
)
simaerep_inframe(
df_visit,
r = 1000,
under_only = FALSE,
visit_med75 = FALSE,
check = TRUE,
env = parent.frame(),
event_names = c("event"),
mult_corr = FALSE,
col_names = list(study_id = "study_id", site_id = "site_id", patient_id = "patient_id",
visit = "visit")
)
simaerep_classic(
df_visit,
check = TRUE,
progress = TRUE,
env = parent.frame(),
under_only = TRUE,
r = 1000,
mult_corr = FALSE,
poisson_test = FALSE,
event_names = "event",
col_names = list(study_id = "study_id", site_id = "site_id", patient_id = "patient_id",
visit = "visit")
)
Arguments
- df_visit
Data frame with columns: study_id, site_number, patnum, visit, n_ae.
- r
Integer or tbl_object, number of repetitions for bootstrap simulation. Pass a tbl object referring to a table with one column and as many rows as desired repetitions. Default: 1000.
- check
Logical, perform data check and attempt repair with
check_df_visit()
. Computationally expensive on large data sets. Default: TRUE.- under_only
Logical, compute under-reporting probabilities only. only applies to the classic algorithm in which a one-sided evaluation can save computation time. Default: FALSE
- visit_med75
Logical, should evaluation point visit_med75 be used. Compatible with inframe and classic version of the algorithm. Default: FALSE
- inframe
Logical, when FALSE classic simaerep algorithm will be used. The default inframe method uses only table operations and is compatible with dbplyr supported database backends. Default: TRUE
- progress
Logical, display progress bar. Default: TRUE.
- mult_corr
Logical, multiplicity correction, Default: TRUE
- poisson_test
logical, compute p-value with poisson test, only supported by the classic algorithm using visit_med75. Default: FALSE
- env
Optional, provide environment of original visit data. Default: parent.frame().
- event_names
vector, contains the event names, default = "event"
- col_names
named list, indicate study_id, site_id, patient_id and visit column in df_visit input dataframe. Default: list( study_id = "study_id", site_id = "site_id", patient_id = "patient_id", visit = "visit" )
Value
A simaerep object. Results are contained in the attached df_eval dataframe.
Column Name | Description | Type |
study_id | The study ID | Character |
site_id. | The site ID | Character |
(event)_count | Site event count | Numeric |
(event)_per_visit_site | Site Ratio of event count divided by visits | Numeric |
visits | Site visit count | Numeric |
n_pat | Site patient count | Numeric |
(event)_per_visit_study | Simulated study ratio | Numeric |
(event)_prob | Site event ratio probability from -1 to 1 | Numeric |
(event)_delta | Difference expected vs reported events | Numeric |
Details
Executes site_aggr()
, sim_sites()
, and eval_sites()
on original
visit data and stores all intermediate results. Stores lazy reference to
original visit data for facilitated plotting using generic plot(x).
See also
site_aggr, sim_sites, eval_sites, orivisit, plot.simaerep, print.simaerep, simaerep_inframe
Examples
df_visit <- sim_test_data_study(
n_pat = 100,
n_sites = 5,
ratio_out = 0.4,
factor_event_rate = - 0.6
)
evrep <- simaerep(df_visit)
evrep
#> simaerep object:
#> ----------------
#> Plot results using plot() generic.
#> Full results available in "df_eval".
#>
#> Summary:
#> Number of sites: 5
#> Number of studies: 1
#>
#> Multiplicity correction applied to '*_prob' columns.
#>
#> First 10 rows of df_eval:
#> # A tibble: 5 × 10
#> study_id site_id event_count event_per_visit_site visits n_pat
#> <chr> <chr> <dbl> <dbl> <dbl> <int>
#> 1 A S0001 103 0.266 387 20
#> 2 A S0002 93 0.221 421 20
#> 3 A S0003 262 0.657 399 20
#> 4 A S0004 256 0.674 380 20
#> 5 A S0005 232 0.611 380 20
#> # ℹ 4 more variables: event_per_visit_study <dbl>, event_prob_no_mult <dbl>,
#> # event_prob <dbl>, event_delta <dbl>
str(evrep)
#> List of 12
#> $ visit :List of 5
#> ..$ dim : int [1:2] 1967 9
#> ..$ df_summary : tibble [1 × 5] (S3: tbl_df/tbl/data.frame)
#> .. ..$ n_studies : int 1
#> .. ..$ n_sites : int 5
#> .. ..$ n_patients: int 100
#> .. ..$ n_visits : int 1967
#> .. ..$ n_events : num 946
#> ..$ str_call : chr "df_visit"
#> ..$ event_names: chr "event"
#> ..$ col_names :List of 4
#> .. ..$ study_id : chr "study_id"
#> .. ..$ site_id : chr "site_id"
#> .. ..$ patient_id: chr "patient_id"
#> .. ..$ visit : chr "visit"
#> ..- attr(*, "class")= chr "orivisit"
#> $ df_site : tibble [5 × 5] (S3: tbl_df/tbl/data.frame)
#> ..$ study_id : chr [1:5] "A" "A" "A" "A" ...
#> ..$ site_id : chr [1:5] "S0001" "S0002" "S0003" "S0004" ...
#> ..$ max_visit : num [1:5] 27 27 28 27 25
#> ..$ n_pat : int [1:5] 20 20 20 20 20
#> ..$ n_pat_with_max_visit: int [1:5] 2 2 3 3 3
#> $ df_sim_sites: tibble [5 × 8] (S3: tbl_df/tbl/data.frame)
#> ..$ study_id : chr [1:5] "A" "A" "A" "A" ...
#> ..$ site_id : chr [1:5] "S0001" "S0002" "S0003" "S0004" ...
#> ..$ event_count : num [1:5] 103 93 262 256 232
#> ..$ event_per_visit_site : num [1:5] 0.266 0.221 0.657 0.674 0.611
#> ..$ visits : num [1:5] 387 421 399 380 380
#> ..$ n_pat : int [1:5] 20 20 20 20 20
#> ..$ event_per_visit_study: num [1:5] 0.466 0.428 0.464 0.476 0.473
#> ..$ event_prob_low : num [1:5] 0 0 1 1 0.99
#> $ df_eval : tibble [5 × 10] (S3: tbl_df/tbl/data.frame)
#> ..$ study_id : chr [1:5] "A" "A" "A" "A" ...
#> ..$ site_id : chr [1:5] "S0001" "S0002" "S0003" "S0004" ...
#> ..$ event_count : num [1:5] 103 93 262 256 232
#> ..$ event_per_visit_site : num [1:5] 0.266 0.221 0.657 0.674 0.611
#> ..$ visits : num [1:5] 387 421 399 380 380
#> ..$ n_pat : int [1:5] 20 20 20 20 20
#> ..$ event_per_visit_study: num [1:5] 0.466 0.428 0.464 0.476 0.473
#> ..$ event_prob_no_mult : num [1:5] -1 -1 1 1 0.99
#> ..$ event_prob : num [1:5] -1 -1 1 1 0.983
#> ..$ event_delta : num [1:5] -77.2 -87.3 76.7 75 52.3
#> $ r : num 1000
#> $ visit_med75 : logi FALSE
#> $ inframe : logi TRUE
#> $ under_only : logi FALSE
#> $ event_names : chr "event"
#> $ mult_corr : logi TRUE
#> $ poisson_test: logi FALSE
#> $ col_names :List of 4
#> ..$ study_id : chr "study_id"
#> ..$ site_id : chr "site_id"
#> ..$ patient_id: chr "patient_id"
#> ..$ visit : chr "visit"
#> - attr(*, "class")= chr "simaerep"
# simaerep classic algorithm
evrep <- simaerep(df_visit, inframe = FALSE, under_only = TRUE, mult_corr = TRUE)
evrep
#> simaerep object:
#> ----------------
#> Plot results using plot() generic.
#> Full results available in "df_eval".
#>
#> Summary:
#> Number of sites: 5
#> Number of studies: 1
#>
#> Classic algorithm used to calculate probabilities!!
#>
#> Only under-reporting probability calculated !!!
#>
#> Multiplicity correction applied to prob column.
#>
#> First 10 rows of df_eval:
#> # A tibble: 5 × 9
#> study_id site_id n_pat n_pat_with_med75 visit_med75 mean_event_site_med75
#> <chr> <chr> <int> <dbl> <dbl> <dbl>
#> 1 A S0001 20 18 16 5.11
#> 2 A S0002 20 17 18 4.18
#> 3 A S0003 20 18 17 12.7
#> 4 A S0004 20 18 14 12.3
#> 5 A S0005 20 17 15 11.7
#> # ℹ 3 more variables: mean_event_study_med75 <dbl>,
#> # n_pat_with_med75_study <int>, prob <dbl>
# multiple events
df_visit_events_test <- sim_test_data_study(
n_pat = 100,
n_sites = 5,
ratio_out = 0.4,
factor_event_rate = - 0.6,
event_rates = list(0.5, 0.3),
event_names = c("ae", "pd")
)
evsrep <- simaerep(df_visit_events_test, inframe = TRUE, event_names = c("ae", "pd"))
evsrep
#> simaerep object:
#> ----------------
#> Plot results using plot() generic.
#> Full results available in "df_eval".
#>
#> Summary:
#> Number of sites: 5
#> Number of studies: 1
#>
#> Multiplicity correction applied to '*_prob' columns.
#>
#> Reporting probabilities calculated for: ae, pd
#>
#> First 10 rows of df_eval:
#> # A tibble: 5 × 16
#> study_id site_id ae_count pd_count ae_per_visit_site pd_per_visit_site visits
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 A S0001 76 43 0.203 0.115 374
#> 2 A S0002 67 29 0.189 0.0817 355
#> 3 A S0003 185 107 0.465 0.269 398
#> 4 A S0004 182 111 0.483 0.294 377
#> 5 A S0005 199 125 0.504 0.316 395
#> # ℹ 9 more variables: n_pat <int>, ae_per_visit_study <dbl>,
#> # pd_per_visit_study <dbl>, ae_prob_no_mult <dbl>, ae_prob <dbl>,
#> # ae_delta <dbl>, pd_prob_no_mult <dbl>, pd_prob <dbl>, pd_delta <dbl>
# \donttest{
# Database example
con <- DBI::dbConnect(duckdb::duckdb(), dbdir = ":memory:")
df_r <- tibble::tibble(rep = seq(1, 1000))
dplyr::copy_to(con, df_visit, "visit")
dplyr::copy_to(con, df_r, "r")
tbl_visit <- dplyr::tbl(con, "visit")
tbl_r <- dplyr::tbl(con, "r")
simaerep(tbl_visit, r = tbl_r)
#> simaerep object:
#> ----------------
#> Plot results using plot() generic.
#> Full results available in "df_eval".
#>
#> Summary:
#> Multiplicity correction applied to '*_prob' columns.
#>
#> First 10 rows of df_eval:
#> # Source: SQL [?? x 10]
#> # Database: DuckDB v1.3.0 [koneswab@Darwin 23.6.0:R 4.4.1/:memory:]
#> # Ordered by: study_id, site_id
#> study_id site_id event_count
#> <chr> <chr> <dbl>
#> 1 A S0004 256
#> 2 A S0001 103
#> 3 A S0002 93
#> 4 A S0003 262
#> 5 A S0005 232
#> # ℹ 7 more variables:
#> # event_per_visit_site <dbl>, visits <dbl>,
#> # n_pat <dbl>, event_per_visit_study <dbl>,
#> # event_prob_no_mult <dbl>, event_prob <dbl>,
#> # event_delta <dbl>
DBI::dbDisconnect(con)
# }