suppressPackageStartupMessages(library(tidyverse))
library(targets)
library(tarchetypes)
library(DT)
knitr::opts_knit$set(root.dir = "../../")
category_id
contains the ID of the clinical impact factor.activity_id_new
contains the ID of the audit/inspectionhas_finding
specifies whether there was a finding of the specific impact factor at the specific audit/inspectiondf_mm <- tar_read(df_mm)
df_mm %>%
select(category_id, activity_id_new, has_finding, everything())
## # A tibble: 4,055 × 185
## category_id activity_id_new has_finding start_date n_visit n_unsch_visit
## <chr> <chr> <chr> <date> <dbl> <dbl>
## 1 cnsn 00001 yes 2015-01-01 732 26
## 2 cnsn 00003 yes 2014-01-01 NA NA
## 3 cnsn 00004 yes 2015-01-01 181 33
## 4 cnsn 00006 yes 2014-01-01 0 0
## 5 cnsn 00008 yes 2015-01-01 114 0
## 6 cnsn 00009 yes 2015-01-01 297 0
## 7 cnsn 00012 yes 2015-01-01 446 7
## 8 cnsn 00013 yes 2015-01-01 688 65
## 9 cnsn 00015 yes 2015-01-01 141 17
## 10 cnsn 00016 yes 2015-01-01 0 0
## # … with 4,045 more rows, and 179 more variables: n_sched_visit <dbl>,
## # ratio_unsch_visit <dbl>, ratio_unsch_visit_rnk <dbl>, n_ae <dbl>,
## # n_sae <dbl>, ae_per_visit <dbl>, sae_per_visit <dbl>,
## # ae_per_visit_rnk <dbl>, sae_per_visit_rnk <dbl>,
## # median_ae_reporting_delay <dbl>, mean_ae_reporting_delay <dbl>,
## # max_ae_reporting_delay <dbl>, median_sae_reporting_delay <dbl>,
## # mean_sae_reporting_delay <dbl>, max_sae_reporting_delay <dbl>, …
df_mm %>%
filter(lubridate::year(start_date) <= 2019) %>%
group_by(category_id) %>%
summarise(n_activities = n_distinct(activity_id_new)) %>%
knitr::kable()
category_id | n_activities |
---|---|
cnsn | 808 |
dtin | 808 |
ptpe | 808 |
sfty | 808 |
spno | 808 |
tibble(columns = colnames(df_mm)) %>%
DT::datatable()
df_mm_bin <- tar_read(df_mm_bin)
df_mm_bin %>%
select(category_id, activity_id_new, has_finding, everything())
## # A tibble: 4,055 × 861
## category_id activity_id_new has_finding nvisitLL nvisitML nvisitM nvisitMH
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 cnsn 00001 yes 0 0 0 1
## 2 cnsn 00003 yes 0 0 0 0
## 3 cnsn 00004 yes 0 0 1 0
## 4 cnsn 00006 yes 1 0 0 0
## 5 cnsn 00008 yes 0 1 0 0
## 6 cnsn 00009 yes 0 0 1 0
## 7 cnsn 00012 yes 0 0 0 1
## 8 cnsn 00013 yes 0 0 0 1
## 9 cnsn 00015 yes 0 1 0 0
## 10 cnsn 00016 yes 1 0 0 0
## # … with 4,045 more rows, and 854 more variables: nvisitHH <dbl>,
## # nvisitNA <dbl>, nunschvisitLL <dbl>, nunschvisitML <dbl>,
## # nunschvisitM <dbl>, nunschvisitMH <dbl>, nunschvisitHH <dbl>,
## # nunschvisitNA <dbl>, nschedvisitLL <dbl>, nschedvisitML <dbl>,
## # nschedvisitM <dbl>, nschedvisitMH <dbl>, nschedvisitHH <dbl>,
## # nschedvisitNA <dbl>, ratiounschvisitLL <dbl>, ratiounschvisitML <dbl>,
## # ratiounschvisitM <dbl>, ratiounschvisitMH <dbl>, ratiounschvisitHH <dbl>, …
tibble(columns = colnames(df_mm_bin)) %>%
DT::datatable()
Modelling coefficients have been preselected by a combination of EDA and SME input.
tar_read(df_form) %>%
DT::datatable()
Indeces of modelling matrix that define time series cross validation strategy.
tar_read(df_cv)
## # A tibble: 45 × 4
## year_start_act category_id index_past index_next_year
## <dbl> <chr> <chr> <chr>
## 1 2011 cnsn 70,71,72,84,85,86,87,8… 155,170,171,172,173,174,1…
## 2 2012 cnsn 70,71,72,84,85,86,87,8… 211,229,231,234,241,242,2…
## 3 2013 cnsn 70,71,72,84,85,86,87,8… 2,4,306,307,308,309,310,3…
## 4 2014 cnsn 2,4,70,71,72,84,85,86,… 1,3,5,6,7,8,9,10,11,12,13…
## 5 2015 cnsn 1,2,3,4,5,6,7,8,9,10,1… 30,34,45,46,47,48,49,50,5…
## 6 2016 cnsn 1,2,3,4,5,6,7,8,9,10,1… 98,99,100,101,102,103,106…
## 7 2017 cnsn 1,2,3,4,5,6,7,8,9,10,1… 228,238,239,240,248,251,2…
## 8 2018 cnsn 1,2,3,4,5,6,7,8,9,10,1… 358,359,360,361,362,363,3…
## 9 2019 cnsn 1,2,3,4,5,6,7,8,9,10,1… 368,805,808
## 10 2015 dtin 5678,5679,5680,5681,56… 5698,5714,5715,5724,5725,…
## # … with 35 more rows
All names of all features and their variations.
tar_read(df_feat_lookup) %>%
DT::datatable()
All finding statements mapped to clinical impact factors.
tar_read(df_cat_lookup) %>%
DT::datatable()