suppressPackageStartupMessages(library(tidyverse))
library(targets)
library(tarchetypes)
library(visNetwork)
knitr::opts_knit$set(root.dir = "../../", tar_interactive = FALSE, collapse = TRUE, comment = "#>")
tar_unscript()

1 {targets} Workflow

1.1 Globals

options(tidyverse.quiet = TRUE)
tar_option_set(
  packages = c(
    "gt",
    "yardstick",
    "broom",
    "cowplot",
    "glue",
    "tidyverse",
    "arrow",
    "tarchetypes"
    )
  )
purrr::map(dir("./src/R", full.names = TRUE), source)
## Established _targets.R and _targets_r/globals/globals.R.

1.2 Targets

1.2.1 Config

tar_target(
  config,
  list(
    # columns that are not features
    id_vars = c(
      'category_id',
      'modelling_category',
      'classification',
      'activity_id_new',
      'start_date',
      'index',
      'date_first_study_activity',
      'date_last_study_activity',
      'protocol',
      'file_name',
      'source_row_index',
      'audit_or_inspection',
      'site_platinum_id',
      'pi_platinum_id',
      'site_num',
      'country',
      'site_activation_date',
      'site_closed_date',
      'site_closed_date_corr',
      'study_start_date',
      'study_end_date'
     ),
     max_year = 2020
   )
)
## Established _targets.R and _targets_r/targets/config.R.

1.2.2 Data

1.2.2.1 Files

list(
  tar_target(file_mm, "data/in/modelling_matrix.feather", format = "file"),
  tar_target(file_mm_bin, "data/in/lasso_prep.feather", format = "file"),
  tar_target(file_form, "data/in/glm_coefs.feather", format = "file"),
  tar_target(file_cv, "data/in/indeces_annual_splits.feather", format = "file"),
  tar_target(file_cat_lookup, "data/in/category_lookup.feather", format = "file"),
  tar_target(file_feat_lookup, "data/in/feature_lookup.csv", format = "file")
)
## Established _targets.R and _targets_r/targets/files.R.

1.2.2.2 Load

list(
  tar_target(df_mm, qract_read_and_anonymize(file_mm, config$id_vars, arrow::read_feather)),
  tar_target(df_mm_bin, qract_read_and_anonymize(file_mm_bin, config$id_vars, arrow::read_feather)),
  tar_target(df_form, qract_read_and_anonymize(file_form,  config$id_vars, arrow::read_feather)),
  tar_target(df_cv, qract_read_and_anonymize(file_cv,  config$id_vars, arrow::read_feather)),
  tar_target(df_cat_lookup, qract_read_and_anonymize(file_cat_lookup, config$id_vars, arrow::read_feather)),
  tar_target(df_feat_lookup, qract_read_and_anonymize(file_feat_lookup, config$id_vars, readr::read_csv))
)
## Established _targets.R and _targets_r/targets/load.R.

1.2.2.3 Report

link

tarchetypes::tar_render(
  report_data,
  "src/Rmd/_01_data.Rmd",
  output_file = "01_data.html",
  output_dir = "src/Rmd/",
  output_yaml = "src/Rmd/_site.yml"
)
## Established _targets.R and _targets_r/targets/report_data.R.

1.2.3 Time Series Cross Validation

list(
  tar_target(p_tscv, qract_plot_tscv(df_mm, df_cv, config$max_year)),
  tar_target(df_cv_preds_and_coefs, qract_pred_cv(df_mm_bin, df_cv, df_form, config$id_vars))
)
## Established _targets.R and _targets_r/targets/cv.R.

1.2.3.1 Report

link

tarchetypes::tar_render(
  report_cv,
  "src/Rmd/_02_cv.Rmd",
  output_file = "02_cv.html",
  output_dir = "src/Rmd/",
  output_yaml = "src/Rmd/_site.yml"
)
## Established _targets.R and _targets_r/targets/report_cv.R.

1.2.4 Performance

list(
  tar_target(df_perf, qract_perf(df_cv_preds_and_coefs, config$max_year)),
  tar_target(df_calib, qract_lin_calib(df_cv_preds_and_coefs, min_sample_size = 200)),
  tar_target(df_bin, qract_bin_preds(df_cv_preds_and_coefs, n_bins = 4, confidence_level = .75)),
  tar_target(
    p_calib,
    qract_plot_calibration_pub(
      category_id_str = df_cv_preds_and_coefs$category_id %>% unique(),
      df_bin,
      df_calib,
      df_cat_lookup,
      uniform_color = "black",
      color_calib = "grey"
      )
  )
)
## Established _targets.R and _targets_r/targets/perf.R.

1.2.4.1 Report

link

tarchetypes::tar_render(
  report_perf,
  "src/Rmd/_03_perf.Rmd",
  output_file = "03_perf.html",
  output_dir = "src/Rmd/",
  output_yaml = "src/Rmd/_site.yml"
)
## Established _targets.R and _targets_r/targets/report_perf.R.

1.2.5 Forest Plots

list(
    tar_target(
      df_forest,
      qract_forest_plots(
        df_cv_preds_and_coefs,
        df_feat_lookup,
        df_mm,
        df_cat_lookup,
        category_ids = c(
          "cnsn",
          "dtin",
          "sfty",
          "ptpe"
        )
      )
    ),
    tar_target(
      forest_files,
      qract_save_forest_plots(
        df_forest,
        path = "./png"
      ),
      format = "file"
    )
)
## Established _targets.R and _targets_r/targets/forest.R.

1.2.5.1 Report

link

tarchetypes::tar_render(
  report_forest,
  "src/Rmd/_04_forest.Rmd",
  output_file = "04_forest.html",
  output_dir = "src/Rmd/",
  output_yaml = "src/Rmd/_site.yml"
)
## Established _targets.R and _targets_r/targets/report_forest.R.

1.3 Run Workflow

tar_make()
## ✔ skip target file_mm
## ✔ skip target file_form
## ✔ skip target config
## ✔ skip target file_cat_lookup
## ✔ skip target file_cv
## ✔ skip target file_feat_lookup
## ✔ skip target file_mm_bin
## ✔ skip target df_form
## ✔ skip target df_mm
## ✔ skip target df_cat_lookup
## ✔ skip target df_cv
## ✔ skip target df_feat_lookup
## ✔ skip target df_mm_bin
## ✔ skip target p_tscv
## ✔ skip target df_cv_preds_and_coefs
## ✔ skip target report_data
## ✔ skip target df_perf
## ✔ skip target report_cv
## ✔ skip target df_bin
## ✔ skip target df_calib
## ✔ skip target df_forest
## ✔ skip target p_calib
## ✔ skip target report_forest
## ✔ skip target forest_files
## ✔ skip target report_perf
## ✔ skip pipeline

1.4 Visualise Workflow

1.4.1 Without Functions

tar_visnetwork(targets_only = TRUE, reporter = "silent")

1.4.2 With Functions

tar_visnetwork(reporter = "silent")