WIP: Delta Analysis of Imputed Data • rbmiUtils

Introduction

This vignette demonstrates the process of analyzing imputed data using the rbmi package, specifically focusing on applying a delta adjustment to missing values and running a tipping point analysis.

Prerequisites

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(rbmi)
library(rbmiUtils)
library(ggplot2)
library(purrr)

set.seed(122)
data("ADEFF")

# Constants
N_IMPUTATIONS <- 100  # Set to 1000 for final analysis

# Prepare the data
ADEFF <- ADEFF %>%
  mutate(
    TRT = factor(TRT01P, levels = c("Placebo", "Drug A")),
    USUBJID = factor(USUBJID),
    AVISIT = factor(AVISIT)
  )

Step 1: Impute Missing Data

In this step, we set up the necessary variables and impute missing data using Bayesian multiple imputation.

# Define key variables for rbmi
vars <- rbmi::set_vars(
    subjid = "USUBJID",
    visit = "AVISIT",
    group = "TRT",
    outcome = "CHG",
    covariates = c("BASE", "STRATA", "REGION")
)

# Define the imputation method (Bayesian)
method <- rbmi::method_bayes(
    n_samples = N_IMPUTATIONS,
    burn_in = 200,
    burn_between = 5
)

# Subset relevant columns
dat <- ADEFF %>%
    select(USUBJID, STRATA, REGION, REGIONC, TRT, BASE, CHG, AVISIT)

# Fit the imputation model and perform imputation
draws_obj <- rbmi::draws(data = dat, vars = vars, method = method, quiet = TRUE)
impute_obj <- rbmi::impute(draws_obj, references = c("Placebo" = "Placebo", "Drug A" = "Placebo"))

# Get the imputed data as a data frame
ADMI <- get_imputed_data(impute_obj)

Step 2: Delta Adjustment

We now apply a delta adjustment to imputed outcomes, setting a delta value of 0.5 for all missing outcomes.

# Create delta adjustment template
dat_delta <- delta_template(imputations = impute_obj) %>%
  mutate(delta = is_missing * 0.5)

glimpse(dat_delta)
#> Rows: 1,000
#> Columns: 8
#> $ USUBJID     <fct> ID001, ID001, ID002, ID002, ID003, ID003, ID004, ID004, ID…
#> $ AVISIT      <fct> Week 24, Week 48, Week 24, Week 48, Week 24, Week 48, Week…
#> $ TRT         <fct> Drug A, Drug A, Drug A, Drug A, Drug A, Drug A, Placebo, P…
#> $ is_mar      <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE…
#> $ is_missing  <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FA…
#> $ is_post_ice <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FA…
#> $ strategy    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
#> $ delta       <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0…

Step 3: Analyze Each Imputed Dataset

Here, we perform an ANCOVA analysis on the imputed datasets after applying the delta adjustment.

ana_obj_ancova <- analyse_mi_data(
  data = ADMI,
  vars = vars,
  method = method,
  fun = ancova,
  delta = dat_delta
)

Step 4: Pool the Results

We pool the results of the ANCOVA analyses across all imputed datasets using Rubin’s Rules.

pool_obj_ancova <- pool(ana_obj_ancova)
pool_obj_ancova
#> 
#> Pool Object
#> -----------
#> Number of Results Combined: 100
#> Method: rubin
#> Confidence Level: 0.95
#> Alternative: two.sided
#> 
#> Results:
#> 
#>   ========================================================
#>       parameter      est     se     lci     uci     pval  
#>   --------------------------------------------------------
#>      trt_Week 24    -2.162  0.182  -2.52   -1.804  <0.001 
#>    lsm_ref_Week 24  0.088   0.131  -0.169  0.345    0.5   
#>    lsm_alt_Week 24  -2.074  0.126  -2.321  -1.826  <0.001 
#>      trt_Week 48    -3.828  0.256  -4.33   -3.325  <0.001 
#>    lsm_ref_Week 48  0.082   0.184  -0.28   0.444   0.657  
#>    lsm_alt_Week 48  -3.746  0.176  -4.091  -3.401  <0.001 
#>   --------------------------------------------------------

Tipping Point Analysis

Next, we conduct a tipping point analysis to assess the sensitivity of the results to various delta offsets for the control and intervention groups.

WIP: the values do not make scientific sense and are to illustrate a difference.

perform_tipp_analysis <- function(delta_control, delta_intervention) {
  delta_df <- delta_df_init %>%
    mutate(
      delta_ctl = (TRT == "Placebo") * is_missing * delta_control,
      delta_int = (TRT == "Drug A") * is_missing * delta_intervention,
      delta = delta_ctl + delta_int
    )

  ana_obj_delta <- analyse_mi_data(
    data = ADMI %>% filter(AVISIT == "Week 48"),
    vars = vars,
    method = method,
    fun = ancova,
    delta = delta_df
  )
  
  pool_delta <- as.data.frame(pool(ana_obj_delta)) %>%
    filter(parameter == "trt_Week 48")

  list(
    trt_effect_6 = pool_delta[["est"]],
    pval_6 = pool_delta[["pval"]]
  )
}

# Initial delta template
delta_df_init <- delta_template(impute_obj) %>%
  filter(AVISIT == "Week 48")

# # Define delta ranges for control and intervention groups
# tipp_frame_grid <- expand.grid(
#   delta_control = seq(-10, 10, by = 1),
#   delta_intervention = seq(-10, 10, by = 1)
# ) %>%
#   as_tibble()
# 
# # Apply tipping point analysis across the grid
# tipp_frame <- tipp_frame_grid %>%
#   mutate(
#     results_list = map2(delta_control, delta_intervention, perform_tipp_analysis),
#     trt_effect_6 = map_dbl(results_list, "trt_effect_6"),
#     pval_6 = map_dbl(results_list, "pval_6")
#   ) %>%
#   select(-results_list) %>%
#   mutate(
#     pval = cut(
#       pval_6,
#       c(0, 0.001, 0.01, 0.05, 0.2, 1),
#       right = FALSE,
#       labels = c("<0.001", "0.001 - <0.01", "0.01- <0.05", "0.05 - <0.20", ">= 0.20")
#     )
#   )
# 
# # Display delta values leading to non-significant results
# tipp_frame %>%
#   filter(pval_6 >= 0.05)
# 
# # Plot the tipping point results
# ggplot(tipp_frame, aes(delta_control, delta_intervention, fill = pval)) +
#   geom_raster() +
#   scale_fill_manual(values = c("darkgreen", "lightgreen", "lightyellow", "orange", "red"))