Analysis sketch - this version uses analyse and pool functions in RBMI based on the example in the vignette https://insightsengineering.github.io/rbmi/main/articles/advanced.html

The analysis is as follows: Rubin’s Rules after standardization: Following multiple imputation, we would fit 1000 logistic regression models and for each, obtain the marginal estimates using beeca. We would then pool these 1000 beeca outputs using Rubin’s Rules.

# Load libraries
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(tidyr)
library(readr)
library(purrr)
library(rbmi)
#> 
#> Attaching package: 'rbmi'
#> The following object is masked from 'package:tidyr':
#> 
#>     expand
library(beeca)
library(rbmiUtils)

# require rstan
if (!requireNamespace("rstan", quietly = TRUE)) {
  install.packages("rstan", repos = "https://cloud.r-project.org/")
  message("rstan has been successfully installed.")
} else {
  message("rstan is already installed.")
}
#> Installing package into '/home/runner/work/_temp/Library'
#> (as 'lib' is unspecified)
#> also installing the dependencies 'numDeriv', 'abind', 'tensorA', 'distributional', 'matrixStats', 'posterior', 'StanHeaders', 'inline', 'gridExtra', 'RcppParallel', 'loo', 'QuickJSR', 'RcppEigen', 'BH'
#> rstan has been successfully installed.


set.seed(1974)
data("ADEFF")

## Global constants
N_IMPUTATIONS <- 100 ## set to 1,000 for final analysis

# Prepare the data
ADEFF$TRT <- factor(ADEFF$TRT01P, levels = c("Placebo", "Drug A"))
ADEFF$USUBJID <- factor(ADEFF$USUBJID)
ADEFF$AVISIT <- factor(ADEFF$AVISIT)

Define function to compare last visit using a responder analysis

## function to compare last visit using a responder analysis
gcomp_responder <- function(data, ...) {

    ## fit the GLM model
    model <- glm(CRIT1FLN ~ TRT + BASE + STRATA + REGION, data = data, family = binomial)

    ## Calculate the marginal treatment effect estimate and associated variance
    ## for a difference contrast using the Ge et al. method
    ## with robust HC0 sandwich variance
    marginal_fit <- get_marginal_effect(model,
                                        trt = "TRT",
                                        method = "Ge",
                                        type = "HC0",
                                        contrast = "diff",
                                        reference = "Placebo")

    ## Extract results
    marginal_results <- marginal_fit$marginal_results
    diff_est <- marginal_results[marginal_results$STAT == "diff", "STATVAL"][[1]]
    diff_se <- marginal_results[marginal_results$STAT == "diff_se", "STATVAL"][[1]]

    # Return a named list with the estimate and standard error
    res <- list(
        trt = list(
            est = diff_est,
            se = diff_se,
            df = NA
        )
    )
    return(res)
}

STEP 1: Impute missing data

# Define key variables for rbmi
vars <- rbmi::set_vars(
    subjid = "USUBJID",
    visit = "AVISIT",
    group = "TRT",
    outcome = "CHG",
    covariates = c("BASE", "STRATA", "REGION")
)

# Define which imputation method to use (here: Bayesian multiple imputation with 150 imputed datsets)
method <- rbmi::method_bayes(
    n_samples = N_IMPUTATIONS,
    burn_in = 200,
    burn_between = 5
)

# Create a dataset with the necessary columns
dat <- ADEFF |>
    select(USUBJID, STRATA, REGION, REGIONC, TRT, BASE, CHG, AVISIT)

# Fit the imputation model
draws_obj <- rbmi::draws(
    data = dat,
    vars = vars,
    method = method,
    quiet = TRUE
)
#> Compiling Stan model please wait...

# Impute the datasets
impute_obj <- rbmi::impute(
    draws_obj,
    references = c("Placebo" = "Placebo", "Drug A" = "Placebo")
)


## get the imputed datq as a data frame
ADMI <- get_imputed_data(impute_obj)

Transform the imputed data to include additional derived variables.

For example, add responder criteria based on the outcome: We convert the continuous version of the endpoint into binary version of the endpoint (e.g. if the endpoint>3, then it is a responder; if it is <=3, then it is a non-responder).

ADMI <- ADMI |>
    dplyr::mutate(
        CRIT1FLN = ifelse(CHG > 3, 1, 0), 
        CRIT1FL = ifelse(CRIT1FLN == 1, "Y", "N"),
        CRIT = "CHG > 3"
    )

# save the imputed data
#write_csv(ADMI, "ADMI.csv")

STEP 2: Analyze each imputed dataset

ANCOVA analysis

ana_obj_ancova <- analyse_mi_data(data = ADMI,
                         vars = vars,
                         method = method,
                         fun = ancova,
                         delta = NULL)

STEP 3: Pool analyzes and apply Rubin’s Rules

pool_obj_ancova <- pool(ana_obj_ancova)

STEP 4: Report analysis

pool_obj_ancova
#> 
#> Pool Object
#> -----------
#> Number of Results Combined: 100
#> Method: rubin
#> Confidence Level: 0.95
#> Alternative: two.sided
#> 
#> Results:
#> 
#>   ========================================================
#>       parameter      est     se     lci     uci     pval  
#>   --------------------------------------------------------
#>      trt_Week 24    -2.178  0.182  -2.535  -1.821  <0.001 
#>    lsm_ref_Week 24   0.08   0.131  -0.176  0.337   0.539  
#>    lsm_alt_Week 24  -2.098  0.126  -2.344  -1.851  <0.001 
#>      trt_Week 48    -3.802  0.256  -4.305  -3.298  <0.001 
#>    lsm_ref_Week 48  0.037   0.186  -0.328  0.402   0.843  
#>    lsm_alt_Week 48  -3.765  0.175  -4.11   -3.42   <0.001 
#>   --------------------------------------------------------

Responder analysis

Note the vars may change from imputation to analysis

Here we focus on the new derived variable CRIT1, CRIT1FL, CRIT1FLN


## note the vars may change from imputation to analysis
vars <- set_vars(
    subjid = "USUBJID",
    visit = "AVISIT",
    group = "TRT",
    outcome = "CRIT1FLN",
    covariates = c("BASE", "STRATA", "REGION")
)

Analyse the imputed datasets

Need to do the data transform in the fun

ana_obj_prop2 <- analyse_mi_data(data = ADMI,
                         vars = vars,
                         method = method,
                         fun = gcomp_responder,
                         delta = NULL)

STEP 3: Pool analyzes and apply Rubin’s Rules

pool_obj_prop <- pool(ana_obj_prop2)

STEP 4: Report analysis

pool_obj_prop
#> 
#> Pool Object
#> -----------
#> Number of Results Combined: 100
#> Method: rubin
#> Confidence Level: 0.95
#> Alternative: two.sided
#> 
#> Results:
#> 
#>   ==================================================
#>    parameter   est     se     lci     uci     pval  
#>   --------------------------------------------------
#>       trt     -0.063  0.012  -0.087  -0.039  <0.001 
#>   --------------------------------------------------