ISE/ISS pooling

Designed and executed an Integrated Summary of Efficacy and Safety (ISE/ISS) simulation to evaluate pooled treatment effects across multiple clinical trials.

Generated realistic patient-level datasets (treatment, study, demographic, efficacy, and adverse event data) to mimic multi-study integration scenarios.

Built R-based simulation pipelines using dplyr, tidyr, and survival to automate data creation, merging, and summary analyses.

Validated simulated outcomes through model-based verification and visualization of efficacy and safety profiles.

# ====================================================
# R CODE: Simulate ISE / ISS data + Meta-Analysis demo
# ====================================================

# Load required packages
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(metafor)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Loading required package: metadat
## Loading required package: numDeriv
## 
## Loading the 'metafor' package (version 4.8-0). For an
## introduction to the package please type: help(metafor)
library(ggplot2)
library(purrr)

set.seed(12345)

# --------------------------
# Step 1. Define study design
# --------------------------
studies <- data.frame(
  study_id = paste0("S", 1:5),
  n_per_arm = c(120, 80, 150, 60, 100),
  treat_effect = c(2.0, 1.5, 2.5, 0.8, 1.8),
  sd = c(4.0, 3.5, 4.5, 3.0, 4.0),
  p_safety_ctrl = c(0.03, 0.04, 0.02, 0.05, 0.025),
  rd_safety = c(0.01, 0.02, 0.015, 0.0, 0.005)
)

# Function to simulate one study (efficacy + safety)
simulate_study <- function(study_id, n_per_arm, treat_effect, sd, p_safety_ctrl, rd_safety) {
  arms <- c("Control", "Treatment")
  df_list <- list()
  for (arm in arms) {
    arm_mult <- ifelse(arm == "Treatment", 1, 0)
    n <- n_per_arm
    eff <- rnorm(n, mean = treat_effect * arm_mult, sd = sd)
    p_safety <- p_safety_ctrl + ifelse(arm == "Treatment", rd_safety, 0)
    safety_event <- rbinom(n, size = 1, prob = p_safety)
    df_list[[arm]] <- data.frame(
      study_id = study_id,
      arm = arm,
      efficacy = eff,
      safety_event = safety_event
    )
  }
  do.call(rbind, df_list)
}

# Simulate all studies
patient_df <- purrr::pmap_dfr(studies, simulate_study)
head( patient_df)
##               study_id     arm   efficacy safety_event
## Control.1...1       S1 Control  2.3421153            0
## Control.2...2       S1 Control  2.8378641            0
## Control.3...3       S1 Control -0.4372133            0
## Control.4...4       S1 Control -1.8139887            0
## Control.5...5       S1 Control  2.4235498            0
## Control.6...6       S1 Control -7.2718239            0
# Write patient-level data
 

# --------------------------
# Step 2. Study-level summary
# --------------------------
study_summary <- patient_df %>%
  group_by(study_id, arm) %>%
  summarise(
    n = n(),
    mean_eff = mean(efficacy),
    sd_eff = sd(efficacy),
    safety_events = sum(safety_event),
    .groups = "drop"
  )

study_summary
## # A tibble: 10 × 6
##    study_id arm           n mean_eff sd_eff safety_events
##    <chr>    <chr>     <int>    <dbl>  <dbl>         <int>
##  1 S1       Control     120    0.851   4.37             3
##  2 S1       Treatment   120    2.01    3.60             6
##  3 S2       Control      80    0.542   3.15             0
##  4 S2       Treatment    80    1.62    3.84             4
##  5 S3       Control     150    0.130   4.73             6
##  6 S3       Treatment   150    1.65    4.25             6
##  7 S4       Control      60   -0.166   3.23             1
##  8 S4       Treatment    60    1.13    2.86             3
##  9 S5       Control     100   -0.348   4.24             2
## 10 S5       Treatment   100    1.90    3.94             4
# --------------------------
# Step 3. Compute effect estimates
# --------------------------
study_est <- study_summary %>%
  pivot_wider(
    names_from = arm,
    values_from = c(n, mean_eff, sd_eff, safety_events)
  ) %>%
  mutate(
    yi = mean_eff_Treatment - mean_eff_Control,
    vi = (sd_eff_Treatment^2 / n_Treatment) + (sd_eff_Control^2 / n_Control),
    pT = safety_events_Treatment / n_Treatment,
    pC = safety_events_Control / n_Control,
    rd = pT - pC,
    v_rd = (pT*(1 - pT) / n_Treatment) + (pC*(1 - pC) / n_Control)
  )

study_est
## # A tibble: 5 × 15
##   study_id n_Control n_Treatment mean_eff_Control mean_eff_Treatment
##   <chr>        <int>       <int>            <dbl>              <dbl>
## 1 S1             120         120            0.851               2.01
## 2 S2              80          80            0.542               1.62
## 3 S3             150         150            0.130               1.65
## 4 S4              60          60           -0.166               1.13
## 5 S5             100         100           -0.348               1.90
## # ℹ 10 more variables: sd_eff_Control <dbl>, sd_eff_Treatment <dbl>,
## #   safety_events_Control <int>, safety_events_Treatment <int>, yi <dbl>,
## #   vi <dbl>, pT <dbl>, pC <dbl>, rd <dbl>, v_rd <dbl>
# --------------------------
# Step 4. Meta-analysis (Efficacy)
# --------------------------
res_eff <- rma(yi = yi, vi = vi, data = study_est, method = "REML")
print(res_eff)
## 
## Random-Effects Model (k = 5; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of total heterogeneity): 0 (SE = 0.2091)
## tau (square root of estimated tau^2 value):      0
## I^2 (total heterogeneity / total variability):   0.00%
## H^2 (total variability / sampling variability):  1.00
## 
## Test for Heterogeneity:
## Q(df = 4) = 2.7308, p-val = 0.6038
## 
## Model Results:
## 
## estimate      se    zval    pval   ci.lb   ci.ub      
##   1.4402  0.2433  5.9192  <.0001  0.9634  1.9171  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Forest plot for efficacy
forest(res_eff,
       slab = study_est$study_id,
       xlab = "Mean Difference (Treatment - Control)",
       main = "ISE / ISS Pooled Efficacy Effect")

# --------------------------
# Step 5. Meta-analysis (Safety Risk Difference)
# --------------------------
res_rd <- rma(yi = rd, vi = v_rd, data = study_est, method = "REML")
print(res_rd)
## 
## Random-Effects Model (k = 5; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of total heterogeneity): 0 (SE = 0.0004)
## tau (square root of estimated tau^2 value):      0
## I^2 (total heterogeneity / total variability):   0.00%
## H^2 (total variability / sampling variability):  1.00
## 
## Test for Heterogeneity:
## Q(df = 4) = 2.3746, p-val = 0.6672
## 
## Model Results:
## 
## estimate      se    zval    pval   ci.lb   ci.ub    
##   0.0241  0.0112  2.1477  0.0317  0.0021  0.0460  * 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Forest plot for safety
forest(res_rd,
       slab = study_est$study_id,
       xlab = "Risk Difference (Treatment - Control)",
       main = "ISE / ISS Pooled Safety Risk Difference")

# --------------------------
# Step 6. Optional: Simple ggplot summary
# --------------------------
ggplot(study_est, aes(x = study_id, y = yi)) +
  geom_point(size = 3, color = "steelblue") +
  geom_errorbar(aes(ymin = yi - 1.96*sqrt(vi), ymax = yi + 1.96*sqrt(vi)), width = 0.2) +
  geom_hline(yintercept = res_eff$b[1], color = "red", linetype = "dashed") +
  labs(title = "Study-wise and Pooled Efficacy Estimates",
       y = "Mean Difference (Treatment - Control)",
       x = "Study ID") +
  theme_minimal()