Designed and executed an Integrated Summary of Efficacy and Safety (ISE/ISS) simulation to evaluate pooled treatment effects across multiple clinical trials.
Generated realistic patient-level datasets (treatment, study, demographic, efficacy, and adverse event data) to mimic multi-study integration scenarios.
Built R-based simulation pipelines using dplyr, tidyr, and survival to automate data creation, merging, and summary analyses.
Validated simulated outcomes through model-based verification and visualization of efficacy and safety profiles.
# ====================================================
# R CODE: Simulate ISE / ISS data + Meta-Analysis demo
# ====================================================
# Load required packages
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(metafor)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Loading required package: metadat
## Loading required package: numDeriv
##
## Loading the 'metafor' package (version 4.8-0). For an
## introduction to the package please type: help(metafor)
library(ggplot2)
library(purrr)
set.seed(12345)
# --------------------------
# Step 1. Define study design
# --------------------------
studies <- data.frame(
study_id = paste0("S", 1:5),
n_per_arm = c(120, 80, 150, 60, 100),
treat_effect = c(2.0, 1.5, 2.5, 0.8, 1.8),
sd = c(4.0, 3.5, 4.5, 3.0, 4.0),
p_safety_ctrl = c(0.03, 0.04, 0.02, 0.05, 0.025),
rd_safety = c(0.01, 0.02, 0.015, 0.0, 0.005)
)
# Function to simulate one study (efficacy + safety)
simulate_study <- function(study_id, n_per_arm, treat_effect, sd, p_safety_ctrl, rd_safety) {
arms <- c("Control", "Treatment")
df_list <- list()
for (arm in arms) {
arm_mult <- ifelse(arm == "Treatment", 1, 0)
n <- n_per_arm
eff <- rnorm(n, mean = treat_effect * arm_mult, sd = sd)
p_safety <- p_safety_ctrl + ifelse(arm == "Treatment", rd_safety, 0)
safety_event <- rbinom(n, size = 1, prob = p_safety)
df_list[[arm]] <- data.frame(
study_id = study_id,
arm = arm,
efficacy = eff,
safety_event = safety_event
)
}
do.call(rbind, df_list)
}
# Simulate all studies
patient_df <- purrr::pmap_dfr(studies, simulate_study)
head( patient_df)
## study_id arm efficacy safety_event
## Control.1...1 S1 Control 2.3421153 0
## Control.2...2 S1 Control 2.8378641 0
## Control.3...3 S1 Control -0.4372133 0
## Control.4...4 S1 Control -1.8139887 0
## Control.5...5 S1 Control 2.4235498 0
## Control.6...6 S1 Control -7.2718239 0
# Write patient-level data
# --------------------------
# Step 2. Study-level summary
# --------------------------
study_summary <- patient_df %>%
group_by(study_id, arm) %>%
summarise(
n = n(),
mean_eff = mean(efficacy),
sd_eff = sd(efficacy),
safety_events = sum(safety_event),
.groups = "drop"
)
study_summary
## # A tibble: 10 × 6
## study_id arm n mean_eff sd_eff safety_events
## <chr> <chr> <int> <dbl> <dbl> <int>
## 1 S1 Control 120 0.851 4.37 3
## 2 S1 Treatment 120 2.01 3.60 6
## 3 S2 Control 80 0.542 3.15 0
## 4 S2 Treatment 80 1.62 3.84 4
## 5 S3 Control 150 0.130 4.73 6
## 6 S3 Treatment 150 1.65 4.25 6
## 7 S4 Control 60 -0.166 3.23 1
## 8 S4 Treatment 60 1.13 2.86 3
## 9 S5 Control 100 -0.348 4.24 2
## 10 S5 Treatment 100 1.90 3.94 4
# --------------------------
# Step 3. Compute effect estimates
# --------------------------
study_est <- study_summary %>%
pivot_wider(
names_from = arm,
values_from = c(n, mean_eff, sd_eff, safety_events)
) %>%
mutate(
yi = mean_eff_Treatment - mean_eff_Control,
vi = (sd_eff_Treatment^2 / n_Treatment) + (sd_eff_Control^2 / n_Control),
pT = safety_events_Treatment / n_Treatment,
pC = safety_events_Control / n_Control,
rd = pT - pC,
v_rd = (pT*(1 - pT) / n_Treatment) + (pC*(1 - pC) / n_Control)
)
study_est
## # A tibble: 5 × 15
## study_id n_Control n_Treatment mean_eff_Control mean_eff_Treatment
## <chr> <int> <int> <dbl> <dbl>
## 1 S1 120 120 0.851 2.01
## 2 S2 80 80 0.542 1.62
## 3 S3 150 150 0.130 1.65
## 4 S4 60 60 -0.166 1.13
## 5 S5 100 100 -0.348 1.90
## # ℹ 10 more variables: sd_eff_Control <dbl>, sd_eff_Treatment <dbl>,
## # safety_events_Control <int>, safety_events_Treatment <int>, yi <dbl>,
## # vi <dbl>, pT <dbl>, pC <dbl>, rd <dbl>, v_rd <dbl>
# --------------------------
# Step 4. Meta-analysis (Efficacy)
# --------------------------
res_eff <- rma(yi = yi, vi = vi, data = study_est, method = "REML")
print(res_eff)
##
## Random-Effects Model (k = 5; tau^2 estimator: REML)
##
## tau^2 (estimated amount of total heterogeneity): 0 (SE = 0.2091)
## tau (square root of estimated tau^2 value): 0
## I^2 (total heterogeneity / total variability): 0.00%
## H^2 (total variability / sampling variability): 1.00
##
## Test for Heterogeneity:
## Q(df = 4) = 2.7308, p-val = 0.6038
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## 1.4402 0.2433 5.9192 <.0001 0.9634 1.9171 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Forest plot for efficacy
forest(res_eff,
slab = study_est$study_id,
xlab = "Mean Difference (Treatment - Control)",
main = "ISE / ISS Pooled Efficacy Effect")
# --------------------------
# Step 5. Meta-analysis (Safety Risk Difference)
# --------------------------
res_rd <- rma(yi = rd, vi = v_rd, data = study_est, method = "REML")
print(res_rd)
##
## Random-Effects Model (k = 5; tau^2 estimator: REML)
##
## tau^2 (estimated amount of total heterogeneity): 0 (SE = 0.0004)
## tau (square root of estimated tau^2 value): 0
## I^2 (total heterogeneity / total variability): 0.00%
## H^2 (total variability / sampling variability): 1.00
##
## Test for Heterogeneity:
## Q(df = 4) = 2.3746, p-val = 0.6672
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## 0.0241 0.0112 2.1477 0.0317 0.0021 0.0460 *
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Forest plot for safety
forest(res_rd,
slab = study_est$study_id,
xlab = "Risk Difference (Treatment - Control)",
main = "ISE / ISS Pooled Safety Risk Difference")
# --------------------------
# Step 6. Optional: Simple ggplot summary
# --------------------------
ggplot(study_est, aes(x = study_id, y = yi)) +
geom_point(size = 3, color = "steelblue") +
geom_errorbar(aes(ymin = yi - 1.96*sqrt(vi), ymax = yi + 1.96*sqrt(vi)), width = 0.2) +
geom_hline(yintercept = res_eff$b[1], color = "red", linetype = "dashed") +
labs(title = "Study-wise and Pooled Efficacy Estimates",
y = "Mean Difference (Treatment - Control)",
x = "Study ID") +
theme_minimal()