library(mgcv)
library(ggplot2)
library(gridExtra)
library(tidyr)
library(patchwork)TBCHAMP Code: Quarto file
TBCHAMP Code
#Dataframe TBCHAMP
dataTBCHAMP <- data.frame(
time = c(0, 6, 12, 18, 24, 30, 36, 42, 48, 54),
control = c(0.00,0.00, 0.006732487, 0.018114981, 0.02280548, 0.022702036, 0.022847634, 0.022830717, 0.022888679, 0.028322148),
intervention = c(0.00, 0.00, 0.002269399, 0.002327916,0.002392257, 0.002286316, 0.002268567, 0.002249154, 0.005036885, 0.014024627))
#TBCHAMP
data_longTBCHAMP <- pivot_longer(dataTBCHAMP, cols = c(control, intervention), names_to = "group", values_to = "incidence")
data_longTBCHAMP$group <- factor(data_longTBCHAMP$group)Fitting the GAM and GLM models - x4 curves x3 models
#Fit GLM LINEAR TBCHAMP
glm_controlTBCHAMP <- glm(control ~ time, data = dataTBCHAMP, family = gaussian())
glm_interventionTBCHAMP <- glm(intervention ~ time, data = dataTBCHAMP, family = gaussian())
#Fit GLM LOGISTIC TBCHAMP
glm_control_logTBCHAMP <- glm(control ~ time, data = dataTBCHAMP, family = binomial(link = "logit"))
glm_intervention_logTBCHAMP <- glm(intervention ~ time, data = dataTBCHAMP, family = binomial(link = "logit"))
#Fit GAM TBCHAMP
gam_modelTBCHAMP <- gam(incidence ~ s(time, by = group) + group, data = data_longTBCHAMP, method = "REML")Generating predictions - TBCHAMP
#Generating predictions TBCHAMP
pred_dataTBCHAMP <- expand.grid(time = seq(0, max(dataTBCHAMP$time), length.out = 100),
group = levels(data_longTBCHAMP$group))
pred_dataTBCHAMP$glm_predTBCHAMP <- ifelse(pred_dataTBCHAMP$group == "control",
predict(glm_controlTBCHAMP, newdata = pred_dataTBCHAMP, type = "response"),
predict(glm_interventionTBCHAMP, newdata = pred_dataTBCHAMP, type = "response"))
pred_dataTBCHAMP$glm_pred_logTBCHAMP <- ifelse(pred_dataTBCHAMP$group == "control",
predict(glm_control_logTBCHAMP, newdata = pred_dataTBCHAMP, type = "response"),
predict(glm_intervention_logTBCHAMP, newdata = pred_dataTBCHAMP, type = "response"))
pred_dataTBCHAMP$gam_predTBCHAMP <- predict(gam_modelTBCHAMP, newdata = pred_dataTBCHAMP, type = "response")
#Calculate RMSE for each model
calculate_rmse <- function(actual, predicted) {
sqrt(mean((actual - predicted)^2))
}
#RMSE for GLM Linear
rmse_glm_controlTBCHAMP <- calculate_rmse(dataTBCHAMP$control, predict(glm_controlTBCHAMP, newdata = dataTBCHAMP))
rmse_glm_interventionTBCHAMP <- calculate_rmse(dataTBCHAMP$intervention, predict(glm_interventionTBCHAMP, newdata = dataTBCHAMP))
rmse_glm_linearTBCHAMP <- mean(c(rmse_glm_controlTBCHAMP, rmse_glm_interventionTBCHAMP))
#RMSE for GLM Logistic
rmse_glm_control_logTBCHAMP <- calculate_rmse(dataTBCHAMP$control, predict(glm_control_logTBCHAMP, newdata = dataTBCHAMP, type = "response"))
rmse_glm_intervention_logTBCHAMP <- calculate_rmse(dataTBCHAMP$intervention, predict(glm_intervention_logTBCHAMP, newdata = dataTBCHAMP, type = "response"))
rmse_glm_logisticTBCHAMP <- mean(c(rmse_glm_control_logTBCHAMP, rmse_glm_intervention_logTBCHAMP))
#RMSE for GAM
gam_predictionsTBCHAMP <- predict(gam_modelTBCHAMP, newdata = data_longTBCHAMP, type = "response")
rmse_gamTBCHAMP <- calculate_rmse(data_longTBCHAMP$incidence, gam_predictionsTBCHAMP)
#Print RMSE values
cat("RMSE for GLM Linear Model (Control + Intervention):", rmse_glm_linearTBCHAMP, "\n") #0.0035
cat("RMSE for GLM Logistic Model (Control + Intervention):", rmse_glm_logisticTBCHAMP, "\n") #0.004
cat("RMSE for GAM Model:", rmse_gamTBCHAMP, "\n") #0.000000013 TBCHAMP predictions plots (Figure 3)
# Create GLM plot (left - shows y-axis only)
glm_plotTBCHAMP <- ggplot(data_longTBCHAMP, aes(x = time, y = incidence * 100, color = group)) +
geom_point() +
geom_line(data = pred_dataTBCHAMP, aes(y = glm_predTBCHAMP * 100)) +
labs(title = "GLM Regression - Linear",
x = "", y = "Cumulative Incidence (%)") +
scale_color_manual(values = c("control" = "red", "intervention" = "blue")) +
theme_minimal() +
ylim(0, 6) +
theme(legend.position = "none")
# GAM plot (middle - no axes)
gam_plotTBCHAMP <- ggplot(data_longTBCHAMP, aes(x = time, y = incidence * 100, color = group)) +
geom_point() +
geom_line(data = pred_dataTBCHAMP, aes(y = gam_predTBCHAMP * 100)) +
labs(title = "GAM Regression",
x = "", y = "") +
scale_color_manual(values = c("control" = "red", "intervention" = "blue")) +
theme_minimal() +
ylim(0, 6) +
theme(legend.position = "none",
axis.text.y = element_blank(),
axis.ticks.y = element_blank())
# GLM plot - logistic (right - no axes)
glm_plot_logTBCHAMP <- ggplot(data_longTBCHAMP, aes(x = time, y = incidence * 100, color = group)) +
geom_point() +
geom_line(data = pred_dataTBCHAMP, aes(y = glm_pred_logTBCHAMP * 100)) +
labs(title = "GLM Regression - Logistic",
x = "", y = "") +
scale_color_manual(values = c("control" = "red", "intervention" = "blue")) +
theme_minimal() +
ylim(0, 6) +
theme(legend.position = "none",
axis.text.y = element_blank(),
axis.ticks.y = element_blank())
# Combine plots with shared x-axis label
combined_plotTBCHAMP <- glm_plotTBCHAMP + gam_plotTBCHAMP + glm_plot_logTBCHAMP +
plot_layout(guides = "collect") &
theme(legend.position = "bottom",
legend.text = element_text(size = 14))
# Add overall x-axis label
combined_plotVQUIN <- combined_plotTBCHAMP
# Display the combined plot
print(combined_plotTBCHAMP)#Then, print model summariesGoodness of fit of the models
TBCHAMP GOF
#McFadden's Pseudo R-squared for GLMs
pseudo_r2_controlTBCHAMP <- 1 - (glm_controlTBCHAMP$deviance / glm_controlTBCHAMP$null.deviance)
pseudo_r2_interventionTBCHAMP <- 1 - (glm_interventionTBCHAMP$deviance / glm_interventionTBCHAMP$null.deviance)
# Print McFadden's Pseudo R-squared for Linear GLMs
cat("McFadden's Pseudo R-squared for Linear GLM (Control):", pseudo_r2_controlTBCHAMP, "\n")
cat("McFadden's Pseudo R-squared for Linear GLM (Intervention):", pseudo_r2_interventionTBCHAMP, "\n")
# Pseudo R-squared for Logistic GLMs
pseudo_r2_control_logTBCHAMP <- 1 - (glm_control_logTBCHAMP$deviance / glm_control_logTBCHAMP$null.deviance)
pseudo_r2_intervention_logTBCHAMP <- 1 - (glm_intervention_logTBCHAMP$deviance / glm_intervention_logTBCHAMP$null.deviance)
# Print Pseudo R-squared for Logistic GLMs
cat("Pseudo R-squared for Logistic GLM (Control):", pseudo_r2_control_logTBCHAMP, "\n")
cat("Pseudo R-squared for Logistic GLM (Intervention):", pseudo_r2_intervention_logTBCHAMP, "\n")
#AIC for the GAM model
aic_valueTBCHAMP <- AIC(gam_modelTBCHAMP)
print(aic_valueTBCHAMP)
#McFadden's Pseudo R-squared for Linear GLM (Control): 0.8
#McFadden's Pseudo R-squared for Linear GLM (Intervention): 0.54
#Pseudo R-squared for Logistic GLM (Control): 0.53
#Pseudo R-squared for Logistic GLM (Intervention): 0.74
#Aic_valueTBCHAMP : -628.7966GAM Cumulative Incidence Curves with 95% UI (Figure 4)
Using final choice GCV = 5
library(mgcv)
library(ggplot2)
library(dplyr)
# Fixed function to add (0,0) point with matching column structure
add_zero_point <- function(data) {
# Create a zero point with the same structure as the input data
zero_point <- data[1, ] # Take first row as template
zero_point[1, ] <- NA # Clear all values
# Set the specific values we want
zero_point$time <- 0
zero_point$incidence <- 0
zero_point$group <- unique(data$group)[1] # Take first unique group value
# Add weight column if it doesn't exist
if(!"weight" %in% names(data)) {
data$weight <- 1 # Default weight for existing data
zero_point$weight <- 100 # High weight for zero point
} else {
zero_point$weight <- 100
}
# Fill any remaining NA values with appropriate defaults
for(col in names(zero_point)) {
if(is.na(zero_point[[col]]) && col != "time" && col != "incidence" && col != "group" && col != "weight") {
if(is.numeric(data[[col]])) {
zero_point[[col]] <- 0
} else if(is.character(data[[col]]) || is.factor(data[[col]])) {
zero_point[[col]] <- data[[col]][1]
}
}
}
# Combine the data
rbind(data, zero_point)
}
# Prepare data with zero points (KEEP THIS DATA!)
control_data_TBCHAMP <- data_longTBCHAMP %>%
filter(group == "control") %>%
add_zero_point()
intervention_data_TBCHAMP <- data_longTBCHAMP %>%
filter(group == "intervention") %>%
add_zero_point()
# Fit GAM models with weights to the data that includes zero points
control_gam_k5_GCV_TBCHAMP <- gam(
incidence ~ s(time, k = 5),
data = control_data_TBCHAMP,
weights = control_data_TBCHAMP$weight, # Use the weight column
method = "GCV.Cp"
)
intervention_gam_k5_GCV_TBCHAMP <- gam(
incidence ~ s(time, k = 5),
data = intervention_data_TBCHAMP,
weights = intervention_data_TBCHAMP$weight, # Use the weight column
method = "GCV.Cp"
)
# Generate predictions for all time points
original_times <- sort(unique(data_longTBCHAMP$time))
new_data <- data.frame(time = original_times)
# Function to get predictions with CIs (MODIFIED TO CONVERT TO PERCENTAGES)
get_predictions <- function(model, group_name) {
pred <- predict(model, newdata = new_data, se.fit = TRUE)
data.frame(
time = original_times,
group = group_name,
incidence = as.numeric(pred$fit) * 100, # Convert to percentage
se = as.numeric(pred$se.fit) * 100, # Convert SE to percentage
stringsAsFactors = FALSE
) %>%
mutate(
ci_lower = pmax(0, incidence - 1.96 * se),
ci_upper = incidence + 1.96 * se,
ci_width = ci_upper - ci_lower
)
}
# Get predictions for both groups
control_predictions <- get_predictions(control_gam_k5_GCV_TBCHAMP, "control")
intervention_predictions <- get_predictions(intervention_gam_k5_GCV_TBCHAMP, "intervention")
# Combine predictions
gam_predictions_k5_TBCHAMP <- rbind(control_predictions, intervention_predictions)
# Verify zero constraint
print("=== ZERO CONSTRAINT CHECK ===")
print("Zero time predictions:")
print(gam_predictions_k5_TBCHAMP %>% filter(time == 0))
# Display CI width comparison
control_ci_width <- mean(control_predictions$ci_width)
intervention_ci_width <- mean(intervention_predictions$ci_width)
print("=== CI WIDTH COMPARISON ===")
print(paste("Control mean CI width:", format(control_ci_width, scientific = TRUE)))
print(paste("Intervention mean CI width:", format(intervention_ci_width, scientific = TRUE)))
print(paste("Ratio (intervention/control):", round(intervention_ci_width / control_ci_width, 1)))
# Create plot (MODIFIED FOR PERCENTAGES)
p_tbchamp <- ggplot(gam_predictions_k5_TBCHAMP, aes(x = time, y = incidence, color = group, fill = group)) +
geom_ribbon(aes(ymin = ci_lower, ymax = ci_upper), alpha = 0.4, color = NA) +
geom_line(size = 1.2) +
scale_color_manual(values = c("control" = "red", "intervention" = "blue"),
labels = c("Control group", "Intervention group")) +
scale_fill_manual(values = c("control" = "red", "intervention" = "blue"),
labels = c("Control group", "Intervention group")) +
labs(
x = "Time since enrolment (weeks)",
y = "Cumulative Incidence (%)", # Updated label
title = "TBCHAMP GAM Model (k=5, GCV) - Starting at 0"
) +
theme_minimal() +
theme(legend.title = element_blank()) +
coord_cartesian(ylim = c(0, 5)) # Updated to percentage scale (0-5%)
print(p_tbchamp)# Week 26 predictions
week_26_data <- data.frame(time = 26)
# Get week 26 predictions for both groups
control_pred_26 <- predict(control_gam_k5_GCV_TBCHAMP, newdata = week_26_data, se.fit = TRUE)
intervention_pred_26 <- predict(intervention_gam_k5_GCV_TBCHAMP, newdata = week_26_data, se.fit = TRUE)
# Create week 26 results table
week_26_results <- data.frame(
Group = c("Control", "Intervention"),
Week = 26,
Predicted_Incidence = c(as.numeric(control_pred_26$fit), as.numeric(intervention_pred_26$fit)),
Standard_Error = c(as.numeric(control_pred_26$se.fit), as.numeric(intervention_pred_26$se.fit))
) %>%
mutate(
CI_Lower_95 = pmax(0, Predicted_Incidence - 1.96 * Standard_Error),
CI_Upper_95 = Predicted_Incidence + 1.96 * Standard_Error,
CI_Width = CI_Upper_95 - CI_Lower_95
)
print("=== WEEK 26 GAM PREDICTIONS ===")
print(week_26_results)Curves with delay scenarios (Figure 5)
library(mgcv)
# —————————————————————————————————————————————————————
# A) Fit‐based cumulative‐incidence at t = 0…54, with p0‐spike
time0 <- 0:54
p0 <- 32/1120
back_wk <- 26
# Use the k=5 GCV GAM models
Fc0 <- predict(control_gam_k5_GCV_TBCHAMP,
newdata = data.frame(time = time0))
Fi0 <- predict(intervention_gam_k5_GCV_TBCHAMP,
newdata = data.frame(time = time0))
Fc0 <- p0 + (1 - p0)*Fc0
Fi0 <- p0 + (1 - p0)*Fi0
h_ctrl <- diff(c(0, Fc0)) / (1 - c(0, Fc0)[-length(Fc0)])
h_int <- diff(c(0, Fi0)) / (1 - c(0, Fi0)[-length(Fi0)])
# —————————————————————————————————————————————————————
# B) "Pre‐screen" ramp length and nominal post‐screen delays
back_wk <- 26 # show 6 months before screen
nominal_delays <- c(-26, -20, -10, -5, 5, 10, 20) # weeks of *additional* delay after screen
# —————————————————————————————————————————————————————
# C) build a time axis from –back_wk … +54
time_ext <- seq(-back_wk, 54, by = 1)
# —————————————————————————————————————————————————————
# D) single‐curve builder: √ ramp on [–back_wk,0], then hazard‐switch at d
build_curve <- function(d) {
n <- length(time_ext)
Fcum <- numeric(n)
# find index of screen‐time t0 = 0
i0 <- which(time_ext == 0)
# 1) PRE‐SCREEN: √ ramp from (–back_wk → 0) up to p0
s_up <- time_ext[1:(i0-1)] + back_wk
Fcum[1:(i0-1)] <- p0 * sqrt( s_up / back_wk )
# 2) AT SCREEN
Fcum[i0] <- p0
Sprev <- 1 - p0
# 3) POST‐SCREEN: control hazard until t0 ≤ d, then intervention hazard
for(i in (i0+1):n) {
t0 <- time_ext[i]
if (d > 0 && t0 <= d) {
h_t <- h_ctrl[t0 + 1]
} else {
idx <- if (d > 0) (t0 - d + 1) else (t0 + 1)
h_t <- h_int[ min(idx, length(h_int)) ]
}
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
Fcum
}
# —————————————————————————————————————————————————————
# E) build all the curves
# 1) Control & Instant curves
ctrl_ext <- build_curve( Inf) # pure control
inst_ext <- build_curve(-Inf) # pure intervention
# and the √ ramp function for pre‐screen:
ramp <- p0 * sqrt( (time_ext + back_wk) / back_wk )
# 2) separate positive and negative delays
pos_delays <- nominal_delays[nominal_delays >= 0]
neg_delays <- nominal_delays[nominal_delays < 0]
# 3) positive delays:
pos_mat <- if(length(pos_delays) > 0) sapply(pos_delays, function(d) build_curve(d)) else NULL
# 4) negative delays: ramp until delay point, then copy instant TPT curve
neg_mat <- if(length(neg_delays) > 0) {
sapply(neg_delays, function(d) {
n <- length(time_ext)
Fcum <- numeric(n)
# find index of delay point d
id_match <- which(time_ext == d)
if(length(id_match) == 0) {
# If exact match not found, find closest index
id <- which.min(abs(time_ext - d))
} else {
id <- id_match[1]
}
i0 <- which(time_ext == 0) # screen time index
# Before delay point: follow the ramp
if(id > 1) {
s_up <- time_ext[1:(id-1)] + back_wk
s_up[s_up < 0] <- 0 # ensure non-negative
Fcum[1:(id-1)] <- p0 * sqrt( s_up / back_wk )
}
# At delay point: set starting value from ramp
s_delay <- max(0, d + back_wk)
start_value <- p0 * sqrt( s_delay / back_wk )
Fcum[id] <- start_value
# After delay point: copy the instant TPT curve pattern
if(id < n) {
# Find the baseline instant TPT value at time 0 (our reference point)
inst_at_zero <- inst_ext[i0]
for(i in (id+1):n) {
# Calculate how many steps from delay point
steps_from_delay <- i - id
# Find corresponding point in instant TPT curve (relative to time 0)
inst_index <- i0 + steps_from_delay
if(inst_index <= length(inst_ext) && inst_index > 0) {
# Get the instant TPT value at this point
inst_value <- inst_ext[inst_index]
# Calculate the change from instant TPT baseline
inst_change <- inst_value - inst_at_zero
# Apply the same change to our starting value
Fcum[i] <- start_value + inst_change
} else if(inst_index > length(inst_ext)) {
# If we've gone beyond available instant TPT data,
# use the last available change and keep it constant
last_inst_value <- inst_ext[length(inst_ext)]
last_change <- last_inst_value - inst_at_zero
Fcum[i] <- start_value + last_change
} else {
# If we somehow get a negative index, keep the start value
Fcum[i] <- start_value
}
}
}
Fcum
})
} else NULL
# Combine all delay matrices
if(!is.null(neg_mat) && !is.null(pos_mat)) {
delay_mat <- cbind(neg_mat, pos_mat)
colnames(delay_mat) <- c(
paste0(neg_delays, " wk delay"),
paste0(pos_delays, " wk delay"))
} else if(!is.null(neg_mat)) {
delay_mat <- neg_mat
colnames(delay_mat) <- paste0(neg_delays, " wk delay")
} else if(!is.null(pos_mat)) {
delay_mat <- pos_mat
colnames(delay_mat) <- paste0(pos_delays, " wk delay")
} else {
delay_mat <- NULL
}
# —————————————————————————————————————————————————————
# F') compressed x‐axis plot, up to +30 wk, now including the negative delay curves
# how many "plot units" to devote to the negative side?
neg_width <- 30
# remap time_ext → x_plot:
x_plot <- ifelse(
time_ext < 0,
neg_width * (time_ext + back_wk) / back_wk, # −back_wk…0 → 0…30
neg_width + pmin(time_ext, 54) # 0…30 → 30…60
)
# bind together Control, Instant, and only your chosen delays:
if(!is.null(delay_mat)) {
all_mat <- cbind(
Control = ctrl_ext,
`Instant TPT` = inst_ext,
delay_mat
)
} else {
all_mat <- cbind(
Control = ctrl_ext,
`Instant TPT` = inst_ext
)
}
# subset to t ≤ 26 wk
keep <- which(time_ext <= 26)
x_sub <- x_plot[keep]
mat_sub <- all_mat[keep, , drop=FALSE]
# CORRECTED: Convert to percentages in the data, not during plotting
mat_sub_pct <- mat_sub * 100
# MODIFIED: Create gradient colors from better (green) to worse (red) delays
n_delay_cols <- if(!is.null(delay_mat)) ncol(delay_mat) else 0
if(n_delay_cols > 0) {
# Create a gradient from green (better/earlier) to red (worse/later)
delay_colors <- colorRampPalette(c("#00AA00", "#FFAA00", "#FF0000"))(n_delay_cols)
cols <- c("red", "blue", delay_colors)
} else {
cols <- c("red", "blue")
}
ltys <- c(1, 1, rep(2, n_delay_cols))
yr <- range(mat_sub_pct)
# CORRECTED: Use original matplot approach with percentage data
matplot(
x_sub, mat_sub_pct,
type="l", col=cols, lty=ltys, lwd=2,
xaxt="n", ylim=yr,
xlab="Weeks since randomisation",
ylab="Cumulative incidence of TB (%)",
main="TBCHAMP Delayed-TPT scenarios",
cex.axis = 1.0, # Size 12 for axis numbers
cex.lab = 1.0 # Size 12 for axis labels
)
# custom x-axis ticks:
neg_ticks <- seq(-back_wk, 0, by=2)
neg_at <- neg_width * (neg_ticks + back_wk) / back_wk
pos_ticks <- seq(0, 26, by=2)
pos_at <- neg_width + pos_ticks
axis(1,
at = c(neg_at, pos_at),
labels = c(neg_ticks, pos_ticks),
cex.axis = 1.0) # Size 12 for axis tick labels
# vertical line at t=0
abline(v=neg_width, lty=3, col="gray50")
# legend with larger text
legend("topleft",
legend = colnames(all_mat),
col = cols,
lty = ltys,
lwd = 2,
bty = "n",
ncol = 1,
cex = 1.2, # Larger legend text
y.intersp = 0.8,
x.intersp = 0.5,
inset = c(0.02, 0.02))# --------------------------------------------------------
# G) Print cumulative incidence at 26 weeks for all curves
# --------------------------------------------------------
t_target <- 26
idx_26 <- which(time_ext == t_target)
if (length(idx_26) > 0) {
ci_26 <- all_mat[idx_26, ] * 100 # convert to %
cat("Cumulative incidence at", t_target, "weeks (%):\n")
print(round(ci_26, 2))
} else {
warning("No exact match for 26 weeks in time_ext")
}Appendix A1 TABLE of all 2-week delay scenario statistics with UIs :
library(mgcv)
library(ggplot2)
library(dplyr)
library(parallel)
# ——————————————————————————————————————————————————————
# 0) Input Parameters - MODIFY THESE WITH YOUR ACTUAL DATA
# ——————————————————————————————————————————————————————
# Sample sizes
n_control <- 465 # REPLACE with your actual control sample size
n_intervention <- 451 # REPLACE with your actual intervention sample size
# Person-years of follow-up
py_control <- 418 # REPLACE with your actual control person-years
py_intervention <- 406 # REPLACE with your actual intervention person-years
# Monte Carlo parameters
n_simulations <- 5000 # Number of bootstrap simulations
set.seed(123) # For reproducibility
cat("Analysis Parameters:\n")
cat("Control sample size:", n_control, "\n")
cat("Intervention sample size:", n_intervention, "\n")
cat("Control person-years:", py_control, "\n")
cat("Intervention person-years:", py_intervention, "\n")
cat("Number of simulations:", n_simulations, "\n\n")
# ——————————————————————————————————————————————————————
# 1) Monte Carlo Simulation Function
# ——————————————————————————————————————————————————————
simulate_gam_predictions <- function(gam_model, newdata, n_sim = n_simulations) {
# Get mean predictions and covariance matrix
pred_mean <- predict(gam_model, newdata = newdata, type = "response")
# Get the covariance matrix of the GAM coefficients
Vp <- vcov(gam_model, unconditional = TRUE)
# Get the linear predictor matrix
Xp <- predict(gam_model, newdata = newdata, type = "lpmatrix")
# Simulate from multivariate normal distribution
# Generate random coefficients
coef_sims <- MASS::mvrnorm(n_sim, coef(gam_model), Vp)
# Calculate predictions for each simulation
pred_sims <- tcrossprod(Xp, coef_sims)
return(pred_sims)
}
# ——————————————————————————————————————————————————————
# 2) Bootstrap Scenario Builder (FIXED p0 inconsistency)
# ——————————————————————————————————————————————————————
bootstrap_scenario <- function(sim_idx, control_sims, intervention_sims, delay_weeks) {
# Extract simulated predictions for this iteration
Fc0_sim <- control_sims[, sim_idx]
Fi0_sim <- intervention_sims[, sim_idx]
# Apply p0 transformation with a beta draw for p0 to consider uncertainty in p0
p0_draw <- rbeta(1, 32 + 0.5, 1120 - 32 + 0.5) # VQUIN p0 values 61 and 3948
Fc0_sim <- p0_draw + (1 - p0_draw) * Fc0_sim
Fi0_sim <- p0_draw + (1 - p0_draw) * Fi0_sim
#enforce monotonicity (Simulated cumulative curves can wiggle; discrete hazards can become negative - preventing this)
Fc0_sim <- pmin(pmax(Fc0_sim, 0), 0.999)
Fi0_sim <- pmin(pmax(Fi0_sim, 0), 0.999)
Fc0_sim <- cummax(Fc0_sim); Fi0_sim <- cummax(Fi0_sim)
# Calculate hazards for this simulation
h_ctrl_sim <- diff(c(0, Fc0_sim)) / (1 - c(0, Fc0_sim)[-length(Fc0_sim)])
h_int_sim <- diff(c(0, Fi0_sim)) / (1 - c(0, Fi0_sim)[-length(Fi0_sim)])
# Build curve for this delay scenario
n <- length(time_ext)
Fcum <- numeric(n)
# Find indices
i0 <- which(time_ext == 0)
back_wk <- 26
# Special handling for pure scenarios
if(delay_weeks == Inf) {
# Pure control - FIXED: use p0_draw instead of p0
# Pre-screen ramp
if(i0 > 1) {
s_up <- time_ext[1:(i0-1)] + back_wk
Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk) # FIXED
}
Fcum[i0] <- p0_draw # FIXED
Sprev <- 1 - p0_draw # FIXED
for(i in (i0+1):n) {
t0 <- time_ext[i]
h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
} else if(delay_weeks == -Inf) {
# Pure intervention (Instant TPT) - FIXED: use p0_draw instead of p0
if(i0 > 1) {
s_up <- time_ext[1:(i0-1)] + back_wk
Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk) # FIXED
}
Fcum[i0] <- p0_draw # FIXED
Sprev <- 1 - p0_draw # FIXED
for(i in (i0+1):n) {
t0 <- time_ext[i]
h_t <- h_int_sim[min(t0 + 1, length(h_int_sim))]
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
} else if(delay_weeks < 0) {
# CORRECTED: Negative delays (pre-screen intervention) - FIXED: use p0_draw instead of p0
# This means intervention starts BEFORE time 0, providing earlier protection
# Find delay point index
id_match <- which(time_ext == delay_weeks)
if(length(id_match) == 0) {
id <- which.min(abs(time_ext - delay_weeks))
} else {
id <- id_match[1]
}
# Before delay point: ramp (no intervention yet)
if(id > 1) {
s_up <- time_ext[1:(id-1)] + back_wk
s_up[s_up < 0] <- 0
Fcum[1:(id-1)] <- p0_draw * sqrt(s_up / back_wk) # FIXED
}
# At delay point: start intervention
s_delay <- max(0, delay_weeks + back_wk)
Fcum[id] <- p0_draw * sqrt(s_delay / back_wk) # FIXED
Sprev <- 1 - Fcum[id]
# CORRECTED: After delay point, use intervention hazards consistently
# Map the post-delay timeline to intervention hazards starting from time 0
for(i in (id+1):n) {
current_time <- time_ext[i]
# Calculate how long intervention has been active
intervention_duration <- current_time - delay_weeks
if(intervention_duration >= 0 && intervention_duration <= 54) {
# Use intervention hazard corresponding to this duration
h_idx <- min(intervention_duration + 1, length(h_int_sim))
h_t <- h_int_sim[max(1, h_idx)]
} else if(intervention_duration > 54) {
# Use last available intervention hazard
h_t <- h_int_sim[length(h_int_sim)]
} else {
# This shouldn't happen with negative delays, but use first intervention hazard as fallback
h_t <- h_int_sim[1]
}
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
} else {
# Positive delays (post-screen delays) - FIXED: use p0_draw instead of p0
# Pre-screen ramp
if(i0 > 1) {
s_up <- time_ext[1:(i0-1)] + back_wk
Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk) # FIXED
}
# At screen
Fcum[i0] <- p0_draw # FIXED
Sprev <- 1 - p0_draw # FIXED
# Post-screen: control until delay point, then intervention
for(i in (i0+1):n) {
t0 <- time_ext[i]
if(t0 <= delay_weeks) {
# Use control hazard until delay point
h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
} else {
# Use intervention hazard after delay point
# Map to intervention timeline: intervention has been active for (t0 - delay_weeks) weeks
intervention_duration <- t0 - delay_weeks
h_idx <- min(intervention_duration + 1, length(h_int_sim))
h_t <- h_int_sim[max(1, h_idx)]
}
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
}
# Return final value at week 26
week_26_idx <- which(time_ext == 26)
return(Fcum[week_26_idx])
}
# ——————————————————————————————————————————————————————
# 3) Run Monte Carlo Simulations - Every 2 Week Delays
# ——————————————————————————————————————————————————————
cat("Running Monte Carlo simulations for 2-week delay intervals...\n")
# Reset seed right before simulations for exact reproducibility
set.seed(42)
# Set up time vectors
time0 <- 0:26
back_wk <- 26
time_ext <- seq(-back_wk, 26, by = 1)
# Create 2-week delay intervals from -26 to +26 weeks
delays_2wk <- seq(-26, 26, by = 2)
# Generate simulated predictions for both models
cat("Generating GAM simulations...\n")
control_sims <- simulate_gam_predictions(control_gam_k5_GCV_TBCHAMP, data.frame(time = time0), n_simulations)
intervention_sims <- simulate_gam_predictions(intervention_gam_k5_GCV_TBCHAMP, data.frame(time = time0), n_simulations)
cat("Generated", ncol(control_sims), "control simulations and", ncol(intervention_sims), "intervention simulations\n")
# Define all delay scenarios (including baseline at week 0 = no delay)
all_delays <- setNames(delays_2wk, paste0(delays_2wk, " wk"))
# Run simulations for all scenarios
cat("Running scenario simulations...\n")
simulation_results <- list()
for(scenario_name in names(all_delays)) {
cat("Processing scenario:", scenario_name, "\n")
delay_val <- all_delays[scenario_name]
# Run simulations for this scenario
scenario_sims <- sapply(1:n_simulations, function(i) {
bootstrap_scenario(i, control_sims, intervention_sims, delay_val)
})
simulation_results[[scenario_name]] <- scenario_sims
}
cat("Completed all simulations!\n\n")
# Check if "0 wk" exists and compare a few values
print(simulation_results[["0 wk"]][1:5]) # First 5 simulation results
print(mean(simulation_results[["0 wk"]])) # Mean for "0 wk" scenario
# ——————————————————————————————————————————————————————
# 4) Calculate Key Metrics with Week 0 as Baseline
# ——————————————————————————————————————————————————————
# Get baseline (Week 0 = no delay) for relative calculations
baseline_sims <- simulation_results[["0 wk"]]
baseline_mean <- mean(baseline_sims)
# Calculate metrics for each scenario
results_list <- list()
for(scenario_name in names(simulation_results)) {
scenario_sims <- simulation_results[[scenario_name]]
delay_value <- as.numeric(sub(" wk", "", scenario_name))
# Basic statistics
mean_incidence <- mean(scenario_sims)
median_incidence <- median(scenario_sims)
ci_lower <- quantile(scenario_sims, 0.025)
ci_upper <- quantile(scenario_sims, 0.975)
# Use intervention PY and sample size for all scenarios
py <- py_intervention
sample_size <- n_intervention
# 1. Incidence rate per 100 person-yrs (95% CI)
rate_per_100py <- (mean_incidence * sample_size / py) * 100
rate_ci_lower <- (ci_lower * sample_size / py) * 100
rate_ci_upper <- (ci_upper * sample_size / py) * 100
# 2. Mean Expected Cases per 100,000 (95% CI)
cases_per_100k <- mean_incidence * 100000
cases_per_100k_ci_lower <- ci_lower * 100000
cases_per_100k_ci_upper <- ci_upper * 100000
# 3. Excess calculations (compared to Week 0)
if(delay_value != 0) {
# Excess incidence simulations
excess_sims <- scenario_sims - baseline_sims
excess_mean <- mean(excess_sims)
excess_ci_lower <- quantile(excess_sims, 0.025)
excess_ci_upper <- quantile(excess_sims, 0.975)
# Excess rate per 100 person-yrs
excess_rate_per_100py <- (excess_mean * sample_size / py) * 100
excess_rate_ci_lower <- (excess_ci_lower * sample_size / py) * 100
excess_rate_ci_upper <- (excess_ci_upper * sample_size / py) * 100
# Excess cases per 100,000
excess_cases_per_100k <- excess_mean * 100000
excess_cases_per_100k_ci_lower <- excess_ci_lower * 100000
excess_cases_per_100k_ci_upper <- excess_ci_upper * 100000
} else {
# Week 0 is the baseline, so excess = 0
excess_rate_per_100py <- 0
excess_rate_ci_lower <- 0
excess_rate_ci_upper <- 0
excess_cases_per_100k <- 0
excess_cases_per_100k_ci_lower <- 0
excess_cases_per_100k_ci_upper <- 0
}
# 4. Relative Risk
relative_risk <- mean_incidence / baseline_mean
# Store results
results_list[[scenario_name]] <- list(
delay_weeks = delay_value,
incidence_rate_per_100py = rate_per_100py,
rate_ci_lower = rate_ci_lower,
rate_ci_upper = rate_ci_upper,
excess_rate_per_100py = excess_rate_per_100py,
excess_rate_ci_lower = excess_rate_ci_lower,
excess_rate_ci_upper = excess_rate_ci_upper,
cases_per_100k = cases_per_100k,
cases_per_100k_ci_lower = cases_per_100k_ci_lower,
cases_per_100k_ci_upper = cases_per_100k_ci_upper,
excess_cases_per_100k = excess_cases_per_100k,
excess_cases_per_100k_ci_lower = excess_cases_per_100k_ci_lower,
excess_cases_per_100k_ci_upper = excess_cases_per_100k_ci_upper,
relative_risk = relative_risk
)
}
# ——————————————————————————————————————————————————————
# 5) Create Export Table for Word
# ——————————————————————————————————————————————————————
# Build the export table
export_table <- data.frame()
for(scenario_name in names(results_list)) {
r <- results_list[[scenario_name]]
# Format all confidence intervals
incidence_rate_ci <- sprintf("%.2f (%.2f-%.2f)",
r$incidence_rate_per_100py, r$rate_ci_lower, r$rate_ci_upper)
if(r$delay_weeks == 0) {
excess_rate_ci <- "BASELINE"
excess_cases_ci <- "BASELINE"
} else {
excess_rate_ci <- sprintf("%+.2f (%+.2f to %+.2f)",
r$excess_rate_per_100py, r$excess_rate_ci_lower, r$excess_rate_ci_upper)
excess_cases_ci <- sprintf("%+.0f (%+.0f to %+.0f)",
r$excess_cases_per_100k, r$excess_cases_per_100k_ci_lower, r$excess_cases_per_100k_ci_upper)
}
cases_per_100k_ci <- sprintf("%.0f (%.0f-%.0f)",
r$cases_per_100k, r$cases_per_100k_ci_lower, r$cases_per_100k_ci_upper)
relative_risk_formatted <- sprintf("%.2f", r$relative_risk)
# Add row to export table
export_table <- rbind(export_table, data.frame(
`Delay (weeks)` = r$delay_weeks,
`Incidence Rate per 100 Person-Yrs (95% CI)` = incidence_rate_ci,
`Excess Incidence Rate per 100 Person-Yrs (95% CI)` = excess_rate_ci,
`Mean Expected Cases per 100,000 (95% CI)` = cases_per_100k_ci,
`Excess Cases per 100,000 (95% CI)` = excess_cases_ci,
`Relative Risk` = relative_risk_formatted,
stringsAsFactors = FALSE,
check.names = FALSE
))
}
# Sort by delay weeks
export_table <- export_table[order(export_table$`Delay (weeks)`), ]
# Display the table
cat("\n=== EXPORT TABLE FOR WORD ===\n\n")
print(export_table, row.names = FALSE)
# ——————————————————————————————————————————————————————
# 6) Export Options
# ——————————————————————————————————————————————————————
# Option 1: Save as CSV for easy import to Word
write.csv(export_table, "TBCHAMP_tb_delays_analysis_table.csv", row.names = FALSE)
cat("\nTable saved as 'tb_delays_analysis_table.csv'\n")
# Option 2: Create a formatted version for direct copy-paste
cat("\n=== FORMATTED FOR COPY-PASTE TO WORD ===\n")
cat("Delay (weeks)\tIncidence Rate per 100 Person-Yrs (95% CI)\tExcess Incidence Rate per 100 Person-Yrs (95% CI)\tMean Expected Cases per 100,000 (95% CI)\tExcess Cases per 100,000 (95% CI)\tRelative Risk\n")
for(i in 1:nrow(export_table)) {
cat(paste(export_table[i, ], collapse = "\t"), "\n")
}
# Option 3: Summary statistics
cat("\n=== ANALYSIS SUMMARY ===\n")
cat("Monte Carlo simulations:", n_simulations, "\n")
cat("Delay intervals: Every 2 weeks from -26 to +26 weeks\n")
cat("Baseline scenario: Week 0 (no delay)\n")
cat("Sample size:", n_intervention, "\n")
cat("Person-years:", py_intervention, "\n")
cat("Total scenarios analyzed:", nrow(export_table), "\n")
# Option 4: Key findings
best_scenario <- export_table[which.min(as.numeric(gsub("[^-0-9.]", "", export_table$`Delay (weeks)`))), ]
worst_scenario <- export_table[which.max(as.numeric(gsub("[^-0-9.]", "", export_table$`Delay (weeks)`))), ]
cat("\nBest scenario (most negative delay):", best_scenario$`Delay (weeks)`, "weeks\n")
cat("Worst scenario (most positive delay):", worst_scenario$`Delay (weeks)`, "weeks\n")
cat("\nNote: Negative delays = treatment starts before screening\n")
cat("Positive delays = treatment starts after screening\n")
cat("All excess calculations are relative to Week 0 (no delay)\n")Delay scenarios bar plot with UIs (Figure 6)
library(mgcv)
library(ggplot2)
library(dplyr)
library(RColorBrewer)
library(scales)
# ——————————————————————————————————————————————————————
# Enhanced TB Delays Analysis with 2-week intervals - Using Real GAM Models
# TBCHAMP ANALYSIS
# ——————————————————————————————————————————————————————
# Input Parameters (TBCHAMP data)
n_control <- 465
n_intervention <- 451
py_control <- 418
py_intervention <- 406
n_simulations <- 5000
set.seed(123)
cat("Enhanced Delays Analysis Parameters (TBCHAMP):\n")
cat("Control sample size:", n_control, "\n")
cat("Intervention sample size:", n_intervention, "\n")
cat("Control person-years:", py_control, "\n")
cat("Intervention person-years:", py_intervention, "\n")
cat("Number of simulations:", n_simulations, "\n\n")
# ——————————————————————————————————————————————————————
# 1) Monte Carlo Simulation Function (from your working code)
# ——————————————————————————————————————————————————————
simulate_gam_predictions <- function(gam_model, newdata, n_sim = n_simulations) {
# Get mean predictions and covariance matrix
pred_mean <- predict(gam_model, newdata = newdata, type = "response")
# Get the covariance matrix of the GAM coefficients
Vp <- vcov(gam_model, unconditional = TRUE)
# Get the linear predictor matrix
Xp <- predict(gam_model, newdata = newdata, type = "lpmatrix")
# Simulate from multivariate normal distribution
# Generate random coefficients
coef_sims <- MASS::mvrnorm(n_sim, coef(gam_model), Vp)
# Calculate predictions for each simulation
pred_sims <- tcrossprod(Xp, coef_sims)
return(pred_sims)
}
# ——————————————————————————————————————————————————————
# 2) Bootstrap Scenario Builder (exactly from your working code)
# ——————————————————————————————————————————————————————
bootstrap_scenario <- function(sim_idx, control_sims, intervention_sims, delay_weeks) {
# Extract simulated predictions for this iteration
Fc0_sim <- control_sims[, sim_idx]
Fi0_sim <- intervention_sims[, sim_idx]
# Apply p0 transformation with a beta draw for p0 to consider uncertainty in p0
p0_draw <- rbeta(1, 32 + 0.5, 1120 - 32 + 0.5) # TBCHAMP p0 values 32 and 1120
Fc0_sim <- p0_draw + (1 - p0_draw) * Fc0_sim
Fi0_sim <- p0_draw + (1 - p0_draw) * Fi0_sim
#enforce monotonicity (Simulated cumulative curves can wiggle; discrete hazards can become negative - preventing this)
Fc0_sim <- pmin(pmax(Fc0_sim, 0), 0.999)
Fi0_sim <- pmin(pmax(Fi0_sim, 0), 0.999)
Fc0_sim <- cummax(Fc0_sim); Fi0_sim <- cummax(Fi0_sim)
# Calculate hazards for this simulation
h_ctrl_sim <- diff(c(0, Fc0_sim)) / (1 - c(0, Fc0_sim)[-length(Fc0_sim)])
h_int_sim <- diff(c(0, Fi0_sim)) / (1 - c(0, Fi0_sim)[-length(Fi0_sim)])
# Build curve for this delay scenario
n <- length(time_ext)
Fcum <- numeric(n)
# Find indices
i0 <- which(time_ext == 0)
back_wk <- 26
# Special handling for pure scenarios
if(delay_weeks == Inf) {
# Pure control - FIXED: use p0_draw instead of p0
# Pre-screen ramp
if(i0 > 1) {
s_up <- time_ext[1:(i0-1)] + back_wk
Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk) # FIXED
}
Fcum[i0] <- p0_draw # FIXED
Sprev <- 1 - p0_draw # FIXED
for(i in (i0+1):n) {
t0 <- time_ext[i]
h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
} else if(delay_weeks == -Inf) {
# Pure intervention (Instant TPT) - FIXED: use p0_draw instead of p0
if(i0 > 1) {
s_up <- time_ext[1:(i0-1)] + back_wk
Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk) # FIXED
}
Fcum[i0] <- p0_draw # FIXED
Sprev <- 1 - p0_draw # FIXED
for(i in (i0+1):n) {
t0 <- time_ext[i]
h_t <- h_int_sim[min(t0 + 1, length(h_int_sim))]
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
} else if(delay_weeks < 0) {
# CORRECTED: Negative delays (pre-screen intervention) - FIXED: use p0_draw instead of p0
# This means intervention starts BEFORE time 0, providing earlier protection
# Find delay point index
id_match <- which(time_ext == delay_weeks)
if(length(id_match) == 0) {
id <- which.min(abs(time_ext - delay_weeks))
} else {
id <- id_match[1]
}
# Before delay point: ramp (no intervention yet)
if(id > 1) {
s_up <- time_ext[1:(id-1)] + back_wk
s_up[s_up < 0] <- 0
Fcum[1:(id-1)] <- p0_draw * sqrt(s_up / back_wk) # FIXED
}
# At delay point: start intervention
s_delay <- max(0, delay_weeks + back_wk)
Fcum[id] <- p0_draw * sqrt(s_delay / back_wk) # FIXED
Sprev <- 1 - Fcum[id]
# CORRECTED: After delay point, use intervention hazards consistently
# Map the post-delay timeline to intervention hazards starting from time 0
for(i in (id+1):n) {
current_time <- time_ext[i]
# Calculate how long intervention has been active
intervention_duration <- current_time - delay_weeks
if(intervention_duration >= 0 && intervention_duration <= 54) {
# Use intervention hazard corresponding to this duration
h_idx <- min(intervention_duration + 1, length(h_int_sim))
h_t <- h_int_sim[max(1, h_idx)]
} else if(intervention_duration > 54) {
# Use last available intervention hazard
h_t <- h_int_sim[length(h_int_sim)]
} else {
# This shouldn't happen with negative delays, but use first intervention hazard as fallback
h_t <- h_int_sim[1]
}
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
} else {
# Positive delays (post-screen delays) - FIXED: use p0_draw instead of p0
# Pre-screen ramp
if(i0 > 1) {
s_up <- time_ext[1:(i0-1)] + back_wk
Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk) # FIXED
}
# At screen
Fcum[i0] <- p0_draw # FIXED
Sprev <- 1 - p0_draw # FIXED
# Post-screen: control until delay point, then intervention
for(i in (i0+1):n) {
t0 <- time_ext[i]
if(t0 <= delay_weeks) {
# Use control hazard until delay point
h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
} else {
# Use intervention hazard after delay point
# Map to intervention timeline: intervention has been active for (t0 - delay_weeks) weeks
intervention_duration <- t0 - delay_weeks
h_idx <- min(intervention_duration + 1, length(h_int_sim))
h_t <- h_int_sim[max(1, h_idx)]
}
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
}
# Return final value at week 26
week_26_idx <- which(time_ext == 26)
return(Fcum[week_26_idx])
}
# ——————————————————————————————————————————————————————
# 3) Run Enhanced Analysis with 2-week intervals using REAL GAM models
# ——————————————————————————————————————————————————————
cat("Running enhanced Monte Carlo simulations with 2-week intervals...\n")
# Reset seed right before simulations for exact reproducibility
set.seed(42)
# Set up time vectors
time0 <- 0:26
back_wk <- 26
time_ext <- seq(-back_wk, 26, by = 1)
# Create 2-week interval delays from -26 to +26
delays_2wk <- seq(-26, 26, by = 2)
# Generate simulated predictions using your ACTUAL GAM models
cat("Generating GAM simulations using your trained models...\n")
# NOTE: Make sure your GAM models are loaded: control_gam_k5_GCV_TBCHAMP and intervention_gam_k5_GCV_TBCHAMP
control_sims <- simulate_gam_predictions(control_gam_k5_GCV_TBCHAMP, data.frame(time = time0), n_simulations)
intervention_sims <- simulate_gam_predictions(intervention_gam_k5_GCV_TBCHAMP, data.frame(time = time0), n_simulations)
cat("Generated", ncol(control_sims), "control simulations and", ncol(intervention_sims), "intervention simulations\n")
# Define all scenarios including baseline
# In Code 2, change to only include delay scenarios:
all_delays <- setNames(delays_2wk, paste0(delays_2wk, " wk"))
# Run simulations for all scenarios
cat("Running scenario simulations...\n")
simulation_results <- list()
for(scenario_name in names(all_delays)) {
cat("Processing scenario:", scenario_name, "\n")
delay_val <- all_delays[scenario_name]
scenario_sims <- sapply(1:n_simulations, function(i) {
bootstrap_scenario(i, control_sims, intervention_sims, delay_val)
})
simulation_results[[scenario_name]] <- scenario_sims
}
cat("Completed all simulations!\n\n")
# Check if "0 wk" exists and compare a few values
print(simulation_results[["0 wk"]][1:5]) # First 5 simulation results
print(mean(simulation_results[["0 wk"]])) # Mean for "0 wk" scenario
# ——————————————————————————————————————————————————————
# 4) Calculate Excess Cases PER 100,000 using your exact methodology
# ——————————————————————————————————————————————————————
# Get baseline for relative calculations
baseline_sims <- simulation_results[["0 wk"]] # Uses "0 wk" as baseline
baseline_mean <- mean(baseline_sims)
# Calculate metrics for each scenario using your exact approach
results_list <- list()
for(scenario_name in names(simulation_results)) {
scenario_sims <- simulation_results[[scenario_name]]
# Basic statistics
mean_incidence <- mean(scenario_sims)
median_incidence <- median(scenario_sims)
ci_lower <- quantile(scenario_sims, 0.025)
ci_upper <- quantile(scenario_sims, 0.975)
# Determine person-years (Control uses control PY, others use intervention PY)
py <- ifelse(scenario_name == "Control", py_control, py_intervention)
sample_size <- ifelse(scenario_name == "Control", n_control, n_intervention)
# 1. Incidence rate per 100 person-yrs (95% CI)
rate_per_100py <- (mean_incidence * sample_size / py) * 100
rate_ci_lower <- (ci_lower * sample_size / py) * 100
rate_ci_upper <- (ci_upper * sample_size / py) * 100
# 2. Mean Expected Cases (95% CI) - absolute numbers in original cohort
mean_cases <- mean_incidence * sample_size
cases_ci_lower <- ci_lower * sample_size
cases_ci_upper <- ci_upper * sample_size
# 3. Excess calculations (compared to Instant TPT)
if(scenario_name != "Instant TPT") {
# Excess incidence simulations
excess_sims <- scenario_sims - baseline_sims
excess_mean <- mean(excess_sims)
excess_ci_lower <- quantile(excess_sims, 0.025)
excess_ci_upper <- quantile(excess_sims, 0.975)
# Excess rate per 100 person-yrs
excess_rate_per_100py <- (excess_mean * sample_size / py) * 100
excess_rate_ci_lower <- (excess_ci_lower * sample_size / py) * 100
excess_rate_ci_upper <- (excess_ci_upper * sample_size / py) * 100
# MODIFIED: Excess cases per 100,000 contacts (not absolute numbers)
excess_cases_per_100k <- excess_mean * 100000
excess_cases_per_100k_ci_lower <- excess_ci_lower * 100000
excess_cases_per_100k_ci_upper <- excess_ci_upper * 100000
} else {
# Instant TPT is the baseline, so excess = 0
excess_rate_per_100py <- 0
excess_rate_ci_lower <- 0
excess_rate_ci_upper <- 0
excess_cases_per_100k <- 0
excess_cases_per_100k_ci_lower <- 0
excess_cases_per_100k_ci_upper <- 0
}
# 4. Relative Risk
relative_risk <- mean_incidence / baseline_mean
# Store results
results_list[[scenario_name]] <- list(
scenario = scenario_name,
incidence_rate_per_100py = rate_per_100py,
incidence_rate_ci_lower = rate_ci_lower,
incidence_rate_ci_upper = rate_ci_upper,
excess_rate_per_100py = excess_rate_per_100py,
excess_rate_ci_lower = excess_rate_ci_lower,
excess_rate_ci_upper = excess_rate_ci_upper,
mean_cases = mean_cases,
cases_ci_lower = cases_ci_lower,
cases_ci_upper = cases_ci_upper,
excess_cases_per_100k = excess_cases_per_100k,
excess_cases_per_100k_ci_lower = excess_cases_per_100k_ci_lower,
excess_cases_per_100k_ci_upper = excess_cases_per_100k_ci_upper,
relative_risk = relative_risk
)
}
# ——————————————————————————————————————————————————————
# 5) Prepare Plot Data using your results (MODIFIED for per 100,000)
# ——————————————————————————————————————————————————————
plot_data <- data.frame()
for(scenario_name in names(results_list)) {
# Don't skip any scenarios this time - keep baseline (week 0) on plot
r <- results_list[[scenario_name]]
# Extract delay value for ordering
if(scenario_name == "26 wk") { # CHANGED: Check for "26 wk" instead of "Control"
delay_weeks <- 999 # Put at the end
delay_label <- "No TPT"
delay_type <- "No Treatment"
} else {
delay_weeks <- as.numeric(sub(" wk", "", scenario_name))
delay_label <- as.character(delay_weeks)
# Assign delay types
if(delay_weeks == 0) {
delay_type <- "Baseline (0 wk)" # Special case for baseline
} else if(delay_weeks < 0) {
delay_type <- "Pre-screen"
} else {
delay_type <- "Post-screen"
}
}
plot_data <- rbind(plot_data, data.frame(
scenario = scenario_name,
delay_weeks = delay_weeks,
delay_label = delay_label,
excess_cases_per_100k = r$excess_cases_per_100k,
ci_lower = r$excess_cases_per_100k_ci_lower,
ci_upper = r$excess_cases_per_100k_ci_upper,
delay_type = delay_type,
stringsAsFactors = FALSE
))
}
# Order by delay weeks
plot_data <- plot_data[order(plot_data$delay_weeks), ]
# ——————————————————————————————————————————————————————
# 6) Create Enhanced Color-Blind Friendly Bar Plot (UPDATED LABELS)
# ——————————————————————————————————————————————————————
# Set up RColorBrewer Paired palette
paired_colors <- brewer.pal(12, "Paired")
# Assign colors to delay types
color_mapping <- c(
"Pre-screen" = paired_colors[2], # Light blue
"Post-screen" = paired_colors[6], # Light orange
"No Treatment" = paired_colors[8] # Light r
)
# Create the enhanced bar plot
enhanced_plot <- ggplot(plot_data, aes(x = reorder(delay_label, delay_weeks),
y = excess_cases_per_100k,
fill = delay_type)) +
geom_col(width = 0.8, alpha = 0.8, color = "white", size = 0.3) +
geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper),
width = 0.4, color = "black", size = 0.6, alpha = 0.8) +
# Add value labels on bars with smart positioning
# Add value labels on bars with smart positioning
geom_text(aes(label = sprintf("%.0f", excess_cases_per_100k),
y = ifelse(excess_cases_per_100k >= 0,
pmax(ci_upper + 150, excess_cases_per_100k + 150), # Above positive bars
pmin(ci_lower - 150, excess_cases_per_100k - 150))), # Below negative bars
size = 4.2, fontface = "bold", color = "black") +
# Styling
scale_fill_manual(values = color_mapping, name = "Treatment Timing") +
scale_y_continuous(labels = number_format(accuracy = 1),
expand = expansion(mult = c(0.1, 0.15))) +
# Reference line at zero
geom_hline(yintercept = 0, linetype = "solid", color = "black", size = 0.8) +
# UPDATED Labels and theme
labs(
title = "TBCHAMP: Excess TB Cases per 100,000 Contacts by Treatment Delay",
subtitle = paste0("Compared to Instant TPT (0 week delay) (95% UI from ",
format(n_simulations, big.mark = ","), " Monte Carlo simulations)"),
x = "Treatment Delay (weeks)",
y = "Excess TB Cases per 100,000 Contacts (26 weeks)",
caption = "Negative delays = pre-screen treatment | Positive delays = post-screen treatment"
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
plot.subtitle = element_text(hjust = 0.5, size = 11, color = "black"),
plot.caption = element_text(size = 9, color = "black"),
axis.text.x = element_text(angle = 45, hjust = 1, size = 13, color = "black"),
axis.text.y = element_text(size = 13, color = "black"),
axis.title = element_text(size = 11, face = "bold"),
legend.position = "top",
legend.title = element_text(size = 10, face = "bold"),
legend.text = element_text(size = 9),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "gray80", fill = NA, size = 0.5)
)
# Display the plot
print(enhanced_plot)# ——————————————————————————————————————————————————————
# 7) Summary Statistics Table (UPDATED for per 100,000)
# ——————————————————————————————————————————————————————
cat("\n=== ENHANCED DELAY ANALYSIS SUMMARY - TBCHAMP (Excess per 100,000 contacts) ===\n")
cat("Delay intervals: Every 2 weeks from -26 to +26 weeks\n")
cat("Monte Carlo simulations:", format(n_simulations, big.mark = ","), "\n")
cat("Results standardized to per 100,000 contacts over 26 weeks\n\n")
# Print key statistics
summary_table <- plot_data %>%
select(delay_label, delay_type, excess_cases_per_100k, ci_lower, ci_upper) %>%
mutate(
excess_cases_ci = sprintf("%.0f (%.0f to %.0f)", excess_cases_per_100k, ci_lower, ci_upper)
) %>%
select(delay_label, delay_type, excess_cases_ci)
colnames(summary_table) <- c("Delay (weeks)", "Type", "Excess Cases per 100,000 (95% CI)")
print(summary_table)
cat("\nNote: Negative delays indicate treatment started before screening\n")
cat("Positive delays indicate treatment started after screening\n")
cat("Results calculated using TBCHAMP GAM models and Monte Carlo approach\n")
cat("Units: Excess TB cases per 100,000 contacts over 26 weeks\n")Shortening
Curves with shortened scenarios (Figure 7)
library(mgcv)
# —————————————————————————————————————————————————————
# A) Fit‐based cumulative‐incidence at t = 0…54, with p0‐spike
# UPDATED: Using k=5 GCV GAM models
time0 <- 0:54
p0 <- 32/1120
# Use the new k=5 GCV GAM models
Fc0 <- predict(control_gam_k5_GCV_TBCHAMP,
newdata = data.frame(time = time0))
Fi0 <- predict(intervention_gam_k5_GCV_TBCHAMP,
newdata = data.frame(time = time0))
Fc0 <- p0 + (1 - p0)*Fc0
Fi0 <- p0 + (1 - p0)*Fi0
h_ctrl <- diff(c(0, Fc0)) / (1 - c(0, Fc0)[-length(Fc0)])
h_int <- diff(c(0, Fi0)) / (1 - c(0, Fi0)[-length(Fi0)])
# —————————————————————————————————————————————————————
# B) "Pre‐screen" ramp length and treatment parameters
back_wk <- 26 # show 3 months before screen
shorten_weeks <- c(6, 13, 20) # weeks at which to shorten treatment, SHOWING EVERY 7 WEEKS APPROX - DO LESS FOR BAR PLOT
# —————————————————————————————————————————————————————
# C) build a time axis from –back_wk … +54
time_ext <- seq(-back_wk, 54, by = 1)
# —————————————————————————————————————————————————————
# D) single‐curve builder: √ ramp on [–back_wk,0], then hazard‐switch at d
build_curve <- function(d) {
n <- length(time_ext)
Fcum <- numeric(n)
# find index of screen‐time t0 = 0
i0 <- which(time_ext == 0)
# 1) PRE‐SCREEN: √ ramp from (–back_wk → 0) up to p0
s_up <- time_ext[1:(i0-1)] + back_wk
Fcum[1:(i0-1)] <- p0 * sqrt( s_up / back_wk )
# 2) AT SCREEN
Fcum[i0] <- p0
Sprev <- 1 - p0
# 3) POST‐SCREEN: control hazard until t0 ≤ d, then intervention hazard
for(i in (i0+1):n) {
t0 <- time_ext[i]
if (d > 0 && t0 <= d) {
h_t <- h_ctrl[t0 + 1]
} else {
idx <- if (d > 0) (t0 - d + 1) else (t0 + 1)
h_t <- h_int[ min(idx, length(h_int)) ]
}
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
Fcum
}
# —————————————————————————————————————————————————————
# E) UPDATED: shortened treatment curve builder using time-specific control slope
build_shortened_curve <- function(shorten_at) {
n <- length(time_ext)
Fcum <- numeric(n)
# find index of screen‐time t0 = 0
i0 <- which(time_ext == 0)
# 1) PRE‐SCREEN: √ ramp from (–back_wk → 0) up to p0
s_up <- time_ext[1:(i0-1)] + back_wk
Fcum[1:(i0-1)] <- p0 * sqrt( s_up / back_wk )
# 2) AT SCREEN
Fcum[i0] <- p0
# 3) POST‐SCREEN: follow intervention until shorten_at, then follow control pattern from shortening timepoint
for(i in (i0+1):n) {
t0 <- time_ext[i]
if (t0 <= shorten_at) {
# Follow intervention curve
Fcum[i] <- Fi0[min(t0 + 1, length(Fi0))]
} else {
# After shortening point: take the value at shortening point and add
# the same increment that control would have from the shortening timepoint
value_at_shorten <- Fi0[min(shorten_at + 1, length(Fi0))]
# NEW: Use control increment from shortening timepoint, not from time 0
control_at_shorten <- Fc0[min(shorten_at + 1, length(Fc0))]
control_at_current <- Fc0[min(t0 + 1, length(Fc0))]
control_increment <- control_at_current - control_at_shorten
Fcum[i] <- value_at_shorten + control_increment
}
}
Fcum
}
# —————————————————————————————————————————————————————
# F) build all the curves
# 1) explicitly build Control & Instant
ctrl_ext <- build_curve( Inf) # pure control
inst_ext <- build_curve(-Inf) # pure intervention
# 2) shortened treatment curves
shorten_mat <- if(length(shorten_weeks) > 0) {
sapply(shorten_weeks, build_shortened_curve)
} else NULL
# Add column names for shortened treatments
if(!is.null(shorten_mat)) {
colnames(shorten_mat) <- paste0("TPT ", shorten_weeks, " wk")
}
# —————————————————————————————————————————————————————
# G) compressed x‐axis plot, up to +54 wk, now including shortened treatment curves
# how many "plot units" to devote to the negative side?
neg_width <- 30
# remap time_ext → x_plot:
x_plot <- ifelse(
time_ext < 0,
neg_width * (time_ext + back_wk) / back_wk, # −back_wk…0 → 0…20
neg_width + pmin(time_ext, 54) # 0…54 → 20…74
)
# bind together Control, Instant, and shortened treatments only:
if(!is.null(shorten_mat)) {
all_mat <- cbind(
Control = ctrl_ext,
`Instant TPT` = inst_ext,
shorten_mat
)
} else {
all_mat <- cbind(
Control = ctrl_ext,
`Instant TPT` = inst_ext
)
}
# subset to t ≤ 26 wk
keep <- which(time_ext <= 26)
x_sub <- x_plot[keep]
mat_sub <- all_mat[keep, , drop=FALSE]
# CORRECTED: Convert to percentages in the data
mat_sub_pct <- mat_sub * 100
# MODIFIED: Create gradient colors where shorter treatment is worse (red) and longer is better (green)
n_shorten_cols <- if(!is.null(shorten_mat)) ncol(shorten_mat) else 0
if(n_shorten_cols > 0) {
# Create a gradient from red (shorter/worse) to green (longer/better)
# Since shorten_weeks is ordered from shortest to longest: 6, 13, 20
shorten_colors <- colorRampPalette(c("#FF0000", "#FF7F00", "#00AA00"))(n_shorten_cols)
cols <- c("red", "blue", shorten_colors)
} else {
cols <- c("red", "blue")
}
ltys <- c(1, 1, rep(3, n_shorten_cols))
yr <- range(mat_sub_pct)
# CORRECTED: Use matplot with percentage data and larger axis text
matplot(
x_sub, mat_sub_pct,
type="l", col=cols, lty=ltys, lwd=2,
xaxt="n", ylim=yr,
xlab="Weeks since randomisation",
ylab="Cumulative incidence of TB (%)", # Updated label
main="TBCHAMP Shortened-TPT scenarios",
cex.axis = 1.0, # Size 12 for axis numbers
cex.lab = 1.0 # Size 12 for axis labels
)
# custom x-axis ticks:
neg_ticks <- seq(-back_wk, 0, by=2)
neg_at <- neg_width * (neg_ticks + back_wk) / back_wk
pos_ticks <- seq(0, 54, by=5)
pos_at <- neg_width + pos_ticks
axis(1,
at = c(neg_at, pos_at),
labels = c(neg_ticks, pos_ticks),
cex.axis = 1.0) # Size 12 for axis tick labels
# vertical line at t=0
abline(v=neg_width, lty=3, col="gray50")
# Add vertical lines to show shortening points
if(!is.null(shorten_mat)) {
for(sw in shorten_weeks) {
if(sw <= 54) { # only show if visible on plot
shorten_x <- neg_width + sw
abline(v=shorten_x, lty=4, col="gray70", lwd=1)
}
}
}
# legend with larger text
legend("topleft",
legend = colnames(all_mat),
col = cols,
lty = ltys,
lwd = 2,
bty = "n",
ncol = 1,
cex = 1.2, # Increased from 0.8 to 1.2 for larger legend
y.intersp = 0.8,
x.intersp = 0.5,
inset = c(0.02, 0.02))t_target <- 26
idx_26 <- which(time_ext == t_target)
if (length(idx_26) > 0) {
ci_26 <- all_mat[idx_26, ] * 100 # convert to %
cat("Cumulative incidence at", t_target, "weeks (%):\n")
print(round(ci_26, 2))
} else {
warning("No exact match for 26 weeks in time_ext")
}Appendix Table A2 of all 2-week shortened scenario statistics with UIs
(w correct control change at that point in time, not t=0)
library(mgcv)
library(ggplot2)
library(dplyr)
library(parallel)
# ——————————————————————————————————————————————————————
# 0) Input Parameters - TBCHAMP Data
# ——————————————————————————————————————————————————————
# Sample sizes (TBCHAMP)
n_control <- 465 # TBCHAMP control sample size
n_intervention <- 451 # TBCHAMP intervention sample size
# Person-years of follow-up (TBCHAMP)
py_control <- 418 # TBCHAMP control person-years
py_intervention <- 406 # TBCHAMP intervention person-years
# Monte Carlo parameters
n_simulations <- 5000 # Number of bootstrap simulations
set.seed(123) # For reproducibility
cat("Shortened Treatment Analysis Parameters (TBCHAMP):\n")
cat("Control sample size:", n_control, "\n")
cat("Intervention sample size:", n_intervention, "\n")
cat("Control person-years:", py_control, "\n")
cat("Intervention person-years:", py_intervention, "\n")
cat("Number of simulations:", n_simulations, "\n\n")
# ——————————————————————————————————————————————————————
# 1) Monte Carlo Simulation Function
# ——————————————————————————————————————————————————————
simulate_gam_predictions <- function(gam_model, newdata, n_sim = n_simulations) {
# Get mean predictions and covariance matrix
pred_mean <- predict(gam_model, newdata = newdata, type = "response")
# Get the covariance matrix of the GAM coefficients
Vp <- vcov(gam_model, unconditional = TRUE)
# Get the linear predictor matrix
Xp <- predict(gam_model, newdata = newdata, type = "lpmatrix")
# Simulate from multivariate normal distribution
# Generate random coefficients
coef_sims <- MASS::mvrnorm(n_sim, coef(gam_model), Vp)
# Calculate predictions for each simulation
pred_sims <- tcrossprod(Xp, coef_sims)
return(pred_sims)
}
# ——————————————————————————————————————————————————————
# 2) Bootstrap Shortened Treatment Scenario Builder
# ——————————————————————————————————————————————————————
# CORRECTED: bootstrap_shortened_scenario function
bootstrap_shortened_scenario <- function(sim_idx, control_sims, intervention_sims, shorten_weeks) {
# Extract simulated predictions for this iteration
Fc0_sim <- control_sims[, sim_idx]
Fi0_sim <- intervention_sims[, sim_idx]
# Apply p0 transformation with a beta draw for p0 to consider uncertainty in p0
p0_draw <- rbeta(1, 32 + 0.5, 1120 - 32 + 0.5) # TBCHAMP p0 values
Fc0_sim <- p0_draw + (1 - p0_draw) * Fc0_sim
Fi0_sim <- p0_draw + (1 - p0_draw) * Fi0_sim
# Enforce monotonicity
Fc0_sim <- pmin(pmax(Fc0_sim, 0), 0.999)
Fi0_sim <- pmin(pmax(Fi0_sim, 0), 0.999)
Fc0_sim <- cummax(Fc0_sim); Fi0_sim <- cummax(Fi0_sim)
# Calculate hazards for this simulation
h_ctrl_sim <- diff(c(0, Fc0_sim)) / (1 - c(0, Fc0_sim)[-length(Fc0_sim)])
h_int_sim <- diff(c(0, Fi0_sim)) / (1 - c(0, Fi0_sim)[-length(Fi0_sim)])
# Build curve for this shortened treatment scenario
n <- length(time_ext)
Fcum <- numeric(n)
# Find indices
i0 <- which(time_ext == 0)
back_wk <- 26
# Special handling for reference scenarios
if(shorten_weeks == Inf) {
# Pure control - no treatment at all
# Pre-screen ramp
if(i0 > 1) {
s_up <- time_ext[1:(i0-1)] + back_wk
Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk)
}
Fcum[i0] <- p0_draw
Sprev <- 1 - p0_draw
for(i in (i0+1):n) {
t0 <- time_ext[i]
h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
} else if(shorten_weeks == -Inf) {
# Full treatment (26 weeks) - equivalent to instant TPT
if(i0 > 1) {
s_up <- time_ext[1:(i0-1)] + back_wk
Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk)
}
Fcum[i0] <- p0_draw
Sprev <- 1 - p0_draw
for(i in (i0+1):n) {
t0 <- time_ext[i]
h_t <- h_int_sim[min(t0 + 1, length(h_int_sim))]
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
} else {
# Shortened treatment scenarios
# Pre-screen ramp
if(i0 > 1) {
s_up <- time_ext[1:(i0-1)] + back_wk
Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk)
}
# At screen
Fcum[i0] <- p0_draw
Sprev <- 1 - p0_draw
# Post-screen: intervention until shorten_weeks, then control
for(i in (i0+1):n) {
t0 <- time_ext[i]
if(t0 <= shorten_weeks) {
# Use intervention hazard until shortening point
h_t <- h_int_sim[min(t0 + 1, length(h_int_sim))]
} else {
# CORRECTED: After shortening, use control hazard at CURRENT time point
# Not time since shortening, but the actual current time
h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
}
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
}
# Return final value at week 26
week_26_idx <- which(time_ext == 26)
return(Fcum[week_26_idx])
}
# ——————————————————————————————————————————————————————
# 3) Run Monte Carlo Simulations - Every 2 Week Treatment Durations
# ——————————————————————————————————————————————————————
cat("Running Monte Carlo simulations for shortened treatments (every 2 weeks)...\n")
# Set up time vectors
time0 <- 0:26
back_wk <- 26
time_ext <- seq(-back_wk, 26, by = 1)
# Create treatment duration intervals every 2 weeks from 0 to 26
shortened_weeks <- seq(0, 26, by = 2) # 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26
# Generate simulated predictions for both models
cat("Generating GAM simulations...\n")
control_sims <- simulate_gam_predictions(control_gam_k5_GCV_TBCHAMP, data.frame(time = time0), n_simulations)
intervention_sims <- simulate_gam_predictions(intervention_gam_k5_GCV_TBCHAMP, data.frame(time = time0), n_simulations)
cat("Generated", ncol(control_sims), "control simulations and", ncol(intervention_sims), "intervention simulations\n")
# Define all scenarios - treat 26 weeks as the "Full TPT" baseline
all_scenarios <- setNames(shortened_weeks, paste0("TPT ", shortened_weeks, " wk"))
# Run simulations for all scenarios
cat("Running shortened treatment scenario simulations...\n")
simulation_results <- list()
for(scenario_name in names(all_scenarios)) {
cat("Processing scenario:", scenario_name, "\n")
shorten_val <- all_scenarios[scenario_name]
# For 26 weeks, use -Inf to trigger full treatment scenario
if(shorten_val == 26) {
shorten_val <- -Inf
}
# Run simulations for this scenario
scenario_sims <- sapply(1:n_simulations, function(i) {
bootstrap_shortened_scenario(i, control_sims, intervention_sims, shorten_val)
})
simulation_results[[scenario_name]] <- scenario_sims
}
cat("Completed all simulations!\n\n")
# ——————————————————————————————————————————————————————
# 4) Calculate Key Metrics with 26 weeks as Baseline
# ——————————————————————————————————————————————————————
# Get baseline (26 weeks = full treatment) for relative calculations
baseline_sims <- simulation_results[["TPT 26 wk"]]
baseline_mean <- mean(baseline_sims)
# Calculate metrics for each scenario
results_list <- list()
for(scenario_name in names(simulation_results)) {
scenario_sims <- simulation_results[[scenario_name]]
treatment_weeks <- as.numeric(sub("TPT (\\d+) wk", "\\1", scenario_name))
# Basic statistics
mean_incidence <- mean(scenario_sims)
median_incidence <- median(scenario_sims)
ci_lower <- quantile(scenario_sims, 0.025)
ci_upper <- quantile(scenario_sims, 0.975)
# Use intervention PY and sample size for all scenarios
py <- py_intervention
sample_size <- n_intervention
# 1. Incidence rate per 100 person-yrs (95% CI)
rate_per_100py <- (mean_incidence * sample_size / py) * 100
rate_ci_lower <- (ci_lower * sample_size / py) * 100
rate_ci_upper <- (ci_upper * sample_size / py) * 100
# 2. Mean Expected Cases per 100,000 (95% CI)
cases_per_100k <- mean_incidence * 100000
cases_per_100k_ci_lower <- ci_lower * 100000
cases_per_100k_ci_upper <- ci_upper * 100000
# 3. Excess calculations (compared to 26 weeks)
if(treatment_weeks != 26) {
# Excess incidence simulations
excess_sims <- scenario_sims - baseline_sims
excess_mean <- mean(excess_sims)
excess_ci_lower <- quantile(excess_sims, 0.025)
excess_ci_upper <- quantile(excess_sims, 0.975)
# Excess rate per 100 person-yrs
excess_rate_per_100py <- (excess_mean * sample_size / py) * 100
excess_rate_ci_lower <- (excess_ci_lower * sample_size / py) * 100
excess_rate_ci_upper <- (excess_ci_upper * sample_size / py) * 100
# Excess cases per 100,000
excess_cases_per_100k <- excess_mean * 100000
excess_cases_per_100k_ci_lower <- excess_ci_lower * 100000
excess_cases_per_100k_ci_upper <- excess_ci_upper * 100000
} else {
# 26 weeks is the baseline, so excess = 0
excess_rate_per_100py <- 0
excess_rate_ci_lower <- 0
excess_rate_ci_upper <- 0
excess_cases_per_100k <- 0
excess_cases_per_100k_ci_lower <- 0
excess_cases_per_100k_ci_upper <- 0
}
# 4. Relative Risk
relative_risk <- mean_incidence / baseline_mean
# Store results
results_list[[scenario_name]] <- list(
treatment_weeks = treatment_weeks,
incidence_rate_per_100py = rate_per_100py,
rate_ci_lower = rate_ci_lower,
rate_ci_upper = rate_ci_upper,
excess_rate_per_100py = excess_rate_per_100py,
excess_rate_ci_lower = excess_rate_ci_lower,
excess_rate_ci_upper = excess_rate_ci_upper,
cases_per_100k = cases_per_100k,
cases_per_100k_ci_lower = cases_per_100k_ci_lower,
cases_per_100k_ci_upper = cases_per_100k_ci_upper,
excess_cases_per_100k = excess_cases_per_100k,
excess_cases_per_100k_ci_lower = excess_cases_per_100k_ci_lower,
excess_cases_per_100k_ci_upper = excess_cases_per_100k_ci_upper,
relative_risk = relative_risk
)
}
# ——————————————————————————————————————————————————————
# 5) Create Export Table for Word
# ——————————————————————————————————————————————————————
# Build the export table
export_table <- data.frame()
for(scenario_name in names(results_list)) {
r <- results_list[[scenario_name]]
# Format all confidence intervals
incidence_rate_ci <- sprintf("%.2f (%.2f-%.2f)",
r$incidence_rate_per_100py, r$rate_ci_lower, r$rate_ci_upper)
if(r$treatment_weeks == 26) {
excess_rate_ci <- "BASELINE"
excess_cases_ci <- "BASELINE"
} else {
excess_rate_ci <- sprintf("%+.2f (%+.2f to %+.2f)",
r$excess_rate_per_100py, r$excess_rate_ci_lower, r$excess_rate_ci_upper)
excess_cases_ci <- sprintf("%+.0f (%+.0f to %+.0f)",
r$excess_cases_per_100k, r$excess_cases_per_100k_ci_lower, r$excess_cases_per_100k_ci_upper)
}
cases_per_100k_ci <- sprintf("%.0f (%.0f-%.0f)",
r$cases_per_100k, r$cases_per_100k_ci_lower, r$cases_per_100k_ci_upper)
relative_risk_formatted <- sprintf("%.2f", r$relative_risk)
# Add row to export table
export_table <- rbind(export_table, data.frame(
`Treatment Duration (weeks)` = r$treatment_weeks,
`Incidence Rate per 100 Person-Yrs (95% CI)` = incidence_rate_ci,
`Excess Incidence Rate per 100 Person-Yrs (95% CI)` = excess_rate_ci,
`Mean Expected Cases per 100,000 (95% CI)` = cases_per_100k_ci,
`Excess Cases per 100,000 (95% CI)` = excess_cases_ci,
`Relative Risk` = relative_risk_formatted,
stringsAsFactors = FALSE,
check.names = FALSE
))
}
# Sort by treatment duration
export_table <- export_table[order(export_table$`Treatment Duration (weeks)`), ]
# Display the table
cat("\n=== EXPORT TABLE FOR WORD (TBCHAMP Shortened Treatment) ===\n\n")
print(export_table, row.names = FALSE)
# ——————————————————————————————————————————————————————
# 6) Export Options
# ——————————————————————————————————————————————————————
# Option 1: Save as CSV for easy import to Word
write.csv(export_table, "tbchamp_shortened_treatment_table.csv", row.names = FALSE)
cat("\nTable saved as 'tbchamp_shortened_treatment_table.csv'\n")
# Option 2: Create a formatted version for direct copy-paste
cat("\n=== FORMATTED FOR COPY-PASTE TO WORD ===\n")
cat("Treatment Duration (weeks)\tIncidence Rate per 100 Person-Yrs (95% CI)\tExcess Incidence Rate per 100 Person-Yrs (95% CI)\tMean Expected Cases per 100,000 (95% CI)\tExcess Cases per 100,000 (95% CI)\tRelative Risk\n")
for(i in 1:nrow(export_table)) {
cat(paste(export_table[i, ], collapse = "\t"), "\n")
}
# Option 3: Summary statistics
cat("\n=== ANALYSIS SUMMARY ===\n")
cat("Monte Carlo simulations:", n_simulations, "\n")
cat("Treatment durations: Every 2 weeks from 0 to 26 weeks\n")
cat("Baseline scenario: 26 weeks (full treatment)\n")
cat("Sample size:", n_intervention, "\n")
cat("Person-years:", py_intervention, "\n")
cat("Total scenarios analyzed:", nrow(export_table), "\n")
# Option 4: Key findings
best_scenario_idx <- which.min(as.numeric(gsub("[^0-9.]", "", export_table$`Treatment Duration (weeks)`)))
worst_scenario_idx <- which.max(as.numeric(gsub("[^0-9.]", "", export_table$`Treatment Duration (weeks)`)))
if(length(best_scenario_idx) > 0 && length(worst_scenario_idx) > 0) {
best_scenario <- export_table[best_scenario_idx, ]
worst_scenario <- export_table[worst_scenario_idx, ]
cat("\nShortest treatment:", best_scenario$`Treatment Duration (weeks)`, "weeks\n")
cat("Longest treatment:", worst_scenario$`Treatment Duration (weeks)`, "weeks\n")
}
cat("\nNote: All excess calculations are relative to 26 weeks (full treatment)\n")
cat("0 weeks = no treatment\n")
cat("Higher treatment duration generally leads to better outcomes\n")
# Option 5: Calculate relative risks compared to full treatment
cat("\nRelative Risks (compared to Full TPT 26 weeks):\n")
for(scenario in names(simulation_results)) {
rr <- mean(simulation_results[[scenario]]) / baseline_mean
weeks <- as.numeric(sub("TPT (\\d+) wk", "\\1", scenario))
cat(sprintf("%d weeks: RR = %.2f\n", weeks, rr))
}Bar plot of shortened scenarios (Figure 8)
library(mgcv)
library(ggplot2)
library(dplyr)
library(RColorBrewer)
library(scales)
# ——————————————————————————————————————————————————————
# Enhanced TB Shortened Treatment Analysis - Using Real GAM Models
# ——————————————————————————————————————————————————————
# Input Parameters (TBCHAMP data)
n_control <- 465
n_intervention <- 451
py_control <- 418
py_intervention <- 406
n_simulations <- 5000
set.seed(123)
cat("Enhanced Shortened Treatment Analysis Parameters (TBCHAMP):\n")
cat("Control sample size:", n_control, "\n")
cat("Intervention sample size:", n_intervention, "\n")
cat("Control person-years:", py_control, "\n")
cat("Intervention person-years:", py_intervention, "\n")
cat("Number of simulations:", n_simulations, "\n\n")
# ——————————————————————————————————————————————————————
# 1) Monte Carlo Simulation Function (from your working code)
# ——————————————————————————————————————————————————————
simulate_gam_predictions <- function(gam_model, newdata, n_sim = n_simulations) {
# Get mean predictions and covariance matrix
pred_mean <- predict(gam_model, newdata = newdata, type = "response")
# Get the covariance matrix of the GAM coefficients
Vp <- vcov(gam_model, unconditional = TRUE)
# Get the linear predictor matrix
Xp <- predict(gam_model, newdata = newdata, type = "lpmatrix")
# Simulate from multivariate normal distribution
# Generate random coefficients
coef_sims <- MASS::mvrnorm(n_sim, coef(gam_model), Vp)
# Calculate predictions for each simulation
pred_sims <- tcrossprod(Xp, coef_sims)
return(pred_sims)
}
# ——————————————————————————————————————————————————————
# 2) Bootstrap Shortened Treatment Scenario Builder (from your code)
# ——————————————————————————————————————————————————————
bootstrap_shortened_scenario <- function(sim_idx, control_sims, intervention_sims, shorten_weeks) {
# Extract simulated predictions for this iteration
Fc0_sim <- control_sims[, sim_idx]
Fi0_sim <- intervention_sims[, sim_idx]
# Apply p0 transformation with a beta draw for p0 to consider uncertainty in p0
p0_draw <- rbeta(1, 32 + 0.5, 1120 - 32 + 0.5) # TBCHAMP p0 values
Fc0_sim <- p0_draw + (1 - p0_draw) * Fc0_sim
Fi0_sim <- p0_draw + (1 - p0_draw) * Fi0_sim
# Enforce monotonicity
Fc0_sim <- pmin(pmax(Fc0_sim, 0), 0.999)
Fi0_sim <- pmin(pmax(Fi0_sim, 0), 0.999)
Fc0_sim <- cummax(Fc0_sim); Fi0_sim <- cummax(Fi0_sim)
# Calculate hazards for this simulation
h_ctrl_sim <- diff(c(0, Fc0_sim)) / (1 - c(0, Fc0_sim)[-length(Fc0_sim)])
h_int_sim <- diff(c(0, Fi0_sim)) / (1 - c(0, Fi0_sim)[-length(Fi0_sim)])
# Build curve for this shortened treatment scenario
n <- length(time_ext)
Fcum <- numeric(n)
# Find indices
i0 <- which(time_ext == 0)
back_wk <- 26
# Special handling for reference scenarios
if(shorten_weeks == Inf) {
# Pure control - no treatment at all
# Pre-screen ramp
if(i0 > 1) {
s_up <- time_ext[1:(i0-1)] + back_wk
Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk)
}
Fcum[i0] <- p0_draw
Sprev <- 1 - p0_draw
for(i in (i0+1):n) {
t0 <- time_ext[i]
h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
} else if(shorten_weeks == -Inf) {
# Full treatment (26 weeks) - equivalent to full TPT
if(i0 > 1) {
s_up <- time_ext[1:(i0-1)] + back_wk
Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk)
}
Fcum[i0] <- p0_draw
Sprev <- 1 - p0_draw
for(i in (i0+1):n) {
t0 <- time_ext[i]
h_t <- h_int_sim[min(t0 + 1, length(h_int_sim))]
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
} else {
# Shortened treatment scenarios
# Pre-screen ramp
if(i0 > 1) {
s_up <- time_ext[1:(i0-1)] + back_wk
Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk)
}
# At screen
Fcum[i0] <- p0_draw
Sprev <- 1 - p0_draw
# Post-screen: intervention until shorten_weeks, then control
for(i in (i0+1):n) {
t0 <- time_ext[i]
if(t0 <= shorten_weeks) {
# Use intervention hazard until shortening point
h_t <- h_int_sim[min(t0 + 1, length(h_int_sim))]
} else {
# After shortening, use control hazard at CURRENT time point
h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
}
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
}
# Return final value at week 26
week_26_idx <- which(time_ext == 26)
return(Fcum[week_26_idx])
}
# ——————————————————————————————————————————————————————
# 3) Run Enhanced Analysis with extended treatment durations
# ——————————————————————————————————————————————————————
cat("Running enhanced Monte Carlo simulations for shortened treatments...\n")
# Set up time vectors
time0 <- 0:26
back_wk <- 26
time_ext <- seq(-back_wk, 26, by = 1)
# Create treatment duration intervals (every 2 weeks from 2 to 24, plus original values)
treatment_durations <- c(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24)
# Generate simulated predictions using your ACTUAL GAM models
cat("Generating GAM simulations using your trained models...\n")
# NOTE: Make sure your GAM models are loaded: control_gam_k5_GCV_TBCHAMP and intervention_gam_k5_GCV_TBCHAMP
control_sims <- simulate_gam_predictions(control_gam_k5_GCV_TBCHAMP, data.frame(time = time0), n_simulations)
intervention_sims <- simulate_gam_predictions(intervention_gam_k5_GCV_TBCHAMP, data.frame(time = time0), n_simulations)
cat("Generated", ncol(control_sims), "control simulations and", ncol(intervention_sims), "intervention simulations\n")
# Define all scenarios including baseline and control
all_scenarios <- c("Control" = Inf,
setNames(treatment_durations, paste0("TPT ", treatment_durations, " wk")),
"Full TPT (26 wk)" = -Inf)
# Run simulations for all scenarios
cat("Running shortened treatment scenario simulations...\n")
simulation_results <- list()
for(scenario_name in names(all_scenarios)) {
cat("Processing scenario:", scenario_name, "\n")
shorten_val <- all_scenarios[scenario_name]
scenario_sims <- sapply(1:n_simulations, function(i) {
bootstrap_shortened_scenario(i, control_sims, intervention_sims, shorten_val)
})
simulation_results[[scenario_name]] <- scenario_sims
}
cat("Completed all simulations!\n\n")
# ——————————————————————————————————————————————————————
# 4) Calculate Excess Cases PER 100,000 using your exact methodology
# ——————————————————————————————————————————————————————
# Get baseline (Full TPT 26 weeks) for relative calculations
baseline_sims <- simulation_results[["Full TPT (26 wk)"]]
baseline_mean <- mean(baseline_sims)
# Calculate metrics for each scenario using your exact approach
results_list <- list()
for(scenario_name in names(simulation_results)) {
scenario_sims <- simulation_results[[scenario_name]]
# Basic statistics
mean_incidence <- mean(scenario_sims)
median_incidence <- median(scenario_sims)
ci_lower <- quantile(scenario_sims, 0.025)
ci_upper <- quantile(scenario_sims, 0.975)
# Determine person-years (Control uses control PY, others use intervention PY)
py <- ifelse(scenario_name == "Control", py_control, py_intervention)
sample_size <- ifelse(scenario_name == "Control", n_control, n_intervention)
# 1. Incidence rate per 100 person-yrs (95% CI)
rate_per_100py <- (mean_incidence * sample_size / py) * 100
rate_ci_lower <- (ci_lower * sample_size / py) * 100
rate_ci_upper <- (ci_upper * sample_size / py) * 100
# 2. Mean Expected Cases (95% CI) - absolute numbers in original cohort
mean_cases <- mean_incidence * sample_size
cases_ci_lower <- ci_lower * sample_size
cases_ci_upper <- ci_upper * sample_size
# 3. Excess calculations (compared to Full TPT 26 weeks)
if(scenario_name != "Full TPT (26 wk)") {
# Excess incidence simulations
excess_sims <- scenario_sims - baseline_sims
excess_mean <- mean(excess_sims)
excess_ci_lower <- quantile(excess_sims, 0.025)
excess_ci_upper <- quantile(excess_sims, 0.975)
# Excess rate per 100 person-yrs
excess_rate_per_100py <- (excess_mean * sample_size / py) * 100
excess_rate_ci_lower <- (excess_ci_lower * sample_size / py) * 100
excess_rate_ci_upper <- (excess_ci_upper * sample_size / py) * 100
# MODIFIED: Excess cases per 100,000 contacts (not absolute numbers)
excess_cases_per_100k <- excess_mean * 100000
excess_cases_per_100k_ci_lower <- excess_ci_lower * 100000
excess_cases_per_100k_ci_upper <- excess_ci_upper * 100000
# Keep absolute numbers for compatibility
excess_cases_mean <- excess_mean * sample_size
excess_cases_ci_lower <- excess_ci_lower * sample_size
excess_cases_ci_upper <- excess_ci_upper * sample_size
} else {
# Full TPT is the baseline, so excess = 0
excess_rate_per_100py <- 0
excess_rate_ci_lower <- 0
excess_rate_ci_upper <- 0
excess_cases_per_100k <- 0
excess_cases_per_100k_ci_lower <- 0
excess_cases_per_100k_ci_upper <- 0
excess_cases_mean <- 0
excess_cases_ci_lower <- 0
excess_cases_ci_upper <- 0
}
# 4. Relative Risk
relative_risk <- mean_incidence / baseline_mean
# Store results
results_list[[scenario_name]] <- list(
scenario = scenario_name,
incidence_rate_per_100py = rate_per_100py,
incidence_rate_ci_lower = rate_ci_lower,
incidence_rate_ci_upper = rate_ci_upper,
excess_rate_per_100py = excess_rate_per_100py,
excess_rate_ci_lower = excess_rate_ci_lower,
excess_rate_ci_upper = excess_rate_ci_upper,
mean_cases = mean_cases,
cases_ci_lower = cases_ci_lower,
cases_ci_upper = cases_ci_upper,
excess_cases_per_100k = excess_cases_per_100k,
excess_cases_per_100k_ci_lower = excess_cases_per_100k_ci_lower,
excess_cases_per_100k_ci_upper = excess_cases_per_100k_ci_upper,
excess_cases_mean = excess_cases_mean,
excess_cases_ci_lower = excess_cases_ci_lower,
excess_cases_ci_upper = excess_cases_ci_upper,
relative_risk = relative_risk
)
}
# ——————————————————————————————————————————————————————
# 5) Prepare Plot Data using your results (MODIFIED for per 100,000)
# ——————————————————————————————————————————————————————
plot_data <- data.frame()
for(scenario_name in names(results_list)) {
if(scenario_name == "Full TPT (26 wk)") next # Skip baseline
r <- results_list[[scenario_name]]
# Extract treatment duration for ordering
if(scenario_name == "Control") {
treatment_weeks <- 0 # No treatment
treatment_label <- "0"
treatment_type <- "No Treatment"
} else {
treatment_weeks <- as.numeric(sub("TPT (\\d+) wk", "\\1", scenario_name))
treatment_label <- as.character(treatment_weeks)
treatment_type <- ifelse(treatment_weeks <= 12, "Short Treatment", "Moderate Treatment")
}
plot_data <- rbind(plot_data, data.frame(
scenario = scenario_name,
treatment_weeks = treatment_weeks,
treatment_label = treatment_label,
excess_cases_per_100k = r$excess_cases_per_100k,
ci_lower = r$excess_cases_per_100k_ci_lower,
ci_upper = r$excess_cases_per_100k_ci_upper,
treatment_type = treatment_type,
stringsAsFactors = FALSE
))
}
# Order by treatment duration
plot_data <- plot_data[order(plot_data$treatment_weeks), ]
# ——————————————————————————————————————————————————————
# 6) Create Enhanced Color-Blind Friendly Bar Plot (UPDATED LABELS)
# ——————————————————————————————————————————————————————
# Set up RColorBrewer Paired palette
paired_colors <- brewer.pal(12, "Paired")
# Assign colors to treatment types
color_mapping <- c(
"No Treatment" = paired_colors[8], # Light red
"Short Treatment" = paired_colors[6], # Light orange
"Moderate Treatment" = paired_colors[2] # Light blue
)
# Create the enhanced bar plot
enhanced_plot <- ggplot(plot_data, aes(x = reorder(treatment_label, treatment_weeks),
y = excess_cases_per_100k,
fill = treatment_type)) +
geom_col(width = 0.8, alpha = 0.8, color = "white", size = 0.3) +
geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper),
width = 0.4, color = "black", size = 0.6, alpha = 0.8) +
# Add value labels on bars with smart positioning
geom_text(aes(label = sprintf("%.0f", excess_cases_per_100k),
y = ifelse(excess_cases_per_100k >= 0,
pmax(ci_upper + 150, excess_cases_per_100k + 150), # Above positive bars
pmin(ci_lower - 150, excess_cases_per_100k - 150))), # Below negative bars
size = 4.2, fontface = "bold", color = "black") +
# Styling
scale_fill_manual(values = color_mapping, name = "Treatment Duration") +
scale_y_continuous(labels = number_format(accuracy = 1),
expand = expansion(mult = c(0.1, 0.15))) +
# Reference line at zero
geom_hline(yintercept = 0, linetype = "solid", color = "black", size = 0.8) +
# UPDATED Labels and theme
labs(
title = "TBCHAMP: Excess TB Cases per 100,000 Contacts by Treatment Duration",
subtitle = paste0("Compared to Full Treatment (26 weeks) - 95% UI from ",
format(n_simulations, big.mark = ","), " Monte Carlo simulations"),
x = "Treatment Duration (weeks)",
y = "Excess TB Cases per 100,000 Contacts (at 26 weeks)",
caption = "0 weeks = no treatment | Negative values indicate fewer cases than full treatment"
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
plot.subtitle = element_text(hjust = 0.5, size = 11, color = "black"),
plot.caption = element_text(size = 9, color = "black"),
axis.text.x = element_text(angle = 45, hjust = 1, size = 13, color = "black"),
axis.text.y = element_text(size = 13, color = "black"),
axis.title = element_text(size = 11, face = "bold"),
legend.position = "top",
legend.title = element_text(size = 10, face = "bold"),
legend.text = element_text(size = 9),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "gray80", fill = NA, size = 0.5)
)
# Display the plot
print(enhanced_plot)# ——————————————————————————————————————————————————————
# 7) Summary Statistics Table (UPDATED for per 100,000)
# ——————————————————————————————————————————————————————
cat("\n=== ENHANCED SHORTENED TREATMENT ANALYSIS SUMMARY (Excess per 100,000 contacts) ===\n")
cat("Treatment durations: Every 2 weeks from 2 to 24 weeks, plus no treatment (0 weeks)\n")
cat("Monte Carlo simulations:", format(n_simulations, big.mark = ","), "\n")
cat("Results standardized to per 100,000 contacts over 26 weeks\n")
cat("Baseline: Full TPT (26 weeks)\n\n")
# Print key statistics
summary_table <- plot_data %>%
select(treatment_label, treatment_type, excess_cases_per_100k, ci_lower, ci_upper) %>%
mutate(
excess_cases_ci = sprintf("%.0f (%.0f to %.0f)", excess_cases_per_100k, ci_lower, ci_upper)
) %>%
select(treatment_label, treatment_type, excess_cases_ci)
colnames(summary_table) <- c("Duration (weeks)", "Type", "Excess Cases per 100,000 (95% CI)")
print(summary_table)
cat("\nNote: All values are excess cases per 100,000 contacts compared to full 26-week treatment\n")
cat("Results calculated using TBCHAMP GAM models and Monte Carlo approach\n")
cat("Units: Excess TB cases per 100,000 contacts over 26 weeks\n")
# ——————————————————————————————————————————————————————
# 8) Key Findings Summary (UPDATED for per 100,000)
# ——————————————————————————————————————————————————————
cat("\n=== KEY FINDINGS (per 100,000 contacts) ===\n")
best_case <- plot_data[which.min(plot_data$excess_cases_per_100k), ]
worst_case <- plot_data[which.max(plot_data$excess_cases_per_100k), ]
cat("Best shortened treatment:", best_case$treatment_label, "weeks ->",
sprintf("%.0f excess cases per 100,000", best_case$excess_cases_per_100k),
sprintf("(95%% CI: %.0f to %.0f)", best_case$ci_lower, best_case$ci_upper), "\n")
cat("Worst case (No treatment):", worst_case$treatment_label, "weeks ->",
sprintf("%.0f excess cases per 100,000", worst_case$excess_cases_per_100k),
sprintf("(95%% CI: %.0f to %.0f)", worst_case$ci_lower, worst_case$ci_upper), "\n")
# Calculate relative risks compared to full treatment
full_tpt_mean <- mean(simulation_results[["Full TPT (26 wk)"]])
cat("\nRelative Risks (compared to Full TPT 26 weeks):\n")
for(scenario in names(simulation_results)) {
if(scenario != "Full TPT (26 wk)") {
rr <- mean(simulation_results[[scenario]]) / full_tpt_mean
cat(sprintf("%s: RR = %.2f\n", scenario, rr))
}
}Sensitivity analysis -
Method 1 – Starting at control time 0 instead of at that point in time - Makes table with 95% UIs for delay scenarios
# ——————————————————————————————————————————————————————
# SENSITIVITY ANALYSIS: Modified Bootstrap Scenario Function
# Change: After delay, use intervention rate from time 0 (not from delay point)
# Output: Excess cases per 100,000 contacts (not absolute cases)
# ——————————————————————————————————————————————————————
bootstrap_scenario_sensitivity <- function(sim_idx, control_sims, intervention_sims, delay_weeks) {
# Extract simulated predictions for this iteration
Fc0_sim <- control_sims[, sim_idx]
Fi0_sim <- intervention_sims[, sim_idx]
# Apply p0 transformation with a beta draw for p0 to consider uncertainty in p0
p0_draw <- rbeta(1, 32 + 0.5, 1120 - 32 + 0.5) # TBCHAMP p0 values 32 and 1120
Fc0_sim <- p0_draw + (1 - p0_draw) * Fc0_sim
Fi0_sim <- p0_draw + (1 - p0_draw) * Fi0_sim
#enforce monotonicity (Simulated cumulative curves can wiggle; discrete hazards can become negative - preventing this)
Fc0_sim <- pmin(pmax(Fc0_sim, 0), 0.999)
Fi0_sim <- pmin(pmax(Fi0_sim, 0), 0.999)
Fc0_sim <- cummax(Fc0_sim); Fi0_sim <- cummax(Fi0_sim)
# Calculate hazards for this simulation
h_ctrl_sim <- diff(c(0, Fc0_sim)) / (1 - c(0, Fc0_sim)[-length(Fc0_sim)])
h_int_sim <- diff(c(0, Fi0_sim)) / (1 - c(0, Fi0_sim)[-length(Fi0_sim)])
# Build curve for this delay scenario
n <- length(time_ext)
Fcum <- numeric(n)
# Find indices
i0 <- which(time_ext == 0)
back_wk <- 26
# Special handling for pure scenarios
if(delay_weeks == Inf) {
# Pure control - FIXED: use p0_draw instead of p0
# Pre-screen ramp
if(i0 > 1) {
s_up <- time_ext[1:(i0-1)] + back_wk
Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk) # FIXED
}
Fcum[i0] <- p0_draw # FIXED
Sprev <- 1 - p0_draw # FIXED
for(i in (i0+1):n) {
t0 <- time_ext[i]
h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
} else if(delay_weeks == -Inf) {
# Pure intervention (Instant TPT) - FIXED: use p0_draw instead of p0
if(i0 > 1) {
s_up <- time_ext[1:(i0-1)] + back_wk
Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk) # FIXED
}
Fcum[i0] <- p0_draw # FIXED
Sprev <- 1 - p0_draw # FIXED
for(i in (i0+1):n) {
t0 <- time_ext[i]
h_t <- h_int_sim[min(t0 + 1, length(h_int_sim))]
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
} else if(delay_weeks < 0) {
# CORRECTED: Negative delays (pre-screen intervention) - FIXED: use p0_draw instead of p0
# This means intervention starts BEFORE time 0, providing earlier protection
# Find delay point index
id_match <- which(time_ext == delay_weeks)
if(length(id_match) == 0) {
id <- which.min(abs(time_ext - delay_weeks))
} else {
id <- id_match[1]
}
# Before delay point: ramp (no intervention yet)
if(id > 1) {
s_up <- time_ext[1:(id-1)] + back_wk
s_up[s_up < 0] <- 0
Fcum[1:(id-1)] <- p0_draw * sqrt(s_up / back_wk) # FIXED
}
# At delay point: start intervention
s_delay <- max(0, delay_weeks + back_wk)
Fcum[id] <- p0_draw * sqrt(s_delay / back_wk) # FIXED
Sprev <- 1 - Fcum[id]
# CORRECTED: After delay point, use intervention hazards consistently
# Map the post-delay timeline to intervention hazards starting from time 0
for(i in (id+1):n) {
current_time <- time_ext[i]
# Calculate how long intervention has been active
intervention_duration <- current_time - delay_weeks
if(intervention_duration >= 0 && intervention_duration <= 54) {
# Use intervention hazard corresponding to this duration
h_idx <- min(intervention_duration + 1, length(h_int_sim))
h_t <- h_int_sim[max(1, h_idx)]
} else if(intervention_duration > 54) {
# Use last available intervention hazard
h_t <- h_int_sim[length(h_int_sim)]
} else {
# This shouldn't happen with negative delays, but use first intervention hazard as fallback
h_t <- h_int_sim[1]
}
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
} else {
# ——————————————————————————————————————————————————————
# SENSITIVITY ANALYSIS MODIFICATION HERE:
# Positive delays (post-screen delays) - MODIFIED LOGIC
# ——————————————————————————————————————————————————————
# Pre-screen ramp
if(i0 > 1) {
s_up <- time_ext[1:(i0-1)] + back_wk
Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk) # FIXED
}
# At screen
Fcum[i0] <- p0_draw # FIXED
Sprev <- 1 - p0_draw # FIXED
# Post-screen: control until delay point, then intervention
for(i in (i0+1):n) {
t0 <- time_ext[i]
if(t0 <= delay_weeks) {
# Use control hazard until delay point
h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
} else {
# SENSITIVITY ANALYSIS CHANGE:
# Use intervention hazard corresponding to time from baseline (t0)
# NOT from delay point (t0 - delay_weeks)
intervention_time_from_baseline <- t0 # CHANGED: was t0 - delay_weeks
h_idx <- min(intervention_time_from_baseline + 1, length(h_int_sim))
h_t <- h_int_sim[max(1, h_idx)]
}
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
}
# Return final value at week 26
week_26_idx <- which(time_ext == 26)
return(Fcum[week_26_idx])
}
# ——————————————————————————————————————————————————————
# Run Sensitivity Analysis with Modified Logic
# ——————————————————————————————————————————————————————
cat("Running SENSITIVITY ANALYSIS with modified intervention timing logic...\n")
cat("Change: After delay, use intervention rate from time 0 (not from delay point)\n")
cat("Output: Excess cases per 100,000 contacts\n\n")
# Set up time vectors (same as before)
time0 <- 0:26
back_wk <- 26
time_ext <- seq(-back_wk, 26, by = 1)
# Create 2-week interval delays from -26 to +26 (same as before)
delays_2wk <- seq(-26, 24, by = 2)
# Generate simulated predictions using your ACTUAL GAM models (same as before)
cat("Using existing GAM simulations...\n")
# NOTE: Assumes control_sims and intervention_sims are already generated
# Define all scenarios including baseline (same as before)
all_delays <- c("Instant TPT" = -Inf, setNames(delays_2wk, paste0(delays_2wk, " wk")), "Control" = Inf)
# Run simulations for all scenarios with MODIFIED FUNCTION
cat("Running SENSITIVITY scenario simulations...\n")
sensitivity_results <- list()
for(scenario_name in names(all_delays)) {
cat("Processing sensitivity scenario:", scenario_name, "\n")
delay_val <- all_delays[scenario_name]
scenario_sims <- sapply(1:n_simulations, function(i) {
bootstrap_scenario_sensitivity(i, control_sims, intervention_sims, delay_val) # MODIFIED FUNCTION
})
sensitivity_results[[scenario_name]] <- scenario_sims
}
cat("Completed SENSITIVITY simulations!\n\n")
# ——————————————————————————————————————————————————————
# Calculate Metrics for Sensitivity Analysis (PER 100,000)
# ——————————————————————————————————————————————————————
# Get baseline (Instant TPT) for relative calculations
sensitivity_baseline_sims <- sensitivity_results[["Instant TPT"]]
sensitivity_baseline_mean <- mean(sensitivity_baseline_sims)
# Calculate metrics for each scenario using per 100,000 conversion
sensitivity_results_list <- list()
for(scenario_name in names(sensitivity_results)) {
scenario_sims <- sensitivity_results[[scenario_name]]
# Basic statistics
mean_incidence <- mean(scenario_sims)
median_incidence <- median(scenario_sims)
ci_lower <- quantile(scenario_sims, 0.025)
ci_upper <- quantile(scenario_sims, 0.975)
# Determine person-years (Control uses control PY, others use intervention PY)
py <- ifelse(scenario_name == "Control", py_control, py_intervention)
sample_size <- ifelse(scenario_name == "Control", n_control, n_intervention)
# 1. Incidence rate per 100 person-yrs (95% CI)
rate_per_100py <- (mean_incidence * sample_size / py) * 100
rate_ci_lower <- (ci_lower * sample_size / py) * 100
rate_ci_upper <- (ci_upper * sample_size / py) * 100
# 2. Mean Expected Cases per 100,000 contacts (95% CI) - MODIFIED
mean_cases_per_100k <- mean_incidence * 100000 # CHANGED: was * sample_size
cases_ci_lower_per_100k <- ci_lower * 100000 # CHANGED: was * sample_size
cases_ci_upper_per_100k <- ci_upper * 100000 # CHANGED: was * sample_size
# 3. Excess calculations (compared to Instant TPT) - MODIFIED FOR PER 100,000
if(scenario_name != "Instant TPT") {
# Excess incidence simulations
excess_sims <- scenario_sims - sensitivity_baseline_sims
excess_mean <- mean(excess_sims)
excess_ci_lower <- quantile(excess_sims, 0.025)
excess_ci_upper <- quantile(excess_sims, 0.975)
# Excess rate per 100 person-yrs
excess_rate_per_100py <- (excess_mean * sample_size / py) * 100
excess_rate_ci_lower <- (excess_ci_lower * sample_size / py) * 100
excess_rate_ci_upper <- (excess_ci_upper * sample_size / py) * 100
# Excess cases per 100,000 contacts - MODIFIED
excess_cases_per_100k <- excess_mean * 100000 # CHANGED: was * sample_size
excess_cases_ci_lower_per_100k <- excess_ci_lower * 100000 # CHANGED: was * sample_size
excess_cases_ci_upper_per_100k <- excess_ci_upper * 100000 # CHANGED: was * sample_size
} else {
# Instant TPT is the baseline, so excess = 0
excess_rate_per_100py <- 0
excess_rate_ci_lower <- 0
excess_rate_ci_upper <- 0
excess_cases_per_100k <- 0
excess_cases_ci_lower_per_100k <- 0
excess_cases_ci_upper_per_100k <- 0
}
# 4. Relative Risk
relative_risk <- mean_incidence / sensitivity_baseline_mean
# Store results - UPDATED VARIABLE NAMES
sensitivity_results_list[[scenario_name]] <- list(
scenario = scenario_name,
incidence_rate_per_100py = rate_per_100py,
incidence_rate_ci_lower = rate_ci_lower,
incidence_rate_ci_upper = rate_ci_upper,
excess_rate_per_100py = excess_rate_per_100py,
excess_rate_ci_lower = excess_rate_ci_lower,
excess_rate_ci_upper = excess_rate_ci_upper,
mean_cases_per_100k = mean_cases_per_100k, # UPDATED
cases_ci_lower_per_100k = cases_ci_lower_per_100k, # UPDATED
cases_ci_upper_per_100k = cases_ci_upper_per_100k, # UPDATED
excess_cases_per_100k = excess_cases_per_100k, # UPDATED
excess_cases_ci_lower_per_100k = excess_cases_ci_lower_per_100k, # UPDATED
excess_cases_ci_upper_per_100k = excess_cases_ci_upper_per_100k, # UPDATED
relative_risk = relative_risk
)
}
# ——————————————————————————————————————————————————————
# Prepare Sensitivity Analysis Plot Data (PER 100,000)
# ——————————————————————————————————————————————————————
sensitivity_plot_data <- data.frame()
for(scenario_name in names(sensitivity_results_list)) {
if(scenario_name == "Instant TPT") next # Skip baseline
r <- sensitivity_results_list[[scenario_name]]
# Extract delay value for ordering
if(scenario_name == "Control") {
delay_weeks <- 999 # Put at the end
delay_label <- "No Tx"
delay_type <- "No Treatment"
} else {
delay_weeks <- as.numeric(sub(" wk", "", scenario_name))
delay_label <- as.character(delay_weeks)
delay_type <- ifelse(delay_weeks < 0, "Pre-screen", "Post-screen")
}
sensitivity_plot_data <- rbind(sensitivity_plot_data, data.frame(
scenario = scenario_name,
delay_weeks = delay_weeks,
delay_label = delay_label,
excess_cases_per_100k = r$excess_cases_per_100k, # UPDATED
ci_lower_per_100k = r$excess_cases_ci_lower_per_100k, # UPDATED
ci_upper_per_100k = r$excess_cases_ci_upper_per_100k, # UPDATED
delay_type = delay_type,
stringsAsFactors = FALSE
))
}
# Order by delay weeks
sensitivity_plot_data <- sensitivity_plot_data[order(sensitivity_plot_data$delay_weeks), ]
# ——————————————————————————————————————————————————————
# Create Sensitivity Analysis Plot (PER 100,000)
# ——————————————————————————————————————————————————————
# Set up RColorBrewer Paired palette
paired_colors <- brewer.pal(12, "Paired")
# Assign colors to delay types
color_mapping <- c(
"Pre-screen" = paired_colors[2], # Light blue
"Post-screen" = paired_colors[6], # Light orange
"No Treatment" = paired_colors[8] # Light red
)
# Create the sensitivity analysis bar plot - UPDATED LABELS
sensitivity_plot <- ggplot(sensitivity_plot_data, aes(x = reorder(delay_label, delay_weeks),
y = excess_cases_per_100k, # UPDATED
fill = delay_type)) +
geom_col(width = 0.8, alpha = 0.8, color = "white", size = 0.3) +
geom_errorbar(aes(ymin = ci_lower_per_100k, ymax = ci_upper_per_100k), # UPDATED
width = 0.4, color = "black", size = 0.6, alpha = 0.8) +
# Add value labels on bars with smart positioning - UPDATED
geom_text(aes(label = sprintf("%.0f", excess_cases_per_100k), # UPDATED - rounded to nearest whole
y = ifelse(excess_cases_per_100k >= 0,
pmax(ci_upper_per_100k + 50, excess_cases_per_100k + 50), # UPDATED
pmin(ci_lower_per_100k - 50, excess_cases_per_100k - 50))), # UPDATED
size = 3.2, fontface = "bold", color = "black") +
# Styling
scale_fill_manual(values = color_mapping, name = "Treatment Timing") +
scale_y_continuous(labels = number_format(accuracy = 1), # UPDATED - whole numbers
expand = expansion(mult = c(0.1, 0.15))) +
# Reference line at zero
geom_hline(yintercept = 0, linetype = "solid", color = "black", size = 0.8) +
# Labels and theme - UPDATED
labs(
title = "SENSITIVITY ANALYSIS: TBCHAMP Excess TB Cases by Treatment Delay",
subtitle = paste0("After delay, intervention follows rate from time 0 (not delay point) | 95% CI from ",
format(n_simulations, big.mark = ","), " Monte Carlo simulations"),
x = "Treatment Delay (weeks)",
y = "Excess TB Cases per 100,000 Contacts", # UPDATED
caption = "Negative delays = pre-screen treatment | Compared to instant TPT baseline" # UPDATED
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
plot.subtitle = element_text(hjust = 0.5, size = 11, color = "gray40"),
plot.caption = element_text(size = 9, color = "gray50"),
axis.text.x = element_text(angle = 45, hjust = 1, size = 10),
axis.text.y = element_text(size = 10),
axis.title = element_text(size = 11, face = "bold"),
legend.position = "top",
legend.title = element_text(size = 10, face = "bold"),
legend.text = element_text(size = 9),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "gray80", fill = NA, size = 0.5)
)
# Display the sensitivity plot
print(sensitivity_plot)# ——————————————————————————————————————————————————————
# Summary Statistics Table for Sensitivity Analysis (PER 100,000)
# ——————————————————————————————————————————————————————
cat("\n=== SENSITIVITY ANALYSIS SUMMARY ===\n")
cat("Modified logic: After delay, use intervention rate from time 0 (not from delay point)\n")
cat("Delay intervals: Every 2 weeks from -26 to +26 weeks\n")
cat("Monte Carlo simulations:", format(n_simulations, big.mark = ","), "\n")
cat("Output: Excess cases per 100,000 contacts\n\n")
# Print key statistics - UPDATED
sensitivity_summary_table <- sensitivity_plot_data %>%
select(delay_label, delay_type, excess_cases_per_100k, ci_lower_per_100k, ci_upper_per_100k) %>% # UPDATED
mutate(
excess_cases_ci = sprintf("%.0f (%.0f to %.0f)", excess_cases_per_100k, ci_lower_per_100k, ci_upper_per_100k) # UPDATED - rounded
) %>%
select(delay_label, delay_type, excess_cases_ci)
colnames(sensitivity_summary_table) <- c("Delay (weeks)", "Type", "Excess Cases per 100,000 (95% CI)") # UPDATED
print(sensitivity_summary_table)
# ——————————————————————————————————————————————————————
# Generate Range Comparison for Publication Table
# ——————————————————————————————————————————————————————
# Calculate range for sensitivity analysis
sensitivity_range_min <- min(sensitivity_plot_data$excess_cases_per_100k)
sensitivity_range_max <- max(sensitivity_plot_data$excess_cases_per_100k)
cat(sprintf("\n=== RANGE COMPARISON FOR PUBLICATION ===\n"))
cat(sprintf("Sensitivity analysis range: %.0f to %.0f cases per 100,000\n", sensitivity_range_min, sensitivity_range_max))
cat("Format for comparison to main results:\n")
cat(sprintf("\"Scenario range: %.0f-%.0f vs [MAIN RESULTS RANGE] cases per 100,000\"\n",
sensitivity_range_min, sensitivity_range_max))
cat("\nNote: This sensitivity analysis tests the assumption about intervention timing\n")
cat("Original: After delay, follow intervention rate from delay point\n")
cat("Sensitivity: After delay, follow intervention rate from time 0\n")Sensitivity analysis method 1 for shortened treatments
library(mgcv)
library(ggplot2)
library(dplyr)
library(parallel)
# ——————————————————————————————————————————————————————
# 0) Input Parameters - TBCHAMP Data
# ——————————————————————————————————————————————————————
# Sample sizes (TBCHAMP)
n_control <- 465 # TBCHAMP control sample size
n_intervention <- 451 # TBCHAMP intervention sample size
# Person-years of follow-up (TBCHAMP)
py_control <- 418 # TBCHAMP control person-years
py_intervention <- 406 # TBCHAMP intervention person-years
# Monte Carlo parameters
n_simulations <- 5000 # Number of bootstrap simulations
set.seed(123) # For reproducibility
cat("Shortened Treatment Analysis Parameters (TBCHAMP):\n")
cat("Control sample size:", n_control, "\n")
cat("Intervention sample size:", n_intervention, "\n")
cat("Control person-years:", py_control, "\n")
cat("Intervention person-years:", py_intervention, "\n")
cat("Number of simulations:", n_simulations, "\n\n")
# ——————————————————————————————————————————————————————
# 1) Monte Carlo Simulation Function
# ——————————————————————————————————————————————————————
simulate_gam_predictions <- function(gam_model, newdata, n_sim = n_simulations) {
# Get mean predictions and covariance matrix
pred_mean <- predict(gam_model, newdata = newdata, type = "response")
# Get the covariance matrix of the GAM coefficients
Vp <- vcov(gam_model, unconditional = TRUE)
# Get the linear predictor matrix
Xp <- predict(gam_model, newdata = newdata, type = "lpmatrix")
# Simulate from multivariate normal distribution
# Generate random coefficients
coef_sims <- MASS::mvrnorm(n_sim, coef(gam_model), Vp)
# Calculate predictions for each simulation
pred_sims <- tcrossprod(Xp, coef_sims)
return(pred_sims)
}
# ——————————————————————————————————————————————————————
# 2) Bootstrap Shortened Treatment Scenario Builder
# ——————————————————————————————————————————————————————
bootstrap_shortened_scenario <- function(sim_idx, control_sims, intervention_sims, shorten_weeks) {
# Extract simulated predictions for this iteration
Fc0_sim <- control_sims[, sim_idx]
Fi0_sim <- intervention_sims[, sim_idx]
# Apply p0 transformation with a beta draw for p0 to consider uncertainty in p0
p0_draw <- rbeta(1, 32 + 0.5, 1120 - 32 + 0.5) # TBCHAMP p0 values
Fc0_sim <- p0_draw + (1 - p0_draw) * Fc0_sim
Fi0_sim <- p0_draw + (1 - p0_draw) * Fi0_sim
# Enforce monotonicity (Simulated cumulative curves can wiggle; discrete hazards can become negative - preventing this)
Fc0_sim <- pmin(pmax(Fc0_sim, 0), 0.999)
Fi0_sim <- pmin(pmax(Fi0_sim, 0), 0.999)
Fc0_sim <- cummax(Fc0_sim); Fi0_sim <- cummax(Fi0_sim)
# Calculate hazards for this simulation
h_ctrl_sim <- diff(c(0, Fc0_sim)) / (1 - c(0, Fc0_sim)[-length(Fc0_sim)])
h_int_sim <- diff(c(0, Fi0_sim)) / (1 - c(0, Fi0_sim)[-length(Fi0_sim)])
# Build curve for this shortened treatment scenario
n <- length(time_ext)
Fcum <- numeric(n)
# Find indices
i0 <- which(time_ext == 0)
back_wk <- 26
# Special handling for reference scenarios
if(shorten_weeks == Inf) {
# Pure control - no treatment at all
# Pre-screen ramp
if(i0 > 1) {
s_up <- time_ext[1:(i0-1)] + back_wk
Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk)
}
Fcum[i0] <- p0_draw
Sprev <- 1 - p0_draw
for(i in (i0+1):n) {
t0 <- time_ext[i]
h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
} else if(shorten_weeks == -Inf) {
# Full treatment (26 weeks) - equivalent to instant TPT
if(i0 > 1) {
s_up <- time_ext[1:(i0-1)] + back_wk
Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk)
}
Fcum[i0] <- p0_draw
Sprev <- 1 - p0_draw
for(i in (i0+1):n) {
t0 <- time_ext[i]
h_t <- h_int_sim[min(t0 + 1, length(h_int_sim))]
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
} else {
# Shortened treatment scenarios
# Pre-screen ramp
if(i0 > 1) {
s_up <- time_ext[1:(i0-1)] + back_wk
Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk)
}
# At screen
Fcum[i0] <- p0_draw
Sprev <- 1 - p0_draw
# Post-screen: intervention until shorten_weeks, then control
for(i in (i0+1):n) {
t0 <- time_ext[i]
if(t0 <= shorten_weeks) {
# Use intervention hazard until shortening point
h_t <- h_int_sim[min(t0 + 1, length(h_int_sim))]
} else {
# After shortening: use control hazard
# Map time to control hazard based on time since shortening
time_since_shorten <- t0 - shorten_weeks
h_idx <- min(time_since_shorten + 1, length(h_ctrl_sim))
h_t <- h_ctrl_sim[max(1, h_idx)]
}
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
}
# Return final value at week 26
week_26_idx <- which(time_ext == 26)
return(Fcum[week_26_idx])
}
# ——————————————————————————————————————————————————————
# 3) Run Monte Carlo Simulations
# ——————————————————————————————————————————————————————
cat("Running Monte Carlo simulations for shortened treatments...\n")
# Set up time vectors
time0 <- 0:26
back_wk <- 26
time_ext <- seq(-back_wk, 26, by = 1)
shortened_weeks <- seq(2, 24, by = 2) # Treatment every 2 weeks from 2-24 weeks
# Generate simulated predictions for both models
cat("Generating GAM simulations...\n")
control_sims <- simulate_gam_predictions(control_gam_k5_GCV_TBCHAMP, data.frame(time = time0), n_simulations)
intervention_sims <- simulate_gam_predictions(intervention_gam_k5_GCV_TBCHAMP, data.frame(time = time0), n_simulations)
cat("Generated", ncol(control_sims), "control simulations and", ncol(intervention_sims), "intervention simulations\n")
# Define all scenarios
all_scenarios <- c("Control" = Inf, "Full TPT (26 wk)" = -Inf, setNames(shortened_weeks, paste0("TPT ", shortened_weeks, " wk")))
# Run simulations for all scenarios
cat("Running shortened treatment scenario simulations...\n")
simulation_results <- list()
for(scenario_name in names(all_scenarios)) {
cat("Processing scenario:", scenario_name, "\n")
shorten_val <- all_scenarios[scenario_name]
# Run simulations for this scenario
scenario_sims <- sapply(1:n_simulations, function(i) {
bootstrap_shortened_scenario(i, control_sims, intervention_sims, shorten_val)
})
simulation_results[[scenario_name]] <- scenario_sims
}
cat("Completed all simulations!\n\n")
# ——————————————————————————————————————————————————————
# 4) Calculate Summary Statistics
# ——————————————————————————————————————————————————————
summary_stats <- data.frame(
Scenario = names(simulation_results),
Mean_Incidence = sapply(simulation_results, mean),
Median_Incidence = sapply(simulation_results, median),
SD_Incidence = sapply(simulation_results, sd),
CI_Lower = sapply(simulation_results, function(x) quantile(x, 0.025)),
CI_Upper = sapply(simulation_results, function(x) quantile(x, 0.975)),
stringsAsFactors = FALSE
)
cat("Summary Statistics (Final Incidence at Week 26):\n")
print(summary_stats)
cat("\n")
# ——————————————————————————————————————————————————————
# 5) Calculate Excess Incidence per 100,000 Contacts
# ——————————————————————————————————————————————————————
# Get baseline simulations (Full TPT 26 weeks)
baseline_sims <- simulation_results[["Full TPT (26 wk)"]]
# Calculate excess for each scenario
excess_stats <- summary_stats %>%
filter(Scenario != "Full TPT (26 wk)") %>%
mutate(
# Point estimates
Excess_Mean = Mean_Incidence - mean(baseline_sims),
Excess_Median = Median_Incidence - median(baseline_sims),
# Extract treatment duration information
Treatment_Weeks = case_when(
Scenario == "Control" ~ "No Treatment",
grepl("TPT", Scenario) ~ as.character(as.numeric(sub("TPT (\\d+) wk", "\\1", Scenario))),
TRUE ~ NA_character_
),
Order = case_when(
Scenario == "Control" ~ 999,
TRUE ~ as.numeric(sub("TPT (\\d+) wk", "\\1", Scenario))
)
) %>%
arrange(Order)
# Calculate bootstrap confidence intervals for excess
for(i in 1:nrow(excess_stats)) {
scenario_name <- excess_stats$Scenario[i]
scenario_sims <- simulation_results[[scenario_name]]
# Calculate excess for each simulation
excess_sims <- scenario_sims - baseline_sims
# Get percentiles
excess_stats$Excess_CI_Lower[i] <- quantile(excess_sims, 0.025)
excess_stats$Excess_CI_Upper[i] <- quantile(excess_sims, 0.975)
}
# Convert to per 100,000 contacts
excess_stats <- excess_stats %>%
mutate(
Excess_per_100k = Excess_Mean * 100000,
Excess_per_100k_CI_Lower = Excess_CI_Lower * 100000,
Excess_per_100k_CI_Upper = Excess_CI_Upper * 100000
)
# ——————————————————————————————————————————————————————
# 6) Final Results Output
# ——————————————————————————————————————————————————————
cat("=== SHORTENED TREATMENT ANALYSIS RESULTS ===\n")
cat("Excess TB Cases per 100,000 Contacts at 26 weeks (compared to Full TPT):\n\n")
# Create final results table
final_results <- excess_stats %>%
select(Treatment_Weeks, Excess_per_100k, Excess_per_100k_CI_Lower, Excess_per_100k_CI_Upper) %>%
arrange(as.numeric(ifelse(Treatment_Weeks == "No Treatment", 999, Treatment_Weeks)))
# Print results in a formatted table
cat(sprintf("%-15s %-15s %-20s\n", "Treatment", "Excess Cases", "95% Confidence Interval"))
cat(sprintf("%-15s %-15s %-20s\n", "(weeks)", "per 100,000", "(per 100,000)"))
cat(paste(rep("-", 50), collapse = ""), "\n")
for(i in 1:nrow(final_results)) {
row <- final_results[i, ]
cat(sprintf("%-15s %-15.0f (%-6.0f to %-6.0f)\n",
row$Treatment_Weeks,
row$Excess_per_100k,
row$Excess_per_100k_CI_Lower,
row$Excess_per_100k_CI_Upper))
}
cat("\n")
cat("Method: Monte Carlo Bootstrap with", n_simulations, "simulations\n")
cat("Baseline: Full TPT (26 weeks)\n")
cat("Sample size basis:", n_intervention, "intervention participants\n")
# ——————————————————————————————————————————————————————
# 7) Summary Statistics
# ——————————————————————————————————————————————————————
cat("\n=== KEY FINDINGS ===\n")
# Find minimum excess (best shortened treatment)
best_shortened <- final_results %>%
filter(Treatment_Weeks != "No Treatment") %>%
slice_min(Excess_per_100k, n = 1)
# Find maximum excess among shortened treatments
worst_shortened <- final_results %>%
filter(Treatment_Weeks != "No Treatment") %>%
slice_max(Excess_per_100k, n = 1)
# Control scenario
control_result <- final_results %>%
filter(Treatment_Weeks == "No Treatment")
cat(sprintf("Best shortened treatment: %s weeks -> %.0f excess cases per 100,000 (95%% CI: %.0f to %.0f)\n",
best_shortened$Treatment_Weeks,
best_shortened$Excess_per_100k,
best_shortened$Excess_per_100k_CI_Lower,
best_shortened$Excess_per_100k_CI_Upper))
cat(sprintf("Worst shortened treatment: %s weeks -> %.0f excess cases per 100,000 (95%% CI: %.0f to %.0f)\n",
worst_shortened$Treatment_Weeks,
worst_shortened$Excess_per_100k,
worst_shortened$Excess_per_100k_CI_Lower,
worst_shortened$Excess_per_100k_CI_Upper))
cat(sprintf("No treatment (Control): %.0f excess cases per 100,000 (95%% CI: %.0f to %.0f)\n",
control_result$Excess_per_100k,
control_result$Excess_per_100k_CI_Lower,
control_result$Excess_per_100k_CI_Upper))
# Return final results for further use
return(final_results)Getting the control value for the sense check compared to 26 week delay - confirms model accuracy if equal
library(mgcv)
library(ggplot2)
library(dplyr)
library(parallel)
# ——————————————————————————————————————————————————————
# 0) Input Parameters - MODIFY THESE WITH YOUR ACTUAL DATA
# ——————————————————————————————————————————————————————
# Sample sizes
n_control <- 465 # REPLACE with your actual control sample size
n_intervention <- 451 # REPLACE with your actual intervention sample size
# Person-years of follow-up
py_control <- 418 # REPLACE with your actual control person-years
py_intervention <- 406 # REPLACE with your actual intervention person-years
# Monte Carlo parameters
n_simulations <- 5000 # Number of bootstrap simulations
set.seed(123) # For reproducibility
cat("Analysis Parameters:\n")
cat("Control sample size:", n_control, "\n")
cat("Intervention sample size:", n_intervention, "\n")
cat("Control person-years:", py_control, "\n")
cat("Intervention person-years:", py_intervention, "\n")
cat("Number of simulations:", n_simulations, "\n\n")
# ——————————————————————————————————————————————————————
# 1) Monte Carlo Simulation Function
# ——————————————————————————————————————————————————————
simulate_gam_predictions <- function(gam_model, newdata, n_sim = n_simulations) {
# Get mean predictions and covariance matrix
pred_mean <- predict(gam_model, newdata = newdata, type = "response")
# Get the covariance matrix of the GAM coefficients
Vp <- vcov(gam_model, unconditional = TRUE)
# Get the linear predictor matrix
Xp <- predict(gam_model, newdata = newdata, type = "lpmatrix")
# Simulate from multivariate normal distribution
# Generate random coefficients
coef_sims <- MASS::mvrnorm(n_sim, coef(gam_model), Vp)
# Calculate predictions for each simulation
pred_sims <- tcrossprod(Xp, coef_sims)
return(pred_sims)
}
# ——————————————————————————————————————————————————————
# 2) Bootstrap Scenario Builder (FIXED p0 inconsistency)
# ——————————————————————————————————————————————————————
bootstrap_scenario <- function(sim_idx, control_sims, intervention_sims, delay_weeks) {
# Extract simulated predictions for this iteration
Fc0_sim <- control_sims[, sim_idx]
Fi0_sim <- intervention_sims[, sim_idx]
# Apply p0 transformation with a beta draw for p0 to consider uncertainty in p0
p0_draw <- rbeta(1, 32 + 0.5, 1120 - 32 + 0.5) # VQUIN p0 values 61 and 3948
Fc0_sim <- p0_draw + (1 - p0_draw) * Fc0_sim
Fi0_sim <- p0_draw + (1 - p0_draw) * Fi0_sim
#enforce monotonicity (Simulated cumulative curves can wiggle; discrete hazards can become negative - preventing this)
Fc0_sim <- pmin(pmax(Fc0_sim, 0), 0.999)
Fi0_sim <- pmin(pmax(Fi0_sim, 0), 0.999)
Fc0_sim <- cummax(Fc0_sim); Fi0_sim <- cummax(Fi0_sim)
# Calculate hazards for this simulation
h_ctrl_sim <- diff(c(0, Fc0_sim)) / (1 - c(0, Fc0_sim)[-length(Fc0_sim)])
h_int_sim <- diff(c(0, Fi0_sim)) / (1 - c(0, Fi0_sim)[-length(Fi0_sim)])
# Build curve for this delay scenario
n <- length(time_ext)
Fcum <- numeric(n)
# Find indices
i0 <- which(time_ext == 0)
back_wk <- 26
# Special handling for pure scenarios
if(delay_weeks == Inf) {
# Pure control - FIXED: use p0_draw instead of p0
# Pre-screen ramp
if(i0 > 1) {
s_up <- time_ext[1:(i0-1)] + back_wk
Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk) # FIXED
}
Fcum[i0] <- p0_draw # FIXED
Sprev <- 1 - p0_draw # FIXED
for(i in (i0+1):n) {
t0 <- time_ext[i]
h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
} else if(delay_weeks == -Inf) {
# Pure intervention (Instant TPT) - FIXED: use p0_draw instead of p0
if(i0 > 1) {
s_up <- time_ext[1:(i0-1)] + back_wk
Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk) # FIXED
}
Fcum[i0] <- p0_draw # FIXED
Sprev <- 1 - p0_draw # FIXED
for(i in (i0+1):n) {
t0 <- time_ext[i]
h_t <- h_int_sim[min(t0 + 1, length(h_int_sim))]
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
} else if(delay_weeks < 0) {
# CORRECTED: Negative delays (pre-screen intervention) - FIXED: use p0_draw instead of p0
# This means intervention starts BEFORE time 0, providing earlier protection
# Find delay point index
id_match <- which(time_ext == delay_weeks)
if(length(id_match) == 0) {
id <- which.min(abs(time_ext - delay_weeks))
} else {
id <- id_match[1]
}
# Before delay point: ramp (no intervention yet)
if(id > 1) {
s_up <- time_ext[1:(id-1)] + back_wk
s_up[s_up < 0] <- 0
Fcum[1:(id-1)] <- p0_draw * sqrt(s_up / back_wk) # FIXED
}
# At delay point: start intervention
s_delay <- max(0, delay_weeks + back_wk)
Fcum[id] <- p0_draw * sqrt(s_delay / back_wk) # FIXED
Sprev <- 1 - Fcum[id]
# CORRECTED: After delay point, use intervention hazards consistently
# Map the post-delay timeline to intervention hazards starting from time 0
for(i in (id+1):n) {
current_time <- time_ext[i]
# Calculate how long intervention has been active
intervention_duration <- current_time - delay_weeks
if(intervention_duration >= 0 && intervention_duration <= 54) {
# Use intervention hazard corresponding to this duration
h_idx <- min(intervention_duration + 1, length(h_int_sim))
h_t <- h_int_sim[max(1, h_idx)]
} else if(intervention_duration > 54) {
# Use last available intervention hazard
h_t <- h_int_sim[length(h_int_sim)]
} else {
# This shouldn't happen with negative delays, but use first intervention hazard as fallback
h_t <- h_int_sim[1]
}
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
} else {
# Positive delays (post-screen delays) - FIXED: use p0_draw instead of p0
# Pre-screen ramp
if(i0 > 1) {
s_up <- time_ext[1:(i0-1)] + back_wk
Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk) # FIXED
}
# At screen
Fcum[i0] <- p0_draw # FIXED
Sprev <- 1 - p0_draw # FIXED
# Post-screen: control until delay point, then intervention
for(i in (i0+1):n) {
t0 <- time_ext[i]
if(t0 <= delay_weeks) {
# Use control hazard until delay point
h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
} else {
# Use intervention hazard after delay point
# Map to intervention timeline: intervention has been active for (t0 - delay_weeks) weeks
intervention_duration <- t0 - delay_weeks
h_idx <- min(intervention_duration + 1, length(h_int_sim))
h_t <- h_int_sim[max(1, h_idx)]
}
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
}
# Return final value at week 26
week_26_idx <- which(time_ext == 26)
return(Fcum[week_26_idx])
}
# ——————————————————————————————————————————————————————
# 3) Run Monte Carlo Simulations - Every 2 Week Delays
# ——————————————————————————————————————————————————————
cat("Running Monte Carlo simulations for 2-week delay intervals...\n")
# Reset seed right before simulations for exact reproducibility
set.seed(42)
# Set up time vectors
time0 <- 0:26
back_wk <- 26
time_ext <- seq(-back_wk, 26, by = 1)
# Create 2-week delay intervals from -26 to +26 weeks
delays_2wk <- seq(-26, 26, by = 2)
# Generate simulated predictions for both models
cat("Generating GAM simulations...\n")
control_sims <- simulate_gam_predictions(control_gam_k5_GCV_TBCHAMP, data.frame(time = time0), n_simulations)
intervention_sims <- simulate_gam_predictions(intervention_gam_k5_GCV_TBCHAMP, data.frame(time = time0), n_simulations)
cat("Generated", ncol(control_sims), "control simulations and", ncol(intervention_sims), "intervention simulations\n")
# Define all delay scenarios (including baseline at week 0 = no delay)
all_delays <- c(setNames(delays_2wk, paste0(delays_2wk, " wk")),
"Control" = Inf) # Add pure control scenario
# Run simulations for all scenarios
cat("Running scenario simulations...\n")
simulation_results <- list()
for(scenario_name in names(all_delays)) {
cat("Processing scenario:", scenario_name, "\n")
delay_val <- all_delays[scenario_name]
# Run simulations for this scenario
scenario_sims <- sapply(1:n_simulations, function(i) {
bootstrap_scenario(i, control_sims, intervention_sims, delay_val)
})
simulation_results[[scenario_name]] <- scenario_sims
}
cat("Completed all simulations!\n\n")
# Check if "0 wk" exists and compare a few values
print(simulation_results[["0 wk"]][1:5]) # First 5 simulation results
print(mean(simulation_results[["0 wk"]])) # Mean for "0 wk" scenario
# ——————————————————————————————————————————————————————
# 4) Calculate Key Metrics with Week 0 as Baseline
# ——————————————————————————————————————————————————————
# Get baseline (Week 0 = no delay) for relative calculations
baseline_sims <- simulation_results[["0 wk"]]
baseline_mean <- mean(baseline_sims)
# Calculate metrics for each scenario
results_list <- list()
for(scenario_name in names(simulation_results)) {
scenario_sims <- simulation_results[[scenario_name]]
# Handle special case for Control scenario
if(scenario_name == "Control") {
delay_value <- Inf # Or 999 for sorting purposes
} else {
delay_value <- as.numeric(sub(" wk", "", scenario_name))
}
# Basic statistics
mean_incidence <- mean(scenario_sims)
median_incidence <- median(scenario_sims)
ci_lower <- quantile(scenario_sims, 0.025)
ci_upper <- quantile(scenario_sims, 0.975)
# Use intervention PY and sample size for all scenarios
py <- py_intervention
sample_size <- n_intervention
# 1. Incidence rate per 100 person-yrs (95% CI)
rate_per_100py <- (mean_incidence * sample_size / py) * 100
rate_ci_lower <- (ci_lower * sample_size / py) * 100
rate_ci_upper <- (ci_upper * sample_size / py) * 100
# 2. Mean Expected Cases per 100,000 (95% CI)
cases_per_100k <- mean_incidence * 100000
cases_per_100k_ci_lower <- ci_lower * 100000
cases_per_100k_ci_upper <- ci_upper * 100000
# 3. Excess calculations (compared to Week 0)
# 3. Excess calculations (compared to Week 0)
if(delay_value != 0) {
# Excess incidence simulations
excess_sims <- scenario_sims - baseline_sims
excess_mean <- mean(excess_sims)
excess_ci_lower <- quantile(excess_sims, 0.025)
excess_ci_upper <- quantile(excess_sims, 0.975)
# Excess rate per 100 person-yrs
excess_rate_per_100py <- (excess_mean * sample_size / py) * 100
excess_rate_ci_lower <- (excess_ci_lower * sample_size / py) * 100
excess_rate_ci_upper <- (excess_ci_upper * sample_size / py) * 100
# Excess cases per 100,000
excess_cases_per_100k <- excess_mean * 100000
excess_cases_per_100k_ci_lower <- excess_ci_lower * 100000
excess_cases_per_100k_ci_upper <- excess_ci_upper * 100000
} else {
# Week 0 is the baseline, so excess = 0
# ... set all excess values to 0
# Week 0 is the baseline, so excess = 0
excess_rate_per_100py <- 0
excess_rate_ci_lower <- 0
excess_rate_ci_upper <- 0
excess_cases_per_100k <- 0
excess_cases_per_100k_ci_lower <- 0
excess_cases_per_100k_ci_upper <- 0
}
# 4. Relative Risk
relative_risk <- mean_incidence / baseline_mean
# Store results
results_list[[scenario_name]] <- list(
delay_weeks = delay_value,
incidence_rate_per_100py = rate_per_100py,
rate_ci_lower = rate_ci_lower,
rate_ci_upper = rate_ci_upper,
excess_rate_per_100py = excess_rate_per_100py,
excess_rate_ci_lower = excess_rate_ci_lower,
excess_rate_ci_upper = excess_rate_ci_upper,
cases_per_100k = cases_per_100k,
cases_per_100k_ci_lower = cases_per_100k_ci_lower,
cases_per_100k_ci_upper = cases_per_100k_ci_upper,
excess_cases_per_100k = excess_cases_per_100k,
excess_cases_per_100k_ci_lower = excess_cases_per_100k_ci_lower,
excess_cases_per_100k_ci_upper = excess_cases_per_100k_ci_upper,
relative_risk = relative_risk
)
}
# ——————————————————————————————————————————————————————
# 5) Create Export Table for Word
# ——————————————————————————————————————————————————————
# Build the export table
export_table <- data.frame()
for(scenario_name in names(results_list)) {
r <- results_list[[scenario_name]]
# Format all confidence intervals
incidence_rate_ci <- sprintf("%.2f (%.2f-%.2f)",
r$incidence_rate_per_100py, r$rate_ci_lower, r$rate_ci_upper)
if(r$delay_weeks == 0) {
excess_rate_ci <- "BASELINE"
excess_cases_ci <- "BASELINE"
} else {
excess_rate_ci <- sprintf("%+.2f (%+.2f to %+.2f)",
r$excess_rate_per_100py, r$excess_rate_ci_lower, r$excess_rate_ci_upper)
excess_cases_ci <- sprintf("%+.0f (%+.0f to %+.0f)",
r$excess_cases_per_100k, r$excess_cases_per_100k_ci_lower, r$excess_cases_per_100k_ci_upper)
}
cases_per_100k_ci <- sprintf("%.0f (%.0f-%.0f)",
r$cases_per_100k, r$cases_per_100k_ci_lower, r$cases_per_100k_ci_upper)
relative_risk_formatted <- sprintf("%.2f", r$relative_risk)
# Add row to export table
export_table <- rbind(export_table, data.frame(
`Delay (weeks)` = r$delay_weeks,
`Incidence Rate per 100 Person-Yrs (95% CI)` = incidence_rate_ci,
`Excess Incidence Rate per 100 Person-Yrs (95% CI)` = excess_rate_ci,
`Mean Expected Cases per 100,000 (95% CI)` = cases_per_100k_ci,
`Excess Cases per 100,000 (95% CI)` = excess_cases_ci,
`Relative Risk` = relative_risk_formatted,
stringsAsFactors = FALSE,
check.names = FALSE
))
}
# Sort by delay weeks
export_table <- export_table[order(export_table$`Delay (weeks)`), ]
# Display the table
cat("\n=== EXPORT TABLE FOR WORD ===\n\n")
print(export_table, row.names = FALSE)
# ——————————————————————————————————————————————————————
# 6) Export Options
# ——————————————————————————————————————————————————————
# Option 1: Save as CSV for easy import to Word
write.csv(export_table, "TBCHAMP_tb_delays_analysis_table.csv", row.names = FALSE)
cat("\nTable saved as 'tb_delays_analysis_table.csv'\n")
# Option 2: Create a formatted version for direct copy-paste
cat("\n=== FORMATTED FOR COPY-PASTE TO WORD ===\n")
cat("Delay (weeks)\tIncidence Rate per 100 Person-Yrs (95% CI)\tExcess Incidence Rate per 100 Person-Yrs (95% CI)\tMean Expected Cases per 100,000 (95% CI)\tExcess Cases per 100,000 (95% CI)\tRelative Risk\n")
for(i in 1:nrow(export_table)) {
cat(paste(export_table[i, ], collapse = "\t"), "\n")
}
# Option 3: Summary statistics
cat("\n=== ANALYSIS SUMMARY ===\n")
cat("Monte Carlo simulations:", n_simulations, "\n")
cat("Delay intervals: Every 2 weeks from -26 to +26 weeks\n")
cat("Baseline scenario: Week 0 (no delay)\n")
cat("Sample size:", n_intervention, "\n")
cat("Person-years:", py_intervention, "\n")
cat("Total scenarios analyzed:", nrow(export_table), "\n")
# Option 4: Key findings
best_scenario <- export_table[which.min(as.numeric(gsub("[^-0-9.]", "", export_table$`Delay (weeks)`))), ]
worst_scenario <- export_table[which.max(as.numeric(gsub("[^-0-9.]", "", export_table$`Delay (weeks)`))), ]
cat("\nBest scenario (most negative delay):", best_scenario$`Delay (weeks)`, "weeks\n")
cat("Worst scenario (most positive delay):", worst_scenario$`Delay (weeks)`, "weeks\n")
cat("\nNote: Negative delays = treatment starts before screening\n")
cat("Positive delays = treatment starts after screening\n")
cat("All excess calculations are relative to Week 0 (no delay)\n")Method 2 – varying the time since diagnosis parameter (Figure 8)
Delays with L = 3 (already have L=6 in main analysis)
library(mgcv)
library(ggplot2)
library(dplyr)
library(readr)
library(tidyr)
# ——————————————————————————————————————————————————————
# TBCHAMP Pre-curve Comparison: Auto-detect CSV delays
# ——————————————————————————————————————————————————————
# Input Parameters - TBCHAMP
n_control <- 465
n_intervention <- 451
py_control <- 418
py_intervention <- 406
n_simulations <- 5000
set.seed(123)
cat("TBCHAMP Pre-curve Length Comparison:\n")
cat("Auto-detecting delays from CSV file for perfect alignment\n")
cat("Control sample size:", n_control, "\n")
cat("Intervention sample size:", n_intervention, "\n")
cat("Number of simulations:", n_simulations, "\n\n")
# ——————————————————————————————————————————————————————
# 1) First, load CSV and detect what delays are actually present
# ——————————————————————————————————————————————————————
cat("=== STEP 1: DETECTING CSV DELAYS ===\n")
if(file.exists("TBCHAMP_tb_delays_analysis_table.csv")) {
# Load CSV and inspect structure
tbchamp_csv_raw <- read_csv("TBCHAMP_tb_delays_analysis_table.csv")
cat("CSV loaded successfully\n")
cat("CSV columns found:\n")
print(colnames(tbchamp_csv_raw))
cat("\nFirst few delay values:\n")
print(head(tbchamp_csv_raw$`Delay (weeks)`, 10))
# Extract numeric delays from the CSV
parse_ci_value <- function(ci_string) {
as.numeric(sub(" \\(.*\\)", "", ci_string))
}
tbchamp_26wk_clean <- tbchamp_csv_raw %>%
mutate(
delay_weeks = case_when(
`Delay (weeks)` == "Instant TPT" ~ -999,
`Delay (weeks)` == "Control" ~ 999,
TRUE ~ as.numeric(`Delay (weeks)`)
),
tbchamp_26wk_excess_cases_per_100k = parse_ci_value(`Excess Cases per 100,000 (95% CI)`),
tbchamp_26wk_incidence_rate = parse_ci_value(`Incidence Rate per 100 Person-Yrs (95% CI)`),
tbchamp_26wk_excess_rate = parse_ci_value(`Excess Incidence Rate per 100 Person-Yrs (95% CI)`),
tbchamp_26wk_relative_risk = `Relative Risk`
) %>%
select(delay_weeks, tbchamp_26wk_excess_cases_per_100k, tbchamp_26wk_incidence_rate, tbchamp_26wk_excess_rate, tbchamp_26wk_relative_risk) %>%
filter(!is.na(delay_weeks)) # Remove rows where delay parsing failed
# Extract the actual delays present in CSV (excluding baselines)
csv_delays <- tbchamp_26wk_clean %>%
filter(delay_weeks != -999 & delay_weeks != 999) %>%
arrange(delay_weeks) %>%
pull(delay_weeks)
cat("\nACTUAL DELAYS FOUND IN CSV:\n")
cat("Delays:", paste(csv_delays, collapse = ", "), "\n")
cat("Number of delay scenarios:", length(csv_delays), "\n\n")
} else {
stop("ERROR: TBCHAMP_tb_delays_analysis_table.csv not found in working directory")
}
# ——————————————————————————————————————————————————————
# 2) Run NEW analysis using EXACTLY the same delays as CSV
# ——————————————————————————————————————————————————————
cat("=== STEP 2: RUNNING NEW ANALYSIS WITH MATCHED DELAYS ===\n")
# Monte Carlo Simulation Function
simulate_gam_predictions <- function(gam_model, newdata, n_sim = n_simulations) {
pred_mean <- predict(gam_model, newdata = newdata, type = "response")
Vp <- vcov(gam_model, unconditional = TRUE)
Xp <- predict(gam_model, newdata = newdata, type = "lpmatrix")
coef_sims <- MASS::mvrnorm(n_sim, coef(gam_model), Vp)
pred_sims <- tcrossprod(Xp, coef_sims)
return(pred_sims)
}
# Bootstrap Scenario Builder - SQUARE ROOT PRE-CURVE (-13 week)
bootstrap_scenario_linear <- function(sim_idx, control_sims, intervention_sims, delay_weeks) {
Fc0_sim <- control_sims[, sim_idx]
Fi0_sim <- intervention_sims[, sim_idx]
# TBCHAMP p0 values
p0_draw <- rbeta(1, 32 + 0.5, 1120 - 32 + 0.5)
Fc0_sim <- p0_draw + (1 - p0_draw) * Fc0_sim
Fi0_sim <- p0_draw + (1 - p0_draw) * Fi0_sim
# Enforce monotonicity
Fc0_sim <- pmin(pmax(Fc0_sim, 0), 0.999)
Fi0_sim <- pmin(pmax(Fi0_sim, 0), 0.999)
Fc0_sim <- cummax(Fc0_sim); Fi0_sim <- cummax(Fi0_sim)
# Calculate hazards
h_ctrl_sim <- diff(c(0, Fc0_sim)) / (1 - c(0, Fc0_sim)[-length(Fc0_sim)])
h_int_sim <- diff(c(0, Fi0_sim)) / (1 - c(0, Fi0_sim)[-length(Fi0_sim)])
# Build curve
n <- length(time_ext)
Fcum <- numeric(n)
i0 <- which(time_ext == 0)
back_wk <- 13 # -13 week pre-curve
if(delay_weeks == Inf) {
# Pure control - SQUARE ROOT PRE-CURVE
if(i0 > 1) {
s_up <- time_ext[1:(i0-1)] + back_wk
Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk) # SQUARE ROOT
}
Fcum[i0] <- p0_draw
Sprev <- 1 - p0_draw
for(i in (i0+1):n) {
t0 <- time_ext[i]
h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
} else if(delay_weeks == -Inf) {
# Pure intervention - SQUARE ROOT PRE-CURVE
if(i0 > 1) {
s_up <- time_ext[1:(i0-1)] + back_wk
Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk) # SQUARE ROOT
}
Fcum[i0] <- p0_draw
Sprev <- 1 - p0_draw
for(i in (i0+1):n) {
t0 <- time_ext[i]
h_t <- h_int_sim[min(t0 + 1, length(h_int_sim))]
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
} else if(delay_weeks < 0) {
# Negative delays - SQUARE ROOT PRE-CURVE
id_match <- which(time_ext == delay_weeks)
if(length(id_match) == 0) {
id <- which.min(abs(time_ext - delay_weeks))
} else {
id <- id_match[1]
}
if(id > 1) {
s_up <- time_ext[1:(id-1)] + back_wk
s_up[s_up < 0] <- 0
Fcum[1:(id-1)] <- p0_draw * sqrt(s_up / back_wk) # SQUARE ROOT
}
s_delay <- max(0, delay_weeks + back_wk)
Fcum[id] <- p0_draw * sqrt(s_delay / back_wk) # SQUARE ROOT
Sprev <- 1 - Fcum[id]
for(i in (id+1):n) {
current_time <- time_ext[i]
intervention_duration <- current_time - delay_weeks
if(intervention_duration >= 0 && intervention_duration <= 54) {
h_idx <- min(intervention_duration + 1, length(h_int_sim))
h_t <- h_int_sim[max(1, h_idx)]
} else if(intervention_duration > 54) {
h_t <- h_int_sim[length(h_int_sim)]
} else {
h_t <- h_int_sim[1]
}
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
} else {
# Positive delays - SQUARE ROOT PRE-CURVE
if(i0 > 1) {
s_up <- time_ext[1:(i0-1)] + back_wk
Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk) # SQUARE ROOT
}
Fcum[i0] <- p0_draw
Sprev <- 1 - p0_draw
for(i in (i0+1):n) {
t0 <- time_ext[i]
if(t0 <= delay_weeks) {
h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
} else {
intervention_duration <- t0 - delay_weeks
h_idx <- min(intervention_duration + 1, length(h_int_sim))
h_t <- h_int_sim[max(1, h_idx)]
}
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
}
week_26_idx <- which(time_ext == 26)
return(Fcum[week_26_idx])
}
# Set up time vectors
time0 <- 0:26
back_wk <- 13
time_ext <- seq(-back_wk, 26, by = 1)
cat("Using EXACT CSV delays:", paste(csv_delays, collapse = ", "), "\n")
# Check GAM models
if(!exists("control_gam_k5_GCV_TBCHAMP") || !exists("intervention_gam_k5_GCV_TBCHAMP")) {
stop("ERROR: TBCHAMP GAM models not found. Please load:\n - control_gam_k5_GCV_TBCHAMP\n - intervention_gam_k5_GCV_TBCHAMP")
}
# Generate simulations
control_sims <- simulate_gam_predictions(control_gam_k5_GCV_TBCHAMP, data.frame(time = time0), n_simulations)
intervention_sims <- simulate_gam_predictions(intervention_gam_k5_GCV_TBCHAMP, data.frame(time = time0), n_simulations)
# Define scenarios using CSV delays
all_delays <- c("Instant TPT" = -Inf, setNames(csv_delays, paste0(csv_delays, " wk")), "Control" = Inf)
# Run simulations
cat("Running", length(all_delays), "scenarios...\n")
simulation_results <- list()
for(scenario_name in names(all_delays)) {
delay_val <- all_delays[scenario_name]
scenario_sims <- sapply(1:n_simulations, function(i) {
bootstrap_scenario_linear(i, control_sims, intervention_sims, delay_val)
})
simulation_results[[scenario_name]] <- scenario_sims
}
cat("Monte Carlo simulations completed!\n\n")
# ——————————————————————————————————————————————————————
# 3) Calculate metrics and create results for comparison
# ——————————————————————————————————————————————————————
baseline_sims <- simulation_results[["Instant TPT"]]
baseline_mean <- mean(baseline_sims)
results_list <- list()
for(scenario_name in names(simulation_results)) {
scenario_sims <- simulation_results[[scenario_name]]
mean_incidence <- mean(scenario_sims)
ci_lower <- quantile(scenario_sims, 0.025)
ci_upper <- quantile(scenario_sims, 0.975)
py <- ifelse(scenario_name == "Control", py_control, py_intervention)
sample_size <- ifelse(scenario_name == "Control", n_control, n_intervention)
rate_per_100py <- (mean_incidence * sample_size / py) * 100
if(scenario_name != "Instant TPT") {
excess_sims <- scenario_sims - baseline_sims
excess_mean <- mean(excess_sims)
excess_rate_per_100py <- (excess_mean * sample_size / py) * 100
# Convert excess cases to per 100,000 population
excess_cases_per_100k <- excess_mean * 100000
} else {
excess_rate_per_100py <- 0
excess_cases_per_100k <- 0
}
relative_risk <- mean_incidence / baseline_mean
results_list[[scenario_name]] <- list(
scenario = scenario_name,
incidence_rate_per_100py = rate_per_100py,
excess_rate_per_100py = excess_rate_per_100py,
excess_cases_per_100k = excess_cases_per_100k,
relative_risk = relative_risk
)
}
# Convert to comparable format
tbchamp_13wk_results <- data.frame()
for(scenario_name in names(results_list)) {
r <- results_list[[scenario_name]]
if(scenario_name == "Instant TPT") {
delay_weeks <- -999
} else if(scenario_name == "Control") {
delay_weeks <- 999
} else {
delay_weeks <- as.numeric(sub(" wk", "", scenario_name))
}
tbchamp_13wk_results <- rbind(tbchamp_13wk_results, data.frame(
scenario = scenario_name,
delay_weeks = delay_weeks,
tbchamp_13wk_excess_cases_per_100k = r$excess_cases_per_100k,
tbchamp_13wk_incidence_rate = r$incidence_rate_per_100py,
tbchamp_13wk_excess_rate = r$excess_rate_per_100py,
tbchamp_13wk_relative_risk = r$relative_risk,
stringsAsFactors = FALSE
))
}
# ——————————————————————————————————————————————————————
# 4) Perfect Comparison
# ——————————————————————————————————————————————————————
cat("=== STEP 3: PERFECT COMPARISON (NO NAs) ===\n")
# Merge datasets
comparison_data <- merge(tbchamp_26wk_clean, tbchamp_13wk_results, by = "delay_weeks", all = FALSE) # all=FALSE ensures only matching rows
# Focus on early benefit scenarios
early_benefit_comparison <- comparison_data %>%
filter(delay_weeks < 0 & delay_weeks != -999) %>%
select(delay_weeks, scenario,
tbchamp_26wk_excess_cases_per_100k, tbchamp_13wk_excess_cases_per_100k,
tbchamp_26wk_excess_rate, tbchamp_13wk_excess_rate,
tbchamp_26wk_relative_risk, tbchamp_13wk_relative_risk) %>%
mutate(
diff_excess_cases_per_100k = tbchamp_13wk_excess_cases_per_100k - tbchamp_26wk_excess_cases_per_100k,
diff_excess_rate = tbchamp_13wk_excess_rate - tbchamp_26wk_excess_rate,
diff_relative_risk = tbchamp_13wk_relative_risk - tbchamp_26wk_relative_risk,
pct_diff_excess_cases = ifelse(!is.na(tbchamp_26wk_excess_cases_per_100k) & tbchamp_26wk_excess_cases_per_100k != 0,
(diff_excess_cases_per_100k / abs(tbchamp_26wk_excess_cases_per_100k)) * 100, NA),
pct_diff_excess_rate = ifelse(!is.na(tbchamp_26wk_excess_rate) & tbchamp_26wk_excess_rate != 0,
(diff_excess_rate / abs(tbchamp_26wk_excess_rate)) * 100, NA)
) %>%
arrange(delay_weeks)
cat("COMPARISON DETAILS:\n")
cat("CSV data: -26 week pre-curve (square-root function)\n")
cat("New analysis: -13 week pre-curve (square-root function)\n")
cat("Population: Children, South Africa (TBCHAMP)\n")
cat("Perfect delay alignment: NO NAs!\n\n")
if(nrow(early_benefit_comparison) > 0) {
cat("EARLY BENEFIT SCENARIOS COMPARISON:\n")
cat("===================================\n")
cat("Number of matched scenarios:", nrow(early_benefit_comparison), "\n")
cat("Delay range:", min(early_benefit_comparison$delay_weeks), "to", max(early_benefit_comparison$delay_weeks), "weeks\n\n")
# Create detailed comparison table with clear per 100,000 labeling
detailed_comparison <- early_benefit_comparison %>%
mutate(
`Delay (weeks)` = delay_weeks,
`-26wk Excess Cases per 100k` = sprintf("%.1f", tbchamp_26wk_excess_cases_per_100k),
`-13wk Excess Cases per 100k` = sprintf("%.1f", tbchamp_13wk_excess_cases_per_100k),
`Difference per 100k` = sprintf("%+.1f", diff_excess_cases_per_100k),
`% Change` = sprintf("%+.1f%%", pct_diff_excess_cases),
`-26wk Rate` = sprintf("%.3f", tbchamp_26wk_excess_rate),
`-13wk Rate` = sprintf("%.3f", tbchamp_13wk_excess_rate),
`Rate Diff` = sprintf("%+.3f", diff_excess_rate)
) %>%
select(`Delay (weeks)`, `-26wk Excess Cases per 100k`, `-13wk Excess Cases per 100k`, `Difference per 100k`, `% Change`,
`-26wk Rate`, `-13wk Rate`, `Rate Diff`)
print(detailed_comparison)
# Summary statistics
finite_cases_diff <- early_benefit_comparison$diff_excess_cases_per_100k[is.finite(early_benefit_comparison$diff_excess_cases_per_100k)]
finite_pct_diff <- early_benefit_comparison$pct_diff_excess_cases[is.finite(early_benefit_comparison$pct_diff_excess_cases)]
if(length(finite_cases_diff) > 0) {
cat("\nSUMMARY STATISTICS:\n")
cat("==================\n")
cat("Mean difference (-13wk - -26wk):", sprintf("%.1f", mean(finite_cases_diff)), "excess cases per 100,000\n")
cat("Range:", sprintf("%.1f to %.1f", min(finite_cases_diff), max(finite_cases_diff)), "excess cases per 100,000\n")
if(length(finite_pct_diff) > 0) {
cat("Mean percentage change:", sprintf("%.1f%%", mean(finite_pct_diff)), "\n")
cat("Range:", sprintf("%.1f%% to %.1f%%", min(finite_pct_diff), max(finite_pct_diff)), "\n")
}
cat("\nINTERPRETATION:\n")
mean_diff <- mean(finite_cases_diff)
if(abs(mean_diff) < 50) {
cat("• MINIMAL impact: Pre-curve length difference shows <50 cases per 100,000 average change\n")
} else if(abs(mean_diff) < 200) {
cat("• MODERATE impact: Pre-curve length difference shows", sprintf("%.0f", abs(mean_diff)), "cases per 100,000 average change\n")
} else {
cat("• SUBSTANTIAL impact: Pre-curve length difference shows", sprintf("%.0f", abs(mean_diff)), "cases per 100,000 average change\n")
}
if(mean_diff > 0) {
cat("• Direction: -13 week pre-curve shows HIGHER excess cases per 100,000 (worse outcomes)\n")
} else if(mean_diff < 0) {
cat("• Direction: -13 week pre-curve shows LOWER excess cases per 100,000 (better outcomes)\n")
}
}
} else {
cat("No early benefit scenarios found for comparison.\n")
}
# ——————————————————————————————————————————————————————
# 5) Create TBCHAMP Sensitivity Analysis Barplot (like original format)
# ——————————————————————————————————————————————————————
cat("=== STEP 4: CREATING SENSITIVITY ANALYSIS BARPLOT ===\n")
# Calculate confidence intervals for all scenarios from simulation results
plot_results <- data.frame()
for(scenario_name in names(simulation_results)) {
scenario_sims <- simulation_results[[scenario_name]]
baseline_sims <- simulation_results[["Instant TPT"]]
if(scenario_name == "Instant TPT") {
delay_weeks <- -999
excess_cases_per_100k <- 0
ci_lower <- 0
ci_upper <- 0
} else if(scenario_name == "Control") {
delay_weeks <- 999
excess_sims <- scenario_sims - baseline_sims
excess_cases_per_100k <- mean(excess_sims) * 100000
ci_lower <- quantile(excess_sims, 0.025) * 100000
ci_upper <- quantile(excess_sims, 0.975) * 100000
} else {
delay_weeks <- as.numeric(sub(" wk", "", scenario_name))
excess_sims <- scenario_sims - baseline_sims
excess_cases_per_100k <- mean(excess_sims) * 100000
ci_lower <- quantile(excess_sims, 0.025) * 100000
ci_upper <- quantile(excess_sims, 0.975) * 100000
}
# Determine treatment timing category
treatment_timing <- case_when(
delay_weeks == 999 ~ "No Treatment",
delay_weeks > 0 ~ "Post-screen",
delay_weeks <= 0 & delay_weeks != -999 ~ "Pre-screen",
delay_weeks == -999 ~ "Instant TPT"
)
plot_results <- rbind(plot_results, data.frame(
delay_weeks = delay_weeks,
scenario_name = scenario_name,
excess_cases_per_100k = excess_cases_per_100k,
ci_lower = ci_lower,
ci_upper = ci_upper,
treatment_timing = treatment_timing,
stringsAsFactors = FALSE
))
}
# Filter and prepare for plotting (exclude instant TPT baseline and delays < -13)
plot_data <- plot_results %>%
filter(delay_weeks != -999 & delay_weeks >= -13 & delay_weeks != 0 & delay_weeks !=26) %>% # Exclude instant TPT, delays < -13, and 0 weeks
arrange(delay_weeks) %>%
mutate(
delay_label = ifelse(delay_weeks == 999, "No TPT", as.character(delay_weeks)),
delay_order = ifelse(delay_weeks == 999, max(delay_weeks[delay_weeks != 999]) + 5, delay_weeks)
) %>%
arrange(delay_order)
# Create sensitivity analysis barplot
sensitivity_plot <- ggplot(plot_data, aes(x = reorder(delay_label, delay_order), y = excess_cases_per_100k, fill = treatment_timing)) +
geom_col(alpha = 0.8, width = 0.7) +
geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2, color = "black", alpha = 0.7) +
scale_fill_manual(values = c("Pre-screen" = "#2E86AB", "Post-screen" = "#E74C3C", "No Treatment" = "#F39C12")) +
geom_text(aes(label = sprintf("%.0f", excess_cases_per_100k),
y = ifelse(excess_cases_per_100k >= 0,
pmax(ci_upper + 150, excess_cases_per_100k + 150), # Above positive bars
pmin(ci_lower - 150, excess_cases_per_100k - 150))), # Below negative bars
size = 4.2, fontface = "bold", color = "black") +
labs(
title = "SENSITIVITY ANALYSIS Method 2: TBCHAMP Excess TB Cases by Treatment Delay",
subtitle = "13-week pre-curve assumption (square root) | Excess cases per 100,000 contacts at 26 weeks",
x = "Treatment Delay (weeks)",
y = "Excess TB Cases per 100,000 Contacts",
fill = "Treatment Timing",
caption = "Sample size: 451 | Negative delays = pre-screen treatment"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
plot.subtitle = element_text(hjust = 0.5, size = 11, color = "black"),
plot.caption = element_text(size = 9, color = "black"),
axis.text.x = element_text(angle = 45, hjust = 1, size = 13, color = "black"),
axis.text.y = element_text(size = 13, color = "black"),
axis.title = element_text(size = 11, face = "bold"),
legend.position = "top",
legend.title = element_text(size = 10, face = "bold"),
legend.text = element_text(size = 9),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "gray80", fill = NA, size = 0.5) ) +
geom_hline(yintercept = 0, linetype = "dashed", alpha = 0.5, color = "gray50")
print(sensitivity_plot)cat("\nSensitivity analysis barplot created!\n")
cat("Format matches original TBCHAMP analysis\n")
cat("13-week pre-curve with square root function\n")
cat("All values shown as excess cases per 100,000 population\n")
cat("Error bars show 95% confidence intervals\n\n")
cat("\n=== PERFECT ALIGNMENT ACHIEVED ===\n")
cat("No NAs in comparison - all delays perfectly matched!\n")
cat("All excess cases displayed as per 100,000 population\n")
cat("Barplot comparison included!\n")Method 3 - trying a linear pre-curve with original trial L length
For delays :
library(mgcv)
# —————————————————————————————————————————————————————
# A) Fit‐based cumulative‐incidence at t = 0…54, with p0‐spike
time0 <- 0:54
p0 <- 32/1120
back_wk <- 13
# Use the k=5 GCV GAM models
Fc0 <- predict(control_gam_k5_GCV_TBCHAMP,
newdata = data.frame(time = time0))
Fi0 <- predict(intervention_gam_k5_GCV_TBCHAMP,
newdata = data.frame(time = time0))
Fc0 <- p0 + (1 - p0)*Fc0
Fi0 <- p0 + (1 - p0)*Fi0
h_ctrl <- diff(c(0, Fc0)) / (1 - c(0, Fc0)[-length(Fc0)])
h_int <- diff(c(0, Fi0)) / (1 - c(0, Fi0)[-length(Fi0)])
# —————————————————————————————————————————————————————
# B) "Pre‐screen" ramp length and nominal post‐screen delays
back_wk <- 26 # show 6 months before screen
nominal_delays <- c(-26, -20, -10, -5, 5, 10, 20) # weeks of *additional* delay after screen
# —————————————————————————————————————————————————————
# C) build a time axis from –back_wk … +54
time_ext <- seq(-back_wk, 54, by = 1)
# —————————————————————————————————————————————————————
# D) single‐curve builder: √ ramp on [–back_wk,0], then hazard‐switch at d
build_curve <- function(d) {
n <- length(time_ext)
Fcum <- numeric(n)
# find index of screen‐time t0 = 0
i0 <- which(time_ext == 0)
# 1) PRE‐SCREEN: √ ramp from (–back_wk → 0) up to p0
s_up <- time_ext[1:(i0-1)] + back_wk
Fcum[1:(i0-1)] <- p0 * (s_up / back_wk)
# 2) AT SCREEN
Fcum[i0] <- p0
Sprev <- 1 - p0
# 3) POST‐SCREEN: control hazard until t0 ≤ d, then intervention hazard
for(i in (i0+1):n) {
t0 <- time_ext[i]
if (d > 0 && t0 <= d) {
h_t <- h_ctrl[t0 + 1]
} else {
idx <- if (d > 0) (t0 - d + 1) else (t0 + 1)
h_t <- h_int[ min(idx, length(h_int)) ]
}
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
Fcum
}
# —————————————————————————————————————————————————————
# E) build all the curves
# 1) Control & Instant curves
ctrl_ext <- build_curve( Inf) # pure control
inst_ext <- build_curve(-Inf) # pure intervention
# and the √ ramp function for pre‐screen:
ramp <- p0 * (time_ext + back_wk) / back_wk
# 2) separate positive and negative delays
pos_delays <- nominal_delays[nominal_delays >= 0]
neg_delays <- nominal_delays[nominal_delays < 0]
# 3) positive delays:
pos_mat <- if(length(pos_delays) > 0) sapply(pos_delays, function(d) build_curve(d)) else NULL
# 4) negative delays: ramp until delay point, then copy instant TPT curve
neg_mat <- if(length(neg_delays) > 0) {
sapply(neg_delays, function(d) {
n <- length(time_ext)
Fcum <- numeric(n)
# find index of delay point d
id_match <- which(time_ext == d)
if(length(id_match) == 0) {
# If exact match not found, find closest index
id <- which.min(abs(time_ext - d))
} else {
id <- id_match[1]
}
i0 <- which(time_ext == 0) # screen time index
# Before delay point: follow the ramp
if(id > 1) {
s_up <- time_ext[1:(id-1)] + back_wk
s_up[s_up < 0] <- 0 # ensure non-negative
Fcum[1:(i0-1)] <- p0 * (s_up / back_wk)
}
# At delay point: set starting value from ramp
s_delay <- max(0, d + back_wk)
start_value <- p0 * (s_delay / back_wk)
Fcum[id] <- start_value
# After delay point: copy the instant TPT curve pattern
if(id < n) {
# Find the baseline instant TPT value at time 0 (our reference point)
inst_at_zero <- inst_ext[i0]
for(i in (id+1):n) {
# Calculate how many steps from delay point
steps_from_delay <- i - id
# Find corresponding point in instant TPT curve (relative to time 0)
inst_index <- i0 + steps_from_delay
if(inst_index <= length(inst_ext) && inst_index > 0) {
# Get the instant TPT value at this point
inst_value <- inst_ext[inst_index]
# Calculate the change from instant TPT baseline
inst_change <- inst_value - inst_at_zero
# Apply the same change to our starting value
Fcum[i] <- start_value + inst_change
} else if(inst_index > length(inst_ext)) {
# If we've gone beyond available instant TPT data,
# use the last available change and keep it constant
last_inst_value <- inst_ext[length(inst_ext)]
last_change <- last_inst_value - inst_at_zero
Fcum[i] <- start_value + last_change
} else {
# If we somehow get a negative index, keep the start value
Fcum[i] <- start_value
}
}
}
Fcum
})
} else NULL
# Combine all delay matrices
if(!is.null(neg_mat) && !is.null(pos_mat)) {
delay_mat <- cbind(neg_mat, pos_mat)
colnames(delay_mat) <- c(
paste0(neg_delays, " wk delay"),
paste0(pos_delays, " wk delay"))
} else if(!is.null(neg_mat)) {
delay_mat <- neg_mat
colnames(delay_mat) <- paste0(neg_delays, " wk delay")
} else if(!is.null(pos_mat)) {
delay_mat <- pos_mat
colnames(delay_mat) <- paste0(pos_delays, " wk delay")
} else {
delay_mat <- NULL
}
# —————————————————————————————————————————————————————
# F') compressed x‐axis plot, up to +30 wk, now including the negative delay curves
# how many "plot units" to devote to the negative side?
neg_width <- 30
# remap time_ext → x_plot:
x_plot <- ifelse(
time_ext < 0,
neg_width * (time_ext + back_wk) / back_wk, # −back_wk…0 → 0…30
neg_width + pmin(time_ext, 54) # 0…30 → 30…60
)
# bind together Control, Instant, and only your chosen delays:
if(!is.null(delay_mat)) {
all_mat <- cbind(
Control = ctrl_ext,
`Instant TPT` = inst_ext,
delay_mat
)
} else {
all_mat <- cbind(
Control = ctrl_ext,
`Instant TPT` = inst_ext
)
}
keep <- which(time_ext <= 26)
x_sub <- x_plot[keep]
mat_sub <- all_mat[keep, , drop=FALSE]
# colours and line‐types
n_delay_cols <- if(!is.null(delay_mat)) ncol(delay_mat) else 0
cols <- c("red","blue", rainbow(n_delay_cols))
ltys <- c(1,1, rep(2, n_delay_cols))
yr <- range(mat_sub)
# draw
matplot(
x_sub, mat_sub,
type="l", col=cols, lty=ltys, lwd=2,
xaxt="n", ylim=yr,
xlab="Weeks since randomisation ",
ylab="Cumulative incidence of TB",
main="SENSITIVITY ANALYSIS TBCHAMP Delayed-TPT scenarios, LINEAR pre-curve"
)
# custom x-axis ticks:
neg_ticks <- seq(-back_wk, 0, by=2)
neg_at <- neg_width * (neg_ticks + back_wk) / back_wk
pos_ticks <- seq(0, 26, by=2)
pos_at <- neg_width + pos_ticks
axis(1,
at = c(neg_at, pos_at),
labels = c(neg_ticks, pos_ticks)
)
# vertical line at t=0
abline(v=neg_width, lty=3, col="gray50")
# legend
legend("topleft",
legend = colnames(all_mat),
col = cols,
lty = ltys,
lwd = 2,
bty = "n",
ncol = 1,
cex = 0.8,
y.intersp = 0.4,
x.intersp = 0.5,
inset = c(0.02,0.02))# Load required library for colorblind-friendly palettes
library(RColorBrewer)
# colours and line-types with colorblind-friendly palette
n_delay_cols <- if(!is.null(delay_mat)) ncol(delay_mat) else 0
# Keep control red and intervention blue, use colorblind-friendly palette for delays
if(n_delay_cols > 0) {
# Option 3: Manual colorblind-friendly colors
delay_colors <- c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99", "#E31A1C", "#FDBF6F", "#FF7F00", "#CAB2D6")[1:n_delay_cols]
cols <- c("red", "blue", delay_colors)
} else {
cols <- c("red", "blue")
}
ltys <- c(1, 1, rep(2, n_delay_cols))
yr <- range(mat_sub)
# Set up the plot area
plot(range(x_sub), yr, type="n",
xaxt="n", ylim=yr,
xlab="Weeks since randomisation",
ylab="Cumulative incidence of TB",
main="SENSITIVITY ANALYSIS: TBCHAMP Delayed-TPT scenarios LINEAR pre-curve")
# Plot all lines EXCEPT the control (first column) first
if(ncol(mat_sub) > 1) {
# Plot intervention and delay lines first (columns 2 onwards)
matlines(x_sub, mat_sub[, -1, drop=FALSE],
col=cols[-1], lty=ltys[-1], lwd=2)
}
# Plot the control line LAST so it appears on top
lines(x_sub, mat_sub[, 1],
col=cols[1], lty=ltys[1], lwd=2)
# custom x-axis ticks:
neg_ticks <- seq(-back_wk, 0, by=2)
neg_at <- neg_width * (neg_ticks + back_wk) / back_wk
pos_ticks <- seq(0, 26, by=2)
pos_at <- neg_width + pos_ticks
axis(1,
at = c(neg_at, pos_at),
labels = c(neg_ticks, pos_ticks)
)
# vertical line at t=0
abline(v=neg_width, lty=3, col="gray50")
# Reorder legend to match plotting order (control was plotted last)
legend_names <- colnames(mat_sub)
legend_cols <- cols
legend_ltys <- ltys
# If we have multiple columns, reorder to match plotting sequence
if(ncol(mat_sub) > 1) {
# We plotted columns 2+ first, then column 1 (control) last
new_order <- c(2:ncol(mat_sub), 1)
legend_names <- legend_names[new_order]
legend_cols <- legend_cols[new_order]
legend_ltys <- legend_ltys[new_order]
}
# legend
legend("topleft",
legend = legend_names,
col = legend_cols,
lty = legend_ltys,
lwd = 2,
bty = "n",
ncol = 1,
cex = 0.8,
y.intersp = 0.8,
x.intersp = 0.5,
inset = c(0.02, 0.02))library(mgcv)
library(ggplot2)
library(dplyr)
library(RColorBrewer)
library(scales)
# ——————————————————————————————————————————————————————
# Enhanced TB Delays Analysis with 2-week intervals - Using Real GAM Models
# ——————————————————————————————————————————————————————
# Input Parameters (from your data)
n_control <- 465
n_intervention <- 451
py_control <- 418
py_intervention <- 406
n_simulations <- 5000
set.seed(123)
cat("Enhanced Delays Analysis Parameters:\n")
cat("Control sample size:", n_control, "\n")
cat("Intervention sample size:", n_intervention, "\n")
cat("Control person-years:", py_control, "\n")
cat("Intervention person-years:", py_intervention, "\n")
cat("Number of simulations:", n_simulations, "\n\n")
# ——————————————————————————————————————————————————————
# 1) Monte Carlo Simulation Function (from your working code)
# ——————————————————————————————————————————————————————
simulate_gam_predictions <- function(gam_model, newdata, n_sim = n_simulations) {
# Get mean predictions and covariance matrix
pred_mean <- predict(gam_model, newdata = newdata, type = "response")
# Get the covariance matrix of the GAM coefficients
Vp <- vcov(gam_model, unconditional = TRUE)
# Get the linear predictor matrix
Xp <- predict(gam_model, newdata = newdata, type = "lpmatrix")
# Simulate from multivariate normal distribution
# Generate random coefficients
coef_sims <- MASS::mvrnorm(n_sim, coef(gam_model), Vp)
# Calculate predictions for each simulation
pred_sims <- tcrossprod(Xp, coef_sims)
return(pred_sims)
}
# ——————————————————————————————————————————————————————
# 2) Bootstrap Scenario Builder (exactly from your working code)
# ——————————————————————————————————————————————————————
bootstrap_scenario <- function(sim_idx, control_sims, intervention_sims, delay_weeks) {
# Extract simulated predictions for this iteration
Fc0_sim <- control_sims[, sim_idx]
Fi0_sim <- intervention_sims[, sim_idx]
# Apply p0 transformation with a beta draw for p0 to consider uncertainty in p0
p0_draw <- rbeta(1, 32 + 0.5, 1120 - 32 + 0.5) # TBCHAMP p0 values 32 and 1120
Fc0_sim <- p0_draw + (1 - p0_draw) * Fc0_sim
Fi0_sim <- p0_draw + (1 - p0_draw) * Fi0_sim
#enforce monotonicity (Simulated cumulative curves can wiggle; discrete hazards can become negative - preventing this)
Fc0_sim <- pmin(pmax(Fc0_sim, 0), 0.999)
Fi0_sim <- pmin(pmax(Fi0_sim, 0), 0.999)
Fc0_sim <- cummax(Fc0_sim); Fi0_sim <- cummax(Fi0_sim)
# Calculate hazards for this simulation
h_ctrl_sim <- diff(c(0, Fc0_sim)) / (1 - c(0, Fc0_sim)[-length(Fc0_sim)])
h_int_sim <- diff(c(0, Fi0_sim)) / (1 - c(0, Fi0_sim)[-length(Fi0_sim)])
# Build curve for this delay scenario
n <- length(time_ext)
Fcum <- numeric(n)
# Find indices
i0 <- which(time_ext == 0)
back_wk <- 26
# Special handling for pure scenarios
if(delay_weeks == Inf) {
# Pure control - FIXED: use p0_draw instead of p0
# Pre-screen ramp
if(i0 > 1) {
s_up <- time_ext[1:(i0-1)] + back_wk
Fcum[1:(i0-1)] <- p0_draw * (s_up / back_wk) # FIXED
}
Fcum[i0] <- p0_draw # FIXED
Sprev <- 1 - p0_draw # FIXED
for(i in (i0+1):n) {
t0 <- time_ext[i]
h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
} else if(delay_weeks == -Inf) {
# Pure intervention (Instant TPT) - FIXED: use p0_draw instead of p0
if(i0 > 1) {
s_up <- time_ext[1:(i0-1)] + back_wk
Fcum[1:(i0-1)] <- p0 * (s_up / back_wk) # FIXED
}
Fcum[i0] <- p0_draw # FIXED
Sprev <- 1 - p0_draw # FIXED
for(i in (i0+1):n) {
t0 <- time_ext[i]
h_t <- h_int_sim[min(t0 + 1, length(h_int_sim))]
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
} else if(delay_weeks < 0) {
# CORRECTED: Negative delays (pre-screen intervention) - FIXED: use p0_draw instead of p0
# This means intervention starts BEFORE time 0, providing earlier protection
# Find delay point index
id_match <- which(time_ext == delay_weeks)
if(length(id_match) == 0) {
id <- which.min(abs(time_ext - delay_weeks))
} else {
id <- id_match[1]
}
# Before delay point: ramp (no intervention yet)
if(id > 1) {
s_up <- time_ext[1:(id-1)] + back_wk
s_up[s_up < 0] <- 0
Fcum[1:(id-1)] <- p0_draw * (s_up / back_wk) # FIXED
}
# At delay point: start intervention
s_delay <- max(0, delay_weeks + back_wk)
Fcum[id] <- p0_draw * sqrt(s_delay / back_wk) # CORRECT - s_delay is always defined # FIXED
Sprev <- 1 - Fcum[id]
# CORRECTED: After delay point, use intervention hazards consistently
# Map the post-delay timeline to intervention hazards starting from time 0
for(i in (id+1):n) {
current_time <- time_ext[i]
# Calculate how long intervention has been active
intervention_duration <- current_time - delay_weeks
if(intervention_duration >= 0 && intervention_duration <= 54) {
# Use intervention hazard corresponding to this duration
h_idx <- min(intervention_duration + 1, length(h_int_sim))
h_t <- h_int_sim[max(1, h_idx)]
} else if(intervention_duration > 54) {
# Use last available intervention hazard
h_t <- h_int_sim[length(h_int_sim)]
} else {
# This shouldn't happen with negative delays, but use first intervention hazard as fallback
h_t <- h_int_sim[1]
}
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
} else {
# Positive delays (post-screen delays) - FIXED: use p0_draw instead of p0
# Pre-screen ramp
if(i0 > 1) {
s_up <- time_ext[1:(i0-1)] + back_wk
Fcum[1:(i0-1)] <- p0_draw * (s_up / back_wk) # FIXED
}
# At screen
Fcum[i0] <- p0_draw # FIXED
Sprev <- 1 - p0_draw # FIXED
# Post-screen: control until delay point, then intervention
for(i in (i0+1):n) {
t0 <- time_ext[i]
if(t0 <= delay_weeks) {
# Use control hazard until delay point
h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
} else {
# Use intervention hazard after delay point
# Map to intervention timeline: intervention has been active for (t0 - delay_weeks) weeks
intervention_duration <- t0 - delay_weeks
h_idx <- min(intervention_duration + 1, length(h_int_sim))
h_t <- h_int_sim[max(1, h_idx)]
}
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
}
# Return final value at week 26
week_26_idx <- which(time_ext == 26)
return(Fcum[week_26_idx])
}
# ——————————————————————————————————————————————————————
# 3) Run Enhanced Analysis with 2-week intervals using REAL GAM models
# ——————————————————————————————————————————————————————
cat("Running enhanced Monte Carlo simulations with 2-week intervals...\n")
# Set up time vectors
time0 <- 0:26
back_wk <- 26
time_ext <- seq(-back_wk, 26, by = 1)
# Create 2-week interval delays from -26 to +26
delays_2wk <- seq(-26, 24, by = 2)
# Generate simulated predictions using your ACTUAL GAM models
cat("Generating GAM simulations using your trained models...\n")
# NOTE: Make sure your GAM models are loaded: control_gam_k5_GCV_TBCHAMP and intervention_gam_k5_GCV_TBCHAMP
control_sims <- simulate_gam_predictions(control_gam_k5_GCV_TBCHAMP, data.frame(time = time0), n_simulations)
intervention_sims <- simulate_gam_predictions(intervention_gam_k5_GCV_TBCHAMP, data.frame(time = time0), n_simulations)
cat("Generated", ncol(control_sims), "control simulations and", ncol(intervention_sims), "intervention simulations\n")
# Define all scenarios including baseline
all_delays <- c("Instant TPT" = -Inf, setNames(delays_2wk, paste0(delays_2wk, " wk")), "Control" = Inf)
# Run simulations for all scenarios
cat("Running scenario simulations...\n")
simulation_results <- list()
for(scenario_name in names(all_delays)) {
cat("Processing scenario:", scenario_name, "\n")
delay_val <- all_delays[scenario_name]
scenario_sims <- sapply(1:n_simulations, function(i) {
bootstrap_scenario(i, control_sims, intervention_sims, delay_val)
})
simulation_results[[scenario_name]] <- scenario_sims
}
cat("Completed all simulations!\n\n")
# ——————————————————————————————————————————————————————
# 4) Calculate Excess Cases using your exact methodology
# ——————————————————————————————————————————————————————
# Get baseline (Instant TPT) for relative calculations
baseline_sims <- simulation_results[["Instant TPT"]]
baseline_mean <- mean(baseline_sims)
# Calculate metrics for each scenario using your exact approach
results_list <- list()
for(scenario_name in names(simulation_results)) {
scenario_sims <- simulation_results[[scenario_name]]
# Basic statistics
mean_incidence <- mean(scenario_sims)
median_incidence <- median(scenario_sims)
ci_lower <- quantile(scenario_sims, 0.025)
ci_upper <- quantile(scenario_sims, 0.975)
# Determine person-years (Control uses control PY, others use intervention PY)
py <- ifelse(scenario_name == "Control", py_control, py_intervention)
sample_size <- ifelse(scenario_name == "Control", n_control, n_intervention)
# 1. Incidence rate per 100 person-yrs (95% CI)
rate_per_100py <- (mean_incidence * sample_size / py) * 100
rate_ci_lower <- (ci_lower * sample_size / py) * 100
rate_ci_upper <- (ci_upper * sample_size / py) * 100
# 2. Mean Expected Cases (95% CI)
mean_cases <- mean_incidence * sample_size
cases_ci_lower <- ci_lower * sample_size
cases_ci_upper <- ci_upper * sample_size
# 3. Excess calculations (compared to Instant TPT)
if(scenario_name != "Instant TPT") {
# Excess incidence simulations
excess_sims <- scenario_sims - baseline_sims
excess_mean <- mean(excess_sims)
excess_ci_lower <- quantile(excess_sims, 0.025)
excess_ci_upper <- quantile(excess_sims, 0.975)
# Excess rate per 100 person-yrs
excess_rate_per_100py <- (excess_mean * sample_size / py) * 100
excess_rate_ci_lower <- (excess_ci_lower * sample_size / py) * 100
excess_rate_ci_upper <- (excess_ci_upper * sample_size / py) * 100
# Excess cases
excess_cases_mean <- excess_mean * sample_size
excess_cases_ci_lower <- excess_ci_lower * sample_size
excess_cases_ci_upper <- excess_ci_upper * sample_size
} else {
# Instant TPT is the baseline, so excess = 0
excess_rate_per_100py <- 0
excess_rate_ci_lower <- 0
excess_rate_ci_upper <- 0
excess_cases_mean <- 0
excess_cases_ci_lower <- 0
excess_cases_ci_upper <- 0
}
# 4. Relative Risk
relative_risk <- mean_incidence / baseline_mean
# Store results
results_list[[scenario_name]] <- list(
scenario = scenario_name,
incidence_rate_per_100py = rate_per_100py,
incidence_rate_ci_lower = rate_ci_lower,
incidence_rate_ci_upper = rate_ci_upper,
excess_rate_per_100py = excess_rate_per_100py,
excess_rate_ci_lower = excess_rate_ci_lower,
excess_rate_ci_upper = excess_rate_ci_upper,
mean_cases = mean_cases,
cases_ci_lower = cases_ci_lower,
cases_ci_upper = cases_ci_upper,
excess_cases_mean = excess_cases_mean,
excess_cases_ci_lower = excess_cases_ci_lower,
excess_cases_ci_upper = excess_cases_ci_upper,
relative_risk = relative_risk
)
}
# ——————————————————————————————————————————————————————
# 5) Prepare Plot Data using your results
# ——————————————————————————————————————————————————————
plot_data <- data.frame()
for(scenario_name in names(results_list)) {
if(scenario_name == "Instant TPT") next # Skip baseline
r <- results_list[[scenario_name]]
# Extract delay value for ordering
if(scenario_name == "Control") {
delay_weeks <- 999 # Put at the end
delay_label <- "No Tx"
delay_type <- "No Treatment"
} else {
delay_weeks <- as.numeric(sub(" wk", "", scenario_name))
delay_label <- as.character(delay_weeks)
delay_type <- ifelse(delay_weeks < 0, "Pre-screen", "Post-screen")
}
plot_data <- rbind(plot_data, data.frame(
scenario = scenario_name,
delay_weeks = delay_weeks,
delay_label = delay_label,
excess_cases = r$excess_cases_mean,
ci_lower = r$excess_cases_ci_lower,
ci_upper = r$excess_cases_ci_upper,
delay_type = delay_type,
stringsAsFactors = FALSE
))
}
# Order by delay weeks
plot_data <- plot_data[order(plot_data$delay_weeks), ]
# ——————————————————————————————————————————————————————
# 6) Create Enhanced Color-Blind Friendly Bar Plot
# ——————————————————————————————————————————————————————
# Set up RColorBrewer Paired palette
paired_colors <- brewer.pal(12, "Paired")
# Assign colors to delay types
color_mapping <- c(
"Pre-screen" = paired_colors[2], # Light blue
"Post-screen" = paired_colors[6], # Light orange
"No Treatment" = paired_colors[8] # Light red
)
# Create the enhanced bar plot
enhanced_plot <- ggplot(plot_data, aes(x = reorder(delay_label, delay_weeks),
y = excess_cases,
fill = delay_type)) +
geom_col(width = 0.8, alpha = 0.8, color = "white", size = 0.3) +
geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper),
width = 0.4, color = "black", size = 0.6, alpha = 0.8) +
# Add value labels on bars with smart positioning
geom_text(aes(label = sprintf("%.1f", excess_cases),
y = ifelse(excess_cases >= 0,
pmax(ci_upper + 0.5, excess_cases + 0.5), # Above positive bars
pmin(ci_lower - 0.5, excess_cases - 0.5))), # Below negative bars
size = 3.2, fontface = "bold", color = "black") +
# Styling
scale_fill_manual(values = color_mapping, name = "Treatment Timing") +
scale_y_continuous(labels = number_format(accuracy = 0.1),
expand = expansion(mult = c(0.1, 0.15))) +
# Reference line at zero
geom_hline(yintercept = 0, linetype = "solid", color = "black", size = 0.8) +
# Labels and theme
labs(
title = "SENSITIVITY ANALYSIS: TBCHAMP Excess TB Cases by Treatment Delay",
subtitle = paste0("The duration from index diagnosis to contact's randomisation is a LINEAR instead of square-root function (95% CI from ",
format(n_simulations, big.mark = ","), " Monte Carlo simulations)"),
x = "Treatment Delay (weeks)",
y = "Excess TB Cases",
caption = paste0("Sample size: ", n_intervention, " | Negative delays = pre-screen treatment")
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
plot.subtitle = element_text(hjust = 0.5, size = 11, color = "gray40"),
plot.caption = element_text(size = 9, color = "gray50"),
axis.text.x = element_text(angle = 45, hjust = 1, size = 10),
axis.text.y = element_text(size = 10),
axis.title = element_text(size = 11, face = "bold"),
legend.position = "top",
legend.title = element_text(size = 10, face = "bold"),
legend.text = element_text(size = 9),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "gray80", fill = NA, size = 0.5)
)
# Display the plot
print(enhanced_plot)# ——————————————————————————————————————————————————————
# 7) Summary Statistics Table
# ——————————————————————————————————————————————————————
cat("\n=== ENHANCED DELAY ANALYSIS SUMMARY (using real GAM models) ===\n")
cat("Delay intervals: Every 2 weeks from -26 to +26 weeks\n")
cat("Monte Carlo simulations:", format(n_simulations, big.mark = ","), "\n")
cat("Sample size:", n_intervention, "\n\n")
# Print key statistics
summary_table <- plot_data %>%
select(delay_label, delay_type, excess_cases, ci_lower, ci_upper) %>%
mutate(
excess_cases_ci = sprintf("%.1f (%.1f to %.1f)", excess_cases, ci_lower, ci_upper)
) %>%
select(delay_label, delay_type, excess_cases_ci)
colnames(summary_table) <- c("Delay (weeks)", "Type", "Excess Cases (95% CI)")
print(summary_table)
cat("\nNote: Negative delays indicate treatment started before screening\n")
cat("Positive delays indicate treatment started after screening\n")
cat("Results calculated using your actual GAM models and Monte Carlo approach\n")Comparison of the linear pre-curve to main results:
# ——————————————————————————————————————————————————————
# Calculate ±X% Change in Early Benefit: Square-root vs Linear Pre-curve
# ——————————————————————————————————————————————————————
library(dplyr)
library(readr)
# ——————————————————————————————————————————————————————
# 1) Load Original Results (Square-root pre-curve) from CSV
# ——————————————————————————————————————————————————————
# Load the original results from your CSV file
original_results <- read_csv("TBCHAMP_tb_delays_analysis_table.csv")
# Clean and parse the original results
parse_ci_value <- function(ci_string) {
# Extract the main value from "X.X (Y.Y to Z.Z)" format
as.numeric(sub(" \\(.*\\)", "", ci_string))
}
original_clean <- original_results %>%
mutate(
delay_weeks = case_when(
`Delay (weeks)` == "Instant TPT" ~ -999,
`Delay (weeks)` == "Control" ~ 999,
TRUE ~ as.numeric(`Delay (weeks)`)
),
orig_excess_cases = parse_ci_value(`Excess Cases per 100,000 (95% CI)`),
orig_incidence_rate = parse_ci_value(`Incidence Rate per 100 Person-Yrs (95% CI)`),
orig_excess_rate = parse_ci_value(`Excess Incidence Rate per 100 Person-Yrs (95% CI)`),
orig_relative_risk = `Relative Risk`
) %>%
select(delay_weeks, orig_excess_cases, orig_incidence_rate, orig_excess_rate, orig_relative_risk)
cat("Original results (square-root pre-curve) loaded successfully\n")
cat("Number of scenarios:", nrow(original_clean), "\n\n")
# ——————————————————————————————————————————————————————
# 2) Extract Linear Pre-curve Results from Current Analysis
# ——————————————————————————————————————————————————————
# Check if results_list exists
if(!exists("results_list")) {
stop("Error: results_list not found. Please run your linear pre-curve analysis first.")
}
cat("Checking results_list structure...\n")
cat("Number of scenarios in results_list:", length(results_list), "\n")
cat("Scenario names:", paste(names(results_list), collapse = ", "), "\n\n")
# Convert results_list to a data frame with error checking
linear_results <- data.frame()
for(scenario_name in names(results_list)) {
cat("Processing scenario:", scenario_name, "\n")
r <- results_list[[scenario_name]]
# Check if r is a list and has the expected elements
if(!is.list(r)) {
cat(" Warning: Scenario", scenario_name, "is not a list. Skipping.\n")
next
}
# Check for required elements
required_elements <- c("excess_cases_mean", "incidence_rate_per_100py", "excess_rate_per_100py", "relative_risk")
missing_elements <- required_elements[!required_elements %in% names(r)]
if(length(missing_elements) > 0) {
cat(" Warning: Scenario", scenario_name, "missing elements:", paste(missing_elements, collapse = ", "), ". Skipping.\n")
next
}
# Extract delay value
if(scenario_name == "Instant TPT") {
delay_weeks <- -999
} else if(scenario_name == "Control") {
delay_weeks <- 999
} else {
# Try to extract delay weeks, handle potential errors
tryCatch({
delay_weeks <- as.numeric(sub(" wk", "", scenario_name))
if(is.na(delay_weeks)) {
cat(" Warning: Could not parse delay weeks from", scenario_name, ". Skipping.\n")
next
}
}, error = function(e) {
cat(" Warning: Error parsing delay weeks from", scenario_name, ":", e$message, ". Skipping.\n")
next
})
}
# Check that all values are scalar (length 1)
values_to_check <- list(
excess_cases_mean = r$excess_cases_mean,
incidence_rate_per_100py = r$incidence_rate_per_100py,
excess_rate_per_100py = r$excess_rate_per_100py,
relative_risk = r$relative_risk
)
# Check each value
all_ok <- TRUE
for(val_name in names(values_to_check)) {
val <- values_to_check[[val_name]]
if(is.null(val) || length(val) != 1 || !is.numeric(val)) {
cat(" Warning: Invalid", val_name, "for scenario", scenario_name,
"(length:", length(val), ", class:", class(val), "). Skipping.\n")
all_ok <- FALSE
break
}
}
if(!all_ok) next
# Convert to per 100,000 to match original scale
tryCatch({
new_row <- data.frame(
scenario = scenario_name,
delay_weeks = delay_weeks,
linear_excess_cases = r$excess_cases_mean * (100000/451), # Scale to per 100,000
linear_incidence_rate = r$incidence_rate_per_100py,
linear_excess_rate = r$excess_rate_per_100py,
linear_relative_risk = r$relative_risk,
stringsAsFactors = FALSE
)
linear_results <- rbind(linear_results, new_row)
cat(" Successfully processed scenario:", scenario_name, "\n")
}, error = function(e) {
cat(" Error creating data frame for scenario", scenario_name, ":", e$message, "\n")
})
}
cat("\nLinear pre-curve results extraction complete\n")
cat("Successfully processed scenarios:", nrow(linear_results), "\n")
if(nrow(linear_results) == 0) {
stop("Error: No scenarios were successfully processed. Please check your results_list structure.")
}
cat("Processed scenarios:", paste(linear_results$scenario, collapse = ", "), "\n\n")
# ——————————————————————————————————————————————————————
# 3) Merge and Calculate Percentage Changes
# ——————————————————————————————————————————————————————
# Merge the datasets
comparison_data <- merge(original_clean, linear_results, by = "delay_weeks", all = TRUE)
# Calculate percentage changes
comparison_data <- comparison_data %>%
mutate(
# Percentage change in excess cases
pct_change_excess_cases = ifelse(orig_excess_cases != 0,
((linear_excess_cases - orig_excess_cases) / abs(orig_excess_cases)) * 100,
NA),
# Percentage change in incidence rate
pct_change_incidence_rate = ifelse(orig_incidence_rate != 0,
((linear_incidence_rate - orig_incidence_rate) / orig_incidence_rate) * 100,
NA),
# Percentage change in excess rate
pct_change_excess_rate = ifelse(orig_excess_rate != 0,
((linear_excess_rate - orig_excess_rate) / abs(orig_excess_rate)) * 100,
NA),
# Change in relative risk (absolute difference)
change_relative_risk = linear_relative_risk - orig_relative_risk,
# Classify delay type
delay_type = case_when(
delay_weeks == -999 ~ "Instant TPT",
delay_weeks == 999 ~ "Control",
delay_weeks < 0 ~ "Pre-screen (Early Benefit)",
delay_weeks >= 0 ~ "Post-screen"
)
) %>%
arrange(delay_weeks)
# ——————————————————————————————————————————————————————
# 4) Focus on Early Benefit Scenarios (Negative Delays)
# ——————————————————————————————————————————————————————
early_benefit_scenarios <- comparison_data %>%
filter(delay_weeks < 0 & delay_weeks != -999) %>% # Exclude Instant TPT baseline
select(delay_weeks, scenario,
orig_excess_cases, linear_excess_cases, pct_change_excess_cases,
orig_excess_rate, linear_excess_rate, pct_change_excess_rate,
orig_relative_risk, linear_relative_risk, change_relative_risk)
cat("=== EARLY BENEFIT ANALYSIS: Square-root vs Linear Pre-curve ===\n\n")
cat("EARLY BENEFIT SCENARIOS (Pre-screen Treatment):\n")
cat("Delay scenarios where treatment starts BEFORE screening (negative weeks)\n\n")
# Summary statistics for early benefit changes
if(nrow(early_benefit_scenarios) > 0) {
# Overall summary
cat("SUMMARY STATISTICS FOR EARLY BENEFIT SCENARIOS:\n")
cat("================================================\n")
cat("Number of early benefit scenarios:", nrow(early_benefit_scenarios), "\n")
cat("Delay range:", min(early_benefit_scenarios$delay_weeks), "to", max(early_benefit_scenarios$delay_weeks), "weeks\n\n")
# Excess cases changes
cat("EXCESS CASES (per 100,000) CHANGES:\n")
cat("Mean change:", sprintf("%.1f%%", mean(early_benefit_scenarios$pct_change_excess_cases, na.rm = TRUE)), "\n")
cat("Range:", sprintf("%.1f%% to %.1f%%",
min(early_benefit_scenarios$pct_change_excess_cases, na.rm = TRUE),
max(early_benefit_scenarios$pct_change_excess_cases, na.rm = TRUE)), "\n")
# Excess rate changes
cat("\nEXCESS INCIDENCE RATE CHANGES:\n")
cat("Mean change:", sprintf("%.1f%%", mean(early_benefit_scenarios$pct_change_excess_rate, na.rm = TRUE)), "\n")
cat("Range:", sprintf("%.1f%% to %.1f%%",
min(early_benefit_scenarios$pct_change_excess_rate, na.rm = TRUE),
max(early_benefit_scenarios$pct_change_excess_rate, na.rm = TRUE)), "\n")
# Relative risk changes
cat("\nRELATIVE RISK CHANGES (absolute):\n")
cat("Mean change:", sprintf("%.3f", mean(early_benefit_scenarios$change_relative_risk, na.rm = TRUE)), "\n")
cat("Range:", sprintf("%.3f to %.3f",
min(early_benefit_scenarios$change_relative_risk, na.rm = TRUE),
max(early_benefit_scenarios$change_relative_risk, na.rm = TRUE)), "\n\n")
} else {
cat("No early benefit scenarios found in the data.\n\n")
}
# ——————————————————————————————————————————————————————
# 5) Detailed Results Table
# ——————————————————————————————————————————————————————
cat("DETAILED COMPARISON TABLE:\n")
cat("==========================\n\n")
# Create detailed comparison table
detailed_table <- early_benefit_scenarios %>%
mutate(
`Delay (weeks)` = delay_weeks,
`Original Excess Cases` = sprintf("%.1f", orig_excess_cases),
`Linear Excess Cases` = sprintf("%.1f", linear_excess_cases),
`% Change Cases` = sprintf("%+.1f%%", pct_change_excess_cases),
`Original Excess Rate` = sprintf("%.2f", orig_excess_rate),
`Linear Excess Rate` = sprintf("%.2f", linear_excess_rate),
`% Change Rate` = sprintf("%+.1f%%", pct_change_excess_rate),
`Change in RR` = sprintf("%+.3f", change_relative_risk)
) %>%
select(`Delay (weeks)`, `Original Excess Cases`, `Linear Excess Cases`, `% Change Cases`,
`Original Excess Rate`, `Linear Excess Rate`, `% Change Rate`, `Change in RR`)
print(detailed_table)
# ——————————————————————————————————————————————————————
# 6) Key Findings Summary
# ——————————————————————————————————————————————————————
cat("\n=== KEY FINDINGS ===\n")
if(nrow(early_benefit_scenarios) > 0) {
# Find scenarios with largest changes
max_cases_change_idx <- which.max(abs(early_benefit_scenarios$pct_change_excess_cases))
max_rate_change_idx <- which.max(abs(early_benefit_scenarios$pct_change_excess_rate))
cat("\nLARGEST CHANGES IN EARLY BENEFIT:\n")
cat("• Excess cases: ", sprintf("%+.1f%%", early_benefit_scenarios$pct_change_excess_cases[max_cases_change_idx]),
" at ", early_benefit_scenarios$delay_weeks[max_cases_change_idx], " weeks\n", sep="")
cat("• Excess rate: ", sprintf("%+.1f%%", early_benefit_scenarios$pct_change_excess_rate[max_rate_change_idx]),
" at ", early_benefit_scenarios$delay_weeks[max_rate_change_idx], " weeks\n", sep="")
# Overall interpretation
mean_change <- mean(early_benefit_scenarios$pct_change_excess_cases, na.rm = TRUE)
cat("\nINTERPRETATION:\n")
if(abs(mean_change) < 5) {
cat("• Minimal impact: Linear vs square-root pre-curve shows <5% average change\n")
} else if(abs(mean_change) < 15) {
cat("• Moderate impact: Linear vs square-root pre-curve shows ", sprintf("%.1f%%", abs(mean_change)), " average change\n", sep="")
} else {
cat("• Substantial impact: Linear vs square-root pre-curve shows ", sprintf("%.1f%%", abs(mean_change)), " average change\n", sep="")
}
if(mean_change > 0) {
cat("• Direction: Linear pre-curve generally shows HIGHER excess cases (worse outcomes)\n")
} else if(mean_change < 0) {
cat("• Direction: Linear pre-curve generally shows LOWER excess cases (better outcomes)\n")
} else {
cat("• Direction: Minimal difference between curve shapes\n")
}
} else {
cat("No early benefit scenarios available for comparison.\n")
}
cat("\n=== ANALYSIS COMPLETE ===\n")
# ——————————————————————————————————————————————————————
# 7) Optional: Save Results
# ——————————————————————————————————————————————————————
# Uncomment to save results
# write_csv(comparison_data, "curve_shape_comparison_full.csv")
# write_csv(early_benefit_scenarios, "early_benefit_curve_comparison.csv")
# Return the comparison data for further analysis
return(list(
full_comparison = comparison_data,
early_benefit_scenarios = early_benefit_scenarios,
summary_stats = list(
n_scenarios = nrow(early_benefit_scenarios),
mean_excess_cases_change = mean(early_benefit_scenarios$pct_change_excess_cases, na.rm = TRUE),
mean_excess_rate_change = mean(early_benefit_scenarios$pct_change_excess_rate, na.rm = TRUE),
mean_rr_change = mean(early_benefit_scenarios$change_relative_risk, na.rm = TRUE)
)
))Plot of curves with linear pre-curve (Figure 10):
library(mgcv)
# ——————————————————————————————————————————————————————
# CORRECTED: Use original method with full 54-week range
# ——————————————————————————————————————————————————————
# Input Parameters - TBCHAMP
time0 <- 0:54 # CORRECTED: Full 54-week range like original
p0 <- 32/1120
back_wk <- 26
# Use the k=5 GCV GAM models - TBCHAMP
Fc0 <- predict(control_gam_k5_GCV_TBCHAMP,
newdata = data.frame(time = time0))
Fi0 <- predict(intervention_gam_k5_GCV_TBCHAMP,
newdata = data.frame(time = time0))
Fc0 <- p0 + (1 - p0)*Fc0
Fi0 <- p0 + (1 - p0)*Fi0
h_ctrl <- diff(c(0, Fc0)) / (1 - c(0, Fc0)[-length(Fc0)])
h_int <- diff(c(0, Fi0)) / (1 - c(0, Fi0)[-length(Fi0)])
# ——————————————————————————————————————————————————————
# Updated delays and time axis
# ——————————————————————————————————————————————————————
nominal_delays <- c(-26, -20, -10, -5, 5, 10, 20) # TBCHAMP delays
time_ext <- seq(-back_wk, 54, by = 1)
# ——————————————————————————————————————————————————————
# CORRECTED: Original curve builder with LINEAR pre-curve
# ——————————————————————————————————————————————————————
build_curve_linear <- function(d) {
n <- length(time_ext)
Fcum <- numeric(n)
# find index of screen‐time t0 = 0
i0 <- which(time_ext == 0)
# 1) PRE‐SCREEN: LINEAR ramp from (–back_wk → 0) up to p0
s_up <- time_ext[1:(i0-1)] + back_wk
Fcum[1:(i0-1)] <- p0 * (s_up / back_wk) # LINEAR: removed sqrt()
# 2) AT SCREEN
Fcum[i0] <- p0
Sprev <- 1 - p0
# 3) POST‐SCREEN: control hazard until t0 ≤ d, then intervention hazard
for(i in (i0+1):n) {
t0 <- time_ext[i]
if (d > 0 && t0 <= d) {
h_t <- h_ctrl[t0 + 1]
} else {
idx <- if (d > 0) (t0 - d + 1) else (t0 + 1)
h_t <- h_int[ min(idx, length(h_int)) ]
}
Snew <- Sprev * (1 - h_t)
Fcum[i] <- 1 - Snew
Sprev <- Snew
}
Fcum
}
# ——————————————————————————————————————————————————————
# Build all curves using original method
# ——————————————————————————————————————————————————————
# 1) Control & Instant curves
ctrl_ext <- build_curve_linear( Inf) # pure control
inst_ext <- build_curve_linear(-Inf) # pure intervention
# 2) separate positive and negative delays
pos_delays <- nominal_delays[nominal_delays >= 0]
neg_delays <- nominal_delays[nominal_delays < 0]
# 3) positive delays:
pos_mat <- if(length(pos_delays) > 0) sapply(pos_delays, function(d) build_curve_linear(d)) else NULL
# 4) negative delays: LINEAR ramp until delay point, then copy instant TPT curve
neg_mat <- if(length(neg_delays) > 0) {
sapply(neg_delays, function(d) {
n <- length(time_ext)
Fcum <- numeric(n)
# find index of delay point d
id_match <- which(time_ext == d)
if(length(id_match) == 0) {
id <- which.min(abs(time_ext - d))
} else {
id <- id_match[1]
}
i0 <- which(time_ext == 0) # screen time index
# Before delay point: follow the LINEAR ramp
if(id > 1) {
s_up <- time_ext[1:(id-1)] + back_wk
s_up[s_up < 0] <- 0 # ensure non-negative
Fcum[1:(id-1)] <- p0 * (s_up / back_wk) # LINEAR: removed sqrt()
}
# At delay point: set starting value from LINEAR ramp
s_delay <- max(0, d + back_wk)
start_value <- p0 * (s_delay / back_wk) # LINEAR: removed sqrt()
Fcum[id] <- start_value
# After delay point: copy the instant TPT curve pattern
if(id < n) {
# Find the baseline instant TPT value at time 0 (our reference point)
inst_at_zero <- inst_ext[i0]
for(i in (id+1):n) {
# Calculate how many steps from delay point
steps_from_delay <- i - id
# Find corresponding point in instant TPT curve (relative to time 0)
inst_index <- i0 + steps_from_delay
if(inst_index <= length(inst_ext) && inst_index > 0) {
# Get the instant TPT value at this point
inst_value <- inst_ext[inst_index]
# Calculate the change from instant TPT baseline
inst_change <- inst_value - inst_at_zero
# Apply the same change to our starting value
Fcum[i] <- start_value + inst_change
} else if(inst_index > length(inst_ext)) {
# Use the last available change and keep it constant
last_inst_value <- inst_ext[length(inst_ext)]
last_change <- last_inst_value - inst_at_zero
Fcum[i] <- start_value + last_change
} else {
Fcum[i] <- start_value
}
}
}
Fcum
})
} else NULL
# Combine all delay matrices
if(!is.null(neg_mat) && !is.null(pos_mat)) {
delay_mat <- cbind(neg_mat, pos_mat)
colnames(delay_mat) <- c(
paste0(neg_delays, " wk delay"),
paste0(pos_delays, " wk delay"))
} else if(!is.null(neg_mat)) {
delay_mat <- neg_mat
colnames(delay_mat) <- paste0(neg_delays, " wk delay")
} else if(!is.null(pos_mat)) {
delay_mat <- pos_mat
colnames(delay_mat) <- paste0(pos_delays, " wk delay")
} else {
delay_mat <- NULL
}
# ——————————————————————————————————————————————————————
# CREATE VISUALIZATION
# ——————————————————————————————————————————————————————
# Combine all curves
if(!is.null(delay_mat)) {
all_mat <- cbind(
Control = ctrl_ext,
`Instant TPT` = inst_ext,
delay_mat
)
} else {
all_mat <- cbind(
Control = ctrl_ext,
`Instant TPT` = inst_ext
)
}
# Set up compressed x-axis
neg_width <- 30
x_plot <- ifelse(
time_ext < 0,
neg_width * (time_ext + back_wk) / back_wk, # −26…0 → 0…30
neg_width + pmin(time_ext, 54) # 0…54 → 30…84
)
# Subset to t ≤ 26 wk
keep <- which(time_ext <= 26)
x_sub <- x_plot[keep]
mat_sub <- all_mat[keep, , drop=FALSE]
# Convert to percentages
mat_sub_pct <- mat_sub * 100
# Create gradient colors from better (green) to worse (red) delays
n_delay_cols <- if(!is.null(delay_mat)) ncol(delay_mat) else 0
if(n_delay_cols > 0) {
delay_colors <- colorRampPalette(c("#00AA00", "#FFAA00", "#FF0000"))(n_delay_cols)
cols <- c("red", "blue", delay_colors)
} else {
cols <- c("red", "blue")
}
ltys <- c(1, 1, rep(2, n_delay_cols))
yr <- range(mat_sub_pct)
# Create the plot
matplot(
x_sub, mat_sub_pct,
type="l", col=cols, lty=ltys, lwd=2,
xaxt="n", ylim=yr,
xlab="Weeks since randomisation",
ylab="Cumulative incidence of TB (%)",
main="Sensitivity Analysis: TBCHAMP Delayed-TPT scenarios with linear pre-curve",
cex.axis = 1.0, # Size 12 for axis numbers
cex.lab = 1.0 # Size 12 for axis labels
)
# Custom x-axis ticks
neg_ticks <- seq(-back_wk, 0, by=2)
neg_at <- neg_width * (neg_ticks + back_wk) / back_wk
pos_ticks <- seq(0, 26, by=2)
pos_at <- neg_width + pos_ticks
axis(1,
at = c(neg_at, pos_at),
labels = c(neg_ticks, pos_ticks),
cex.axis = 1.0)
# Vertical line at t=0
abline(v=neg_width, lty=3, col="gray50")
# Legend with larger text
legend("topleft",
legend = colnames(all_mat),
col = cols,
lty = ltys,
lwd = 2,
bty = "n",
ncol = 1,
cex = 1.2, # Larger legend text
y.intersp = 0.8,
x.intersp = 0.5,
inset = c(0.02, 0.02))cat("TBCHAMP plotting complete with original curve method!\n")