TBCHAMP Code: Quarto file

TBCHAMP Code

library(mgcv)
library(ggplot2)
library(gridExtra)
library(tidyr)
library(patchwork)
#Dataframe TBCHAMP
dataTBCHAMP <- data.frame(
  time = c(0, 6, 12, 18, 24, 30, 36, 42, 48, 54),
  control = c(0.00,0.00, 0.006732487, 0.018114981, 0.02280548, 0.022702036, 0.022847634, 0.022830717, 0.022888679, 0.028322148),
  intervention = c(0.00, 0.00, 0.002269399, 0.002327916,0.002392257, 0.002286316, 0.002268567, 0.002249154, 0.005036885, 0.014024627))

#TBCHAMP
data_longTBCHAMP <- pivot_longer(dataTBCHAMP, cols = c(control, intervention), names_to = "group", values_to = "incidence")
data_longTBCHAMP$group <- factor(data_longTBCHAMP$group)

Fitting the GAM and GLM models - x4 curves x3 models

#Fit GLM LINEAR TBCHAMP
glm_controlTBCHAMP <- glm(control ~ time, data = dataTBCHAMP, family = gaussian())
glm_interventionTBCHAMP <- glm(intervention ~ time, data = dataTBCHAMP, family = gaussian())

#Fit GLM LOGISTIC TBCHAMP 
glm_control_logTBCHAMP <- glm(control ~ time, data = dataTBCHAMP, family = binomial(link = "logit"))
glm_intervention_logTBCHAMP <- glm(intervention ~ time, data = dataTBCHAMP, family = binomial(link = "logit"))

#Fit GAM TBCHAMP
gam_modelTBCHAMP <- gam(incidence ~ s(time, by = group) + group, data = data_longTBCHAMP, method = "REML")

Generating predictions - TBCHAMP

#Generating predictions TBCHAMP
pred_dataTBCHAMP <- expand.grid(time = seq(0, max(dataTBCHAMP$time), length.out = 100),
                         group = levels(data_longTBCHAMP$group))
pred_dataTBCHAMP$glm_predTBCHAMP <- ifelse(pred_dataTBCHAMP$group == "control",
                             predict(glm_controlTBCHAMP, newdata = pred_dataTBCHAMP, type = "response"),
                             predict(glm_interventionTBCHAMP, newdata = pred_dataTBCHAMP, type = "response"))
pred_dataTBCHAMP$glm_pred_logTBCHAMP <- ifelse(pred_dataTBCHAMP$group == "control",
                                 predict(glm_control_logTBCHAMP, newdata = pred_dataTBCHAMP, type = "response"),
                                 predict(glm_intervention_logTBCHAMP, newdata = pred_dataTBCHAMP, type = "response"))
pred_dataTBCHAMP$gam_predTBCHAMP <- predict(gam_modelTBCHAMP, newdata = pred_dataTBCHAMP, type = "response")
#Calculate RMSE for each model
calculate_rmse <- function(actual, predicted) {
  sqrt(mean((actual - predicted)^2))
}
#RMSE for GLM Linear
rmse_glm_controlTBCHAMP <- calculate_rmse(dataTBCHAMP$control, predict(glm_controlTBCHAMP, newdata = dataTBCHAMP))
rmse_glm_interventionTBCHAMP <- calculate_rmse(dataTBCHAMP$intervention, predict(glm_interventionTBCHAMP, newdata = dataTBCHAMP))
rmse_glm_linearTBCHAMP <- mean(c(rmse_glm_controlTBCHAMP, rmse_glm_interventionTBCHAMP))

#RMSE for GLM Logistic
rmse_glm_control_logTBCHAMP <- calculate_rmse(dataTBCHAMP$control, predict(glm_control_logTBCHAMP, newdata = dataTBCHAMP, type = "response"))
rmse_glm_intervention_logTBCHAMP <- calculate_rmse(dataTBCHAMP$intervention, predict(glm_intervention_logTBCHAMP, newdata = dataTBCHAMP, type = "response"))
rmse_glm_logisticTBCHAMP <- mean(c(rmse_glm_control_logTBCHAMP, rmse_glm_intervention_logTBCHAMP))

#RMSE for GAM
gam_predictionsTBCHAMP <- predict(gam_modelTBCHAMP, newdata = data_longTBCHAMP, type = "response")
rmse_gamTBCHAMP <- calculate_rmse(data_longTBCHAMP$incidence, gam_predictionsTBCHAMP)

#Print RMSE values
cat("RMSE for GLM Linear Model (Control + Intervention):", rmse_glm_linearTBCHAMP, "\n") #0.0035  
cat("RMSE for GLM Logistic Model (Control + Intervention):", rmse_glm_logisticTBCHAMP, "\n") #0.004 
cat("RMSE for GAM Model:", rmse_gamTBCHAMP, "\n") #0.000000013 

TBCHAMP predictions plots (Figure 3)

# Create GLM plot (left - shows y-axis only)
glm_plotTBCHAMP <- ggplot(data_longTBCHAMP, aes(x = time, y = incidence * 100, color = group)) +
  geom_point() +
  geom_line(data = pred_dataTBCHAMP, aes(y = glm_predTBCHAMP * 100)) +
  labs(title = "GLM Regression - Linear",
       x = "", y = "Cumulative Incidence (%)") +
  scale_color_manual(values = c("control" = "red", "intervention" = "blue")) +
  theme_minimal() +
  ylim(0, 6) +
  theme(legend.position = "none")

# GAM plot (middle - no axes)
gam_plotTBCHAMP <- ggplot(data_longTBCHAMP, aes(x = time, y = incidence * 100, color = group)) +
  geom_point() +
  geom_line(data = pred_dataTBCHAMP, aes(y = gam_predTBCHAMP * 100)) +
  labs(title = "GAM Regression",
       x = "", y = "") +
  scale_color_manual(values = c("control" = "red", "intervention" = "blue")) +
  theme_minimal() +
  ylim(0, 6) +
  theme(legend.position = "none",
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank())

# GLM plot - logistic (right - no axes)
glm_plot_logTBCHAMP <- ggplot(data_longTBCHAMP, aes(x = time, y = incidence * 100, color = group)) +
  geom_point() +
  geom_line(data = pred_dataTBCHAMP, aes(y = glm_pred_logTBCHAMP * 100)) +
  labs(title = "GLM Regression - Logistic",
       x = "", y = "") +
  scale_color_manual(values = c("control" = "red", "intervention" = "blue")) +
  theme_minimal() +
  ylim(0, 6) +
  theme(legend.position = "none",
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank())

# Combine plots with shared x-axis label
combined_plotTBCHAMP <- glm_plotTBCHAMP + gam_plotTBCHAMP + glm_plot_logTBCHAMP +
  plot_layout(guides = "collect") &
  theme(legend.position = "bottom", 
        legend.text = element_text(size = 14))

# Add overall x-axis label
combined_plotVQUIN <- combined_plotTBCHAMP

# Display the combined plot
print(combined_plotTBCHAMP)
#Then, print model summaries

Goodness of fit of the models

TBCHAMP GOF

#McFadden's Pseudo R-squared for GLMs
pseudo_r2_controlTBCHAMP <- 1 - (glm_controlTBCHAMP$deviance / glm_controlTBCHAMP$null.deviance)
pseudo_r2_interventionTBCHAMP <- 1 - (glm_interventionTBCHAMP$deviance / glm_interventionTBCHAMP$null.deviance)

# Print McFadden's Pseudo R-squared for Linear GLMs
cat("McFadden's Pseudo R-squared for Linear GLM (Control):", pseudo_r2_controlTBCHAMP, "\n")
cat("McFadden's Pseudo R-squared for Linear GLM (Intervention):", pseudo_r2_interventionTBCHAMP, "\n")

# Pseudo R-squared for Logistic GLMs
pseudo_r2_control_logTBCHAMP <- 1 - (glm_control_logTBCHAMP$deviance / glm_control_logTBCHAMP$null.deviance)
pseudo_r2_intervention_logTBCHAMP <- 1 - (glm_intervention_logTBCHAMP$deviance / glm_intervention_logTBCHAMP$null.deviance)

# Print Pseudo R-squared for Logistic GLMs
cat("Pseudo R-squared for Logistic GLM (Control):", pseudo_r2_control_logTBCHAMP, "\n")
cat("Pseudo R-squared for Logistic GLM (Intervention):", pseudo_r2_intervention_logTBCHAMP, "\n")

#AIC for the GAM model
aic_valueTBCHAMP <- AIC(gam_modelTBCHAMP)
print(aic_valueTBCHAMP)

#McFadden's Pseudo R-squared for Linear GLM (Control): 0.8 
#McFadden's Pseudo R-squared for Linear GLM (Intervention): 0.54
#Pseudo R-squared for Logistic GLM (Control): 0.53
#Pseudo R-squared for Logistic GLM (Intervention): 0.74
#Aic_valueTBCHAMP : -628.7966

GAM Cumulative Incidence Curves with 95% UI (Figure 4)

Using final choice GCV = 5

library(mgcv)
library(ggplot2)
library(dplyr)

# Fixed function to add (0,0) point with matching column structure
add_zero_point <- function(data) {
  # Create a zero point with the same structure as the input data
  zero_point <- data[1, ]  # Take first row as template
  zero_point[1, ] <- NA    # Clear all values
  
  # Set the specific values we want
  zero_point$time <- 0
  zero_point$incidence <- 0
  zero_point$group <- unique(data$group)[1]  # Take first unique group value
  
  # Add weight column if it doesn't exist
  if(!"weight" %in% names(data)) {
    data$weight <- 1  # Default weight for existing data
    zero_point$weight <- 100  # High weight for zero point
  } else {
    zero_point$weight <- 100
  }
  
  # Fill any remaining NA values with appropriate defaults
  for(col in names(zero_point)) {
    if(is.na(zero_point[[col]]) && col != "time" && col != "incidence" && col != "group" && col != "weight") {
      if(is.numeric(data[[col]])) {
        zero_point[[col]] <- 0
      } else if(is.character(data[[col]]) || is.factor(data[[col]])) {
        zero_point[[col]] <- data[[col]][1]
      }
    }
  }
  
  # Combine the data
  rbind(data, zero_point)
}

# Prepare data with zero points (KEEP THIS DATA!)
control_data_TBCHAMP <- data_longTBCHAMP %>% 
  filter(group == "control") %>%
  add_zero_point()

intervention_data_TBCHAMP <- data_longTBCHAMP %>% 
  filter(group == "intervention") %>%
  add_zero_point()

# Fit GAM models with weights to the data that includes zero points
control_gam_k5_GCV_TBCHAMP <- gam(
  incidence ~ s(time, k = 5), 
  data = control_data_TBCHAMP,
  weights = control_data_TBCHAMP$weight,  # Use the weight column
  method = "GCV.Cp"
)

intervention_gam_k5_GCV_TBCHAMP <- gam(
  incidence ~ s(time, k = 5), 
  data = intervention_data_TBCHAMP,
  weights = intervention_data_TBCHAMP$weight,  # Use the weight column
  method = "GCV.Cp"
)

# Generate predictions for all time points
original_times <- sort(unique(data_longTBCHAMP$time))
new_data <- data.frame(time = original_times)

# Function to get predictions with CIs (MODIFIED TO CONVERT TO PERCENTAGES)
get_predictions <- function(model, group_name) {
  pred <- predict(model, newdata = new_data, se.fit = TRUE)
  data.frame(
    time = original_times,
    group = group_name,
    incidence = as.numeric(pred$fit) * 100,  # Convert to percentage
    se = as.numeric(pred$se.fit) * 100,      # Convert SE to percentage
    stringsAsFactors = FALSE
  ) %>%
    mutate(
      ci_lower = pmax(0, incidence - 1.96 * se),
      ci_upper = incidence + 1.96 * se,
      ci_width = ci_upper - ci_lower
    )
}

# Get predictions for both groups
control_predictions <- get_predictions(control_gam_k5_GCV_TBCHAMP, "control")
intervention_predictions <- get_predictions(intervention_gam_k5_GCV_TBCHAMP, "intervention")

# Combine predictions
gam_predictions_k5_TBCHAMP <- rbind(control_predictions, intervention_predictions)

# Verify zero constraint
print("=== ZERO CONSTRAINT CHECK ===")
print("Zero time predictions:")
print(gam_predictions_k5_TBCHAMP %>% filter(time == 0))

# Display CI width comparison
control_ci_width <- mean(control_predictions$ci_width)
intervention_ci_width <- mean(intervention_predictions$ci_width)
print("=== CI WIDTH COMPARISON ===")
print(paste("Control mean CI width:", format(control_ci_width, scientific = TRUE)))
print(paste("Intervention mean CI width:", format(intervention_ci_width, scientific = TRUE)))
print(paste("Ratio (intervention/control):", round(intervention_ci_width / control_ci_width, 1)))

# Create plot (MODIFIED FOR PERCENTAGES)
p_tbchamp <- ggplot(gam_predictions_k5_TBCHAMP, aes(x = time, y = incidence, color = group, fill = group)) +
  geom_ribbon(aes(ymin = ci_lower, ymax = ci_upper), alpha = 0.4, color = NA) +
  geom_line(size = 1.2) +
  scale_color_manual(values = c("control" = "red", "intervention" = "blue"),
                     labels = c("Control group", "Intervention group")) +
  scale_fill_manual(values = c("control" = "red", "intervention" = "blue"),
                    labels = c("Control group", "Intervention group")) +
  labs(
    x = "Time since enrolment (weeks)",
    y = "Cumulative Incidence (%)",  # Updated label
    title = "TBCHAMP GAM Model (k=5, GCV) - Starting at 0"
  ) +
  theme_minimal() +
  theme(legend.title = element_blank()) +
  coord_cartesian(ylim = c(0, 5))  # Updated to percentage scale (0-5%)

print(p_tbchamp)
# Week 26 predictions
week_26_data <- data.frame(time = 26)

# Get week 26 predictions for both groups
control_pred_26 <- predict(control_gam_k5_GCV_TBCHAMP, newdata = week_26_data, se.fit = TRUE)
intervention_pred_26 <- predict(intervention_gam_k5_GCV_TBCHAMP, newdata = week_26_data, se.fit = TRUE)

# Create week 26 results table
week_26_results <- data.frame(
  Group = c("Control", "Intervention"),
  Week = 26,
  Predicted_Incidence = c(as.numeric(control_pred_26$fit), as.numeric(intervention_pred_26$fit)),
  Standard_Error = c(as.numeric(control_pred_26$se.fit), as.numeric(intervention_pred_26$se.fit))
) %>%
  mutate(
    CI_Lower_95 = pmax(0, Predicted_Incidence - 1.96 * Standard_Error),
    CI_Upper_95 = Predicted_Incidence + 1.96 * Standard_Error,
    CI_Width = CI_Upper_95 - CI_Lower_95
  )

print("=== WEEK 26 GAM PREDICTIONS ===")
print(week_26_results)

Curves with delay scenarios (Figure 5)

library(mgcv)

# —————————————————————————————————————————————————————
# A) Fit‐based cumulative‐incidence at t = 0…54, with p0‐spike

time0 <- 0:54
p0    <- 32/1120
back_wk <- 26

# Use the k=5 GCV GAM models
Fc0 <- predict(control_gam_k5_GCV_TBCHAMP,
               newdata = data.frame(time = time0))
Fi0 <- predict(intervention_gam_k5_GCV_TBCHAMP,
               newdata = data.frame(time = time0))

Fc0 <- p0 + (1 - p0)*Fc0
Fi0 <- p0 + (1 - p0)*Fi0

h_ctrl <- diff(c(0, Fc0)) / (1 - c(0, Fc0)[-length(Fc0)])
h_int  <- diff(c(0, Fi0)) / (1 - c(0, Fi0)[-length(Fi0)])

# —————————————————————————————————————————————————————
# B) "Pre‐screen" ramp length and nominal post‐screen delays

back_wk <- 26            # show 6 months before screen
nominal_delays <- c(-26, -20, -10, -5, 5, 10, 20) # weeks of *additional* delay after screen

# —————————————————————————————————————————————————————
# C) build a time axis from –back_wk … +54

time_ext <- seq(-back_wk, 54, by = 1)

# —————————————————————————————————————————————————————
# D) single‐curve builder: √ ramp on [–back_wk,0], then hazard‐switch at d

build_curve <- function(d) {
  n    <- length(time_ext)
  Fcum <- numeric(n)

  # find index of screen‐time t0 = 0
  i0   <- which(time_ext == 0)

  # 1) PRE‐SCREEN: √ ramp from (–back_wk → 0) up to p0
  s_up <- time_ext[1:(i0-1)] + back_wk
  Fcum[1:(i0-1)] <- p0 * sqrt( s_up / back_wk )

  # 2) AT SCREEN
  Fcum[i0] <- p0
  Sprev    <- 1 - p0

  # 3) POST‐SCREEN: control hazard until t0 ≤ d, then intervention hazard
  for(i in (i0+1):n) {
    t0 <- time_ext[i]
    if (d > 0 && t0 <= d) {
      h_t <- h_ctrl[t0 + 1]
    } else {
      idx <- if (d > 0) (t0 - d + 1) else (t0 + 1)
      h_t <- h_int[ min(idx, length(h_int)) ]
    }
    Snew     <- Sprev * (1 - h_t)
    Fcum[i]  <- 1 - Snew
    Sprev    <- Snew
  }

  Fcum
}

# —————————————————————————————————————————————————————
# E) build all the curves

# 1) Control & Instant curves
ctrl_ext <- build_curve( Inf)   # pure control
inst_ext <- build_curve(-Inf)   # pure intervention

#   and the √ ramp function for pre‐screen:
ramp <- p0 * sqrt( (time_ext + back_wk) / back_wk )

# 2) separate positive and negative delays
pos_delays <- nominal_delays[nominal_delays >= 0]
neg_delays <- nominal_delays[nominal_delays < 0]

# 3) positive delays:
pos_mat <- if(length(pos_delays) > 0) sapply(pos_delays, function(d) build_curve(d)) else NULL

# 4) negative delays: ramp until delay point, then copy instant TPT curve
neg_mat <- if(length(neg_delays) > 0) {
  sapply(neg_delays, function(d) {
    n    <- length(time_ext)
    Fcum <- numeric(n)
    
    # find index of delay point d
    id_match <- which(time_ext == d)
    if(length(id_match) == 0) {
      # If exact match not found, find closest index
      id <- which.min(abs(time_ext - d))
    } else {
      id <- id_match[1]
    }
    
    i0   <- which(time_ext == 0)  # screen time index
    
    # Before delay point: follow the ramp
    if(id > 1) {
      s_up <- time_ext[1:(id-1)] + back_wk
      s_up[s_up < 0] <- 0  # ensure non-negative
      Fcum[1:(id-1)] <- p0 * sqrt( s_up / back_wk )
    }
    
    # At delay point: set starting value from ramp
    s_delay <- max(0, d + back_wk)
    start_value <- p0 * sqrt( s_delay / back_wk )
    Fcum[id] <- start_value
    
    # After delay point: copy the instant TPT curve pattern
    if(id < n) {
      # Find the baseline instant TPT value at time 0 (our reference point)
      inst_at_zero <- inst_ext[i0]
      
      for(i in (id+1):n) {
        # Calculate how many steps from delay point
        steps_from_delay <- i - id
        
        # Find corresponding point in instant TPT curve (relative to time 0)
        inst_index <- i0 + steps_from_delay
        
        if(inst_index <= length(inst_ext) && inst_index > 0) {
          # Get the instant TPT value at this point
          inst_value <- inst_ext[inst_index]
          
          # Calculate the change from instant TPT baseline
          inst_change <- inst_value - inst_at_zero
          
          # Apply the same change to our starting value
          Fcum[i] <- start_value + inst_change
        } else if(inst_index > length(inst_ext)) {
          # If we've gone beyond available instant TPT data, 
          # use the last available change and keep it constant
          last_inst_value <- inst_ext[length(inst_ext)]
          last_change <- last_inst_value - inst_at_zero
          Fcum[i] <- start_value + last_change
        } else {
          # If we somehow get a negative index, keep the start value
          Fcum[i] <- start_value
        }
      }
    }
    
    Fcum
  })
} else NULL

# Combine all delay matrices
if(!is.null(neg_mat) && !is.null(pos_mat)) {
  delay_mat <- cbind(neg_mat, pos_mat)
  colnames(delay_mat) <- c(
    paste0(neg_delays, " wk delay"),
    paste0(pos_delays, " wk delay"))
} else if(!is.null(neg_mat)) {
  delay_mat <- neg_mat
  colnames(delay_mat) <- paste0(neg_delays, " wk delay")
} else if(!is.null(pos_mat)) {
  delay_mat <- pos_mat
  colnames(delay_mat) <- paste0(pos_delays, " wk delay")
} else {
  delay_mat <- NULL
}

# —————————————————————————————————————————————————————
# F') compressed x‐axis plot, up to +30 wk, now including the negative delay curves

# how many "plot units" to devote to the negative side?
neg_width <- 30

# remap time_ext → x_plot:
x_plot <- ifelse(
  time_ext < 0,
  neg_width * (time_ext + back_wk) / back_wk,  # −back_wk…0 → 0…30
  neg_width + pmin(time_ext, 54)              #   0…30 → 30…60
)

# bind together Control, Instant, and only your chosen delays:
if(!is.null(delay_mat)) {
  all_mat <- cbind(
    Control       = ctrl_ext,
    `Instant TPT` = inst_ext,
    delay_mat
  )
} else {
  all_mat <- cbind(
    Control       = ctrl_ext,
    `Instant TPT` = inst_ext
  )
}

# subset to t ≤ 26 wk 
keep   <- which(time_ext <= 26)
x_sub   <- x_plot[keep]
mat_sub <- all_mat[keep, , drop=FALSE]

# CORRECTED: Convert to percentages in the data, not during plotting
mat_sub_pct <- mat_sub * 100

# MODIFIED: Create gradient colors from better (green) to worse (red) delays
n_delay_cols <- if(!is.null(delay_mat)) ncol(delay_mat) else 0

if(n_delay_cols > 0) {
  # Create a gradient from green (better/earlier) to red (worse/later)
  delay_colors <- colorRampPalette(c("#00AA00", "#FFAA00", "#FF0000"))(n_delay_cols)
  cols <- c("red", "blue", delay_colors)
} else {
  cols <- c("red", "blue")
}

ltys <- c(1, 1, rep(2, n_delay_cols))
yr   <- range(mat_sub_pct)

# CORRECTED: Use original matplot approach with percentage data
matplot(
  x_sub, mat_sub_pct,
  type="l", col=cols, lty=ltys, lwd=2,
  xaxt="n", ylim=yr,
  xlab="Weeks since randomisation",
  ylab="Cumulative incidence of TB (%)",
  main="TBCHAMP Delayed-TPT scenarios",
  cex.axis = 1.0,  # Size 12 for axis numbers
  cex.lab = 1.0    # Size 12 for axis labels
)

# custom x-axis ticks:
neg_ticks <- seq(-back_wk, 0, by=2)     
neg_at    <- neg_width * (neg_ticks + back_wk) / back_wk
pos_ticks <- seq(0, 26, by=2) 
pos_at    <- neg_width + pos_ticks

axis(1,
     at     = c(neg_at, pos_at),
     labels = c(neg_ticks, pos_ticks),
     cex.axis = 1.0)  # Size 12 for axis tick labels

# vertical line at t=0
abline(v=neg_width, lty=3, col="gray50")

# legend with larger text
legend("topleft",
       legend    = colnames(all_mat),
       col       = cols,
       lty       = ltys,
       lwd       = 2,
       bty       = "n",
       ncol      = 1,        
       cex       = 1.2,      # Larger legend text
       y.intersp = 0.8,      
       x.intersp = 0.5,      
       inset     = c(0.02, 0.02))
# --------------------------------------------------------
# G) Print cumulative incidence at 26 weeks for all curves
# --------------------------------------------------------

t_target <- 26
idx_26 <- which(time_ext == t_target)

if (length(idx_26) > 0) {
  ci_26 <- all_mat[idx_26, ] * 100   # convert to %
  cat("Cumulative incidence at", t_target, "weeks (%):\n")
  print(round(ci_26, 2))
} else {
  warning("No exact match for 26 weeks in time_ext")
}

Appendix A1 TABLE of all 2-week delay scenario statistics with UIs :

library(mgcv)
library(ggplot2)
library(dplyr)
library(parallel)

# ——————————————————————————————————————————————————————
# 0) Input Parameters - MODIFY THESE WITH YOUR ACTUAL DATA
# ——————————————————————————————————————————————————————

# Sample sizes
n_control <- 465      # REPLACE with your actual control sample size
n_intervention <- 451 # REPLACE with your actual intervention sample size

# Person-years of follow-up
py_control <- 418     # REPLACE with your actual control person-years
py_intervention <- 406 # REPLACE with your actual intervention person-years

# Monte Carlo parameters
n_simulations <- 5000  # Number of bootstrap simulations
set.seed(123)         # For reproducibility

cat("Analysis Parameters:\n")
cat("Control sample size:", n_control, "\n")
cat("Intervention sample size:", n_intervention, "\n")
cat("Control person-years:", py_control, "\n")
cat("Intervention person-years:", py_intervention, "\n")
cat("Number of simulations:", n_simulations, "\n\n")

# ——————————————————————————————————————————————————————
# 1) Monte Carlo Simulation Function
# ——————————————————————————————————————————————————————

simulate_gam_predictions <- function(gam_model, newdata, n_sim = n_simulations) {
  # Get mean predictions and covariance matrix
  pred_mean <- predict(gam_model, newdata = newdata, type = "response")
  
  # Get the covariance matrix of the GAM coefficients
  Vp <- vcov(gam_model, unconditional = TRUE)
  
  # Get the linear predictor matrix
  Xp <- predict(gam_model, newdata = newdata, type = "lpmatrix")
  
  # Simulate from multivariate normal distribution
  # Generate random coefficients
  coef_sims <- MASS::mvrnorm(n_sim, coef(gam_model), Vp)
  
  # Calculate predictions for each simulation
  pred_sims <- tcrossprod(Xp, coef_sims)
  
  return(pred_sims)
}

# ——————————————————————————————————————————————————————
# 2) Bootstrap Scenario Builder (FIXED p0 inconsistency)
# ——————————————————————————————————————————————————————

bootstrap_scenario <- function(sim_idx, control_sims, intervention_sims, delay_weeks) {
  # Extract simulated predictions for this iteration
  Fc0_sim <- control_sims[, sim_idx]
  Fi0_sim <- intervention_sims[, sim_idx]
  
  # Apply p0 transformation with a beta draw for p0 to consider uncertainty in p0
  p0_draw <- rbeta(1, 32 + 0.5, 1120 - 32 + 0.5)  # VQUIN p0 values 61 and 3948
  Fc0_sim <- p0_draw + (1 - p0_draw) * Fc0_sim
  Fi0_sim <- p0_draw + (1 - p0_draw) * Fi0_sim
  #enforce monotonicity (Simulated cumulative curves can wiggle; discrete hazards can become negative - preventing this)
  Fc0_sim <- pmin(pmax(Fc0_sim, 0), 0.999)
  Fi0_sim <- pmin(pmax(Fi0_sim, 0), 0.999)
  Fc0_sim <- cummax(Fc0_sim); Fi0_sim <- cummax(Fi0_sim)

  
  # Calculate hazards for this simulation
  h_ctrl_sim <- diff(c(0, Fc0_sim)) / (1 - c(0, Fc0_sim)[-length(Fc0_sim)])
  h_int_sim <- diff(c(0, Fi0_sim)) / (1 - c(0, Fi0_sim)[-length(Fi0_sim)])
  
  # Build curve for this delay scenario
  n <- length(time_ext)
  Fcum <- numeric(n)
  
  # Find indices
  i0 <- which(time_ext == 0)
  back_wk <- 26
  
  # Special handling for pure scenarios
  if(delay_weeks == Inf) {
    # Pure control - FIXED: use p0_draw instead of p0
    # Pre-screen ramp
    if(i0 > 1) {
      s_up <- time_ext[1:(i0-1)] + back_wk
      Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk)  # FIXED
    }
    Fcum[i0] <- p0_draw  # FIXED
    Sprev <- 1 - p0_draw  # FIXED
    
    for(i in (i0+1):n) {
      t0 <- time_ext[i]
      h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
      Snew <- Sprev * (1 - h_t)
      Fcum[i] <- 1 - Snew
      Sprev <- Snew
    }
    
  } else if(delay_weeks == -Inf) {
    # Pure intervention (Instant TPT) - FIXED: use p0_draw instead of p0
    if(i0 > 1) {
      s_up <- time_ext[1:(i0-1)] + back_wk
      Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk)  # FIXED
    }
    Fcum[i0] <- p0_draw  # FIXED
    Sprev <- 1 - p0_draw  # FIXED
    
    for(i in (i0+1):n) {
      t0 <- time_ext[i]
      h_t <- h_int_sim[min(t0 + 1, length(h_int_sim))]
      Snew <- Sprev * (1 - h_t)
      Fcum[i] <- 1 - Snew
      Sprev <- Snew
    }
    
  } else if(delay_weeks < 0) {
    # CORRECTED: Negative delays (pre-screen intervention) - FIXED: use p0_draw instead of p0
    # This means intervention starts BEFORE time 0, providing earlier protection
    
    # Find delay point index
    id_match <- which(time_ext == delay_weeks)
    if(length(id_match) == 0) {
      id <- which.min(abs(time_ext - delay_weeks))
    } else {
      id <- id_match[1]
    }
    
    # Before delay point: ramp (no intervention yet)
    if(id > 1) {
      s_up <- time_ext[1:(id-1)] + back_wk
      s_up[s_up < 0] <- 0
      Fcum[1:(id-1)] <- p0_draw * sqrt(s_up / back_wk)  # FIXED
    }
    
    # At delay point: start intervention
    s_delay <- max(0, delay_weeks + back_wk)
    Fcum[id] <- p0_draw * sqrt(s_delay / back_wk)  # FIXED
    Sprev <- 1 - Fcum[id]
    
    # CORRECTED: After delay point, use intervention hazards consistently
    # Map the post-delay timeline to intervention hazards starting from time 0
    for(i in (id+1):n) {
      current_time <- time_ext[i]
      
      # Calculate how long intervention has been active
      intervention_duration <- current_time - delay_weeks
      
      if(intervention_duration >= 0 && intervention_duration <= 54) {
        # Use intervention hazard corresponding to this duration
        h_idx <- min(intervention_duration + 1, length(h_int_sim))
        h_t <- h_int_sim[max(1, h_idx)]
      } else if(intervention_duration > 54) {
        # Use last available intervention hazard
        h_t <- h_int_sim[length(h_int_sim)]
      } else {
        # This shouldn't happen with negative delays, but use first intervention hazard as fallback
        h_t <- h_int_sim[1]
      }
      
      Snew <- Sprev * (1 - h_t)
      Fcum[i] <- 1 - Snew
      Sprev <- Snew
    }
    
  } else {
    # Positive delays (post-screen delays) - FIXED: use p0_draw instead of p0
    # Pre-screen ramp
    if(i0 > 1) {
      s_up <- time_ext[1:(i0-1)] + back_wk
      Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk)  # FIXED
    }
    
    # At screen
    Fcum[i0] <- p0_draw  # FIXED
    Sprev <- 1 - p0_draw  # FIXED
    
    # Post-screen: control until delay point, then intervention
    for(i in (i0+1):n) {
      t0 <- time_ext[i]
      
      if(t0 <= delay_weeks) {
        # Use control hazard until delay point
        h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
      } else {
        # Use intervention hazard after delay point
        # Map to intervention timeline: intervention has been active for (t0 - delay_weeks) weeks
        intervention_duration <- t0 - delay_weeks
        h_idx <- min(intervention_duration + 1, length(h_int_sim))
        h_t <- h_int_sim[max(1, h_idx)]
      }
      
      Snew <- Sprev * (1 - h_t)
      Fcum[i] <- 1 - Snew
      Sprev <- Snew
    }
  }
  
  # Return final value at week 26
  week_26_idx <- which(time_ext == 26)
  return(Fcum[week_26_idx])
}

# ——————————————————————————————————————————————————————
# 3) Run Monte Carlo Simulations - Every 2 Week Delays
# ——————————————————————————————————————————————————————

cat("Running Monte Carlo simulations for 2-week delay intervals...\n")

# Reset seed right before simulations for exact reproducibility
set.seed(42)

# Set up time vectors
time0 <- 0:26
back_wk <- 26
time_ext <- seq(-back_wk, 26, by = 1)

# Create 2-week delay intervals from -26 to +26 weeks
delays_2wk <- seq(-26, 26, by = 2)

# Generate simulated predictions for both models
cat("Generating GAM simulations...\n")
control_sims <- simulate_gam_predictions(control_gam_k5_GCV_TBCHAMP, data.frame(time = time0), n_simulations)
intervention_sims <- simulate_gam_predictions(intervention_gam_k5_GCV_TBCHAMP, data.frame(time = time0), n_simulations)

cat("Generated", ncol(control_sims), "control simulations and", ncol(intervention_sims), "intervention simulations\n")

# Define all delay scenarios (including baseline at week 0 = no delay)
all_delays <- setNames(delays_2wk, paste0(delays_2wk, " wk"))

# Run simulations for all scenarios
cat("Running scenario simulations...\n")
simulation_results <- list()

for(scenario_name in names(all_delays)) {
  cat("Processing scenario:", scenario_name, "\n")
  delay_val <- all_delays[scenario_name]
  
  # Run simulations for this scenario
  scenario_sims <- sapply(1:n_simulations, function(i) {
    bootstrap_scenario(i, control_sims, intervention_sims, delay_val)
  })
  
  simulation_results[[scenario_name]] <- scenario_sims
}

cat("Completed all simulations!\n\n")

# Check if "0 wk" exists and compare a few values
print(simulation_results[["0 wk"]][1:5])  # First 5 simulation results
print(mean(simulation_results[["0 wk"]]))   # Mean for "0 wk" scenario

# ——————————————————————————————————————————————————————
# 4) Calculate Key Metrics with Week 0 as Baseline
# ——————————————————————————————————————————————————————

# Get baseline (Week 0 = no delay) for relative calculations
baseline_sims <- simulation_results[["0 wk"]]
baseline_mean <- mean(baseline_sims)

# Calculate metrics for each scenario
results_list <- list()

for(scenario_name in names(simulation_results)) {
  scenario_sims <- simulation_results[[scenario_name]]
  delay_value <- as.numeric(sub(" wk", "", scenario_name))
  
  # Basic statistics
  mean_incidence <- mean(scenario_sims)
  median_incidence <- median(scenario_sims)
  ci_lower <- quantile(scenario_sims, 0.025)
  ci_upper <- quantile(scenario_sims, 0.975)
  
  # Use intervention PY and sample size for all scenarios
  py <- py_intervention
  sample_size <- n_intervention
  
  # 1. Incidence rate per 100 person-yrs (95% CI)
  rate_per_100py <- (mean_incidence * sample_size / py) * 100
  rate_ci_lower <- (ci_lower * sample_size / py) * 100
  rate_ci_upper <- (ci_upper * sample_size / py) * 100
  
  # 2. Mean Expected Cases per 100,000 (95% CI)
  cases_per_100k <- mean_incidence * 100000
  cases_per_100k_ci_lower <- ci_lower * 100000
  cases_per_100k_ci_upper <- ci_upper * 100000
  
  # 3. Excess calculations (compared to Week 0)
  if(delay_value != 0) {
    # Excess incidence simulations
    excess_sims <- scenario_sims - baseline_sims
    excess_mean <- mean(excess_sims)
    excess_ci_lower <- quantile(excess_sims, 0.025)
    excess_ci_upper <- quantile(excess_sims, 0.975)
    
    # Excess rate per 100 person-yrs
    excess_rate_per_100py <- (excess_mean * sample_size / py) * 100
    excess_rate_ci_lower <- (excess_ci_lower * sample_size / py) * 100
    excess_rate_ci_upper <- (excess_ci_upper * sample_size / py) * 100
    
    # Excess cases per 100,000
    excess_cases_per_100k <- excess_mean * 100000
    excess_cases_per_100k_ci_lower <- excess_ci_lower * 100000
    excess_cases_per_100k_ci_upper <- excess_ci_upper * 100000
  } else {
    # Week 0 is the baseline, so excess = 0
    excess_rate_per_100py <- 0
    excess_rate_ci_lower <- 0
    excess_rate_ci_upper <- 0
    excess_cases_per_100k <- 0
    excess_cases_per_100k_ci_lower <- 0
    excess_cases_per_100k_ci_upper <- 0
  }
  
  # 4. Relative Risk
  relative_risk <- mean_incidence / baseline_mean
  
  # Store results
  results_list[[scenario_name]] <- list(
    delay_weeks = delay_value,
    incidence_rate_per_100py = rate_per_100py,
    rate_ci_lower = rate_ci_lower,
    rate_ci_upper = rate_ci_upper,
    excess_rate_per_100py = excess_rate_per_100py,
    excess_rate_ci_lower = excess_rate_ci_lower,
    excess_rate_ci_upper = excess_rate_ci_upper,
    cases_per_100k = cases_per_100k,
    cases_per_100k_ci_lower = cases_per_100k_ci_lower,
    cases_per_100k_ci_upper = cases_per_100k_ci_upper,
    excess_cases_per_100k = excess_cases_per_100k,
    excess_cases_per_100k_ci_lower = excess_cases_per_100k_ci_lower,
    excess_cases_per_100k_ci_upper = excess_cases_per_100k_ci_upper,
    relative_risk = relative_risk
  )
}

# ——————————————————————————————————————————————————————
# 5) Create Export Table for Word
# ——————————————————————————————————————————————————————

# Build the export table
export_table <- data.frame()

for(scenario_name in names(results_list)) {
  r <- results_list[[scenario_name]]
  
  # Format all confidence intervals
  incidence_rate_ci <- sprintf("%.2f (%.2f-%.2f)", 
                              r$incidence_rate_per_100py, r$rate_ci_lower, r$rate_ci_upper)
  
  if(r$delay_weeks == 0) {
    excess_rate_ci <- "BASELINE"
    excess_cases_ci <- "BASELINE"
  } else {
    excess_rate_ci <- sprintf("%+.2f (%+.2f to %+.2f)", 
                             r$excess_rate_per_100py, r$excess_rate_ci_lower, r$excess_rate_ci_upper)
    excess_cases_ci <- sprintf("%+.0f (%+.0f to %+.0f)", 
                              r$excess_cases_per_100k, r$excess_cases_per_100k_ci_lower, r$excess_cases_per_100k_ci_upper)
  }
  
  cases_per_100k_ci <- sprintf("%.0f (%.0f-%.0f)", 
                              r$cases_per_100k, r$cases_per_100k_ci_lower, r$cases_per_100k_ci_upper)
  
  relative_risk_formatted <- sprintf("%.2f", r$relative_risk)
  
  # Add row to export table
  export_table <- rbind(export_table, data.frame(
    `Delay (weeks)` = r$delay_weeks,
    `Incidence Rate per 100 Person-Yrs (95% CI)` = incidence_rate_ci,
    `Excess Incidence Rate per 100 Person-Yrs (95% CI)` = excess_rate_ci,
    `Mean Expected Cases per 100,000 (95% CI)` = cases_per_100k_ci,
    `Excess Cases per 100,000 (95% CI)` = excess_cases_ci,
    `Relative Risk` = relative_risk_formatted,
    stringsAsFactors = FALSE,
    check.names = FALSE
  ))
}

# Sort by delay weeks
export_table <- export_table[order(export_table$`Delay (weeks)`), ]

# Display the table
cat("\n=== EXPORT TABLE FOR WORD ===\n\n")
print(export_table, row.names = FALSE)

# ——————————————————————————————————————————————————————
# 6) Export Options
# ——————————————————————————————————————————————————————

# Option 1: Save as CSV for easy import to Word
write.csv(export_table, "TBCHAMP_tb_delays_analysis_table.csv", row.names = FALSE)
cat("\nTable saved as 'tb_delays_analysis_table.csv'\n")

# Option 2: Create a formatted version for direct copy-paste
cat("\n=== FORMATTED FOR COPY-PASTE TO WORD ===\n")
cat("Delay (weeks)\tIncidence Rate per 100 Person-Yrs (95% CI)\tExcess Incidence Rate per 100 Person-Yrs (95% CI)\tMean Expected Cases per 100,000 (95% CI)\tExcess Cases per 100,000 (95% CI)\tRelative Risk\n")

for(i in 1:nrow(export_table)) {
  cat(paste(export_table[i, ], collapse = "\t"), "\n")
}

# Option 3: Summary statistics
cat("\n=== ANALYSIS SUMMARY ===\n")
cat("Monte Carlo simulations:", n_simulations, "\n")
cat("Delay intervals: Every 2 weeks from -26 to +26 weeks\n")
cat("Baseline scenario: Week 0 (no delay)\n")
cat("Sample size:", n_intervention, "\n")
cat("Person-years:", py_intervention, "\n")
cat("Total scenarios analyzed:", nrow(export_table), "\n")

# Option 4: Key findings
best_scenario <- export_table[which.min(as.numeric(gsub("[^-0-9.]", "", export_table$`Delay (weeks)`))), ]
worst_scenario <- export_table[which.max(as.numeric(gsub("[^-0-9.]", "", export_table$`Delay (weeks)`))), ]

cat("\nBest scenario (most negative delay):", best_scenario$`Delay (weeks)`, "weeks\n")
cat("Worst scenario (most positive delay):", worst_scenario$`Delay (weeks)`, "weeks\n")

cat("\nNote: Negative delays = treatment starts before screening\n")
cat("Positive delays = treatment starts after screening\n")
cat("All excess calculations are relative to Week 0 (no delay)\n")

Delay scenarios bar plot with UIs (Figure 6)

library(mgcv)
library(ggplot2)
library(dplyr)
library(RColorBrewer)
library(scales)

# ——————————————————————————————————————————————————————
# Enhanced TB Delays Analysis with 2-week intervals - Using Real GAM Models
# TBCHAMP ANALYSIS
# ——————————————————————————————————————————————————————

# Input Parameters (TBCHAMP data)
n_control <- 465
n_intervention <- 451
py_control <- 418
py_intervention <- 406
n_simulations <- 5000
set.seed(123)

cat("Enhanced Delays Analysis Parameters (TBCHAMP):\n")
cat("Control sample size:", n_control, "\n")
cat("Intervention sample size:", n_intervention, "\n")
cat("Control person-years:", py_control, "\n")
cat("Intervention person-years:", py_intervention, "\n")
cat("Number of simulations:", n_simulations, "\n\n")

# ——————————————————————————————————————————————————————
# 1) Monte Carlo Simulation Function (from your working code)
# ——————————————————————————————————————————————————————

simulate_gam_predictions <- function(gam_model, newdata, n_sim = n_simulations) {
  # Get mean predictions and covariance matrix
  pred_mean <- predict(gam_model, newdata = newdata, type = "response")
  
  # Get the covariance matrix of the GAM coefficients
  Vp <- vcov(gam_model, unconditional = TRUE)
  
  # Get the linear predictor matrix
  Xp <- predict(gam_model, newdata = newdata, type = "lpmatrix")
  
  # Simulate from multivariate normal distribution
  # Generate random coefficients
  coef_sims <- MASS::mvrnorm(n_sim, coef(gam_model), Vp)
  
  # Calculate predictions for each simulation
  pred_sims <- tcrossprod(Xp, coef_sims)
  
  return(pred_sims)
}

# ——————————————————————————————————————————————————————
# 2) Bootstrap Scenario Builder (exactly from your working code)
# ——————————————————————————————————————————————————————

bootstrap_scenario <- function(sim_idx, control_sims, intervention_sims, delay_weeks) {
  # Extract simulated predictions for this iteration
  Fc0_sim <- control_sims[, sim_idx]
  Fi0_sim <- intervention_sims[, sim_idx]
  
  # Apply p0 transformation with a beta draw for p0 to consider uncertainty in p0
  p0_draw <- rbeta(1, 32 + 0.5, 1120 - 32 + 0.5)  # TBCHAMP p0 values 32 and 1120
  Fc0_sim <- p0_draw + (1 - p0_draw) * Fc0_sim
  Fi0_sim <- p0_draw + (1 - p0_draw) * Fi0_sim
  #enforce monotonicity (Simulated cumulative curves can wiggle; discrete hazards can become negative - preventing this)
  Fc0_sim <- pmin(pmax(Fc0_sim, 0), 0.999)
  Fi0_sim <- pmin(pmax(Fi0_sim, 0), 0.999)
  Fc0_sim <- cummax(Fc0_sim); Fi0_sim <- cummax(Fi0_sim)

  
  # Calculate hazards for this simulation
  h_ctrl_sim <- diff(c(0, Fc0_sim)) / (1 - c(0, Fc0_sim)[-length(Fc0_sim)])
  h_int_sim <- diff(c(0, Fi0_sim)) / (1 - c(0, Fi0_sim)[-length(Fi0_sim)])
  
  # Build curve for this delay scenario
  n <- length(time_ext)
  Fcum <- numeric(n)
  
  # Find indices
  i0 <- which(time_ext == 0)
  back_wk <- 26
  
  # Special handling for pure scenarios
  if(delay_weeks == Inf) {
    # Pure control - FIXED: use p0_draw instead of p0
    # Pre-screen ramp
    if(i0 > 1) {
      s_up <- time_ext[1:(i0-1)] + back_wk
      Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk)  # FIXED
    }
    Fcum[i0] <- p0_draw  # FIXED
    Sprev <- 1 - p0_draw  # FIXED
    
    for(i in (i0+1):n) {
      t0 <- time_ext[i]
      h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
      Snew <- Sprev * (1 - h_t)
      Fcum[i] <- 1 - Snew
      Sprev <- Snew
    }
    
  } else if(delay_weeks == -Inf) {
    # Pure intervention (Instant TPT) - FIXED: use p0_draw instead of p0
    if(i0 > 1) {
      s_up <- time_ext[1:(i0-1)] + back_wk
      Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk)  # FIXED
    }
    Fcum[i0] <- p0_draw  # FIXED
    Sprev <- 1 - p0_draw  # FIXED
    
    for(i in (i0+1):n) {
      t0 <- time_ext[i]
      h_t <- h_int_sim[min(t0 + 1, length(h_int_sim))]
      Snew <- Sprev * (1 - h_t)
      Fcum[i] <- 1 - Snew
      Sprev <- Snew
    }
    
  } else if(delay_weeks < 0) {
    # CORRECTED: Negative delays (pre-screen intervention) - FIXED: use p0_draw instead of p0
    # This means intervention starts BEFORE time 0, providing earlier protection
    
    # Find delay point index
    id_match <- which(time_ext == delay_weeks)
    if(length(id_match) == 0) {
      id <- which.min(abs(time_ext - delay_weeks))
    } else {
      id <- id_match[1]
    }
    
    # Before delay point: ramp (no intervention yet)
    if(id > 1) {
      s_up <- time_ext[1:(id-1)] + back_wk
      s_up[s_up < 0] <- 0
      Fcum[1:(id-1)] <- p0_draw * sqrt(s_up / back_wk)  # FIXED
    }
    
    # At delay point: start intervention
    s_delay <- max(0, delay_weeks + back_wk)
    Fcum[id] <- p0_draw * sqrt(s_delay / back_wk)  # FIXED
    Sprev <- 1 - Fcum[id]
    
    # CORRECTED: After delay point, use intervention hazards consistently
    # Map the post-delay timeline to intervention hazards starting from time 0
    for(i in (id+1):n) {
      current_time <- time_ext[i]
      
      # Calculate how long intervention has been active
      intervention_duration <- current_time - delay_weeks
      
      if(intervention_duration >= 0 && intervention_duration <= 54) {
        # Use intervention hazard corresponding to this duration
        h_idx <- min(intervention_duration + 1, length(h_int_sim))
        h_t <- h_int_sim[max(1, h_idx)]
      } else if(intervention_duration > 54) {
        # Use last available intervention hazard
        h_t <- h_int_sim[length(h_int_sim)]
      } else {
        # This shouldn't happen with negative delays, but use first intervention hazard as fallback
        h_t <- h_int_sim[1]
      }
      
      Snew <- Sprev * (1 - h_t)
      Fcum[i] <- 1 - Snew
      Sprev <- Snew
    }
    
  } else {
    # Positive delays (post-screen delays) - FIXED: use p0_draw instead of p0
    # Pre-screen ramp
    if(i0 > 1) {
      s_up <- time_ext[1:(i0-1)] + back_wk
      Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk)  # FIXED
    }
    
    # At screen
    Fcum[i0] <- p0_draw  # FIXED
    Sprev <- 1 - p0_draw  # FIXED
    
    # Post-screen: control until delay point, then intervention
    for(i in (i0+1):n) {
      t0 <- time_ext[i]
      
      if(t0 <= delay_weeks) {
        # Use control hazard until delay point
        h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
      } else {
        # Use intervention hazard after delay point
        # Map to intervention timeline: intervention has been active for (t0 - delay_weeks) weeks
        intervention_duration <- t0 - delay_weeks
        h_idx <- min(intervention_duration + 1, length(h_int_sim))
        h_t <- h_int_sim[max(1, h_idx)]
      }
      
      Snew <- Sprev * (1 - h_t)
      Fcum[i] <- 1 - Snew
      Sprev <- Snew
    }
  }
  
  # Return final value at week 26
  week_26_idx <- which(time_ext == 26)
  return(Fcum[week_26_idx])
}

# ——————————————————————————————————————————————————————
# 3) Run Enhanced Analysis with 2-week intervals using REAL GAM models
# ——————————————————————————————————————————————————————

cat("Running enhanced Monte Carlo simulations with 2-week intervals...\n")

# Reset seed right before simulations for exact reproducibility
set.seed(42)

# Set up time vectors
time0 <- 0:26
back_wk <- 26
time_ext <- seq(-back_wk, 26, by = 1)

# Create 2-week interval delays from -26 to +26
delays_2wk <- seq(-26, 26, by = 2)

# Generate simulated predictions using your ACTUAL GAM models
cat("Generating GAM simulations using your trained models...\n")
# NOTE: Make sure your GAM models are loaded: control_gam_k5_GCV_TBCHAMP and intervention_gam_k5_GCV_TBCHAMP
control_sims <- simulate_gam_predictions(control_gam_k5_GCV_TBCHAMP, data.frame(time = time0), n_simulations)
intervention_sims <- simulate_gam_predictions(intervention_gam_k5_GCV_TBCHAMP, data.frame(time = time0), n_simulations)

cat("Generated", ncol(control_sims), "control simulations and", ncol(intervention_sims), "intervention simulations\n")

# Define all scenarios including baseline
# In Code 2, change to only include delay scenarios:
all_delays <- setNames(delays_2wk, paste0(delays_2wk, " wk"))

# Run simulations for all scenarios
cat("Running scenario simulations...\n")
simulation_results <- list()

for(scenario_name in names(all_delays)) {
  cat("Processing scenario:", scenario_name, "\n")
  delay_val <- all_delays[scenario_name]
  
  scenario_sims <- sapply(1:n_simulations, function(i) {
    bootstrap_scenario(i, control_sims, intervention_sims, delay_val)
  })
  
  simulation_results[[scenario_name]] <- scenario_sims
}

cat("Completed all simulations!\n\n")

# Check if "0 wk" exists and compare a few values
print(simulation_results[["0 wk"]][1:5])  # First 5 simulation results
print(mean(simulation_results[["0 wk"]]))   # Mean for "0 wk" scenario

# ——————————————————————————————————————————————————————
# 4) Calculate Excess Cases PER 100,000 using your exact methodology
# ——————————————————————————————————————————————————————

# Get baseline for relative calculations
baseline_sims <- simulation_results[["0 wk"]]  # Uses "0 wk" as baseline
baseline_mean <- mean(baseline_sims)

# Calculate metrics for each scenario using your exact approach
results_list <- list()

for(scenario_name in names(simulation_results)) {
  scenario_sims <- simulation_results[[scenario_name]]
  
  # Basic statistics
  mean_incidence <- mean(scenario_sims)
  median_incidence <- median(scenario_sims)
  ci_lower <- quantile(scenario_sims, 0.025)
  ci_upper <- quantile(scenario_sims, 0.975)
  
  # Determine person-years (Control uses control PY, others use intervention PY)
  py <- ifelse(scenario_name == "Control", py_control, py_intervention)
  sample_size <- ifelse(scenario_name == "Control", n_control, n_intervention)
  
  # 1. Incidence rate per 100 person-yrs (95% CI)
  rate_per_100py <- (mean_incidence * sample_size / py) * 100
  rate_ci_lower <- (ci_lower * sample_size / py) * 100
  rate_ci_upper <- (ci_upper * sample_size / py) * 100
  
  # 2. Mean Expected Cases (95% CI) - absolute numbers in original cohort
  mean_cases <- mean_incidence * sample_size
  cases_ci_lower <- ci_lower * sample_size
  cases_ci_upper <- ci_upper * sample_size
  
  # 3. Excess calculations (compared to Instant TPT)
  if(scenario_name != "Instant TPT") {
    # Excess incidence simulations
    excess_sims <- scenario_sims - baseline_sims
    excess_mean <- mean(excess_sims)
    excess_ci_lower <- quantile(excess_sims, 0.025)
    excess_ci_upper <- quantile(excess_sims, 0.975)
    
    # Excess rate per 100 person-yrs
    excess_rate_per_100py <- (excess_mean * sample_size / py) * 100
    excess_rate_ci_lower <- (excess_ci_lower * sample_size / py) * 100
    excess_rate_ci_upper <- (excess_ci_upper * sample_size / py) * 100
    
    # MODIFIED: Excess cases per 100,000 contacts (not absolute numbers)
    excess_cases_per_100k <- excess_mean * 100000
    excess_cases_per_100k_ci_lower <- excess_ci_lower * 100000
    excess_cases_per_100k_ci_upper <- excess_ci_upper * 100000
  } else {
    # Instant TPT is the baseline, so excess = 0
    excess_rate_per_100py <- 0
    excess_rate_ci_lower <- 0
    excess_rate_ci_upper <- 0
    excess_cases_per_100k <- 0
    excess_cases_per_100k_ci_lower <- 0
    excess_cases_per_100k_ci_upper <- 0
  }
  
  # 4. Relative Risk
  relative_risk <- mean_incidence / baseline_mean
  
  # Store results
  results_list[[scenario_name]] <- list(
    scenario = scenario_name,
    incidence_rate_per_100py = rate_per_100py,
    incidence_rate_ci_lower = rate_ci_lower,
    incidence_rate_ci_upper = rate_ci_upper,
    excess_rate_per_100py = excess_rate_per_100py,
    excess_rate_ci_lower = excess_rate_ci_lower,
    excess_rate_ci_upper = excess_rate_ci_upper,
    mean_cases = mean_cases,
    cases_ci_lower = cases_ci_lower,
    cases_ci_upper = cases_ci_upper,
    excess_cases_per_100k = excess_cases_per_100k,
    excess_cases_per_100k_ci_lower = excess_cases_per_100k_ci_lower,
    excess_cases_per_100k_ci_upper = excess_cases_per_100k_ci_upper,
    relative_risk = relative_risk
  )
}

# ——————————————————————————————————————————————————————
# 5) Prepare Plot Data using your results (MODIFIED for per 100,000)
# ——————————————————————————————————————————————————————

plot_data <- data.frame()

for(scenario_name in names(results_list)) {
  # Don't skip any scenarios this time - keep baseline (week 0) on plot
  
  r <- results_list[[scenario_name]]
  
  # Extract delay value for ordering
  if(scenario_name == "26 wk") {  # CHANGED: Check for "26 wk" instead of "Control"
    delay_weeks <- 999  # Put at the end
    delay_label <- "No TPT"
    delay_type <- "No Treatment"
  } else {
    delay_weeks <- as.numeric(sub(" wk", "", scenario_name))
    delay_label <- as.character(delay_weeks)
    
    # Assign delay types
    if(delay_weeks == 0) {
      delay_type <- "Baseline (0 wk)"  # Special case for baseline
    } else if(delay_weeks < 0) {
      delay_type <- "Pre-screen"
    } else {
      delay_type <- "Post-screen"
    }
  }
  
  plot_data <- rbind(plot_data, data.frame(
    scenario = scenario_name,
    delay_weeks = delay_weeks,
    delay_label = delay_label,
    excess_cases_per_100k = r$excess_cases_per_100k,
    ci_lower = r$excess_cases_per_100k_ci_lower,
    ci_upper = r$excess_cases_per_100k_ci_upper,
    delay_type = delay_type,
    stringsAsFactors = FALSE
  ))
}

# Order by delay weeks
plot_data <- plot_data[order(plot_data$delay_weeks), ]

# ——————————————————————————————————————————————————————
# 6) Create Enhanced Color-Blind Friendly Bar Plot (UPDATED LABELS)
# ——————————————————————————————————————————————————————

# Set up RColorBrewer Paired palette
paired_colors <- brewer.pal(12, "Paired")

# Assign colors to delay types
color_mapping <- c(
  "Pre-screen" = paired_colors[2],    # Light blue
  "Post-screen" = paired_colors[6],   # Light orange  
  "No Treatment" = paired_colors[8]   # Light r
)

# Create the enhanced bar plot
enhanced_plot <- ggplot(plot_data, aes(x = reorder(delay_label, delay_weeks), 
                                      y = excess_cases_per_100k, 
                                      fill = delay_type)) +
  geom_col(width = 0.8, alpha = 0.8, color = "white", size = 0.3) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), 
                width = 0.4, color = "black", size = 0.6, alpha = 0.8) +
  
  # Add value labels on bars with smart positioning
  # Add value labels on bars with smart positioning
geom_text(aes(label = sprintf("%.0f", excess_cases_per_100k),
              y = ifelse(excess_cases_per_100k >= 0, 
                        pmax(ci_upper + 150, excess_cases_per_100k + 150),  # Above positive bars
                        pmin(ci_lower - 150, excess_cases_per_100k - 150))), # Below negative bars
          size = 4.2, fontface = "bold", color = "black") +
  
  # Styling
  scale_fill_manual(values = color_mapping, name = "Treatment Timing") +
  scale_y_continuous(labels = number_format(accuracy = 1), 
                     expand = expansion(mult = c(0.1, 0.15))) +
  
  # Reference line at zero
  geom_hline(yintercept = 0, linetype = "solid", color = "black", size = 0.8) +
  
  # UPDATED Labels and theme
  labs(
    title = "TBCHAMP: Excess TB Cases per 100,000 Contacts by Treatment Delay",
    subtitle = paste0("Compared to Instant TPT (0 week delay) (95% UI from ", 
                     format(n_simulations, big.mark = ","), " Monte Carlo simulations)"),
    x = "Treatment Delay (weeks)",
    y = "Excess TB Cases per 100,000 Contacts (26 weeks)",
    caption = "Negative delays = pre-screen treatment | Positive delays = post-screen treatment"
  ) +
  
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    plot.subtitle = element_text(hjust = 0.5, size = 11, color = "black"),
    plot.caption = element_text(size = 9, color = "black"),
    axis.text.x = element_text(angle = 45, hjust = 1, size = 13, color = "black"),
    axis.text.y = element_text(size = 13, color = "black"),
    axis.title = element_text(size = 11, face = "bold"),
    legend.position = "top",
    legend.title = element_text(size = 10, face = "bold"),
    legend.text = element_text(size = 9),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "gray80", fill = NA, size = 0.5)
  )

# Display the plot
print(enhanced_plot)
# ——————————————————————————————————————————————————————
# 7) Summary Statistics Table (UPDATED for per 100,000)
# ——————————————————————————————————————————————————————

cat("\n=== ENHANCED DELAY ANALYSIS SUMMARY - TBCHAMP (Excess per 100,000 contacts) ===\n")
cat("Delay intervals: Every 2 weeks from -26 to +26 weeks\n")
cat("Monte Carlo simulations:", format(n_simulations, big.mark = ","), "\n")
cat("Results standardized to per 100,000 contacts over 26 weeks\n\n")

# Print key statistics
summary_table <- plot_data %>%
  select(delay_label, delay_type, excess_cases_per_100k, ci_lower, ci_upper) %>%
  mutate(
    excess_cases_ci = sprintf("%.0f (%.0f to %.0f)", excess_cases_per_100k, ci_lower, ci_upper)
  ) %>%
  select(delay_label, delay_type, excess_cases_ci)

colnames(summary_table) <- c("Delay (weeks)", "Type", "Excess Cases per 100,000 (95% CI)")

print(summary_table)

cat("\nNote: Negative delays indicate treatment started before screening\n")
cat("Positive delays indicate treatment started after screening\n")
cat("Results calculated using TBCHAMP GAM models and Monte Carlo approach\n")
cat("Units: Excess TB cases per 100,000 contacts over 26 weeks\n")

Shortening

Curves with shortened scenarios (Figure 7)

library(mgcv)

# —————————————————————————————————————————————————————
# A) Fit‐based cumulative‐incidence at t = 0…54, with p0‐spike
# UPDATED: Using k=5 GCV GAM models

time0 <- 0:54
p0    <- 32/1120

# Use the new k=5 GCV GAM models
Fc0 <- predict(control_gam_k5_GCV_TBCHAMP,
               newdata = data.frame(time = time0))
Fi0 <- predict(intervention_gam_k5_GCV_TBCHAMP,
               newdata = data.frame(time = time0))

Fc0 <- p0 + (1 - p0)*Fc0
Fi0 <- p0 + (1 - p0)*Fi0

h_ctrl <- diff(c(0, Fc0)) / (1 - c(0, Fc0)[-length(Fc0)])
h_int  <- diff(c(0, Fi0)) / (1 - c(0, Fi0)[-length(Fi0)])

# —————————————————————————————————————————————————————
# B) "Pre‐screen" ramp length and treatment parameters

back_wk        <- 26            # show 3 months before screen
shorten_weeks  <- c(6, 13, 20)       # weeks at which to shorten treatment, SHOWING EVERY 7 WEEKS APPROX - DO LESS FOR BAR PLOT 

# —————————————————————————————————————————————————————
# C) build a time axis from –back_wk … +54

time_ext <- seq(-back_wk, 54, by = 1)

# —————————————————————————————————————————————————————
# D) single‐curve builder: √ ramp on [–back_wk,0], then hazard‐switch at d

build_curve <- function(d) {
  n    <- length(time_ext)
  Fcum <- numeric(n)

  # find index of screen‐time t0 = 0
  i0   <- which(time_ext == 0)

  # 1) PRE‐SCREEN: √ ramp from (–back_wk → 0) up to p0
  s_up <- time_ext[1:(i0-1)] + back_wk
  Fcum[1:(i0-1)] <- p0 * sqrt( s_up / back_wk )

  # 2) AT SCREEN
  Fcum[i0] <- p0
  Sprev    <- 1 - p0

  # 3) POST‐SCREEN: control hazard until t0 ≤ d, then intervention hazard
  for(i in (i0+1):n) {
    t0 <- time_ext[i]
    if (d > 0 && t0 <= d) {
      h_t <- h_ctrl[t0 + 1]
    } else {
      idx <- if (d > 0) (t0 - d + 1) else (t0 + 1)
      h_t <- h_int[ min(idx, length(h_int)) ]
    }
    Snew     <- Sprev * (1 - h_t)
    Fcum[i]  <- 1 - Snew
    Sprev    <- Snew
  }

  Fcum
}

# —————————————————————————————————————————————————————
# E) UPDATED: shortened treatment curve builder using time-specific control slope

build_shortened_curve <- function(shorten_at) {
  n    <- length(time_ext)
  Fcum <- numeric(n)

  # find index of screen‐time t0 = 0
  i0   <- which(time_ext == 0)

  # 1) PRE‐SCREEN: √ ramp from (–back_wk → 0) up to p0
  s_up <- time_ext[1:(i0-1)] + back_wk
  Fcum[1:(i0-1)] <- p0 * sqrt( s_up / back_wk )

  # 2) AT SCREEN
  Fcum[i0] <- p0

  # 3) POST‐SCREEN: follow intervention until shorten_at, then follow control pattern from shortening timepoint
  for(i in (i0+1):n) {
    t0 <- time_ext[i]
    if (t0 <= shorten_at) {
      # Follow intervention curve
      Fcum[i] <- Fi0[min(t0 + 1, length(Fi0))]
    } else {
      # After shortening point: take the value at shortening point and add 
      # the same increment that control would have from the shortening timepoint
      value_at_shorten <- Fi0[min(shorten_at + 1, length(Fi0))]
      
      # NEW: Use control increment from shortening timepoint, not from time 0
      control_at_shorten <- Fc0[min(shorten_at + 1, length(Fc0))]
      control_at_current <- Fc0[min(t0 + 1, length(Fc0))]
      control_increment <- control_at_current - control_at_shorten
      
      Fcum[i] <- value_at_shorten + control_increment
    }
  }

  Fcum
}

# —————————————————————————————————————————————————————
# F) build all the curves

# 1) explicitly build Control & Instant
ctrl_ext <- build_curve( Inf)   # pure control
inst_ext <- build_curve(-Inf)   # pure intervention

# 2) shortened treatment curves
shorten_mat <- if(length(shorten_weeks) > 0) {
  sapply(shorten_weeks, build_shortened_curve)
} else NULL

# Add column names for shortened treatments
if(!is.null(shorten_mat)) {
  colnames(shorten_mat) <- paste0("TPT ", shorten_weeks, " wk")
}

# —————————————————————————————————————————————————————
# G) compressed x‐axis plot, up to +54 wk, now including shortened treatment curves

# how many "plot units" to devote to the negative side?
neg_width <- 30

# remap time_ext → x_plot:
x_plot <- ifelse(
  time_ext < 0,
  neg_width * (time_ext + back_wk) / back_wk,  # −back_wk…0 → 0…20
  neg_width + pmin(time_ext, 54)              #   0…54 → 20…74
)

# bind together Control, Instant, and shortened treatments only:
if(!is.null(shorten_mat)) {
  all_mat <- cbind(
    Control       = ctrl_ext,
    `Instant TPT` = inst_ext,
    shorten_mat
  )
} else {
  all_mat <- cbind(
    Control       = ctrl_ext,
    `Instant TPT` = inst_ext
  )
}

# subset to t ≤ 26 wk
keep   <- which(time_ext <= 26)
x_sub   <- x_plot[keep]
mat_sub <- all_mat[keep, , drop=FALSE]

# CORRECTED: Convert to percentages in the data
mat_sub_pct <- mat_sub * 100

# MODIFIED: Create gradient colors where shorter treatment is worse (red) and longer is better (green)
n_shorten_cols <- if(!is.null(shorten_mat)) ncol(shorten_mat) else 0

if(n_shorten_cols > 0) {
  # Create a gradient from red (shorter/worse) to green (longer/better)
  # Since shorten_weeks is ordered from shortest to longest: 6, 13, 20
  shorten_colors <- colorRampPalette(c("#FF0000", "#FF7F00", "#00AA00"))(n_shorten_cols)
  cols <- c("red", "blue", shorten_colors)
} else {
  cols <- c("red", "blue")
}

ltys <- c(1, 1, rep(3, n_shorten_cols))
yr   <- range(mat_sub_pct)

# CORRECTED: Use matplot with percentage data and larger axis text
matplot(
  x_sub, mat_sub_pct,
  type="l", col=cols, lty=ltys, lwd=2,
  xaxt="n", ylim=yr,
  xlab="Weeks since randomisation",
  ylab="Cumulative incidence of TB (%)",  # Updated label
  main="TBCHAMP Shortened-TPT scenarios",
  cex.axis = 1.0,  # Size 12 for axis numbers
  cex.lab = 1.0    # Size 12 for axis labels
)

# custom x-axis ticks:
neg_ticks <- seq(-back_wk, 0, by=2)     
neg_at    <- neg_width * (neg_ticks + back_wk) / back_wk
pos_ticks <- seq(0, 54, by=5)
pos_at    <- neg_width + pos_ticks

axis(1,
     at     = c(neg_at, pos_at),
     labels = c(neg_ticks, pos_ticks),
     cex.axis = 1.0)  # Size 12 for axis tick labels

# vertical line at t=0
abline(v=neg_width, lty=3, col="gray50")

# Add vertical lines to show shortening points
if(!is.null(shorten_mat)) {
  for(sw in shorten_weeks) {
    if(sw <= 54) {  # only show if visible on plot
      shorten_x <- neg_width + sw
      abline(v=shorten_x, lty=4, col="gray70", lwd=1)
    }
  }
}

# legend with larger text
legend("topleft",
       legend    = colnames(all_mat),
       col       = cols,
       lty       = ltys,
       lwd       = 2,
       bty       = "n",
       ncol      = 1,        
       cex       = 1.2,      # Increased from 0.8 to 1.2 for larger legend
       y.intersp = 0.8,      
       x.intersp = 0.5,      
       inset     = c(0.02, 0.02))
t_target <- 26
idx_26 <- which(time_ext == t_target)

if (length(idx_26) > 0) {
  ci_26 <- all_mat[idx_26, ] * 100   # convert to %
  cat("Cumulative incidence at", t_target, "weeks (%):\n")
  print(round(ci_26, 2))
} else {
  warning("No exact match for 26 weeks in time_ext")
}

Appendix Table A2 of all 2-week shortened scenario statistics with UIs

(w correct control change at that point in time, not t=0)

library(mgcv)
library(ggplot2)
library(dplyr)
library(parallel)

# ——————————————————————————————————————————————————————
# 0) Input Parameters - TBCHAMP Data
# ——————————————————————————————————————————————————————

# Sample sizes (TBCHAMP)
n_control <- 465      # TBCHAMP control sample size
n_intervention <- 451 # TBCHAMP intervention sample size

# Person-years of follow-up (TBCHAMP)
py_control <- 418     # TBCHAMP control person-years
py_intervention <- 406 # TBCHAMP intervention person-years

# Monte Carlo parameters
n_simulations <- 5000  # Number of bootstrap simulations
set.seed(123)         # For reproducibility

cat("Shortened Treatment Analysis Parameters (TBCHAMP):\n")
cat("Control sample size:", n_control, "\n")
cat("Intervention sample size:", n_intervention, "\n")
cat("Control person-years:", py_control, "\n")
cat("Intervention person-years:", py_intervention, "\n")
cat("Number of simulations:", n_simulations, "\n\n")

# ——————————————————————————————————————————————————————
# 1) Monte Carlo Simulation Function
# ——————————————————————————————————————————————————————

simulate_gam_predictions <- function(gam_model, newdata, n_sim = n_simulations) {
  # Get mean predictions and covariance matrix
  pred_mean <- predict(gam_model, newdata = newdata, type = "response")
  
  # Get the covariance matrix of the GAM coefficients
  Vp <- vcov(gam_model, unconditional = TRUE)
  
  # Get the linear predictor matrix
  Xp <- predict(gam_model, newdata = newdata, type = "lpmatrix")
  
  # Simulate from multivariate normal distribution
  # Generate random coefficients
  coef_sims <- MASS::mvrnorm(n_sim, coef(gam_model), Vp)
  
  # Calculate predictions for each simulation
  pred_sims <- tcrossprod(Xp, coef_sims)
  
  return(pred_sims)
}

# ——————————————————————————————————————————————————————
# 2) Bootstrap Shortened Treatment Scenario Builder
# ——————————————————————————————————————————————————————

# CORRECTED: bootstrap_shortened_scenario function
bootstrap_shortened_scenario <- function(sim_idx, control_sims, intervention_sims, shorten_weeks) {
  # Extract simulated predictions for this iteration
  Fc0_sim <- control_sims[, sim_idx]
  Fi0_sim <- intervention_sims[, sim_idx]
  
  # Apply p0 transformation with a beta draw for p0 to consider uncertainty in p0
  p0_draw <- rbeta(1, 32 + 0.5, 1120 - 32 + 0.5)  # TBCHAMP p0 values
  Fc0_sim <- p0_draw + (1 - p0_draw) * Fc0_sim
  Fi0_sim <- p0_draw + (1 - p0_draw) * Fi0_sim
  
  # Enforce monotonicity
  Fc0_sim <- pmin(pmax(Fc0_sim, 0), 0.999)
  Fi0_sim <- pmin(pmax(Fi0_sim, 0), 0.999)
  Fc0_sim <- cummax(Fc0_sim); Fi0_sim <- cummax(Fi0_sim)

  # Calculate hazards for this simulation
  h_ctrl_sim <- diff(c(0, Fc0_sim)) / (1 - c(0, Fc0_sim)[-length(Fc0_sim)])
  h_int_sim <- diff(c(0, Fi0_sim)) / (1 - c(0, Fi0_sim)[-length(Fi0_sim)])
  
  # Build curve for this shortened treatment scenario
  n <- length(time_ext)
  Fcum <- numeric(n)
  
  # Find indices
  i0 <- which(time_ext == 0)
  back_wk <- 26
  
  # Special handling for reference scenarios
  if(shorten_weeks == Inf) {
    # Pure control - no treatment at all
    # Pre-screen ramp
    if(i0 > 1) {
      s_up <- time_ext[1:(i0-1)] + back_wk
      Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk)
    }
    Fcum[i0] <- p0_draw
    Sprev <- 1 - p0_draw
    
    for(i in (i0+1):n) {
      t0 <- time_ext[i]
      h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
      Snew <- Sprev * (1 - h_t)
      Fcum[i] <- 1 - Snew
      Sprev <- Snew
    }
    
  } else if(shorten_weeks == -Inf) {
    # Full treatment (26 weeks) - equivalent to instant TPT
    if(i0 > 1) {
      s_up <- time_ext[1:(i0-1)] + back_wk
      Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk)
    }
    Fcum[i0] <- p0_draw
    Sprev <- 1 - p0_draw
    
    for(i in (i0+1):n) {
      t0 <- time_ext[i]
      h_t <- h_int_sim[min(t0 + 1, length(h_int_sim))]
      Snew <- Sprev * (1 - h_t)
      Fcum[i] <- 1 - Snew
      Sprev <- Snew
    }
    
  } else {
    # Shortened treatment scenarios
    # Pre-screen ramp
    if(i0 > 1) {
      s_up <- time_ext[1:(i0-1)] + back_wk
      Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk)
    }
    
    # At screen
    Fcum[i0] <- p0_draw
    Sprev <- 1 - p0_draw
    
    # Post-screen: intervention until shorten_weeks, then control
    for(i in (i0+1):n) {
      t0 <- time_ext[i]
      
      if(t0 <= shorten_weeks) {
        # Use intervention hazard until shortening point
        h_t <- h_int_sim[min(t0 + 1, length(h_int_sim))]
      } else {
        # CORRECTED: After shortening, use control hazard at CURRENT time point
        # Not time since shortening, but the actual current time
        h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
      }
      
      Snew <- Sprev * (1 - h_t)
      Fcum[i] <- 1 - Snew
      Sprev <- Snew
    }
  }
  
  # Return final value at week 26
  week_26_idx <- which(time_ext == 26)
  return(Fcum[week_26_idx])
}

# ——————————————————————————————————————————————————————
# 3) Run Monte Carlo Simulations - Every 2 Week Treatment Durations
# ——————————————————————————————————————————————————————

cat("Running Monte Carlo simulations for shortened treatments (every 2 weeks)...\n")

# Set up time vectors
time0 <- 0:26
back_wk <- 26
time_ext <- seq(-back_wk, 26, by = 1)

# Create treatment duration intervals every 2 weeks from 0 to 26
shortened_weeks <- seq(0, 26, by = 2)  # 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26

# Generate simulated predictions for both models
cat("Generating GAM simulations...\n")
control_sims <- simulate_gam_predictions(control_gam_k5_GCV_TBCHAMP, data.frame(time = time0), n_simulations)
intervention_sims <- simulate_gam_predictions(intervention_gam_k5_GCV_TBCHAMP, data.frame(time = time0), n_simulations)

cat("Generated", ncol(control_sims), "control simulations and", ncol(intervention_sims), "intervention simulations\n")

# Define all scenarios - treat 26 weeks as the "Full TPT" baseline
all_scenarios <- setNames(shortened_weeks, paste0("TPT ", shortened_weeks, " wk"))

# Run simulations for all scenarios
cat("Running shortened treatment scenario simulations...\n")
simulation_results <- list()

for(scenario_name in names(all_scenarios)) {
  cat("Processing scenario:", scenario_name, "\n")
  shorten_val <- all_scenarios[scenario_name]
  
  # For 26 weeks, use -Inf to trigger full treatment scenario
  if(shorten_val == 26) {
    shorten_val <- -Inf
  }
  
  # Run simulations for this scenario
  scenario_sims <- sapply(1:n_simulations, function(i) {
    bootstrap_shortened_scenario(i, control_sims, intervention_sims, shorten_val)
  })
  
  simulation_results[[scenario_name]] <- scenario_sims
}

cat("Completed all simulations!\n\n")

# ——————————————————————————————————————————————————————
# 4) Calculate Key Metrics with 26 weeks as Baseline
# ——————————————————————————————————————————————————————

# Get baseline (26 weeks = full treatment) for relative calculations
baseline_sims <- simulation_results[["TPT 26 wk"]]
baseline_mean <- mean(baseline_sims)

# Calculate metrics for each scenario
results_list <- list()

for(scenario_name in names(simulation_results)) {
  scenario_sims <- simulation_results[[scenario_name]]
  treatment_weeks <- as.numeric(sub("TPT (\\d+) wk", "\\1", scenario_name))
  
  # Basic statistics
  mean_incidence <- mean(scenario_sims)
  median_incidence <- median(scenario_sims)
  ci_lower <- quantile(scenario_sims, 0.025)
  ci_upper <- quantile(scenario_sims, 0.975)
  
  # Use intervention PY and sample size for all scenarios
  py <- py_intervention
  sample_size <- n_intervention
  
  # 1. Incidence rate per 100 person-yrs (95% CI)
  rate_per_100py <- (mean_incidence * sample_size / py) * 100
  rate_ci_lower <- (ci_lower * sample_size / py) * 100
  rate_ci_upper <- (ci_upper * sample_size / py) * 100
  
  # 2. Mean Expected Cases per 100,000 (95% CI)
  cases_per_100k <- mean_incidence * 100000
  cases_per_100k_ci_lower <- ci_lower * 100000
  cases_per_100k_ci_upper <- ci_upper * 100000
  
  # 3. Excess calculations (compared to 26 weeks)
  if(treatment_weeks != 26) {
    # Excess incidence simulations
    excess_sims <- scenario_sims - baseline_sims
    excess_mean <- mean(excess_sims)
    excess_ci_lower <- quantile(excess_sims, 0.025)
    excess_ci_upper <- quantile(excess_sims, 0.975)
    
    # Excess rate per 100 person-yrs
    excess_rate_per_100py <- (excess_mean * sample_size / py) * 100
    excess_rate_ci_lower <- (excess_ci_lower * sample_size / py) * 100
    excess_rate_ci_upper <- (excess_ci_upper * sample_size / py) * 100
    
    # Excess cases per 100,000
    excess_cases_per_100k <- excess_mean * 100000
    excess_cases_per_100k_ci_lower <- excess_ci_lower * 100000
    excess_cases_per_100k_ci_upper <- excess_ci_upper * 100000
  } else {
    # 26 weeks is the baseline, so excess = 0
    excess_rate_per_100py <- 0
    excess_rate_ci_lower <- 0
    excess_rate_ci_upper <- 0
    excess_cases_per_100k <- 0
    excess_cases_per_100k_ci_lower <- 0
    excess_cases_per_100k_ci_upper <- 0
  }
  
  # 4. Relative Risk
  relative_risk <- mean_incidence / baseline_mean
  
  # Store results
  results_list[[scenario_name]] <- list(
    treatment_weeks = treatment_weeks,
    incidence_rate_per_100py = rate_per_100py,
    rate_ci_lower = rate_ci_lower,
    rate_ci_upper = rate_ci_upper,
    excess_rate_per_100py = excess_rate_per_100py,
    excess_rate_ci_lower = excess_rate_ci_lower,
    excess_rate_ci_upper = excess_rate_ci_upper,
    cases_per_100k = cases_per_100k,
    cases_per_100k_ci_lower = cases_per_100k_ci_lower,
    cases_per_100k_ci_upper = cases_per_100k_ci_upper,
    excess_cases_per_100k = excess_cases_per_100k,
    excess_cases_per_100k_ci_lower = excess_cases_per_100k_ci_lower,
    excess_cases_per_100k_ci_upper = excess_cases_per_100k_ci_upper,
    relative_risk = relative_risk
  )
}

# ——————————————————————————————————————————————————————
# 5) Create Export Table for Word
# ——————————————————————————————————————————————————————

# Build the export table
export_table <- data.frame()

for(scenario_name in names(results_list)) {
  r <- results_list[[scenario_name]]
  
  # Format all confidence intervals
  incidence_rate_ci <- sprintf("%.2f (%.2f-%.2f)", 
                              r$incidence_rate_per_100py, r$rate_ci_lower, r$rate_ci_upper)
  
  if(r$treatment_weeks == 26) {
    excess_rate_ci <- "BASELINE"
    excess_cases_ci <- "BASELINE"
  } else {
    excess_rate_ci <- sprintf("%+.2f (%+.2f to %+.2f)", 
                             r$excess_rate_per_100py, r$excess_rate_ci_lower, r$excess_rate_ci_upper)
    excess_cases_ci <- sprintf("%+.0f (%+.0f to %+.0f)", 
                              r$excess_cases_per_100k, r$excess_cases_per_100k_ci_lower, r$excess_cases_per_100k_ci_upper)
  }
  
  cases_per_100k_ci <- sprintf("%.0f (%.0f-%.0f)", 
                              r$cases_per_100k, r$cases_per_100k_ci_lower, r$cases_per_100k_ci_upper)
  
  relative_risk_formatted <- sprintf("%.2f", r$relative_risk)
  
  # Add row to export table
  export_table <- rbind(export_table, data.frame(
    `Treatment Duration (weeks)` = r$treatment_weeks,
    `Incidence Rate per 100 Person-Yrs (95% CI)` = incidence_rate_ci,
    `Excess Incidence Rate per 100 Person-Yrs (95% CI)` = excess_rate_ci,
    `Mean Expected Cases per 100,000 (95% CI)` = cases_per_100k_ci,
    `Excess Cases per 100,000 (95% CI)` = excess_cases_ci,
    `Relative Risk` = relative_risk_formatted,
    stringsAsFactors = FALSE,
    check.names = FALSE
  ))
}

# Sort by treatment duration
export_table <- export_table[order(export_table$`Treatment Duration (weeks)`), ]

# Display the table
cat("\n=== EXPORT TABLE FOR WORD (TBCHAMP Shortened Treatment) ===\n\n")
print(export_table, row.names = FALSE)

# ——————————————————————————————————————————————————————
# 6) Export Options
# ——————————————————————————————————————————————————————

# Option 1: Save as CSV for easy import to Word
write.csv(export_table, "tbchamp_shortened_treatment_table.csv", row.names = FALSE)
cat("\nTable saved as 'tbchamp_shortened_treatment_table.csv'\n")

# Option 2: Create a formatted version for direct copy-paste
cat("\n=== FORMATTED FOR COPY-PASTE TO WORD ===\n")
cat("Treatment Duration (weeks)\tIncidence Rate per 100 Person-Yrs (95% CI)\tExcess Incidence Rate per 100 Person-Yrs (95% CI)\tMean Expected Cases per 100,000 (95% CI)\tExcess Cases per 100,000 (95% CI)\tRelative Risk\n")

for(i in 1:nrow(export_table)) {
  cat(paste(export_table[i, ], collapse = "\t"), "\n")
}

# Option 3: Summary statistics
cat("\n=== ANALYSIS SUMMARY ===\n")
cat("Monte Carlo simulations:", n_simulations, "\n")
cat("Treatment durations: Every 2 weeks from 0 to 26 weeks\n")
cat("Baseline scenario: 26 weeks (full treatment)\n")
cat("Sample size:", n_intervention, "\n")
cat("Person-years:", py_intervention, "\n")
cat("Total scenarios analyzed:", nrow(export_table), "\n")

# Option 4: Key findings
best_scenario_idx <- which.min(as.numeric(gsub("[^0-9.]", "", export_table$`Treatment Duration (weeks)`)))
worst_scenario_idx <- which.max(as.numeric(gsub("[^0-9.]", "", export_table$`Treatment Duration (weeks)`)))

if(length(best_scenario_idx) > 0 && length(worst_scenario_idx) > 0) {
  best_scenario <- export_table[best_scenario_idx, ]
  worst_scenario <- export_table[worst_scenario_idx, ]
  
  cat("\nShortest treatment:", best_scenario$`Treatment Duration (weeks)`, "weeks\n")
  cat("Longest treatment:", worst_scenario$`Treatment Duration (weeks)`, "weeks\n")
}

cat("\nNote: All excess calculations are relative to 26 weeks (full treatment)\n")
cat("0 weeks = no treatment\n")
cat("Higher treatment duration generally leads to better outcomes\n")

# Option 5: Calculate relative risks compared to full treatment
cat("\nRelative Risks (compared to Full TPT 26 weeks):\n")
for(scenario in names(simulation_results)) {
  rr <- mean(simulation_results[[scenario]]) / baseline_mean
  weeks <- as.numeric(sub("TPT (\\d+) wk", "\\1", scenario))
  cat(sprintf("%d weeks: RR = %.2f\n", weeks, rr))
}

Bar plot of shortened scenarios (Figure 8)

library(mgcv)
library(ggplot2)
library(dplyr)
library(RColorBrewer)
library(scales)

# ——————————————————————————————————————————————————————
# Enhanced TB Shortened Treatment Analysis - Using Real GAM Models
# ——————————————————————————————————————————————————————

# Input Parameters (TBCHAMP data)
n_control <- 465
n_intervention <- 451
py_control <- 418
py_intervention <- 406
n_simulations <- 5000
set.seed(123)

cat("Enhanced Shortened Treatment Analysis Parameters (TBCHAMP):\n")
cat("Control sample size:", n_control, "\n")
cat("Intervention sample size:", n_intervention, "\n")
cat("Control person-years:", py_control, "\n")
cat("Intervention person-years:", py_intervention, "\n")
cat("Number of simulations:", n_simulations, "\n\n")

# ——————————————————————————————————————————————————————
# 1) Monte Carlo Simulation Function (from your working code)
# ——————————————————————————————————————————————————————

simulate_gam_predictions <- function(gam_model, newdata, n_sim = n_simulations) {
  # Get mean predictions and covariance matrix
  pred_mean <- predict(gam_model, newdata = newdata, type = "response")
  
  # Get the covariance matrix of the GAM coefficients
  Vp <- vcov(gam_model, unconditional = TRUE)
  
  # Get the linear predictor matrix
  Xp <- predict(gam_model, newdata = newdata, type = "lpmatrix")
  
  # Simulate from multivariate normal distribution
  # Generate random coefficients
  coef_sims <- MASS::mvrnorm(n_sim, coef(gam_model), Vp)
  
  # Calculate predictions for each simulation
  pred_sims <- tcrossprod(Xp, coef_sims)
  
  return(pred_sims)
}

# ——————————————————————————————————————————————————————
# 2) Bootstrap Shortened Treatment Scenario Builder (from your code)
# ——————————————————————————————————————————————————————

bootstrap_shortened_scenario <- function(sim_idx, control_sims, intervention_sims, shorten_weeks) {
  # Extract simulated predictions for this iteration
  Fc0_sim <- control_sims[, sim_idx]
  Fi0_sim <- intervention_sims[, sim_idx]
  
  # Apply p0 transformation with a beta draw for p0 to consider uncertainty in p0
  p0_draw <- rbeta(1, 32 + 0.5, 1120 - 32 + 0.5)  # TBCHAMP p0 values
  Fc0_sim <- p0_draw + (1 - p0_draw) * Fc0_sim
  Fi0_sim <- p0_draw + (1 - p0_draw) * Fi0_sim
  
  # Enforce monotonicity
  Fc0_sim <- pmin(pmax(Fc0_sim, 0), 0.999)
  Fi0_sim <- pmin(pmax(Fi0_sim, 0), 0.999)
  Fc0_sim <- cummax(Fc0_sim); Fi0_sim <- cummax(Fi0_sim)

  # Calculate hazards for this simulation
  h_ctrl_sim <- diff(c(0, Fc0_sim)) / (1 - c(0, Fc0_sim)[-length(Fc0_sim)])
  h_int_sim <- diff(c(0, Fi0_sim)) / (1 - c(0, Fi0_sim)[-length(Fi0_sim)])
  
  # Build curve for this shortened treatment scenario
  n <- length(time_ext)
  Fcum <- numeric(n)
  
  # Find indices
  i0 <- which(time_ext == 0)
  back_wk <- 26
  
  # Special handling for reference scenarios
  if(shorten_weeks == Inf) {
    # Pure control - no treatment at all
    # Pre-screen ramp
    if(i0 > 1) {
      s_up <- time_ext[1:(i0-1)] + back_wk
      Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk)
    }
    Fcum[i0] <- p0_draw
    Sprev <- 1 - p0_draw
    
    for(i in (i0+1):n) {
      t0 <- time_ext[i]
      h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
      Snew <- Sprev * (1 - h_t)
      Fcum[i] <- 1 - Snew
      Sprev <- Snew
    }
    
  } else if(shorten_weeks == -Inf) {
    # Full treatment (26 weeks) - equivalent to full TPT
    if(i0 > 1) {
      s_up <- time_ext[1:(i0-1)] + back_wk
      Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk)
    }
    Fcum[i0] <- p0_draw
    Sprev <- 1 - p0_draw
    
    for(i in (i0+1):n) {
      t0 <- time_ext[i]
      h_t <- h_int_sim[min(t0 + 1, length(h_int_sim))]
      Snew <- Sprev * (1 - h_t)
      Fcum[i] <- 1 - Snew
      Sprev <- Snew
    }
    
  } else {
    # Shortened treatment scenarios
    # Pre-screen ramp
    if(i0 > 1) {
      s_up <- time_ext[1:(i0-1)] + back_wk
      Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk)
    }
    
    # At screen
    Fcum[i0] <- p0_draw
    Sprev <- 1 - p0_draw
    
    # Post-screen: intervention until shorten_weeks, then control
    for(i in (i0+1):n) {
      t0 <- time_ext[i]
      
      if(t0 <= shorten_weeks) {
        # Use intervention hazard until shortening point
        h_t <- h_int_sim[min(t0 + 1, length(h_int_sim))]
      } else {
        # After shortening, use control hazard at CURRENT time point
        h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
      }
      
      Snew <- Sprev * (1 - h_t)
      Fcum[i] <- 1 - Snew
      Sprev <- Snew
    }
  }
  
  # Return final value at week 26
  week_26_idx <- which(time_ext == 26)
  return(Fcum[week_26_idx])
}

# ——————————————————————————————————————————————————————
# 3) Run Enhanced Analysis with extended treatment durations
# ——————————————————————————————————————————————————————

cat("Running enhanced Monte Carlo simulations for shortened treatments...\n")

# Set up time vectors
time0 <- 0:26
back_wk <- 26
time_ext <- seq(-back_wk, 26, by = 1)

# Create treatment duration intervals (every 2 weeks from 2 to 24, plus original values)
treatment_durations <- c(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24)

# Generate simulated predictions using your ACTUAL GAM models
cat("Generating GAM simulations using your trained models...\n")
# NOTE: Make sure your GAM models are loaded: control_gam_k5_GCV_TBCHAMP and intervention_gam_k5_GCV_TBCHAMP
control_sims <- simulate_gam_predictions(control_gam_k5_GCV_TBCHAMP, data.frame(time = time0), n_simulations)
intervention_sims <- simulate_gam_predictions(intervention_gam_k5_GCV_TBCHAMP, data.frame(time = time0), n_simulations)

cat("Generated", ncol(control_sims), "control simulations and", ncol(intervention_sims), "intervention simulations\n")

# Define all scenarios including baseline and control
all_scenarios <- c("Control" = Inf, 
                   setNames(treatment_durations, paste0("TPT ", treatment_durations, " wk")),
                   "Full TPT (26 wk)" = -Inf)

# Run simulations for all scenarios
cat("Running shortened treatment scenario simulations...\n")
simulation_results <- list()

for(scenario_name in names(all_scenarios)) {
  cat("Processing scenario:", scenario_name, "\n")
  shorten_val <- all_scenarios[scenario_name]
  
  scenario_sims <- sapply(1:n_simulations, function(i) {
    bootstrap_shortened_scenario(i, control_sims, intervention_sims, shorten_val)
  })
  
  simulation_results[[scenario_name]] <- scenario_sims
}

cat("Completed all simulations!\n\n")

# ——————————————————————————————————————————————————————
# 4) Calculate Excess Cases PER 100,000 using your exact methodology
# ——————————————————————————————————————————————————————

# Get baseline (Full TPT 26 weeks) for relative calculations
baseline_sims <- simulation_results[["Full TPT (26 wk)"]]
baseline_mean <- mean(baseline_sims)

# Calculate metrics for each scenario using your exact approach
results_list <- list()

for(scenario_name in names(simulation_results)) {
  scenario_sims <- simulation_results[[scenario_name]]
  
  # Basic statistics
  mean_incidence <- mean(scenario_sims)
  median_incidence <- median(scenario_sims)
  ci_lower <- quantile(scenario_sims, 0.025)
  ci_upper <- quantile(scenario_sims, 0.975)
  
  # Determine person-years (Control uses control PY, others use intervention PY)
  py <- ifelse(scenario_name == "Control", py_control, py_intervention)
  sample_size <- ifelse(scenario_name == "Control", n_control, n_intervention)
  
  # 1. Incidence rate per 100 person-yrs (95% CI)
  rate_per_100py <- (mean_incidence * sample_size / py) * 100
  rate_ci_lower <- (ci_lower * sample_size / py) * 100
  rate_ci_upper <- (ci_upper * sample_size / py) * 100
  
  # 2. Mean Expected Cases (95% CI) - absolute numbers in original cohort
  mean_cases <- mean_incidence * sample_size
  cases_ci_lower <- ci_lower * sample_size
  cases_ci_upper <- ci_upper * sample_size
  
  # 3. Excess calculations (compared to Full TPT 26 weeks)
  if(scenario_name != "Full TPT (26 wk)") {
    # Excess incidence simulations
    excess_sims <- scenario_sims - baseline_sims
    excess_mean <- mean(excess_sims)
    excess_ci_lower <- quantile(excess_sims, 0.025)
    excess_ci_upper <- quantile(excess_sims, 0.975)
    
    # Excess rate per 100 person-yrs
    excess_rate_per_100py <- (excess_mean * sample_size / py) * 100
    excess_rate_ci_lower <- (excess_ci_lower * sample_size / py) * 100
    excess_rate_ci_upper <- (excess_ci_upper * sample_size / py) * 100
    
    # MODIFIED: Excess cases per 100,000 contacts (not absolute numbers)
    excess_cases_per_100k <- excess_mean * 100000
    excess_cases_per_100k_ci_lower <- excess_ci_lower * 100000
    excess_cases_per_100k_ci_upper <- excess_ci_upper * 100000
    
    # Keep absolute numbers for compatibility
    excess_cases_mean <- excess_mean * sample_size
    excess_cases_ci_lower <- excess_ci_lower * sample_size
    excess_cases_ci_upper <- excess_ci_upper * sample_size
  } else {
    # Full TPT is the baseline, so excess = 0
    excess_rate_per_100py <- 0
    excess_rate_ci_lower <- 0
    excess_rate_ci_upper <- 0
    excess_cases_per_100k <- 0
    excess_cases_per_100k_ci_lower <- 0
    excess_cases_per_100k_ci_upper <- 0
    excess_cases_mean <- 0
    excess_cases_ci_lower <- 0
    excess_cases_ci_upper <- 0
  }
  
  # 4. Relative Risk
  relative_risk <- mean_incidence / baseline_mean
  
  # Store results
  results_list[[scenario_name]] <- list(
    scenario = scenario_name,
    incidence_rate_per_100py = rate_per_100py,
    incidence_rate_ci_lower = rate_ci_lower,
    incidence_rate_ci_upper = rate_ci_upper,
    excess_rate_per_100py = excess_rate_per_100py,
    excess_rate_ci_lower = excess_rate_ci_lower,
    excess_rate_ci_upper = excess_rate_ci_upper,
    mean_cases = mean_cases,
    cases_ci_lower = cases_ci_lower,
    cases_ci_upper = cases_ci_upper,
    excess_cases_per_100k = excess_cases_per_100k,
    excess_cases_per_100k_ci_lower = excess_cases_per_100k_ci_lower,
    excess_cases_per_100k_ci_upper = excess_cases_per_100k_ci_upper,
    excess_cases_mean = excess_cases_mean,
    excess_cases_ci_lower = excess_cases_ci_lower,
    excess_cases_ci_upper = excess_cases_ci_upper,
    relative_risk = relative_risk
  )
}

# ——————————————————————————————————————————————————————
# 5) Prepare Plot Data using your results (MODIFIED for per 100,000)
# ——————————————————————————————————————————————————————

plot_data <- data.frame()

for(scenario_name in names(results_list)) {
  if(scenario_name == "Full TPT (26 wk)") next  # Skip baseline
  
  r <- results_list[[scenario_name]]
  
  # Extract treatment duration for ordering
  if(scenario_name == "Control") {
    treatment_weeks <- 0  # No treatment
    treatment_label <- "0"
    treatment_type <- "No Treatment"
  } else {
    treatment_weeks <- as.numeric(sub("TPT (\\d+) wk", "\\1", scenario_name))
    treatment_label <- as.character(treatment_weeks)
    treatment_type <- ifelse(treatment_weeks <= 12, "Short Treatment", "Moderate Treatment")
  }
  
  plot_data <- rbind(plot_data, data.frame(
    scenario = scenario_name,
    treatment_weeks = treatment_weeks,
    treatment_label = treatment_label,
    excess_cases_per_100k = r$excess_cases_per_100k,
    ci_lower = r$excess_cases_per_100k_ci_lower,
    ci_upper = r$excess_cases_per_100k_ci_upper,
    treatment_type = treatment_type,
    stringsAsFactors = FALSE
  ))
}

# Order by treatment duration
plot_data <- plot_data[order(plot_data$treatment_weeks), ]

# ——————————————————————————————————————————————————————
# 6) Create Enhanced Color-Blind Friendly Bar Plot (UPDATED LABELS)
# ——————————————————————————————————————————————————————

# Set up RColorBrewer Paired palette
paired_colors <- brewer.pal(12, "Paired")

# Assign colors to treatment types
color_mapping <- c(
  "No Treatment" = paired_colors[8],     # Light red
  "Short Treatment" = paired_colors[6],  # Light orange
  "Moderate Treatment" = paired_colors[2] # Light blue
)

# Create the enhanced bar plot
enhanced_plot <- ggplot(plot_data, aes(x = reorder(treatment_label, treatment_weeks), 
                                      y = excess_cases_per_100k, 
                                      fill = treatment_type)) +
  geom_col(width = 0.8, alpha = 0.8, color = "white", size = 0.3) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), 
                width = 0.4, color = "black", size = 0.6, alpha = 0.8) +
  
  # Add value labels on bars with smart positioning
  geom_text(aes(label = sprintf("%.0f", excess_cases_per_100k),
                y = ifelse(excess_cases_per_100k >= 0, 
                          pmax(ci_upper + 150, excess_cases_per_100k + 150),  # Above positive bars
                          pmin(ci_lower - 150, excess_cases_per_100k - 150))), # Below negative bars
            size = 4.2, fontface = "bold", color = "black") +
  
  # Styling
  scale_fill_manual(values = color_mapping, name = "Treatment Duration") +
  scale_y_continuous(labels = number_format(accuracy = 1), 
                     expand = expansion(mult = c(0.1, 0.15))) +
  
  # Reference line at zero
  geom_hline(yintercept = 0, linetype = "solid", color = "black", size = 0.8) +
  
  # UPDATED Labels and theme
  labs(
    title = "TBCHAMP: Excess TB Cases per 100,000 Contacts by Treatment Duration",
    subtitle = paste0("Compared to Full Treatment (26 weeks) - 95% UI from ", 
                     format(n_simulations, big.mark = ","), " Monte Carlo simulations"),
    x = "Treatment Duration (weeks)",
    y = "Excess TB Cases per 100,000 Contacts (at 26 weeks)",
    caption = "0 weeks = no treatment | Negative values indicate fewer cases than full treatment"
  ) +
  
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    plot.subtitle = element_text(hjust = 0.5, size = 11, color = "black"),
    plot.caption = element_text(size = 9, color = "black"),
    axis.text.x = element_text(angle = 45, hjust = 1, size = 13, color = "black"),
    axis.text.y = element_text(size = 13, color = "black"),
    axis.title = element_text(size = 11, face = "bold"),
    legend.position = "top",
    legend.title = element_text(size = 10, face = "bold"),
    legend.text = element_text(size = 9),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "gray80", fill = NA, size = 0.5)
  )

# Display the plot
print(enhanced_plot)
# ——————————————————————————————————————————————————————
# 7) Summary Statistics Table (UPDATED for per 100,000)
# ——————————————————————————————————————————————————————

cat("\n=== ENHANCED SHORTENED TREATMENT ANALYSIS SUMMARY (Excess per 100,000 contacts) ===\n")
cat("Treatment durations: Every 2 weeks from 2 to 24 weeks, plus no treatment (0 weeks)\n")
cat("Monte Carlo simulations:", format(n_simulations, big.mark = ","), "\n")
cat("Results standardized to per 100,000 contacts over 26 weeks\n")
cat("Baseline: Full TPT (26 weeks)\n\n")

# Print key statistics
summary_table <- plot_data %>%
  select(treatment_label, treatment_type, excess_cases_per_100k, ci_lower, ci_upper) %>%
  mutate(
    excess_cases_ci = sprintf("%.0f (%.0f to %.0f)", excess_cases_per_100k, ci_lower, ci_upper)
  ) %>%
  select(treatment_label, treatment_type, excess_cases_ci)

colnames(summary_table) <- c("Duration (weeks)", "Type", "Excess Cases per 100,000 (95% CI)")

print(summary_table)

cat("\nNote: All values are excess cases per 100,000 contacts compared to full 26-week treatment\n")
cat("Results calculated using TBCHAMP GAM models and Monte Carlo approach\n")
cat("Units: Excess TB cases per 100,000 contacts over 26 weeks\n")

# ——————————————————————————————————————————————————————
# 8) Key Findings Summary (UPDATED for per 100,000)
# ——————————————————————————————————————————————————————

cat("\n=== KEY FINDINGS (per 100,000 contacts) ===\n")
best_case <- plot_data[which.min(plot_data$excess_cases_per_100k), ]
worst_case <- plot_data[which.max(plot_data$excess_cases_per_100k), ]

cat("Best shortened treatment:", best_case$treatment_label, "weeks ->", 
    sprintf("%.0f excess cases per 100,000", best_case$excess_cases_per_100k), 
    sprintf("(95%% CI: %.0f to %.0f)", best_case$ci_lower, best_case$ci_upper), "\n")

cat("Worst case (No treatment):", worst_case$treatment_label, "weeks ->", 
    sprintf("%.0f excess cases per 100,000", worst_case$excess_cases_per_100k), 
    sprintf("(95%% CI: %.0f to %.0f)", worst_case$ci_lower, worst_case$ci_upper), "\n")

# Calculate relative risks compared to full treatment
full_tpt_mean <- mean(simulation_results[["Full TPT (26 wk)"]])
cat("\nRelative Risks (compared to Full TPT 26 weeks):\n")
for(scenario in names(simulation_results)) {
  if(scenario != "Full TPT (26 wk)") {
    rr <- mean(simulation_results[[scenario]]) / full_tpt_mean
    cat(sprintf("%s: RR = %.2f\n", scenario, rr))
  }
}

Sensitivity analysis -

Method 1 – Starting at control time 0 instead of at that point in time - Makes table with 95% UIs for delay scenarios

# ——————————————————————————————————————————————————————
# SENSITIVITY ANALYSIS: Modified Bootstrap Scenario Function
# Change: After delay, use intervention rate from time 0 (not from delay point)
# Output: Excess cases per 100,000 contacts (not absolute cases)
# ——————————————————————————————————————————————————————

bootstrap_scenario_sensitivity <- function(sim_idx, control_sims, intervention_sims, delay_weeks) {
  # Extract simulated predictions for this iteration
  Fc0_sim <- control_sims[, sim_idx]
  Fi0_sim <- intervention_sims[, sim_idx]
  
  # Apply p0 transformation with a beta draw for p0 to consider uncertainty in p0
  p0_draw <- rbeta(1, 32 + 0.5, 1120 - 32 + 0.5)  # TBCHAMP p0 values 32 and 1120
  Fc0_sim <- p0_draw + (1 - p0_draw) * Fc0_sim
  Fi0_sim <- p0_draw + (1 - p0_draw) * Fi0_sim
  #enforce monotonicity (Simulated cumulative curves can wiggle; discrete hazards can become negative - preventing this)
  Fc0_sim <- pmin(pmax(Fc0_sim, 0), 0.999)
  Fi0_sim <- pmin(pmax(Fi0_sim, 0), 0.999)
  Fc0_sim <- cummax(Fc0_sim); Fi0_sim <- cummax(Fi0_sim)

  
  # Calculate hazards for this simulation
  h_ctrl_sim <- diff(c(0, Fc0_sim)) / (1 - c(0, Fc0_sim)[-length(Fc0_sim)])
  h_int_sim <- diff(c(0, Fi0_sim)) / (1 - c(0, Fi0_sim)[-length(Fi0_sim)])
  
  # Build curve for this delay scenario
  n <- length(time_ext)
  Fcum <- numeric(n)
  
  # Find indices
  i0 <- which(time_ext == 0)
  back_wk <- 26
  
  # Special handling for pure scenarios
  if(delay_weeks == Inf) {
    # Pure control - FIXED: use p0_draw instead of p0
    # Pre-screen ramp
    if(i0 > 1) {
      s_up <- time_ext[1:(i0-1)] + back_wk
      Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk)  # FIXED
    }
    Fcum[i0] <- p0_draw  # FIXED
    Sprev <- 1 - p0_draw  # FIXED
    
    for(i in (i0+1):n) {
      t0 <- time_ext[i]
      h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
      Snew <- Sprev * (1 - h_t)
      Fcum[i] <- 1 - Snew
      Sprev <- Snew
    }
    
  } else if(delay_weeks == -Inf) {
    # Pure intervention (Instant TPT) - FIXED: use p0_draw instead of p0
    if(i0 > 1) {
      s_up <- time_ext[1:(i0-1)] + back_wk
      Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk)  # FIXED
    }
    Fcum[i0] <- p0_draw  # FIXED
    Sprev <- 1 - p0_draw  # FIXED
    
    for(i in (i0+1):n) {
      t0 <- time_ext[i]
      h_t <- h_int_sim[min(t0 + 1, length(h_int_sim))]
      Snew <- Sprev * (1 - h_t)
      Fcum[i] <- 1 - Snew
      Sprev <- Snew
    }
    
  } else if(delay_weeks < 0) {
    # CORRECTED: Negative delays (pre-screen intervention) - FIXED: use p0_draw instead of p0
    # This means intervention starts BEFORE time 0, providing earlier protection
    
    # Find delay point index
    id_match <- which(time_ext == delay_weeks)
    if(length(id_match) == 0) {
      id <- which.min(abs(time_ext - delay_weeks))
    } else {
      id <- id_match[1]
    }
    
    # Before delay point: ramp (no intervention yet)
    if(id > 1) {
      s_up <- time_ext[1:(id-1)] + back_wk
      s_up[s_up < 0] <- 0
      Fcum[1:(id-1)] <- p0_draw * sqrt(s_up / back_wk)  # FIXED
    }
    
    # At delay point: start intervention
    s_delay <- max(0, delay_weeks + back_wk)
    Fcum[id] <- p0_draw * sqrt(s_delay / back_wk)  # FIXED
    Sprev <- 1 - Fcum[id]
    
    # CORRECTED: After delay point, use intervention hazards consistently
    # Map the post-delay timeline to intervention hazards starting from time 0
    for(i in (id+1):n) {
      current_time <- time_ext[i]
      
      # Calculate how long intervention has been active
      intervention_duration <- current_time - delay_weeks
      
      if(intervention_duration >= 0 && intervention_duration <= 54) {
        # Use intervention hazard corresponding to this duration
        h_idx <- min(intervention_duration + 1, length(h_int_sim))
        h_t <- h_int_sim[max(1, h_idx)]
      } else if(intervention_duration > 54) {
        # Use last available intervention hazard
        h_t <- h_int_sim[length(h_int_sim)]
      } else {
        # This shouldn't happen with negative delays, but use first intervention hazard as fallback
        h_t <- h_int_sim[1]
      }
      
      Snew <- Sprev * (1 - h_t)
      Fcum[i] <- 1 - Snew
      Sprev <- Snew
    }
    
  } else {
    # ——————————————————————————————————————————————————————
    # SENSITIVITY ANALYSIS MODIFICATION HERE:
    # Positive delays (post-screen delays) - MODIFIED LOGIC
    # ——————————————————————————————————————————————————————
    # Pre-screen ramp
    if(i0 > 1) {
      s_up <- time_ext[1:(i0-1)] + back_wk
      Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk)  # FIXED
    }
    
    # At screen
    Fcum[i0] <- p0_draw  # FIXED
    Sprev <- 1 - p0_draw  # FIXED
    
    # Post-screen: control until delay point, then intervention
    for(i in (i0+1):n) {
      t0 <- time_ext[i]
      
      if(t0 <= delay_weeks) {
        # Use control hazard until delay point
        h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
      } else {
        # SENSITIVITY ANALYSIS CHANGE:
        # Use intervention hazard corresponding to time from baseline (t0)
        # NOT from delay point (t0 - delay_weeks)
        intervention_time_from_baseline <- t0  # CHANGED: was t0 - delay_weeks
        h_idx <- min(intervention_time_from_baseline + 1, length(h_int_sim))
        h_t <- h_int_sim[max(1, h_idx)]
      }
      
      Snew <- Sprev * (1 - h_t)
      Fcum[i] <- 1 - Snew
      Sprev <- Snew
    }
  }
  
  # Return final value at week 26
  week_26_idx <- which(time_ext == 26)
  return(Fcum[week_26_idx])
}

# ——————————————————————————————————————————————————————
# Run Sensitivity Analysis with Modified Logic
# ——————————————————————————————————————————————————————

cat("Running SENSITIVITY ANALYSIS with modified intervention timing logic...\n")
cat("Change: After delay, use intervention rate from time 0 (not from delay point)\n")
cat("Output: Excess cases per 100,000 contacts\n\n")

# Set up time vectors (same as before)
time0 <- 0:26
back_wk <- 26
time_ext <- seq(-back_wk, 26, by = 1)

# Create 2-week interval delays from -26 to +26 (same as before)
delays_2wk <- seq(-26, 24, by = 2)

# Generate simulated predictions using your ACTUAL GAM models (same as before)
cat("Using existing GAM simulations...\n")
# NOTE: Assumes control_sims and intervention_sims are already generated

# Define all scenarios including baseline (same as before)
all_delays <- c("Instant TPT" = -Inf, setNames(delays_2wk, paste0(delays_2wk, " wk")), "Control" = Inf)

# Run simulations for all scenarios with MODIFIED FUNCTION
cat("Running SENSITIVITY scenario simulations...\n")
sensitivity_results <- list()

for(scenario_name in names(all_delays)) {
  cat("Processing sensitivity scenario:", scenario_name, "\n")
  delay_val <- all_delays[scenario_name]
  
  scenario_sims <- sapply(1:n_simulations, function(i) {
    bootstrap_scenario_sensitivity(i, control_sims, intervention_sims, delay_val)  # MODIFIED FUNCTION
  })
  
  sensitivity_results[[scenario_name]] <- scenario_sims
}

cat("Completed SENSITIVITY simulations!\n\n")

# ——————————————————————————————————————————————————————
# Calculate Metrics for Sensitivity Analysis (PER 100,000)
# ——————————————————————————————————————————————————————

# Get baseline (Instant TPT) for relative calculations
sensitivity_baseline_sims <- sensitivity_results[["Instant TPT"]]
sensitivity_baseline_mean <- mean(sensitivity_baseline_sims)

# Calculate metrics for each scenario using per 100,000 conversion
sensitivity_results_list <- list()

for(scenario_name in names(sensitivity_results)) {
  scenario_sims <- sensitivity_results[[scenario_name]]
  
  # Basic statistics
  mean_incidence <- mean(scenario_sims)
  median_incidence <- median(scenario_sims)
  ci_lower <- quantile(scenario_sims, 0.025)
  ci_upper <- quantile(scenario_sims, 0.975)
  
  # Determine person-years (Control uses control PY, others use intervention PY)
  py <- ifelse(scenario_name == "Control", py_control, py_intervention)
  sample_size <- ifelse(scenario_name == "Control", n_control, n_intervention)
  
  # 1. Incidence rate per 100 person-yrs (95% CI)
  rate_per_100py <- (mean_incidence * sample_size / py) * 100
  rate_ci_lower <- (ci_lower * sample_size / py) * 100
  rate_ci_upper <- (ci_upper * sample_size / py) * 100
  
  # 2. Mean Expected Cases per 100,000 contacts (95% CI) - MODIFIED
  mean_cases_per_100k <- mean_incidence * 100000  # CHANGED: was * sample_size
  cases_ci_lower_per_100k <- ci_lower * 100000     # CHANGED: was * sample_size
  cases_ci_upper_per_100k <- ci_upper * 100000     # CHANGED: was * sample_size
  
  # 3. Excess calculations (compared to Instant TPT) - MODIFIED FOR PER 100,000
  if(scenario_name != "Instant TPT") {
    # Excess incidence simulations
    excess_sims <- scenario_sims - sensitivity_baseline_sims
    excess_mean <- mean(excess_sims)
    excess_ci_lower <- quantile(excess_sims, 0.025)
    excess_ci_upper <- quantile(excess_sims, 0.975)
    
    # Excess rate per 100 person-yrs
    excess_rate_per_100py <- (excess_mean * sample_size / py) * 100
    excess_rate_ci_lower <- (excess_ci_lower * sample_size / py) * 100
    excess_rate_ci_upper <- (excess_ci_upper * sample_size / py) * 100
    
    # Excess cases per 100,000 contacts - MODIFIED
    excess_cases_per_100k <- excess_mean * 100000        # CHANGED: was * sample_size
    excess_cases_ci_lower_per_100k <- excess_ci_lower * 100000  # CHANGED: was * sample_size
    excess_cases_ci_upper_per_100k <- excess_ci_upper * 100000  # CHANGED: was * sample_size
  } else {
    # Instant TPT is the baseline, so excess = 0
    excess_rate_per_100py <- 0
    excess_rate_ci_lower <- 0
    excess_rate_ci_upper <- 0
    excess_cases_per_100k <- 0
    excess_cases_ci_lower_per_100k <- 0
    excess_cases_ci_upper_per_100k <- 0
  }
  
  # 4. Relative Risk
  relative_risk <- mean_incidence / sensitivity_baseline_mean
  
  # Store results - UPDATED VARIABLE NAMES
  sensitivity_results_list[[scenario_name]] <- list(
    scenario = scenario_name,
    incidence_rate_per_100py = rate_per_100py,
    incidence_rate_ci_lower = rate_ci_lower,
    incidence_rate_ci_upper = rate_ci_upper,
    excess_rate_per_100py = excess_rate_per_100py,
    excess_rate_ci_lower = excess_rate_ci_lower,
    excess_rate_ci_upper = excess_rate_ci_upper,
    mean_cases_per_100k = mean_cases_per_100k,                     # UPDATED
    cases_ci_lower_per_100k = cases_ci_lower_per_100k,             # UPDATED
    cases_ci_upper_per_100k = cases_ci_upper_per_100k,             # UPDATED
    excess_cases_per_100k = excess_cases_per_100k,                 # UPDATED
    excess_cases_ci_lower_per_100k = excess_cases_ci_lower_per_100k, # UPDATED
    excess_cases_ci_upper_per_100k = excess_cases_ci_upper_per_100k, # UPDATED
    relative_risk = relative_risk
  )
}

# ——————————————————————————————————————————————————————
# Prepare Sensitivity Analysis Plot Data (PER 100,000)
# ——————————————————————————————————————————————————————

sensitivity_plot_data <- data.frame()

for(scenario_name in names(sensitivity_results_list)) {
  if(scenario_name == "Instant TPT") next  # Skip baseline
  
  r <- sensitivity_results_list[[scenario_name]]
  
  # Extract delay value for ordering
  if(scenario_name == "Control") {
    delay_weeks <- 999  # Put at the end
    delay_label <- "No Tx"
    delay_type <- "No Treatment"
  } else {
    delay_weeks <- as.numeric(sub(" wk", "", scenario_name))
    delay_label <- as.character(delay_weeks)
    delay_type <- ifelse(delay_weeks < 0, "Pre-screen", "Post-screen")
  }
  
  sensitivity_plot_data <- rbind(sensitivity_plot_data, data.frame(
    scenario = scenario_name,
    delay_weeks = delay_weeks,
    delay_label = delay_label,
    excess_cases_per_100k = r$excess_cases_per_100k,                     # UPDATED
    ci_lower_per_100k = r$excess_cases_ci_lower_per_100k,                # UPDATED
    ci_upper_per_100k = r$excess_cases_ci_upper_per_100k,                # UPDATED
    delay_type = delay_type,
    stringsAsFactors = FALSE
  ))
}

# Order by delay weeks
sensitivity_plot_data <- sensitivity_plot_data[order(sensitivity_plot_data$delay_weeks), ]

# ——————————————————————————————————————————————————————
# Create Sensitivity Analysis Plot (PER 100,000)
# ——————————————————————————————————————————————————————

# Set up RColorBrewer Paired palette
paired_colors <- brewer.pal(12, "Paired")

# Assign colors to delay types
color_mapping <- c(
  "Pre-screen" = paired_colors[2],    # Light blue
  "Post-screen" = paired_colors[6],   # Light orange  
  "No Treatment" = paired_colors[8]   # Light red
)

# Create the sensitivity analysis bar plot - UPDATED LABELS
sensitivity_plot <- ggplot(sensitivity_plot_data, aes(x = reorder(delay_label, delay_weeks), 
                                      y = excess_cases_per_100k,     # UPDATED
                                      fill = delay_type)) +
  geom_col(width = 0.8, alpha = 0.8, color = "white", size = 0.3) +
  geom_errorbar(aes(ymin = ci_lower_per_100k, ymax = ci_upper_per_100k),  # UPDATED
                width = 0.4, color = "black", size = 0.6, alpha = 0.8) +
  
  # Add value labels on bars with smart positioning - UPDATED
  geom_text(aes(label = sprintf("%.0f", excess_cases_per_100k),           # UPDATED - rounded to nearest whole
                y = ifelse(excess_cases_per_100k >= 0, 
                          pmax(ci_upper_per_100k + 50, excess_cases_per_100k + 50),  # UPDATED
                          pmin(ci_lower_per_100k - 50, excess_cases_per_100k - 50))), # UPDATED
            size = 3.2, fontface = "bold", color = "black") +
  
  # Styling
  scale_fill_manual(values = color_mapping, name = "Treatment Timing") +
  scale_y_continuous(labels = number_format(accuracy = 1),      # UPDATED - whole numbers
                     expand = expansion(mult = c(0.1, 0.15))) +
  
  # Reference line at zero
  geom_hline(yintercept = 0, linetype = "solid", color = "black", size = 0.8) +
  
  # Labels and theme - UPDATED
  labs(
    title = "SENSITIVITY ANALYSIS: TBCHAMP Excess TB Cases by Treatment Delay",
    subtitle = paste0("After delay, intervention follows rate from time 0 (not delay point) | 95% CI from ", 
                     format(n_simulations, big.mark = ","), " Monte Carlo simulations"),
    x = "Treatment Delay (weeks)",
    y = "Excess TB Cases per 100,000 Contacts",  # UPDATED
    caption = "Negative delays = pre-screen treatment | Compared to instant TPT baseline"  # UPDATED
  ) +
  
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    plot.subtitle = element_text(hjust = 0.5, size = 11, color = "gray40"),
    plot.caption = element_text(size = 9, color = "gray50"),
    axis.text.x = element_text(angle = 45, hjust = 1, size = 10),
    axis.text.y = element_text(size = 10),
    axis.title = element_text(size = 11, face = "bold"),
    legend.position = "top",
    legend.title = element_text(size = 10, face = "bold"),
    legend.text = element_text(size = 9),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "gray80", fill = NA, size = 0.5)
  )

# Display the sensitivity plot
print(sensitivity_plot)
# ——————————————————————————————————————————————————————
# Summary Statistics Table for Sensitivity Analysis (PER 100,000)
# ——————————————————————————————————————————————————————

cat("\n=== SENSITIVITY ANALYSIS SUMMARY ===\n")
cat("Modified logic: After delay, use intervention rate from time 0 (not from delay point)\n")
cat("Delay intervals: Every 2 weeks from -26 to +26 weeks\n")
cat("Monte Carlo simulations:", format(n_simulations, big.mark = ","), "\n")
cat("Output: Excess cases per 100,000 contacts\n\n")

# Print key statistics - UPDATED
sensitivity_summary_table <- sensitivity_plot_data %>%
  select(delay_label, delay_type, excess_cases_per_100k, ci_lower_per_100k, ci_upper_per_100k) %>%  # UPDATED
  mutate(
    excess_cases_ci = sprintf("%.0f (%.0f to %.0f)", excess_cases_per_100k, ci_lower_per_100k, ci_upper_per_100k)  # UPDATED - rounded
  ) %>%
  select(delay_label, delay_type, excess_cases_ci)

colnames(sensitivity_summary_table) <- c("Delay (weeks)", "Type", "Excess Cases per 100,000 (95% CI)")  # UPDATED

print(sensitivity_summary_table)

# ——————————————————————————————————————————————————————
# Generate Range Comparison for Publication Table
# ——————————————————————————————————————————————————————

# Calculate range for sensitivity analysis
sensitivity_range_min <- min(sensitivity_plot_data$excess_cases_per_100k)
sensitivity_range_max <- max(sensitivity_plot_data$excess_cases_per_100k)

cat(sprintf("\n=== RANGE COMPARISON FOR PUBLICATION ===\n"))
cat(sprintf("Sensitivity analysis range: %.0f to %.0f cases per 100,000\n", sensitivity_range_min, sensitivity_range_max))
cat("Format for comparison to main results:\n")
cat(sprintf("\"Scenario range: %.0f-%.0f vs [MAIN RESULTS RANGE] cases per 100,000\"\n", 
            sensitivity_range_min, sensitivity_range_max))

cat("\nNote: This sensitivity analysis tests the assumption about intervention timing\n")
cat("Original: After delay, follow intervention rate from delay point\n")
cat("Sensitivity: After delay, follow intervention rate from time 0\n")

Sensitivity analysis method 1 for shortened treatments

library(mgcv)
library(ggplot2)
library(dplyr)
library(parallel)

# ——————————————————————————————————————————————————————
# 0) Input Parameters - TBCHAMP Data
# ——————————————————————————————————————————————————————

# Sample sizes (TBCHAMP)
n_control <- 465      # TBCHAMP control sample size
n_intervention <- 451 # TBCHAMP intervention sample size

# Person-years of follow-up (TBCHAMP)
py_control <- 418     # TBCHAMP control person-years
py_intervention <- 406 # TBCHAMP intervention person-years

# Monte Carlo parameters
n_simulations <- 5000  # Number of bootstrap simulations
set.seed(123)         # For reproducibility

cat("Shortened Treatment Analysis Parameters (TBCHAMP):\n")
cat("Control sample size:", n_control, "\n")
cat("Intervention sample size:", n_intervention, "\n")
cat("Control person-years:", py_control, "\n")
cat("Intervention person-years:", py_intervention, "\n")
cat("Number of simulations:", n_simulations, "\n\n")

# ——————————————————————————————————————————————————————
# 1) Monte Carlo Simulation Function
# ——————————————————————————————————————————————————————

simulate_gam_predictions <- function(gam_model, newdata, n_sim = n_simulations) {
  # Get mean predictions and covariance matrix
  pred_mean <- predict(gam_model, newdata = newdata, type = "response")
  
  # Get the covariance matrix of the GAM coefficients
  Vp <- vcov(gam_model, unconditional = TRUE)
  
  # Get the linear predictor matrix
  Xp <- predict(gam_model, newdata = newdata, type = "lpmatrix")
  
  # Simulate from multivariate normal distribution
  # Generate random coefficients
  coef_sims <- MASS::mvrnorm(n_sim, coef(gam_model), Vp)
  
  # Calculate predictions for each simulation
  pred_sims <- tcrossprod(Xp, coef_sims)
  
  return(pred_sims)
}

# ——————————————————————————————————————————————————————
# 2) Bootstrap Shortened Treatment Scenario Builder
# ——————————————————————————————————————————————————————

bootstrap_shortened_scenario <- function(sim_idx, control_sims, intervention_sims, shorten_weeks) {
  # Extract simulated predictions for this iteration
  Fc0_sim <- control_sims[, sim_idx]
  Fi0_sim <- intervention_sims[, sim_idx]
  
  # Apply p0 transformation with a beta draw for p0 to consider uncertainty in p0
  p0_draw <- rbeta(1, 32 + 0.5, 1120 - 32 + 0.5)  # TBCHAMP p0 values
  Fc0_sim <- p0_draw + (1 - p0_draw) * Fc0_sim
  Fi0_sim <- p0_draw + (1 - p0_draw) * Fi0_sim
  
  # Enforce monotonicity (Simulated cumulative curves can wiggle; discrete hazards can become negative - preventing this)
  Fc0_sim <- pmin(pmax(Fc0_sim, 0), 0.999)
  Fi0_sim <- pmin(pmax(Fi0_sim, 0), 0.999)
  Fc0_sim <- cummax(Fc0_sim); Fi0_sim <- cummax(Fi0_sim)

  # Calculate hazards for this simulation
  h_ctrl_sim <- diff(c(0, Fc0_sim)) / (1 - c(0, Fc0_sim)[-length(Fc0_sim)])
  h_int_sim <- diff(c(0, Fi0_sim)) / (1 - c(0, Fi0_sim)[-length(Fi0_sim)])
  
  # Build curve for this shortened treatment scenario
  n <- length(time_ext)
  Fcum <- numeric(n)
  
  # Find indices
  i0 <- which(time_ext == 0)
  back_wk <- 26
  
  # Special handling for reference scenarios
  if(shorten_weeks == Inf) {
    # Pure control - no treatment at all
    # Pre-screen ramp
    if(i0 > 1) {
      s_up <- time_ext[1:(i0-1)] + back_wk
      Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk)
    }
    Fcum[i0] <- p0_draw
    Sprev <- 1 - p0_draw
    
    for(i in (i0+1):n) {
      t0 <- time_ext[i]
      h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
      Snew <- Sprev * (1 - h_t)
      Fcum[i] <- 1 - Snew
      Sprev <- Snew
    }
    
  } else if(shorten_weeks == -Inf) {
    # Full treatment (26 weeks) - equivalent to instant TPT
    if(i0 > 1) {
      s_up <- time_ext[1:(i0-1)] + back_wk
      Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk)
    }
    Fcum[i0] <- p0_draw
    Sprev <- 1 - p0_draw
    
    for(i in (i0+1):n) {
      t0 <- time_ext[i]
      h_t <- h_int_sim[min(t0 + 1, length(h_int_sim))]
      Snew <- Sprev * (1 - h_t)
      Fcum[i] <- 1 - Snew
      Sprev <- Snew
    }
    
  } else {
    # Shortened treatment scenarios
    # Pre-screen ramp
    if(i0 > 1) {
      s_up <- time_ext[1:(i0-1)] + back_wk
      Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk)
    }
    
    # At screen
    Fcum[i0] <- p0_draw
    Sprev <- 1 - p0_draw
    
    # Post-screen: intervention until shorten_weeks, then control
    for(i in (i0+1):n) {
      t0 <- time_ext[i]
      
      if(t0 <= shorten_weeks) {
        # Use intervention hazard until shortening point
        h_t <- h_int_sim[min(t0 + 1, length(h_int_sim))]
      } else {
        # After shortening: use control hazard
        # Map time to control hazard based on time since shortening
        time_since_shorten <- t0 - shorten_weeks
        h_idx <- min(time_since_shorten + 1, length(h_ctrl_sim))
        h_t <- h_ctrl_sim[max(1, h_idx)]
      }
      
      Snew <- Sprev * (1 - h_t)
      Fcum[i] <- 1 - Snew
      Sprev <- Snew
    }
  }
  
  # Return final value at week 26
  week_26_idx <- which(time_ext == 26)
  return(Fcum[week_26_idx])
}

# ——————————————————————————————————————————————————————
# 3) Run Monte Carlo Simulations
# ——————————————————————————————————————————————————————

cat("Running Monte Carlo simulations for shortened treatments...\n")

# Set up time vectors
time0 <- 0:26
back_wk <- 26
time_ext <- seq(-back_wk, 26, by = 1)
shortened_weeks <- seq(2, 24, by = 2)  # Treatment every 2 weeks from 2-24 weeks

# Generate simulated predictions for both models
cat("Generating GAM simulations...\n")
control_sims <- simulate_gam_predictions(control_gam_k5_GCV_TBCHAMP, data.frame(time = time0), n_simulations)
intervention_sims <- simulate_gam_predictions(intervention_gam_k5_GCV_TBCHAMP, data.frame(time = time0), n_simulations)

cat("Generated", ncol(control_sims), "control simulations and", ncol(intervention_sims), "intervention simulations\n")

# Define all scenarios
all_scenarios <- c("Control" = Inf, "Full TPT (26 wk)" = -Inf, setNames(shortened_weeks, paste0("TPT ", shortened_weeks, " wk")))

# Run simulations for all scenarios
cat("Running shortened treatment scenario simulations...\n")
simulation_results <- list()

for(scenario_name in names(all_scenarios)) {
  cat("Processing scenario:", scenario_name, "\n")
  shorten_val <- all_scenarios[scenario_name]
  
  # Run simulations for this scenario
  scenario_sims <- sapply(1:n_simulations, function(i) {
    bootstrap_shortened_scenario(i, control_sims, intervention_sims, shorten_val)
  })
  
  simulation_results[[scenario_name]] <- scenario_sims
}

cat("Completed all simulations!\n\n")

# ——————————————————————————————————————————————————————
# 4) Calculate Summary Statistics
# ——————————————————————————————————————————————————————

summary_stats <- data.frame(
  Scenario = names(simulation_results),
  Mean_Incidence = sapply(simulation_results, mean),
  Median_Incidence = sapply(simulation_results, median),
  SD_Incidence = sapply(simulation_results, sd),
  CI_Lower = sapply(simulation_results, function(x) quantile(x, 0.025)),
  CI_Upper = sapply(simulation_results, function(x) quantile(x, 0.975)),
  stringsAsFactors = FALSE
)

cat("Summary Statistics (Final Incidence at Week 26):\n")
print(summary_stats)
cat("\n")

# ——————————————————————————————————————————————————————
# 5) Calculate Excess Incidence per 100,000 Contacts
# ——————————————————————————————————————————————————————

# Get baseline simulations (Full TPT 26 weeks)
baseline_sims <- simulation_results[["Full TPT (26 wk)"]]

# Calculate excess for each scenario
excess_stats <- summary_stats %>%
  filter(Scenario != "Full TPT (26 wk)") %>%
  mutate(
    # Point estimates
    Excess_Mean = Mean_Incidence - mean(baseline_sims),
    Excess_Median = Median_Incidence - median(baseline_sims),
    
    # Extract treatment duration information
    Treatment_Weeks = case_when(
      Scenario == "Control" ~ "No Treatment",
      grepl("TPT", Scenario) ~ as.character(as.numeric(sub("TPT (\\d+) wk", "\\1", Scenario))),
      TRUE ~ NA_character_
    ),
    Order = case_when(
      Scenario == "Control" ~ 999,
      TRUE ~ as.numeric(sub("TPT (\\d+) wk", "\\1", Scenario))
    )
  ) %>%
  arrange(Order)

# Calculate bootstrap confidence intervals for excess
for(i in 1:nrow(excess_stats)) {
  scenario_name <- excess_stats$Scenario[i]
  scenario_sims <- simulation_results[[scenario_name]]
  
  # Calculate excess for each simulation
  excess_sims <- scenario_sims - baseline_sims
  
  # Get percentiles
  excess_stats$Excess_CI_Lower[i] <- quantile(excess_sims, 0.025)
  excess_stats$Excess_CI_Upper[i] <- quantile(excess_sims, 0.975)
}

# Convert to per 100,000 contacts
excess_stats <- excess_stats %>%
  mutate(
    Excess_per_100k = Excess_Mean * 100000,
    Excess_per_100k_CI_Lower = Excess_CI_Lower * 100000,
    Excess_per_100k_CI_Upper = Excess_CI_Upper * 100000
  )

# ——————————————————————————————————————————————————————
# 6) Final Results Output
# ——————————————————————————————————————————————————————

cat("=== SHORTENED TREATMENT ANALYSIS RESULTS ===\n")
cat("Excess TB Cases per 100,000 Contacts at 26 weeks (compared to Full TPT):\n\n")

# Create final results table
final_results <- excess_stats %>%
  select(Treatment_Weeks, Excess_per_100k, Excess_per_100k_CI_Lower, Excess_per_100k_CI_Upper) %>%
  arrange(as.numeric(ifelse(Treatment_Weeks == "No Treatment", 999, Treatment_Weeks)))

# Print results in a formatted table
cat(sprintf("%-15s %-15s %-20s\n", "Treatment", "Excess Cases", "95% Confidence Interval"))
cat(sprintf("%-15s %-15s %-20s\n", "(weeks)", "per 100,000", "(per 100,000)"))
cat(paste(rep("-", 50), collapse = ""), "\n")

for(i in 1:nrow(final_results)) {
  row <- final_results[i, ]
  cat(sprintf("%-15s %-15.0f (%-6.0f to %-6.0f)\n", 
              row$Treatment_Weeks, 
              row$Excess_per_100k,
              row$Excess_per_100k_CI_Lower,
              row$Excess_per_100k_CI_Upper))
}

cat("\n")
cat("Method: Monte Carlo Bootstrap with", n_simulations, "simulations\n")
cat("Baseline: Full TPT (26 weeks)\n")
cat("Sample size basis:", n_intervention, "intervention participants\n")

# ——————————————————————————————————————————————————————
# 7) Summary Statistics
# ——————————————————————————————————————————————————————

cat("\n=== KEY FINDINGS ===\n")

# Find minimum excess (best shortened treatment)
best_shortened <- final_results %>%
  filter(Treatment_Weeks != "No Treatment") %>%
  slice_min(Excess_per_100k, n = 1)

# Find maximum excess among shortened treatments
worst_shortened <- final_results %>%
  filter(Treatment_Weeks != "No Treatment") %>%
  slice_max(Excess_per_100k, n = 1)

# Control scenario
control_result <- final_results %>%
  filter(Treatment_Weeks == "No Treatment")

cat(sprintf("Best shortened treatment: %s weeks -> %.0f excess cases per 100,000 (95%% CI: %.0f to %.0f)\n",
            best_shortened$Treatment_Weeks, 
            best_shortened$Excess_per_100k,
            best_shortened$Excess_per_100k_CI_Lower,
            best_shortened$Excess_per_100k_CI_Upper))

cat(sprintf("Worst shortened treatment: %s weeks -> %.0f excess cases per 100,000 (95%% CI: %.0f to %.0f)\n",
            worst_shortened$Treatment_Weeks,
            worst_shortened$Excess_per_100k,
            worst_shortened$Excess_per_100k_CI_Lower,
            worst_shortened$Excess_per_100k_CI_Upper))

cat(sprintf("No treatment (Control): %.0f excess cases per 100,000 (95%% CI: %.0f to %.0f)\n",
            control_result$Excess_per_100k,
            control_result$Excess_per_100k_CI_Lower,
            control_result$Excess_per_100k_CI_Upper))

# Return final results for further use
return(final_results)

Getting the control value for the sense check compared to 26 week delay - confirms model accuracy if equal

library(mgcv)
library(ggplot2)
library(dplyr)
library(parallel)

# ——————————————————————————————————————————————————————
# 0) Input Parameters - MODIFY THESE WITH YOUR ACTUAL DATA
# ——————————————————————————————————————————————————————

# Sample sizes
n_control <- 465      # REPLACE with your actual control sample size
n_intervention <- 451 # REPLACE with your actual intervention sample size

# Person-years of follow-up
py_control <- 418     # REPLACE with your actual control person-years
py_intervention <- 406 # REPLACE with your actual intervention person-years

# Monte Carlo parameters
n_simulations <- 5000  # Number of bootstrap simulations
set.seed(123)         # For reproducibility

cat("Analysis Parameters:\n")
cat("Control sample size:", n_control, "\n")
cat("Intervention sample size:", n_intervention, "\n")
cat("Control person-years:", py_control, "\n")
cat("Intervention person-years:", py_intervention, "\n")
cat("Number of simulations:", n_simulations, "\n\n")

# ——————————————————————————————————————————————————————
# 1) Monte Carlo Simulation Function
# ——————————————————————————————————————————————————————

simulate_gam_predictions <- function(gam_model, newdata, n_sim = n_simulations) {
  # Get mean predictions and covariance matrix
  pred_mean <- predict(gam_model, newdata = newdata, type = "response")
  
  # Get the covariance matrix of the GAM coefficients
  Vp <- vcov(gam_model, unconditional = TRUE)
  
  # Get the linear predictor matrix
  Xp <- predict(gam_model, newdata = newdata, type = "lpmatrix")
  
  # Simulate from multivariate normal distribution
  # Generate random coefficients
  coef_sims <- MASS::mvrnorm(n_sim, coef(gam_model), Vp)
  
  # Calculate predictions for each simulation
  pred_sims <- tcrossprod(Xp, coef_sims)
  
  return(pred_sims)
}

# ——————————————————————————————————————————————————————
# 2) Bootstrap Scenario Builder (FIXED p0 inconsistency)
# ——————————————————————————————————————————————————————

bootstrap_scenario <- function(sim_idx, control_sims, intervention_sims, delay_weeks) {
  # Extract simulated predictions for this iteration
  Fc0_sim <- control_sims[, sim_idx]
  Fi0_sim <- intervention_sims[, sim_idx]
  
  # Apply p0 transformation with a beta draw for p0 to consider uncertainty in p0
  p0_draw <- rbeta(1, 32 + 0.5, 1120 - 32 + 0.5)  # VQUIN p0 values 61 and 3948
  Fc0_sim <- p0_draw + (1 - p0_draw) * Fc0_sim
  Fi0_sim <- p0_draw + (1 - p0_draw) * Fi0_sim
  #enforce monotonicity (Simulated cumulative curves can wiggle; discrete hazards can become negative - preventing this)
  Fc0_sim <- pmin(pmax(Fc0_sim, 0), 0.999)
  Fi0_sim <- pmin(pmax(Fi0_sim, 0), 0.999)
  Fc0_sim <- cummax(Fc0_sim); Fi0_sim <- cummax(Fi0_sim)

  
  # Calculate hazards for this simulation
  h_ctrl_sim <- diff(c(0, Fc0_sim)) / (1 - c(0, Fc0_sim)[-length(Fc0_sim)])
  h_int_sim <- diff(c(0, Fi0_sim)) / (1 - c(0, Fi0_sim)[-length(Fi0_sim)])
  
  # Build curve for this delay scenario
  n <- length(time_ext)
  Fcum <- numeric(n)
  
  # Find indices
  i0 <- which(time_ext == 0)
  back_wk <- 26
  
  # Special handling for pure scenarios
  if(delay_weeks == Inf) {
    # Pure control - FIXED: use p0_draw instead of p0
    # Pre-screen ramp
    if(i0 > 1) {
      s_up <- time_ext[1:(i0-1)] + back_wk
      Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk)  # FIXED
    }
    Fcum[i0] <- p0_draw  # FIXED
    Sprev <- 1 - p0_draw  # FIXED
    
    for(i in (i0+1):n) {
      t0 <- time_ext[i]
      h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
      Snew <- Sprev * (1 - h_t)
      Fcum[i] <- 1 - Snew
      Sprev <- Snew
    }
    
  } else if(delay_weeks == -Inf) {
    # Pure intervention (Instant TPT) - FIXED: use p0_draw instead of p0
    if(i0 > 1) {
      s_up <- time_ext[1:(i0-1)] + back_wk
      Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk)  # FIXED
    }
    Fcum[i0] <- p0_draw  # FIXED
    Sprev <- 1 - p0_draw  # FIXED
    
    for(i in (i0+1):n) {
      t0 <- time_ext[i]
      h_t <- h_int_sim[min(t0 + 1, length(h_int_sim))]
      Snew <- Sprev * (1 - h_t)
      Fcum[i] <- 1 - Snew
      Sprev <- Snew
    }
    
  } else if(delay_weeks < 0) {
    # CORRECTED: Negative delays (pre-screen intervention) - FIXED: use p0_draw instead of p0
    # This means intervention starts BEFORE time 0, providing earlier protection
    
    # Find delay point index
    id_match <- which(time_ext == delay_weeks)
    if(length(id_match) == 0) {
      id <- which.min(abs(time_ext - delay_weeks))
    } else {
      id <- id_match[1]
    }
    
    # Before delay point: ramp (no intervention yet)
    if(id > 1) {
      s_up <- time_ext[1:(id-1)] + back_wk
      s_up[s_up < 0] <- 0
      Fcum[1:(id-1)] <- p0_draw * sqrt(s_up / back_wk)  # FIXED
    }
    
    # At delay point: start intervention
    s_delay <- max(0, delay_weeks + back_wk)
    Fcum[id] <- p0_draw * sqrt(s_delay / back_wk)  # FIXED
    Sprev <- 1 - Fcum[id]
    
    # CORRECTED: After delay point, use intervention hazards consistently
    # Map the post-delay timeline to intervention hazards starting from time 0
    for(i in (id+1):n) {
      current_time <- time_ext[i]
      
      # Calculate how long intervention has been active
      intervention_duration <- current_time - delay_weeks
      
      if(intervention_duration >= 0 && intervention_duration <= 54) {
        # Use intervention hazard corresponding to this duration
        h_idx <- min(intervention_duration + 1, length(h_int_sim))
        h_t <- h_int_sim[max(1, h_idx)]
      } else if(intervention_duration > 54) {
        # Use last available intervention hazard
        h_t <- h_int_sim[length(h_int_sim)]
      } else {
        # This shouldn't happen with negative delays, but use first intervention hazard as fallback
        h_t <- h_int_sim[1]
      }
      
      Snew <- Sprev * (1 - h_t)
      Fcum[i] <- 1 - Snew
      Sprev <- Snew
    }
    
  } else {
    # Positive delays (post-screen delays) - FIXED: use p0_draw instead of p0
    # Pre-screen ramp
    if(i0 > 1) {
      s_up <- time_ext[1:(i0-1)] + back_wk
      Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk)  # FIXED
    }
    
    # At screen
    Fcum[i0] <- p0_draw  # FIXED
    Sprev <- 1 - p0_draw  # FIXED
    
    # Post-screen: control until delay point, then intervention
    for(i in (i0+1):n) {
      t0 <- time_ext[i]
      
      if(t0 <= delay_weeks) {
        # Use control hazard until delay point
        h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
      } else {
        # Use intervention hazard after delay point
        # Map to intervention timeline: intervention has been active for (t0 - delay_weeks) weeks
        intervention_duration <- t0 - delay_weeks
        h_idx <- min(intervention_duration + 1, length(h_int_sim))
        h_t <- h_int_sim[max(1, h_idx)]
      }
      
      Snew <- Sprev * (1 - h_t)
      Fcum[i] <- 1 - Snew
      Sprev <- Snew
    }
  }
  
  # Return final value at week 26
  week_26_idx <- which(time_ext == 26)
  return(Fcum[week_26_idx])
}

# ——————————————————————————————————————————————————————
# 3) Run Monte Carlo Simulations - Every 2 Week Delays
# ——————————————————————————————————————————————————————

cat("Running Monte Carlo simulations for 2-week delay intervals...\n")

# Reset seed right before simulations for exact reproducibility
set.seed(42)

# Set up time vectors
time0 <- 0:26
back_wk <- 26
time_ext <- seq(-back_wk, 26, by = 1)

# Create 2-week delay intervals from -26 to +26 weeks
delays_2wk <- seq(-26, 26, by = 2)

# Generate simulated predictions for both models
cat("Generating GAM simulations...\n")
control_sims <- simulate_gam_predictions(control_gam_k5_GCV_TBCHAMP, data.frame(time = time0), n_simulations)
intervention_sims <- simulate_gam_predictions(intervention_gam_k5_GCV_TBCHAMP, data.frame(time = time0), n_simulations)

cat("Generated", ncol(control_sims), "control simulations and", ncol(intervention_sims), "intervention simulations\n")

# Define all delay scenarios (including baseline at week 0 = no delay)
all_delays <- c(setNames(delays_2wk, paste0(delays_2wk, " wk")), 
                "Control" = Inf)  # Add pure control scenario

# Run simulations for all scenarios
cat("Running scenario simulations...\n")
simulation_results <- list()

for(scenario_name in names(all_delays)) {
  cat("Processing scenario:", scenario_name, "\n")
  delay_val <- all_delays[scenario_name]
  
  # Run simulations for this scenario
  scenario_sims <- sapply(1:n_simulations, function(i) {
    bootstrap_scenario(i, control_sims, intervention_sims, delay_val)
  })
  
  simulation_results[[scenario_name]] <- scenario_sims
}

cat("Completed all simulations!\n\n")

# Check if "0 wk" exists and compare a few values
print(simulation_results[["0 wk"]][1:5])  # First 5 simulation results
print(mean(simulation_results[["0 wk"]]))   # Mean for "0 wk" scenario

# ——————————————————————————————————————————————————————
# 4) Calculate Key Metrics with Week 0 as Baseline
# ——————————————————————————————————————————————————————

# Get baseline (Week 0 = no delay) for relative calculations
baseline_sims <- simulation_results[["0 wk"]]
baseline_mean <- mean(baseline_sims)

# Calculate metrics for each scenario
results_list <- list()

for(scenario_name in names(simulation_results)) {
  scenario_sims <- simulation_results[[scenario_name]]
  
  # Handle special case for Control scenario
  if(scenario_name == "Control") {
    delay_value <- Inf  # Or 999 for sorting purposes
  } else {
    delay_value <- as.numeric(sub(" wk", "", scenario_name))
  }
  
  # Basic statistics
  mean_incidence <- mean(scenario_sims)
  median_incidence <- median(scenario_sims)
  ci_lower <- quantile(scenario_sims, 0.025)
  ci_upper <- quantile(scenario_sims, 0.975)
  
  # Use intervention PY and sample size for all scenarios
  py <- py_intervention
  sample_size <- n_intervention
  
  # 1. Incidence rate per 100 person-yrs (95% CI)
  rate_per_100py <- (mean_incidence * sample_size / py) * 100
  rate_ci_lower <- (ci_lower * sample_size / py) * 100
  rate_ci_upper <- (ci_upper * sample_size / py) * 100
  
  # 2. Mean Expected Cases per 100,000 (95% CI)
  cases_per_100k <- mean_incidence * 100000
  cases_per_100k_ci_lower <- ci_lower * 100000
  cases_per_100k_ci_upper <- ci_upper * 100000
  
  # 3. Excess calculations (compared to Week 0)
  # 3. Excess calculations (compared to Week 0)
if(delay_value != 0) {
    # Excess incidence simulations
    excess_sims <- scenario_sims - baseline_sims
    excess_mean <- mean(excess_sims)
    excess_ci_lower <- quantile(excess_sims, 0.025)
    excess_ci_upper <- quantile(excess_sims, 0.975)
    
    # Excess rate per 100 person-yrs
    excess_rate_per_100py <- (excess_mean * sample_size / py) * 100
    excess_rate_ci_lower <- (excess_ci_lower * sample_size / py) * 100
    excess_rate_ci_upper <- (excess_ci_upper * sample_size / py) * 100
    
    # Excess cases per 100,000
    excess_cases_per_100k <- excess_mean * 100000
    excess_cases_per_100k_ci_lower <- excess_ci_lower * 100000
    excess_cases_per_100k_ci_upper <- excess_ci_upper * 100000
} else {
    # Week 0 is the baseline, so excess = 0
    # ... set all excess values to 0

    # Week 0 is the baseline, so excess = 0
    excess_rate_per_100py <- 0
    excess_rate_ci_lower <- 0
    excess_rate_ci_upper <- 0
    excess_cases_per_100k <- 0
    excess_cases_per_100k_ci_lower <- 0
    excess_cases_per_100k_ci_upper <- 0
  }
  
  # 4. Relative Risk
  relative_risk <- mean_incidence / baseline_mean
  
  # Store results
  results_list[[scenario_name]] <- list(
    delay_weeks = delay_value,
    incidence_rate_per_100py = rate_per_100py,
    rate_ci_lower = rate_ci_lower,
    rate_ci_upper = rate_ci_upper,
    excess_rate_per_100py = excess_rate_per_100py,
    excess_rate_ci_lower = excess_rate_ci_lower,
    excess_rate_ci_upper = excess_rate_ci_upper,
    cases_per_100k = cases_per_100k,
    cases_per_100k_ci_lower = cases_per_100k_ci_lower,
    cases_per_100k_ci_upper = cases_per_100k_ci_upper,
    excess_cases_per_100k = excess_cases_per_100k,
    excess_cases_per_100k_ci_lower = excess_cases_per_100k_ci_lower,
    excess_cases_per_100k_ci_upper = excess_cases_per_100k_ci_upper,
    relative_risk = relative_risk
  )
}

# ——————————————————————————————————————————————————————
# 5) Create Export Table for Word
# ——————————————————————————————————————————————————————

# Build the export table
export_table <- data.frame()

for(scenario_name in names(results_list)) {
  r <- results_list[[scenario_name]]
  
  # Format all confidence intervals
  incidence_rate_ci <- sprintf("%.2f (%.2f-%.2f)", 
                              r$incidence_rate_per_100py, r$rate_ci_lower, r$rate_ci_upper)
  
  if(r$delay_weeks == 0) {
    excess_rate_ci <- "BASELINE"
    excess_cases_ci <- "BASELINE"
  } else {
    excess_rate_ci <- sprintf("%+.2f (%+.2f to %+.2f)", 
                             r$excess_rate_per_100py, r$excess_rate_ci_lower, r$excess_rate_ci_upper)
    excess_cases_ci <- sprintf("%+.0f (%+.0f to %+.0f)", 
                              r$excess_cases_per_100k, r$excess_cases_per_100k_ci_lower, r$excess_cases_per_100k_ci_upper)
  }
  
  cases_per_100k_ci <- sprintf("%.0f (%.0f-%.0f)", 
                              r$cases_per_100k, r$cases_per_100k_ci_lower, r$cases_per_100k_ci_upper)
  
  relative_risk_formatted <- sprintf("%.2f", r$relative_risk)
  
  # Add row to export table
  export_table <- rbind(export_table, data.frame(
    `Delay (weeks)` = r$delay_weeks,
    `Incidence Rate per 100 Person-Yrs (95% CI)` = incidence_rate_ci,
    `Excess Incidence Rate per 100 Person-Yrs (95% CI)` = excess_rate_ci,
    `Mean Expected Cases per 100,000 (95% CI)` = cases_per_100k_ci,
    `Excess Cases per 100,000 (95% CI)` = excess_cases_ci,
    `Relative Risk` = relative_risk_formatted,
    stringsAsFactors = FALSE,
    check.names = FALSE
  ))
}

# Sort by delay weeks
export_table <- export_table[order(export_table$`Delay (weeks)`), ]

# Display the table
cat("\n=== EXPORT TABLE FOR WORD ===\n\n")
print(export_table, row.names = FALSE)

# ——————————————————————————————————————————————————————
# 6) Export Options
# ——————————————————————————————————————————————————————

# Option 1: Save as CSV for easy import to Word
write.csv(export_table, "TBCHAMP_tb_delays_analysis_table.csv", row.names = FALSE)
cat("\nTable saved as 'tb_delays_analysis_table.csv'\n")

# Option 2: Create a formatted version for direct copy-paste
cat("\n=== FORMATTED FOR COPY-PASTE TO WORD ===\n")
cat("Delay (weeks)\tIncidence Rate per 100 Person-Yrs (95% CI)\tExcess Incidence Rate per 100 Person-Yrs (95% CI)\tMean Expected Cases per 100,000 (95% CI)\tExcess Cases per 100,000 (95% CI)\tRelative Risk\n")

for(i in 1:nrow(export_table)) {
  cat(paste(export_table[i, ], collapse = "\t"), "\n")
}

# Option 3: Summary statistics
cat("\n=== ANALYSIS SUMMARY ===\n")
cat("Monte Carlo simulations:", n_simulations, "\n")
cat("Delay intervals: Every 2 weeks from -26 to +26 weeks\n")
cat("Baseline scenario: Week 0 (no delay)\n")
cat("Sample size:", n_intervention, "\n")
cat("Person-years:", py_intervention, "\n")
cat("Total scenarios analyzed:", nrow(export_table), "\n")

# Option 4: Key findings
best_scenario <- export_table[which.min(as.numeric(gsub("[^-0-9.]", "", export_table$`Delay (weeks)`))), ]
worst_scenario <- export_table[which.max(as.numeric(gsub("[^-0-9.]", "", export_table$`Delay (weeks)`))), ]

cat("\nBest scenario (most negative delay):", best_scenario$`Delay (weeks)`, "weeks\n")
cat("Worst scenario (most positive delay):", worst_scenario$`Delay (weeks)`, "weeks\n")

cat("\nNote: Negative delays = treatment starts before screening\n")
cat("Positive delays = treatment starts after screening\n")
cat("All excess calculations are relative to Week 0 (no delay)\n")

Method 2 – varying the time since diagnosis parameter (Figure 8)

Delays with L = 3 (already have L=6 in main analysis)

library(mgcv)
library(ggplot2)
library(dplyr)
library(readr)
library(tidyr)

# ——————————————————————————————————————————————————————
# TBCHAMP Pre-curve Comparison: Auto-detect CSV delays
# ——————————————————————————————————————————————————————

# Input Parameters - TBCHAMP
n_control <- 465
n_intervention <- 451
py_control <- 418
py_intervention <- 406
n_simulations <- 5000
set.seed(123)

cat("TBCHAMP Pre-curve Length Comparison:\n")
cat("Auto-detecting delays from CSV file for perfect alignment\n")
cat("Control sample size:", n_control, "\n")
cat("Intervention sample size:", n_intervention, "\n")
cat("Number of simulations:", n_simulations, "\n\n")

# ——————————————————————————————————————————————————————
# 1) First, load CSV and detect what delays are actually present
# ——————————————————————————————————————————————————————

cat("=== STEP 1: DETECTING CSV DELAYS ===\n")

if(file.exists("TBCHAMP_tb_delays_analysis_table.csv")) {
  # Load CSV and inspect structure
  tbchamp_csv_raw <- read_csv("TBCHAMP_tb_delays_analysis_table.csv")
  
  cat("CSV loaded successfully\n")
  cat("CSV columns found:\n")
  print(colnames(tbchamp_csv_raw))
  cat("\nFirst few delay values:\n")
  print(head(tbchamp_csv_raw$`Delay (weeks)`, 10))
  
  # Extract numeric delays from the CSV
  parse_ci_value <- function(ci_string) {
    as.numeric(sub(" \\(.*\\)", "", ci_string))
  }
  
  tbchamp_26wk_clean <- tbchamp_csv_raw %>%
    mutate(
      delay_weeks = case_when(
        `Delay (weeks)` == "Instant TPT" ~ -999,
        `Delay (weeks)` == "Control" ~ 999,
        TRUE ~ as.numeric(`Delay (weeks)`)
      ),
      tbchamp_26wk_excess_cases_per_100k = parse_ci_value(`Excess Cases per 100,000 (95% CI)`),
      tbchamp_26wk_incidence_rate = parse_ci_value(`Incidence Rate per 100 Person-Yrs (95% CI)`),
      tbchamp_26wk_excess_rate = parse_ci_value(`Excess Incidence Rate per 100 Person-Yrs (95% CI)`),
      tbchamp_26wk_relative_risk = `Relative Risk`
    ) %>%
    select(delay_weeks, tbchamp_26wk_excess_cases_per_100k, tbchamp_26wk_incidence_rate, tbchamp_26wk_excess_rate, tbchamp_26wk_relative_risk) %>%
    filter(!is.na(delay_weeks))  # Remove rows where delay parsing failed
  
  # Extract the actual delays present in CSV (excluding baselines)
  csv_delays <- tbchamp_26wk_clean %>%
    filter(delay_weeks != -999 & delay_weeks != 999) %>%
    arrange(delay_weeks) %>%
    pull(delay_weeks)
  
  cat("\nACTUAL DELAYS FOUND IN CSV:\n")
  cat("Delays:", paste(csv_delays, collapse = ", "), "\n")
  cat("Number of delay scenarios:", length(csv_delays), "\n\n")
  
} else {
  stop("ERROR: TBCHAMP_tb_delays_analysis_table.csv not found in working directory")
}

# ——————————————————————————————————————————————————————
# 2) Run NEW analysis using EXACTLY the same delays as CSV
# ——————————————————————————————————————————————————————

cat("=== STEP 2: RUNNING NEW ANALYSIS WITH MATCHED DELAYS ===\n")

# Monte Carlo Simulation Function
simulate_gam_predictions <- function(gam_model, newdata, n_sim = n_simulations) {
  pred_mean <- predict(gam_model, newdata = newdata, type = "response")
  Vp <- vcov(gam_model, unconditional = TRUE)
  Xp <- predict(gam_model, newdata = newdata, type = "lpmatrix")
  coef_sims <- MASS::mvrnorm(n_sim, coef(gam_model), Vp)
  pred_sims <- tcrossprod(Xp, coef_sims)
  return(pred_sims)
}

# Bootstrap Scenario Builder - SQUARE ROOT PRE-CURVE (-13 week)
bootstrap_scenario_linear <- function(sim_idx, control_sims, intervention_sims, delay_weeks) {
  Fc0_sim <- control_sims[, sim_idx]
  Fi0_sim <- intervention_sims[, sim_idx]
  
  # TBCHAMP p0 values
  p0_draw <- rbeta(1, 32 + 0.5, 1120 - 32 + 0.5)
  Fc0_sim <- p0_draw + (1 - p0_draw) * Fc0_sim
  Fi0_sim <- p0_draw + (1 - p0_draw) * Fi0_sim
  
  # Enforce monotonicity
  Fc0_sim <- pmin(pmax(Fc0_sim, 0), 0.999)
  Fi0_sim <- pmin(pmax(Fi0_sim, 0), 0.999)
  Fc0_sim <- cummax(Fc0_sim); Fi0_sim <- cummax(Fi0_sim)

  # Calculate hazards
  h_ctrl_sim <- diff(c(0, Fc0_sim)) / (1 - c(0, Fc0_sim)[-length(Fc0_sim)])
  h_int_sim <- diff(c(0, Fi0_sim)) / (1 - c(0, Fi0_sim)[-length(Fi0_sim)])
  
  # Build curve
  n <- length(time_ext)
  Fcum <- numeric(n)
  
  i0 <- which(time_ext == 0)
  back_wk <- 13  # -13 week pre-curve
  
  if(delay_weeks == Inf) {
    # Pure control - SQUARE ROOT PRE-CURVE
    if(i0 > 1) {
      s_up <- time_ext[1:(i0-1)] + back_wk
      Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk)  # SQUARE ROOT
    }
    Fcum[i0] <- p0_draw
    Sprev <- 1 - p0_draw
    
    for(i in (i0+1):n) {
      t0 <- time_ext[i]
      h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
      Snew <- Sprev * (1 - h_t)
      Fcum[i] <- 1 - Snew
      Sprev <- Snew
    }
    
  } else if(delay_weeks == -Inf) {
    # Pure intervention - SQUARE ROOT PRE-CURVE
    if(i0 > 1) {
      s_up <- time_ext[1:(i0-1)] + back_wk
      Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk)  # SQUARE ROOT
    }
    Fcum[i0] <- p0_draw
    Sprev <- 1 - p0_draw
    
    for(i in (i0+1):n) {
      t0 <- time_ext[i]
      h_t <- h_int_sim[min(t0 + 1, length(h_int_sim))]
      Snew <- Sprev * (1 - h_t)
      Fcum[i] <- 1 - Snew
      Sprev <- Snew
    }
    
  } else if(delay_weeks < 0) {
    # Negative delays - SQUARE ROOT PRE-CURVE
    id_match <- which(time_ext == delay_weeks)
    if(length(id_match) == 0) {
      id <- which.min(abs(time_ext - delay_weeks))
    } else {
      id <- id_match[1]
    }
    
    if(id > 1) {
      s_up <- time_ext[1:(id-1)] + back_wk
      s_up[s_up < 0] <- 0
      Fcum[1:(id-1)] <- p0_draw * sqrt(s_up / back_wk)  # SQUARE ROOT
    }
    
    s_delay <- max(0, delay_weeks + back_wk)
    Fcum[id] <- p0_draw * sqrt(s_delay / back_wk)  # SQUARE ROOT
    Sprev <- 1 - Fcum[id]
    
    for(i in (id+1):n) {
      current_time <- time_ext[i]
      intervention_duration <- current_time - delay_weeks
      
      if(intervention_duration >= 0 && intervention_duration <= 54) {
        h_idx <- min(intervention_duration + 1, length(h_int_sim))
        h_t <- h_int_sim[max(1, h_idx)]
      } else if(intervention_duration > 54) {
        h_t <- h_int_sim[length(h_int_sim)]
      } else {
        h_t <- h_int_sim[1]
      }
      
      Snew <- Sprev * (1 - h_t)
      Fcum[i] <- 1 - Snew
      Sprev <- Snew
    }
    
  } else {
    # Positive delays - SQUARE ROOT PRE-CURVE
    if(i0 > 1) {
      s_up <- time_ext[1:(i0-1)] + back_wk
      Fcum[1:(i0-1)] <- p0_draw * sqrt(s_up / back_wk)  # SQUARE ROOT
    }
    
    Fcum[i0] <- p0_draw
    Sprev <- 1 - p0_draw
    
    for(i in (i0+1):n) {
      t0 <- time_ext[i]
      
      if(t0 <= delay_weeks) {
        h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
      } else {
        intervention_duration <- t0 - delay_weeks
        h_idx <- min(intervention_duration + 1, length(h_int_sim))
        h_t <- h_int_sim[max(1, h_idx)]
      }
      
      Snew <- Sprev * (1 - h_t)
      Fcum[i] <- 1 - Snew
      Sprev <- Snew
    }
  }
  
  week_26_idx <- which(time_ext == 26)
  return(Fcum[week_26_idx])
}

# Set up time vectors
time0 <- 0:26
back_wk <- 13
time_ext <- seq(-back_wk, 26, by = 1)

cat("Using EXACT CSV delays:", paste(csv_delays, collapse = ", "), "\n")

# Check GAM models
if(!exists("control_gam_k5_GCV_TBCHAMP") || !exists("intervention_gam_k5_GCV_TBCHAMP")) {
  stop("ERROR: TBCHAMP GAM models not found. Please load:\n  - control_gam_k5_GCV_TBCHAMP\n  - intervention_gam_k5_GCV_TBCHAMP")
}

# Generate simulations
control_sims <- simulate_gam_predictions(control_gam_k5_GCV_TBCHAMP, data.frame(time = time0), n_simulations)
intervention_sims <- simulate_gam_predictions(intervention_gam_k5_GCV_TBCHAMP, data.frame(time = time0), n_simulations)

# Define scenarios using CSV delays
all_delays <- c("Instant TPT" = -Inf, setNames(csv_delays, paste0(csv_delays, " wk")), "Control" = Inf)

# Run simulations
cat("Running", length(all_delays), "scenarios...\n")
simulation_results <- list()

for(scenario_name in names(all_delays)) {
  delay_val <- all_delays[scenario_name]
  
  scenario_sims <- sapply(1:n_simulations, function(i) {
    bootstrap_scenario_linear(i, control_sims, intervention_sims, delay_val)
  })
  
  simulation_results[[scenario_name]] <- scenario_sims
}

cat("Monte Carlo simulations completed!\n\n")

# ——————————————————————————————————————————————————————
# 3) Calculate metrics and create results for comparison
# ——————————————————————————————————————————————————————

baseline_sims <- simulation_results[["Instant TPT"]]
baseline_mean <- mean(baseline_sims)

results_list <- list()

for(scenario_name in names(simulation_results)) {
  scenario_sims <- simulation_results[[scenario_name]]
  
  mean_incidence <- mean(scenario_sims)
  ci_lower <- quantile(scenario_sims, 0.025)
  ci_upper <- quantile(scenario_sims, 0.975)
  
  py <- ifelse(scenario_name == "Control", py_control, py_intervention)
  sample_size <- ifelse(scenario_name == "Control", n_control, n_intervention)
  
  rate_per_100py <- (mean_incidence * sample_size / py) * 100
  
  if(scenario_name != "Instant TPT") {
    excess_sims <- scenario_sims - baseline_sims
    excess_mean <- mean(excess_sims)
    excess_rate_per_100py <- (excess_mean * sample_size / py) * 100
    # Convert excess cases to per 100,000 population
    excess_cases_per_100k <- excess_mean * 100000
  } else {
    excess_rate_per_100py <- 0
    excess_cases_per_100k <- 0
  }
  
  relative_risk <- mean_incidence / baseline_mean
  
  results_list[[scenario_name]] <- list(
    scenario = scenario_name,
    incidence_rate_per_100py = rate_per_100py,
    excess_rate_per_100py = excess_rate_per_100py,
    excess_cases_per_100k = excess_cases_per_100k,
    relative_risk = relative_risk
  )
}

# Convert to comparable format
tbchamp_13wk_results <- data.frame()

for(scenario_name in names(results_list)) {
  r <- results_list[[scenario_name]]
  
  if(scenario_name == "Instant TPT") {
    delay_weeks <- -999
  } else if(scenario_name == "Control") {
    delay_weeks <- 999
  } else {
    delay_weeks <- as.numeric(sub(" wk", "", scenario_name))
  }
  
  tbchamp_13wk_results <- rbind(tbchamp_13wk_results, data.frame(
    scenario = scenario_name,
    delay_weeks = delay_weeks,
    tbchamp_13wk_excess_cases_per_100k = r$excess_cases_per_100k,
    tbchamp_13wk_incidence_rate = r$incidence_rate_per_100py,
    tbchamp_13wk_excess_rate = r$excess_rate_per_100py,
    tbchamp_13wk_relative_risk = r$relative_risk,
    stringsAsFactors = FALSE
  ))
}

# ——————————————————————————————————————————————————————
# 4) Perfect Comparison
# ——————————————————————————————————————————————————————

cat("=== STEP 3: PERFECT COMPARISON (NO NAs) ===\n")

# Merge datasets
comparison_data <- merge(tbchamp_26wk_clean, tbchamp_13wk_results, by = "delay_weeks", all = FALSE)  # all=FALSE ensures only matching rows

# Focus on early benefit scenarios
early_benefit_comparison <- comparison_data %>%
  filter(delay_weeks < 0 & delay_weeks != -999) %>%
  select(delay_weeks, scenario, 
         tbchamp_26wk_excess_cases_per_100k, tbchamp_13wk_excess_cases_per_100k,
         tbchamp_26wk_excess_rate, tbchamp_13wk_excess_rate,
         tbchamp_26wk_relative_risk, tbchamp_13wk_relative_risk) %>%
  mutate(
    diff_excess_cases_per_100k = tbchamp_13wk_excess_cases_per_100k - tbchamp_26wk_excess_cases_per_100k,
    diff_excess_rate = tbchamp_13wk_excess_rate - tbchamp_26wk_excess_rate,
    diff_relative_risk = tbchamp_13wk_relative_risk - tbchamp_26wk_relative_risk,
    pct_diff_excess_cases = ifelse(!is.na(tbchamp_26wk_excess_cases_per_100k) & tbchamp_26wk_excess_cases_per_100k != 0,
                                  (diff_excess_cases_per_100k / abs(tbchamp_26wk_excess_cases_per_100k)) * 100, NA),
    pct_diff_excess_rate = ifelse(!is.na(tbchamp_26wk_excess_rate) & tbchamp_26wk_excess_rate != 0,
                                 (diff_excess_rate / abs(tbchamp_26wk_excess_rate)) * 100, NA)
  ) %>%
  arrange(delay_weeks)

cat("COMPARISON DETAILS:\n")
cat("CSV data: -26 week pre-curve (square-root function)\n")
cat("New analysis: -13 week pre-curve (square-root function)\n")  
cat("Population: Children, South Africa (TBCHAMP)\n")
cat("Perfect delay alignment: NO NAs!\n\n")

if(nrow(early_benefit_comparison) > 0) {
  cat("EARLY BENEFIT SCENARIOS COMPARISON:\n")
  cat("===================================\n")
  cat("Number of matched scenarios:", nrow(early_benefit_comparison), "\n")
  cat("Delay range:", min(early_benefit_comparison$delay_weeks), "to", max(early_benefit_comparison$delay_weeks), "weeks\n\n")
  
  # Create detailed comparison table with clear per 100,000 labeling
  detailed_comparison <- early_benefit_comparison %>%
    mutate(
      `Delay (weeks)` = delay_weeks,
      `-26wk Excess Cases per 100k` = sprintf("%.1f", tbchamp_26wk_excess_cases_per_100k),
      `-13wk Excess Cases per 100k` = sprintf("%.1f", tbchamp_13wk_excess_cases_per_100k),
      `Difference per 100k` = sprintf("%+.1f", diff_excess_cases_per_100k),
      `% Change` = sprintf("%+.1f%%", pct_diff_excess_cases),
      `-26wk Rate` = sprintf("%.3f", tbchamp_26wk_excess_rate),
      `-13wk Rate` = sprintf("%.3f", tbchamp_13wk_excess_rate),
      `Rate Diff` = sprintf("%+.3f", diff_excess_rate)
    ) %>%
    select(`Delay (weeks)`, `-26wk Excess Cases per 100k`, `-13wk Excess Cases per 100k`, `Difference per 100k`, `% Change`,
           `-26wk Rate`, `-13wk Rate`, `Rate Diff`)
  
  print(detailed_comparison)
  
  # Summary statistics
  finite_cases_diff <- early_benefit_comparison$diff_excess_cases_per_100k[is.finite(early_benefit_comparison$diff_excess_cases_per_100k)]
  finite_pct_diff <- early_benefit_comparison$pct_diff_excess_cases[is.finite(early_benefit_comparison$pct_diff_excess_cases)]
  
  if(length(finite_cases_diff) > 0) {
    cat("\nSUMMARY STATISTICS:\n")
    cat("==================\n")
    cat("Mean difference (-13wk - -26wk):", sprintf("%.1f", mean(finite_cases_diff)), "excess cases per 100,000\n")
    cat("Range:", sprintf("%.1f to %.1f", min(finite_cases_diff), max(finite_cases_diff)), "excess cases per 100,000\n")
    
    if(length(finite_pct_diff) > 0) {
      cat("Mean percentage change:", sprintf("%.1f%%", mean(finite_pct_diff)), "\n")
      cat("Range:", sprintf("%.1f%% to %.1f%%", min(finite_pct_diff), max(finite_pct_diff)), "\n")
    }
    
    cat("\nINTERPRETATION:\n")
    mean_diff <- mean(finite_cases_diff)
    if(abs(mean_diff) < 50) {
      cat("• MINIMAL impact: Pre-curve length difference shows <50 cases per 100,000 average change\n")
    } else if(abs(mean_diff) < 200) {
      cat("• MODERATE impact: Pre-curve length difference shows", sprintf("%.0f", abs(mean_diff)), "cases per 100,000 average change\n")
    } else {
      cat("• SUBSTANTIAL impact: Pre-curve length difference shows", sprintf("%.0f", abs(mean_diff)), "cases per 100,000 average change\n")
    }
    
    if(mean_diff > 0) {
      cat("• Direction: -13 week pre-curve shows HIGHER excess cases per 100,000 (worse outcomes)\n")
    } else if(mean_diff < 0) {
      cat("• Direction: -13 week pre-curve shows LOWER excess cases per 100,000 (better outcomes)\n")
    }
  }
  
} else {
  cat("No early benefit scenarios found for comparison.\n")
}

# ——————————————————————————————————————————————————————
# 5) Create TBCHAMP Sensitivity Analysis Barplot (like original format)
# ——————————————————————————————————————————————————————

cat("=== STEP 4: CREATING SENSITIVITY ANALYSIS BARPLOT ===\n")

# Calculate confidence intervals for all scenarios from simulation results
plot_results <- data.frame()

for(scenario_name in names(simulation_results)) {
  scenario_sims <- simulation_results[[scenario_name]]
  baseline_sims <- simulation_results[["Instant TPT"]]
  
  if(scenario_name == "Instant TPT") {
    delay_weeks <- -999
    excess_cases_per_100k <- 0
    ci_lower <- 0
    ci_upper <- 0
  } else if(scenario_name == "Control") {
    delay_weeks <- 999
    excess_sims <- scenario_sims - baseline_sims
    excess_cases_per_100k <- mean(excess_sims) * 100000
    ci_lower <- quantile(excess_sims, 0.025) * 100000
    ci_upper <- quantile(excess_sims, 0.975) * 100000
  } else {
    delay_weeks <- as.numeric(sub(" wk", "", scenario_name))
    excess_sims <- scenario_sims - baseline_sims
    excess_cases_per_100k <- mean(excess_sims) * 100000
    ci_lower <- quantile(excess_sims, 0.025) * 100000
    ci_upper <- quantile(excess_sims, 0.975) * 100000
  }
  
  # Determine treatment timing category
  treatment_timing <- case_when(
    delay_weeks == 999 ~ "No Treatment",
    delay_weeks > 0 ~ "Post-screen",
    delay_weeks <= 0 & delay_weeks != -999 ~ "Pre-screen",
    delay_weeks == -999 ~ "Instant TPT"
  )
  
  plot_results <- rbind(plot_results, data.frame(
    delay_weeks = delay_weeks,
    scenario_name = scenario_name,
    excess_cases_per_100k = excess_cases_per_100k,
    ci_lower = ci_lower,
    ci_upper = ci_upper,
    treatment_timing = treatment_timing,
    stringsAsFactors = FALSE
  ))
}

# Filter and prepare for plotting (exclude instant TPT baseline and delays < -13)
plot_data <- plot_results %>%
  filter(delay_weeks != -999 & delay_weeks >= -13 & delay_weeks != 0 & delay_weeks !=26) %>%  # Exclude instant TPT, delays < -13, and 0 weeks
  arrange(delay_weeks) %>%
  mutate(
    delay_label = ifelse(delay_weeks == 999, "No TPT", as.character(delay_weeks)),
    delay_order = ifelse(delay_weeks == 999, max(delay_weeks[delay_weeks != 999]) + 5, delay_weeks)
  ) %>%
  arrange(delay_order)

# Create sensitivity analysis barplot
sensitivity_plot <- ggplot(plot_data, aes(x = reorder(delay_label, delay_order), y = excess_cases_per_100k, fill = treatment_timing)) +
  geom_col(alpha = 0.8, width = 0.7) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2, color = "black", alpha = 0.7) +
  scale_fill_manual(values = c("Pre-screen" = "#2E86AB", "Post-screen" = "#E74C3C", "No Treatment" = "#F39C12")) +
  geom_text(aes(label = sprintf("%.0f", excess_cases_per_100k),
              y = ifelse(excess_cases_per_100k >= 0, 
                        pmax(ci_upper + 150, excess_cases_per_100k + 150),  # Above positive bars
                        pmin(ci_lower - 150, excess_cases_per_100k - 150))), # Below negative bars
          size = 4.2, fontface = "bold", color = "black") +
  labs(
    title = "SENSITIVITY ANALYSIS Method 2: TBCHAMP Excess TB Cases by Treatment Delay",
    subtitle = "13-week pre-curve assumption (square root) | Excess cases per 100,000 contacts at 26 weeks",
    x = "Treatment Delay (weeks)",
    y = "Excess TB Cases per 100,000 Contacts",
    fill = "Treatment Timing",
    caption = "Sample size: 451 | Negative delays = pre-screen treatment"
  ) +
  theme_minimal() +
  theme(
  plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    plot.subtitle = element_text(hjust = 0.5, size = 11, color = "black"),
    plot.caption = element_text(size = 9, color = "black"),
    axis.text.x = element_text(angle = 45, hjust = 1, size = 13, color = "black"),
    axis.text.y = element_text(size = 13, color = "black"),
    axis.title = element_text(size = 11, face = "bold"),
    legend.position = "top",
    legend.title = element_text(size = 10, face = "bold"),
    legend.text = element_text(size = 9),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "gray80", fill = NA, size = 0.5) ) +
  geom_hline(yintercept = 0, linetype = "dashed", alpha = 0.5, color = "gray50")

print(sensitivity_plot)
cat("\nSensitivity analysis barplot created!\n")
cat("Format matches original TBCHAMP analysis\n")
cat("13-week pre-curve with square root function\n")
cat("All values shown as excess cases per 100,000 population\n")
cat("Error bars show 95% confidence intervals\n\n")

cat("\n=== PERFECT ALIGNMENT ACHIEVED ===\n")
cat("No NAs in comparison - all delays perfectly matched!\n")
cat("All excess cases displayed as per 100,000 population\n")
cat("Barplot comparison included!\n")

Method 3 - trying a linear pre-curve with original trial L length

For delays :

library(mgcv)

# —————————————————————————————————————————————————————
# A) Fit‐based cumulative‐incidence at t = 0…54, with p0‐spike

time0 <- 0:54
p0    <- 32/1120
back_wk <- 13

# Use the k=5 GCV GAM models
Fc0 <- predict(control_gam_k5_GCV_TBCHAMP,
               newdata = data.frame(time = time0))
Fi0 <- predict(intervention_gam_k5_GCV_TBCHAMP,
               newdata = data.frame(time = time0))

Fc0 <- p0 + (1 - p0)*Fc0
Fi0 <- p0 + (1 - p0)*Fi0

h_ctrl <- diff(c(0, Fc0)) / (1 - c(0, Fc0)[-length(Fc0)])
h_int  <- diff(c(0, Fi0)) / (1 - c(0, Fi0)[-length(Fi0)])

# —————————————————————————————————————————————————————
# B) "Pre‐screen" ramp length and nominal post‐screen delays

back_wk <- 26            # show 6 months before screen
nominal_delays <- c(-26, -20, -10, -5, 5, 10, 20) # weeks of *additional* delay after screen

# —————————————————————————————————————————————————————
# C) build a time axis from –back_wk … +54

time_ext <- seq(-back_wk, 54, by = 1)

# —————————————————————————————————————————————————————
# D) single‐curve builder: √ ramp on [–back_wk,0], then hazard‐switch at d

build_curve <- function(d) {
  n    <- length(time_ext)
  Fcum <- numeric(n)

  # find index of screen‐time t0 = 0
  i0   <- which(time_ext == 0)

  # 1) PRE‐SCREEN: √ ramp from (–back_wk → 0) up to p0
  s_up <- time_ext[1:(i0-1)] + back_wk
  Fcum[1:(i0-1)] <- p0 * (s_up / back_wk)

  # 2) AT SCREEN
  Fcum[i0] <- p0
  Sprev    <- 1 - p0

  # 3) POST‐SCREEN: control hazard until t0 ≤ d, then intervention hazard
  for(i in (i0+1):n) {
    t0 <- time_ext[i]
    if (d > 0 && t0 <= d) {
      h_t <- h_ctrl[t0 + 1]
    } else {
      idx <- if (d > 0) (t0 - d + 1) else (t0 + 1)
      h_t <- h_int[ min(idx, length(h_int)) ]
    }
    Snew     <- Sprev * (1 - h_t)
    Fcum[i]  <- 1 - Snew
    Sprev    <- Snew
  }

  Fcum
}

# —————————————————————————————————————————————————————
# E) build all the curves

# 1) Control & Instant curves
ctrl_ext <- build_curve( Inf)   # pure control
inst_ext <- build_curve(-Inf)   # pure intervention

#   and the √ ramp function for pre‐screen:
ramp <- p0 * (time_ext + back_wk) / back_wk 

# 2) separate positive and negative delays
pos_delays <- nominal_delays[nominal_delays >= 0]
neg_delays <- nominal_delays[nominal_delays < 0]

# 3) positive delays:
pos_mat <- if(length(pos_delays) > 0) sapply(pos_delays, function(d) build_curve(d)) else NULL

# 4) negative delays: ramp until delay point, then copy instant TPT curve
neg_mat <- if(length(neg_delays) > 0) {
  sapply(neg_delays, function(d) {
    n    <- length(time_ext)
    Fcum <- numeric(n)
    
    # find index of delay point d
    id_match <- which(time_ext == d)
    if(length(id_match) == 0) {
      # If exact match not found, find closest index
      id <- which.min(abs(time_ext - d))
    } else {
      id <- id_match[1]
    }
    
    i0   <- which(time_ext == 0)  # screen time index
    
    # Before delay point: follow the ramp
    if(id > 1) {
      s_up <- time_ext[1:(id-1)] + back_wk
      s_up[s_up < 0] <- 0  # ensure non-negative
      Fcum[1:(i0-1)] <- p0 * (s_up / back_wk)
    }
    
    # At delay point: set starting value from ramp
    s_delay <- max(0, d + back_wk)
    start_value <- p0 * (s_delay / back_wk)
    Fcum[id] <- start_value
    
    # After delay point: copy the instant TPT curve pattern
    if(id < n) {
      # Find the baseline instant TPT value at time 0 (our reference point)
      inst_at_zero <- inst_ext[i0]
      
      for(i in (id+1):n) {
        # Calculate how many steps from delay point
        steps_from_delay <- i - id
        
        # Find corresponding point in instant TPT curve (relative to time 0)
        inst_index <- i0 + steps_from_delay
        
        if(inst_index <= length(inst_ext) && inst_index > 0) {
          # Get the instant TPT value at this point
          inst_value <- inst_ext[inst_index]
          
          # Calculate the change from instant TPT baseline
          inst_change <- inst_value - inst_at_zero
          
          # Apply the same change to our starting value
          Fcum[i] <- start_value + inst_change
        } else if(inst_index > length(inst_ext)) {
          # If we've gone beyond available instant TPT data, 
          # use the last available change and keep it constant
          last_inst_value <- inst_ext[length(inst_ext)]
          last_change <- last_inst_value - inst_at_zero
          Fcum[i] <- start_value + last_change
        } else {
          # If we somehow get a negative index, keep the start value
          Fcum[i] <- start_value
        }
      }
    }
    
    Fcum
  })
} else NULL

# Combine all delay matrices
if(!is.null(neg_mat) && !is.null(pos_mat)) {
  delay_mat <- cbind(neg_mat, pos_mat)
  colnames(delay_mat) <- c(
    paste0(neg_delays, " wk delay"),
    paste0(pos_delays, " wk delay"))
} else if(!is.null(neg_mat)) {
  delay_mat <- neg_mat
  colnames(delay_mat) <- paste0(neg_delays, " wk delay")
} else if(!is.null(pos_mat)) {
  delay_mat <- pos_mat
  colnames(delay_mat) <- paste0(pos_delays, " wk delay")
} else {
  delay_mat <- NULL
}

# —————————————————————————————————————————————————————
# F') compressed x‐axis plot, up to +30 wk, now including the negative delay curves

# how many "plot units" to devote to the negative side?
neg_width <- 30

# remap time_ext → x_plot:
x_plot <- ifelse(
  time_ext < 0,
  neg_width * (time_ext + back_wk) / back_wk,  # −back_wk…0 → 0…30
  neg_width + pmin(time_ext, 54)              #   0…30 → 30…60
)

# bind together Control, Instant, and only your chosen delays:
if(!is.null(delay_mat)) {
  all_mat <- cbind(
    Control       = ctrl_ext,
    `Instant TPT` = inst_ext,
    delay_mat
  )
} else {
  all_mat <- cbind(
    Control       = ctrl_ext,
    `Instant TPT` = inst_ext
  )
}

keep   <- which(time_ext <= 26)
x_sub   <- x_plot[keep]
mat_sub <- all_mat[keep, , drop=FALSE]

# colours and line‐types
n_delay_cols <- if(!is.null(delay_mat)) ncol(delay_mat) else 0
cols <- c("red","blue", rainbow(n_delay_cols))
ltys <- c(1,1, rep(2, n_delay_cols))
yr   <- range(mat_sub)

# draw
matplot(
  x_sub, mat_sub,
  type="l", col=cols, lty=ltys, lwd=2,
  xaxt="n",  ylim=yr,
  xlab="Weeks since randomisation ",
  ylab="Cumulative incidence of TB",
  main="SENSITIVITY ANALYSIS TBCHAMP Delayed-TPT scenarios, LINEAR pre-curve"
)

# custom x-axis ticks:
neg_ticks <- seq(-back_wk, 0, by=2)     
neg_at    <- neg_width * (neg_ticks + back_wk) / back_wk
pos_ticks <- seq(0, 26, by=2) 
pos_at    <- neg_width + pos_ticks

axis(1,
     at     = c(neg_at, pos_at),
     labels = c(neg_ticks, pos_ticks)
)

# vertical line at t=0
abline(v=neg_width, lty=3, col="gray50")

# legend
legend("topleft",
       legend    = colnames(all_mat),
       col       = cols,
       lty       = ltys,
       lwd       = 2,
       bty       = "n",
       ncol      = 1,        
       cex       = 0.8,      
       y.intersp = 0.4,      
       x.intersp = 0.5,      
       inset     = c(0.02,0.02))
# Load required library for colorblind-friendly palettes
library(RColorBrewer)

# colours and line-types with colorblind-friendly palette
n_delay_cols <- if(!is.null(delay_mat)) ncol(delay_mat) else 0

# Keep control red and intervention blue, use colorblind-friendly palette for delays
if(n_delay_cols > 0) {
  # Option 3: Manual colorblind-friendly colors
  delay_colors <- c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99", "#E31A1C", "#FDBF6F", "#FF7F00", "#CAB2D6")[1:n_delay_cols]
  
  cols <- c("red", "blue", delay_colors)
} else {
  cols <- c("red", "blue")
}

ltys <- c(1, 1, rep(2, n_delay_cols))
yr   <- range(mat_sub)

# Set up the plot area
plot(range(x_sub), yr, type="n", 
     xaxt="n", ylim=yr,
     xlab="Weeks since randomisation",
     ylab="Cumulative incidence of TB",
     main="SENSITIVITY ANALYSIS: TBCHAMP Delayed-TPT scenarios LINEAR pre-curve")

# Plot all lines EXCEPT the control (first column) first
if(ncol(mat_sub) > 1) {
  # Plot intervention and delay lines first (columns 2 onwards)
  matlines(x_sub, mat_sub[, -1, drop=FALSE], 
           col=cols[-1], lty=ltys[-1], lwd=2)
}

# Plot the control line LAST so it appears on top
lines(x_sub, mat_sub[, 1], 
      col=cols[1], lty=ltys[1], lwd=2)

# custom x-axis ticks:
neg_ticks <- seq(-back_wk, 0, by=2)     
neg_at    <- neg_width * (neg_ticks + back_wk) / back_wk
pos_ticks <- seq(0, 26, by=2) 
pos_at    <- neg_width + pos_ticks

axis(1,
     at     = c(neg_at, pos_at),
     labels = c(neg_ticks, pos_ticks)
)

# vertical line at t=0
abline(v=neg_width, lty=3, col="gray50")

# Reorder legend to match plotting order (control was plotted last)
legend_names <- colnames(mat_sub)
legend_cols <- cols
legend_ltys <- ltys

# If we have multiple columns, reorder to match plotting sequence
if(ncol(mat_sub) > 1) {
  # We plotted columns 2+ first, then column 1 (control) last
  new_order <- c(2:ncol(mat_sub), 1)
  legend_names <- legend_names[new_order]
  legend_cols <- legend_cols[new_order] 
  legend_ltys <- legend_ltys[new_order]
}

# legend
legend("topleft",
       legend    = legend_names,
       col       = legend_cols,
       lty       = legend_ltys,
       lwd       = 2,
       bty       = "n",
       ncol      = 1,        
       cex       = 0.8,      
       y.intersp = 0.8,      
       x.intersp = 0.5,      
       inset     = c(0.02, 0.02))
library(mgcv)
library(ggplot2)
library(dplyr)
library(RColorBrewer)
library(scales)

# ——————————————————————————————————————————————————————
# Enhanced TB Delays Analysis with 2-week intervals - Using Real GAM Models
# ——————————————————————————————————————————————————————

# Input Parameters (from your data)
n_control <- 465
n_intervention <- 451
py_control <- 418
py_intervention <- 406
n_simulations <- 5000
set.seed(123)

cat("Enhanced Delays Analysis Parameters:\n")
cat("Control sample size:", n_control, "\n")
cat("Intervention sample size:", n_intervention, "\n")
cat("Control person-years:", py_control, "\n")
cat("Intervention person-years:", py_intervention, "\n")
cat("Number of simulations:", n_simulations, "\n\n")

# ——————————————————————————————————————————————————————
# 1) Monte Carlo Simulation Function (from your working code)
# ——————————————————————————————————————————————————————

simulate_gam_predictions <- function(gam_model, newdata, n_sim = n_simulations) {
  # Get mean predictions and covariance matrix
  pred_mean <- predict(gam_model, newdata = newdata, type = "response")
  
  # Get the covariance matrix of the GAM coefficients
  Vp <- vcov(gam_model, unconditional = TRUE)
  
  # Get the linear predictor matrix
  Xp <- predict(gam_model, newdata = newdata, type = "lpmatrix")
  
  # Simulate from multivariate normal distribution
  # Generate random coefficients
  coef_sims <- MASS::mvrnorm(n_sim, coef(gam_model), Vp)
  
  # Calculate predictions for each simulation
  pred_sims <- tcrossprod(Xp, coef_sims)
  
  return(pred_sims)
}

# ——————————————————————————————————————————————————————
# 2) Bootstrap Scenario Builder (exactly from your working code)
# ——————————————————————————————————————————————————————

bootstrap_scenario <- function(sim_idx, control_sims, intervention_sims, delay_weeks) {
  # Extract simulated predictions for this iteration
  Fc0_sim <- control_sims[, sim_idx]
  Fi0_sim <- intervention_sims[, sim_idx]
  
  # Apply p0 transformation with a beta draw for p0 to consider uncertainty in p0
  p0_draw <- rbeta(1, 32 + 0.5, 1120 - 32 + 0.5)  # TBCHAMP p0 values 32 and 1120
  Fc0_sim <- p0_draw + (1 - p0_draw) * Fc0_sim
  Fi0_sim <- p0_draw + (1 - p0_draw) * Fi0_sim
  #enforce monotonicity (Simulated cumulative curves can wiggle; discrete hazards can become negative - preventing this)
  Fc0_sim <- pmin(pmax(Fc0_sim, 0), 0.999)
  Fi0_sim <- pmin(pmax(Fi0_sim, 0), 0.999)
  Fc0_sim <- cummax(Fc0_sim); Fi0_sim <- cummax(Fi0_sim)

  
  # Calculate hazards for this simulation
  h_ctrl_sim <- diff(c(0, Fc0_sim)) / (1 - c(0, Fc0_sim)[-length(Fc0_sim)])
  h_int_sim <- diff(c(0, Fi0_sim)) / (1 - c(0, Fi0_sim)[-length(Fi0_sim)])
  
  # Build curve for this delay scenario
  n <- length(time_ext)
  Fcum <- numeric(n)
  
  # Find indices
  i0 <- which(time_ext == 0)
  back_wk <- 26
  
  # Special handling for pure scenarios
  if(delay_weeks == Inf) {
    # Pure control - FIXED: use p0_draw instead of p0
    # Pre-screen ramp
    if(i0 > 1) {
      s_up <- time_ext[1:(i0-1)] + back_wk
      Fcum[1:(i0-1)] <- p0_draw * (s_up / back_wk)  # FIXED
    }
    Fcum[i0] <- p0_draw  # FIXED
    Sprev <- 1 - p0_draw  # FIXED
    
    for(i in (i0+1):n) {
      t0 <- time_ext[i]
      h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
      Snew <- Sprev * (1 - h_t)
      Fcum[i] <- 1 - Snew
      Sprev <- Snew
    }
    
  } else if(delay_weeks == -Inf) {
    # Pure intervention (Instant TPT) - FIXED: use p0_draw instead of p0
    if(i0 > 1) {
      s_up <- time_ext[1:(i0-1)] + back_wk
      Fcum[1:(i0-1)] <- p0 * (s_up / back_wk)  # FIXED
    }
    Fcum[i0] <- p0_draw  # FIXED
    Sprev <- 1 - p0_draw  # FIXED
    
    for(i in (i0+1):n) {
      t0 <- time_ext[i]
      h_t <- h_int_sim[min(t0 + 1, length(h_int_sim))]
      Snew <- Sprev * (1 - h_t)
      Fcum[i] <- 1 - Snew
      Sprev <- Snew
    }
    
  } else if(delay_weeks < 0) {
    # CORRECTED: Negative delays (pre-screen intervention) - FIXED: use p0_draw instead of p0
    # This means intervention starts BEFORE time 0, providing earlier protection
    
    # Find delay point index
    id_match <- which(time_ext == delay_weeks)
    if(length(id_match) == 0) {
      id <- which.min(abs(time_ext - delay_weeks))
    } else {
      id <- id_match[1]
    }
    
    # Before delay point: ramp (no intervention yet)
    if(id > 1) {
      s_up <- time_ext[1:(id-1)] + back_wk
      s_up[s_up < 0] <- 0
      Fcum[1:(id-1)] <- p0_draw * (s_up / back_wk)  # FIXED
    }
    
    # At delay point: start intervention
    s_delay <- max(0, delay_weeks + back_wk)
    Fcum[id] <- p0_draw * sqrt(s_delay / back_wk)  # CORRECT - s_delay is always defined  # FIXED
    Sprev <- 1 - Fcum[id]
    
    # CORRECTED: After delay point, use intervention hazards consistently
    # Map the post-delay timeline to intervention hazards starting from time 0
    for(i in (id+1):n) {
      current_time <- time_ext[i]
      
      # Calculate how long intervention has been active
      intervention_duration <- current_time - delay_weeks
      
      if(intervention_duration >= 0 && intervention_duration <= 54) {
        # Use intervention hazard corresponding to this duration
        h_idx <- min(intervention_duration + 1, length(h_int_sim))
        h_t <- h_int_sim[max(1, h_idx)]
      } else if(intervention_duration > 54) {
        # Use last available intervention hazard
        h_t <- h_int_sim[length(h_int_sim)]
      } else {
        # This shouldn't happen with negative delays, but use first intervention hazard as fallback
        h_t <- h_int_sim[1]
      }
      
      Snew <- Sprev * (1 - h_t)
      Fcum[i] <- 1 - Snew
      Sprev <- Snew
    }
    
  } else {
    # Positive delays (post-screen delays) - FIXED: use p0_draw instead of p0
    # Pre-screen ramp
    if(i0 > 1) {
      s_up <- time_ext[1:(i0-1)] + back_wk
      Fcum[1:(i0-1)] <- p0_draw * (s_up / back_wk)  # FIXED
    }
    
    # At screen
    Fcum[i0] <- p0_draw  # FIXED
    Sprev <- 1 - p0_draw  # FIXED
    
    # Post-screen: control until delay point, then intervention
    for(i in (i0+1):n) {
      t0 <- time_ext[i]
      
      if(t0 <= delay_weeks) {
        # Use control hazard until delay point
        h_t <- h_ctrl_sim[min(t0 + 1, length(h_ctrl_sim))]
      } else {
        # Use intervention hazard after delay point
        # Map to intervention timeline: intervention has been active for (t0 - delay_weeks) weeks
        intervention_duration <- t0 - delay_weeks
        h_idx <- min(intervention_duration + 1, length(h_int_sim))
        h_t <- h_int_sim[max(1, h_idx)]
      }
      
      Snew <- Sprev * (1 - h_t)
      Fcum[i] <- 1 - Snew
      Sprev <- Snew
    }
  }
  
  # Return final value at week 26
  week_26_idx <- which(time_ext == 26)
  return(Fcum[week_26_idx])
}

# ——————————————————————————————————————————————————————
# 3) Run Enhanced Analysis with 2-week intervals using REAL GAM models
# ——————————————————————————————————————————————————————

cat("Running enhanced Monte Carlo simulations with 2-week intervals...\n")

# Set up time vectors
time0 <- 0:26
back_wk <- 26
time_ext <- seq(-back_wk, 26, by = 1)

# Create 2-week interval delays from -26 to +26
delays_2wk <- seq(-26, 24, by = 2)

# Generate simulated predictions using your ACTUAL GAM models
cat("Generating GAM simulations using your trained models...\n")
# NOTE: Make sure your GAM models are loaded: control_gam_k5_GCV_TBCHAMP and intervention_gam_k5_GCV_TBCHAMP
control_sims <- simulate_gam_predictions(control_gam_k5_GCV_TBCHAMP, data.frame(time = time0), n_simulations)
intervention_sims <- simulate_gam_predictions(intervention_gam_k5_GCV_TBCHAMP, data.frame(time = time0), n_simulations)

cat("Generated", ncol(control_sims), "control simulations and", ncol(intervention_sims), "intervention simulations\n")

# Define all scenarios including baseline
all_delays <- c("Instant TPT" = -Inf, setNames(delays_2wk, paste0(delays_2wk, " wk")), "Control" = Inf)

# Run simulations for all scenarios
cat("Running scenario simulations...\n")
simulation_results <- list()

for(scenario_name in names(all_delays)) {
  cat("Processing scenario:", scenario_name, "\n")
  delay_val <- all_delays[scenario_name]
  
  scenario_sims <- sapply(1:n_simulations, function(i) {
    bootstrap_scenario(i, control_sims, intervention_sims, delay_val)
  })
  
  simulation_results[[scenario_name]] <- scenario_sims
}

cat("Completed all simulations!\n\n")

# ——————————————————————————————————————————————————————
# 4) Calculate Excess Cases using your exact methodology
# ——————————————————————————————————————————————————————

# Get baseline (Instant TPT) for relative calculations
baseline_sims <- simulation_results[["Instant TPT"]]
baseline_mean <- mean(baseline_sims)

# Calculate metrics for each scenario using your exact approach
results_list <- list()

for(scenario_name in names(simulation_results)) {
  scenario_sims <- simulation_results[[scenario_name]]
  
  # Basic statistics
  mean_incidence <- mean(scenario_sims)
  median_incidence <- median(scenario_sims)
  ci_lower <- quantile(scenario_sims, 0.025)
  ci_upper <- quantile(scenario_sims, 0.975)
  
  # Determine person-years (Control uses control PY, others use intervention PY)
  py <- ifelse(scenario_name == "Control", py_control, py_intervention)
  sample_size <- ifelse(scenario_name == "Control", n_control, n_intervention)
  
  # 1. Incidence rate per 100 person-yrs (95% CI)
  rate_per_100py <- (mean_incidence * sample_size / py) * 100
  rate_ci_lower <- (ci_lower * sample_size / py) * 100
  rate_ci_upper <- (ci_upper * sample_size / py) * 100
  
  # 2. Mean Expected Cases (95% CI)
  mean_cases <- mean_incidence * sample_size
  cases_ci_lower <- ci_lower * sample_size
  cases_ci_upper <- ci_upper * sample_size
  
  # 3. Excess calculations (compared to Instant TPT)
  if(scenario_name != "Instant TPT") {
    # Excess incidence simulations
    excess_sims <- scenario_sims - baseline_sims
    excess_mean <- mean(excess_sims)
    excess_ci_lower <- quantile(excess_sims, 0.025)
    excess_ci_upper <- quantile(excess_sims, 0.975)
    
    # Excess rate per 100 person-yrs
    excess_rate_per_100py <- (excess_mean * sample_size / py) * 100
    excess_rate_ci_lower <- (excess_ci_lower * sample_size / py) * 100
    excess_rate_ci_upper <- (excess_ci_upper * sample_size / py) * 100
    
    # Excess cases
    excess_cases_mean <- excess_mean * sample_size
    excess_cases_ci_lower <- excess_ci_lower * sample_size
    excess_cases_ci_upper <- excess_ci_upper * sample_size
  } else {
    # Instant TPT is the baseline, so excess = 0
    excess_rate_per_100py <- 0
    excess_rate_ci_lower <- 0
    excess_rate_ci_upper <- 0
    excess_cases_mean <- 0
    excess_cases_ci_lower <- 0
    excess_cases_ci_upper <- 0
  }
  
  # 4. Relative Risk
  relative_risk <- mean_incidence / baseline_mean
  
  # Store results
  results_list[[scenario_name]] <- list(
    scenario = scenario_name,
    incidence_rate_per_100py = rate_per_100py,
    incidence_rate_ci_lower = rate_ci_lower,
    incidence_rate_ci_upper = rate_ci_upper,
    excess_rate_per_100py = excess_rate_per_100py,
    excess_rate_ci_lower = excess_rate_ci_lower,
    excess_rate_ci_upper = excess_rate_ci_upper,
    mean_cases = mean_cases,
    cases_ci_lower = cases_ci_lower,
    cases_ci_upper = cases_ci_upper,
    excess_cases_mean = excess_cases_mean,
    excess_cases_ci_lower = excess_cases_ci_lower,
    excess_cases_ci_upper = excess_cases_ci_upper,
    relative_risk = relative_risk
  )
}

# ——————————————————————————————————————————————————————
# 5) Prepare Plot Data using your results
# ——————————————————————————————————————————————————————

plot_data <- data.frame()

for(scenario_name in names(results_list)) {
  if(scenario_name == "Instant TPT") next  # Skip baseline
  
  r <- results_list[[scenario_name]]
  
  # Extract delay value for ordering
  if(scenario_name == "Control") {
    delay_weeks <- 999  # Put at the end
    delay_label <- "No Tx"
    delay_type <- "No Treatment"
  } else {
    delay_weeks <- as.numeric(sub(" wk", "", scenario_name))
    delay_label <- as.character(delay_weeks)
    delay_type <- ifelse(delay_weeks < 0, "Pre-screen", "Post-screen")
  }
  
  plot_data <- rbind(plot_data, data.frame(
    scenario = scenario_name,
    delay_weeks = delay_weeks,
    delay_label = delay_label,
    excess_cases = r$excess_cases_mean,
    ci_lower = r$excess_cases_ci_lower,
    ci_upper = r$excess_cases_ci_upper,
    delay_type = delay_type,
    stringsAsFactors = FALSE
  ))
}

# Order by delay weeks
plot_data <- plot_data[order(plot_data$delay_weeks), ]

# ——————————————————————————————————————————————————————
# 6) Create Enhanced Color-Blind Friendly Bar Plot
# ——————————————————————————————————————————————————————

# Set up RColorBrewer Paired palette
paired_colors <- brewer.pal(12, "Paired")

# Assign colors to delay types
color_mapping <- c(
  "Pre-screen" = paired_colors[2],    # Light blue
  "Post-screen" = paired_colors[6],   # Light orange  
  "No Treatment" = paired_colors[8]   # Light red
)

# Create the enhanced bar plot
enhanced_plot <- ggplot(plot_data, aes(x = reorder(delay_label, delay_weeks), 
                                      y = excess_cases, 
                                      fill = delay_type)) +
  geom_col(width = 0.8, alpha = 0.8, color = "white", size = 0.3) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), 
                width = 0.4, color = "black", size = 0.6, alpha = 0.8) +
  
  # Add value labels on bars with smart positioning
  geom_text(aes(label = sprintf("%.1f", excess_cases),
                y = ifelse(excess_cases >= 0, 
                          pmax(ci_upper + 0.5, excess_cases + 0.5),  # Above positive bars
                          pmin(ci_lower - 0.5, excess_cases - 0.5))), # Below negative bars
            size = 3.2, fontface = "bold", color = "black") +
  
  # Styling
  scale_fill_manual(values = color_mapping, name = "Treatment Timing") +
  scale_y_continuous(labels = number_format(accuracy = 0.1), 
                     expand = expansion(mult = c(0.1, 0.15))) +
  
  # Reference line at zero
  geom_hline(yintercept = 0, linetype = "solid", color = "black", size = 0.8) +
  
  # Labels and theme
  labs(
    title = "SENSITIVITY ANALYSIS: TBCHAMP Excess TB Cases by Treatment Delay",
    subtitle = paste0("The duration from index diagnosis to contact's randomisation is a LINEAR instead of square-root function (95% CI from ", 
                     format(n_simulations, big.mark = ","), " Monte Carlo simulations)"),
    x = "Treatment Delay (weeks)",
    y = "Excess TB Cases",
    caption = paste0("Sample size: ", n_intervention, " | Negative delays = pre-screen treatment")
  ) +
  
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    plot.subtitle = element_text(hjust = 0.5, size = 11, color = "gray40"),
    plot.caption = element_text(size = 9, color = "gray50"),
    axis.text.x = element_text(angle = 45, hjust = 1, size = 10),
    axis.text.y = element_text(size = 10),
    axis.title = element_text(size = 11, face = "bold"),
    legend.position = "top",
    legend.title = element_text(size = 10, face = "bold"),
    legend.text = element_text(size = 9),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "gray80", fill = NA, size = 0.5)
  )

# Display the plot
print(enhanced_plot)
# ——————————————————————————————————————————————————————
# 7) Summary Statistics Table
# ——————————————————————————————————————————————————————

cat("\n=== ENHANCED DELAY ANALYSIS SUMMARY (using real GAM models) ===\n")
cat("Delay intervals: Every 2 weeks from -26 to +26 weeks\n")
cat("Monte Carlo simulations:", format(n_simulations, big.mark = ","), "\n")
cat("Sample size:", n_intervention, "\n\n")

# Print key statistics
summary_table <- plot_data %>%
  select(delay_label, delay_type, excess_cases, ci_lower, ci_upper) %>%
  mutate(
    excess_cases_ci = sprintf("%.1f (%.1f to %.1f)", excess_cases, ci_lower, ci_upper)
  ) %>%
  select(delay_label, delay_type, excess_cases_ci)

colnames(summary_table) <- c("Delay (weeks)", "Type", "Excess Cases (95% CI)")

print(summary_table)

cat("\nNote: Negative delays indicate treatment started before screening\n")
cat("Positive delays indicate treatment started after screening\n")
cat("Results calculated using your actual GAM models and Monte Carlo approach\n")

Comparison of the linear pre-curve to main results:

# ——————————————————————————————————————————————————————
# Calculate ±X% Change in Early Benefit: Square-root vs Linear Pre-curve
# ——————————————————————————————————————————————————————

library(dplyr)
library(readr)

# ——————————————————————————————————————————————————————
# 1) Load Original Results (Square-root pre-curve) from CSV
# ——————————————————————————————————————————————————————

# Load the original results from your CSV file
original_results <- read_csv("TBCHAMP_tb_delays_analysis_table.csv")

# Clean and parse the original results
parse_ci_value <- function(ci_string) {
  # Extract the main value from "X.X (Y.Y to Z.Z)" format
  as.numeric(sub(" \\(.*\\)", "", ci_string))
}

original_clean <- original_results %>%
  mutate(
    delay_weeks = case_when(
      `Delay (weeks)` == "Instant TPT" ~ -999,
      `Delay (weeks)` == "Control" ~ 999,
      TRUE ~ as.numeric(`Delay (weeks)`)
    ),
    orig_excess_cases = parse_ci_value(`Excess Cases per 100,000 (95% CI)`),
    orig_incidence_rate = parse_ci_value(`Incidence Rate per 100 Person-Yrs (95% CI)`),
    orig_excess_rate = parse_ci_value(`Excess Incidence Rate per 100 Person-Yrs (95% CI)`),
    orig_relative_risk = `Relative Risk`
  ) %>%
  select(delay_weeks, orig_excess_cases, orig_incidence_rate, orig_excess_rate, orig_relative_risk)

cat("Original results (square-root pre-curve) loaded successfully\n")
cat("Number of scenarios:", nrow(original_clean), "\n\n")

# ——————————————————————————————————————————————————————
# 2) Extract Linear Pre-curve Results from Current Analysis
# ——————————————————————————————————————————————————————

# Check if results_list exists
if(!exists("results_list")) {
  stop("Error: results_list not found. Please run your linear pre-curve analysis first.")
}

cat("Checking results_list structure...\n")
cat("Number of scenarios in results_list:", length(results_list), "\n")
cat("Scenario names:", paste(names(results_list), collapse = ", "), "\n\n")

# Convert results_list to a data frame with error checking
linear_results <- data.frame()

for(scenario_name in names(results_list)) {
  cat("Processing scenario:", scenario_name, "\n")
  
  r <- results_list[[scenario_name]]
  
  # Check if r is a list and has the expected elements
  if(!is.list(r)) {
    cat("  Warning: Scenario", scenario_name, "is not a list. Skipping.\n")
    next
  }
  
  # Check for required elements
  required_elements <- c("excess_cases_mean", "incidence_rate_per_100py", "excess_rate_per_100py", "relative_risk")
  missing_elements <- required_elements[!required_elements %in% names(r)]
  
  if(length(missing_elements) > 0) {
    cat("  Warning: Scenario", scenario_name, "missing elements:", paste(missing_elements, collapse = ", "), ". Skipping.\n")
    next
  }
  
  # Extract delay value
  if(scenario_name == "Instant TPT") {
    delay_weeks <- -999
  } else if(scenario_name == "Control") {
    delay_weeks <- 999
  } else {
    # Try to extract delay weeks, handle potential errors
    tryCatch({
      delay_weeks <- as.numeric(sub(" wk", "", scenario_name))
      if(is.na(delay_weeks)) {
        cat("  Warning: Could not parse delay weeks from", scenario_name, ". Skipping.\n")
        next
      }
    }, error = function(e) {
      cat("  Warning: Error parsing delay weeks from", scenario_name, ":", e$message, ". Skipping.\n")
      next
    })
  }
  
  # Check that all values are scalar (length 1)
  values_to_check <- list(
    excess_cases_mean = r$excess_cases_mean,
    incidence_rate_per_100py = r$incidence_rate_per_100py,
    excess_rate_per_100py = r$excess_rate_per_100py,
    relative_risk = r$relative_risk
  )
  
  # Check each value
  all_ok <- TRUE
  for(val_name in names(values_to_check)) {
    val <- values_to_check[[val_name]]
    if(is.null(val) || length(val) != 1 || !is.numeric(val)) {
      cat("  Warning: Invalid", val_name, "for scenario", scenario_name, 
          "(length:", length(val), ", class:", class(val), "). Skipping.\n")
      all_ok <- FALSE
      break
    }
  }
  
  if(!all_ok) next
  
  # Convert to per 100,000 to match original scale
  tryCatch({
    new_row <- data.frame(
      scenario = scenario_name,
      delay_weeks = delay_weeks,
      linear_excess_cases = r$excess_cases_mean * (100000/451), # Scale to per 100,000
      linear_incidence_rate = r$incidence_rate_per_100py,
      linear_excess_rate = r$excess_rate_per_100py,
      linear_relative_risk = r$relative_risk,
      stringsAsFactors = FALSE
    )
    
    linear_results <- rbind(linear_results, new_row)
    cat("  Successfully processed scenario:", scenario_name, "\n")
    
  }, error = function(e) {
    cat("  Error creating data frame for scenario", scenario_name, ":", e$message, "\n")
  })
}

cat("\nLinear pre-curve results extraction complete\n")
cat("Successfully processed scenarios:", nrow(linear_results), "\n")

if(nrow(linear_results) == 0) {
  stop("Error: No scenarios were successfully processed. Please check your results_list structure.")
}

cat("Processed scenarios:", paste(linear_results$scenario, collapse = ", "), "\n\n")

# ——————————————————————————————————————————————————————
# 3) Merge and Calculate Percentage Changes
# ——————————————————————————————————————————————————————

# Merge the datasets
comparison_data <- merge(original_clean, linear_results, by = "delay_weeks", all = TRUE)

# Calculate percentage changes
comparison_data <- comparison_data %>%
  mutate(
    # Percentage change in excess cases
    pct_change_excess_cases = ifelse(orig_excess_cases != 0, 
                                   ((linear_excess_cases - orig_excess_cases) / abs(orig_excess_cases)) * 100,
                                   NA),
    
    # Percentage change in incidence rate
    pct_change_incidence_rate = ifelse(orig_incidence_rate != 0,
                                     ((linear_incidence_rate - orig_incidence_rate) / orig_incidence_rate) * 100,
                                     NA),
    
    # Percentage change in excess rate
    pct_change_excess_rate = ifelse(orig_excess_rate != 0,
                                  ((linear_excess_rate - orig_excess_rate) / abs(orig_excess_rate)) * 100,
                                  NA),
    
    # Change in relative risk (absolute difference)
    change_relative_risk = linear_relative_risk - orig_relative_risk,
    
    # Classify delay type
    delay_type = case_when(
      delay_weeks == -999 ~ "Instant TPT",
      delay_weeks == 999 ~ "Control", 
      delay_weeks < 0 ~ "Pre-screen (Early Benefit)",
      delay_weeks >= 0 ~ "Post-screen"
    )
  ) %>%
  arrange(delay_weeks)

# ——————————————————————————————————————————————————————
# 4) Focus on Early Benefit Scenarios (Negative Delays)
# ——————————————————————————————————————————————————————

early_benefit_scenarios <- comparison_data %>%
  filter(delay_weeks < 0 & delay_weeks != -999) %>%  # Exclude Instant TPT baseline
  select(delay_weeks, scenario, 
         orig_excess_cases, linear_excess_cases, pct_change_excess_cases,
         orig_excess_rate, linear_excess_rate, pct_change_excess_rate,
         orig_relative_risk, linear_relative_risk, change_relative_risk)

cat("=== EARLY BENEFIT ANALYSIS: Square-root vs Linear Pre-curve ===\n\n")

cat("EARLY BENEFIT SCENARIOS (Pre-screen Treatment):\n")
cat("Delay scenarios where treatment starts BEFORE screening (negative weeks)\n\n")

# Summary statistics for early benefit changes
if(nrow(early_benefit_scenarios) > 0) {
  
  # Overall summary
  cat("SUMMARY STATISTICS FOR EARLY BENEFIT SCENARIOS:\n")
  cat("================================================\n")
  cat("Number of early benefit scenarios:", nrow(early_benefit_scenarios), "\n")
  cat("Delay range:", min(early_benefit_scenarios$delay_weeks), "to", max(early_benefit_scenarios$delay_weeks), "weeks\n\n")
  
  # Excess cases changes
  cat("EXCESS CASES (per 100,000) CHANGES:\n")
  cat("Mean change:", sprintf("%.1f%%", mean(early_benefit_scenarios$pct_change_excess_cases, na.rm = TRUE)), "\n")
  cat("Range:", sprintf("%.1f%% to %.1f%%", 
                      min(early_benefit_scenarios$pct_change_excess_cases, na.rm = TRUE),
                      max(early_benefit_scenarios$pct_change_excess_cases, na.rm = TRUE)), "\n")
  
  # Excess rate changes  
  cat("\nEXCESS INCIDENCE RATE CHANGES:\n")
  cat("Mean change:", sprintf("%.1f%%", mean(early_benefit_scenarios$pct_change_excess_rate, na.rm = TRUE)), "\n")
  cat("Range:", sprintf("%.1f%% to %.1f%%",
                      min(early_benefit_scenarios$pct_change_excess_rate, na.rm = TRUE),
                      max(early_benefit_scenarios$pct_change_excess_rate, na.rm = TRUE)), "\n")
  
  # Relative risk changes
  cat("\nRELATIVE RISK CHANGES (absolute):\n")
  cat("Mean change:", sprintf("%.3f", mean(early_benefit_scenarios$change_relative_risk, na.rm = TRUE)), "\n")
  cat("Range:", sprintf("%.3f to %.3f",
                      min(early_benefit_scenarios$change_relative_risk, na.rm = TRUE),
                      max(early_benefit_scenarios$change_relative_risk, na.rm = TRUE)), "\n\n")
  
} else {
  cat("No early benefit scenarios found in the data.\n\n")
}

# ——————————————————————————————————————————————————————
# 5) Detailed Results Table
# ——————————————————————————————————————————————————————

cat("DETAILED COMPARISON TABLE:\n")
cat("==========================\n\n")

# Create detailed comparison table
detailed_table <- early_benefit_scenarios %>%
  mutate(
    `Delay (weeks)` = delay_weeks,
    `Original Excess Cases` = sprintf("%.1f", orig_excess_cases),
    `Linear Excess Cases` = sprintf("%.1f", linear_excess_cases),
    `% Change Cases` = sprintf("%+.1f%%", pct_change_excess_cases),
    `Original Excess Rate` = sprintf("%.2f", orig_excess_rate),
    `Linear Excess Rate` = sprintf("%.2f", linear_excess_rate),
    `% Change Rate` = sprintf("%+.1f%%", pct_change_excess_rate),
    `Change in RR` = sprintf("%+.3f", change_relative_risk)
  ) %>%
  select(`Delay (weeks)`, `Original Excess Cases`, `Linear Excess Cases`, `% Change Cases`,
         `Original Excess Rate`, `Linear Excess Rate`, `% Change Rate`, `Change in RR`)

print(detailed_table)

# ——————————————————————————————————————————————————————
# 6) Key Findings Summary
# ——————————————————————————————————————————————————————

cat("\n=== KEY FINDINGS ===\n")

if(nrow(early_benefit_scenarios) > 0) {
  
  # Find scenarios with largest changes
  max_cases_change_idx <- which.max(abs(early_benefit_scenarios$pct_change_excess_cases))
  max_rate_change_idx <- which.max(abs(early_benefit_scenarios$pct_change_excess_rate))
  
  cat("\nLARGEST CHANGES IN EARLY BENEFIT:\n")
  cat("• Excess cases: ", sprintf("%+.1f%%", early_benefit_scenarios$pct_change_excess_cases[max_cases_change_idx]),
      " at ", early_benefit_scenarios$delay_weeks[max_cases_change_idx], " weeks\n", sep="")
  cat("• Excess rate: ", sprintf("%+.1f%%", early_benefit_scenarios$pct_change_excess_rate[max_rate_change_idx]),
      " at ", early_benefit_scenarios$delay_weeks[max_rate_change_idx], " weeks\n", sep="")
  
  # Overall interpretation
  mean_change <- mean(early_benefit_scenarios$pct_change_excess_cases, na.rm = TRUE)
  
  cat("\nINTERPRETATION:\n")
  if(abs(mean_change) < 5) {
    cat("• Minimal impact: Linear vs square-root pre-curve shows <5% average change\n")
  } else if(abs(mean_change) < 15) {
    cat("• Moderate impact: Linear vs square-root pre-curve shows ", sprintf("%.1f%%", abs(mean_change)), " average change\n", sep="")
  } else {
    cat("• Substantial impact: Linear vs square-root pre-curve shows ", sprintf("%.1f%%", abs(mean_change)), " average change\n", sep="")
  }
  
  if(mean_change > 0) {
    cat("• Direction: Linear pre-curve generally shows HIGHER excess cases (worse outcomes)\n")
  } else if(mean_change < 0) {
    cat("• Direction: Linear pre-curve generally shows LOWER excess cases (better outcomes)\n")
  } else {
    cat("• Direction: Minimal difference between curve shapes\n")
  }
  
} else {
  cat("No early benefit scenarios available for comparison.\n")
}

cat("\n=== ANALYSIS COMPLETE ===\n")

# ——————————————————————————————————————————————————————
# 7) Optional: Save Results
# ——————————————————————————————————————————————————————

# Uncomment to save results
# write_csv(comparison_data, "curve_shape_comparison_full.csv")
# write_csv(early_benefit_scenarios, "early_benefit_curve_comparison.csv")

# Return the comparison data for further analysis
return(list(
  full_comparison = comparison_data,
  early_benefit_scenarios = early_benefit_scenarios,
  summary_stats = list(
    n_scenarios = nrow(early_benefit_scenarios),
    mean_excess_cases_change = mean(early_benefit_scenarios$pct_change_excess_cases, na.rm = TRUE),
    mean_excess_rate_change = mean(early_benefit_scenarios$pct_change_excess_rate, na.rm = TRUE),
    mean_rr_change = mean(early_benefit_scenarios$change_relative_risk, na.rm = TRUE)
  )
))

Plot of curves with linear pre-curve (Figure 10):

library(mgcv)

# ——————————————————————————————————————————————————————
# CORRECTED: Use original method with full 54-week range
# ——————————————————————————————————————————————————————

# Input Parameters - TBCHAMP
time0 <- 0:54  # CORRECTED: Full 54-week range like original
p0 <- 32/1120
back_wk <- 26

# Use the k=5 GCV GAM models - TBCHAMP
Fc0 <- predict(control_gam_k5_GCV_TBCHAMP,
               newdata = data.frame(time = time0))
Fi0 <- predict(intervention_gam_k5_GCV_TBCHAMP,
               newdata = data.frame(time = time0))

Fc0 <- p0 + (1 - p0)*Fc0
Fi0 <- p0 + (1 - p0)*Fi0

h_ctrl <- diff(c(0, Fc0)) / (1 - c(0, Fc0)[-length(Fc0)])
h_int  <- diff(c(0, Fi0)) / (1 - c(0, Fi0)[-length(Fi0)])

# ——————————————————————————————————————————————————————
# Updated delays and time axis
# ——————————————————————————————————————————————————————

nominal_delays <- c(-26, -20, -10, -5, 5, 10, 20) # TBCHAMP delays
time_ext <- seq(-back_wk, 54, by = 1)

# ——————————————————————————————————————————————————————
# CORRECTED: Original curve builder with LINEAR pre-curve
# ——————————————————————————————————————————————————————

build_curve_linear <- function(d) {
  n    <- length(time_ext)
  Fcum <- numeric(n)

  # find index of screen‐time t0 = 0
  i0   <- which(time_ext == 0)

  # 1) PRE‐SCREEN: LINEAR ramp from (–back_wk → 0) up to p0
  s_up <- time_ext[1:(i0-1)] + back_wk
  Fcum[1:(i0-1)] <- p0 * (s_up / back_wk)  # LINEAR: removed sqrt()

  # 2) AT SCREEN
  Fcum[i0] <- p0
  Sprev    <- 1 - p0

  # 3) POST‐SCREEN: control hazard until t0 ≤ d, then intervention hazard
  for(i in (i0+1):n) {
    t0 <- time_ext[i]
    if (d > 0 && t0 <= d) {
      h_t <- h_ctrl[t0 + 1]
    } else {
      idx <- if (d > 0) (t0 - d + 1) else (t0 + 1)
      h_t <- h_int[ min(idx, length(h_int)) ]
    }
    Snew     <- Sprev * (1 - h_t)
    Fcum[i]  <- 1 - Snew
    Sprev    <- Snew
  }

  Fcum
}

# ——————————————————————————————————————————————————————
# Build all curves using original method
# ——————————————————————————————————————————————————————

# 1) Control & Instant curves
ctrl_ext <- build_curve_linear( Inf)   # pure control
inst_ext <- build_curve_linear(-Inf)   # pure intervention

# 2) separate positive and negative delays
pos_delays <- nominal_delays[nominal_delays >= 0]
neg_delays <- nominal_delays[nominal_delays < 0]

# 3) positive delays:
pos_mat <- if(length(pos_delays) > 0) sapply(pos_delays, function(d) build_curve_linear(d)) else NULL

# 4) negative delays: LINEAR ramp until delay point, then copy instant TPT curve
neg_mat <- if(length(neg_delays) > 0) {
  sapply(neg_delays, function(d) {
    n    <- length(time_ext)
    Fcum <- numeric(n)
    
    # find index of delay point d
    id_match <- which(time_ext == d)
    if(length(id_match) == 0) {
      id <- which.min(abs(time_ext - d))
    } else {
      id <- id_match[1]
    }
    
    i0   <- which(time_ext == 0)  # screen time index
    
    # Before delay point: follow the LINEAR ramp
    if(id > 1) {
      s_up <- time_ext[1:(id-1)] + back_wk
      s_up[s_up < 0] <- 0  # ensure non-negative
      Fcum[1:(id-1)] <- p0 * (s_up / back_wk)  # LINEAR: removed sqrt()
    }
    
    # At delay point: set starting value from LINEAR ramp
    s_delay <- max(0, d + back_wk)
    start_value <- p0 * (s_delay / back_wk)  # LINEAR: removed sqrt()
    Fcum[id] <- start_value
    
    # After delay point: copy the instant TPT curve pattern
    if(id < n) {
      # Find the baseline instant TPT value at time 0 (our reference point)
      inst_at_zero <- inst_ext[i0]
      
      for(i in (id+1):n) {
        # Calculate how many steps from delay point
        steps_from_delay <- i - id
        
        # Find corresponding point in instant TPT curve (relative to time 0)
        inst_index <- i0 + steps_from_delay
        
        if(inst_index <= length(inst_ext) && inst_index > 0) {
          # Get the instant TPT value at this point
          inst_value <- inst_ext[inst_index]
          
          # Calculate the change from instant TPT baseline
          inst_change <- inst_value - inst_at_zero
          
          # Apply the same change to our starting value
          Fcum[i] <- start_value + inst_change
        } else if(inst_index > length(inst_ext)) {
          # Use the last available change and keep it constant
          last_inst_value <- inst_ext[length(inst_ext)]
          last_change <- last_inst_value - inst_at_zero
          Fcum[i] <- start_value + last_change
        } else {
          Fcum[i] <- start_value
        }
      }
    }
    
    Fcum
  })
} else NULL

# Combine all delay matrices
if(!is.null(neg_mat) && !is.null(pos_mat)) {
  delay_mat <- cbind(neg_mat, pos_mat)
  colnames(delay_mat) <- c(
    paste0(neg_delays, " wk delay"),
    paste0(pos_delays, " wk delay"))
} else if(!is.null(neg_mat)) {
  delay_mat <- neg_mat
  colnames(delay_mat) <- paste0(neg_delays, " wk delay")
} else if(!is.null(pos_mat)) {
  delay_mat <- pos_mat
  colnames(delay_mat) <- paste0(pos_delays, " wk delay")
} else {
  delay_mat <- NULL
}

# ——————————————————————————————————————————————————————
# CREATE VISUALIZATION
# ——————————————————————————————————————————————————————

# Combine all curves
if(!is.null(delay_mat)) {
  all_mat <- cbind(
    Control       = ctrl_ext,
    `Instant TPT` = inst_ext,
    delay_mat
  )
} else {
  all_mat <- cbind(
    Control       = ctrl_ext,
    `Instant TPT` = inst_ext
  )
}

# Set up compressed x-axis
neg_width <- 30
x_plot <- ifelse(
  time_ext < 0,
  neg_width * (time_ext + back_wk) / back_wk,  # −26…0 → 0…30
  neg_width + pmin(time_ext, 54)              #   0…54 → 30…84
)

# Subset to t ≤ 26 wk
keep   <- which(time_ext <= 26)
x_sub   <- x_plot[keep]
mat_sub <- all_mat[keep, , drop=FALSE]

# Convert to percentages
mat_sub_pct <- mat_sub * 100

# Create gradient colors from better (green) to worse (red) delays
n_delay_cols <- if(!is.null(delay_mat)) ncol(delay_mat) else 0
if(n_delay_cols > 0) {
  delay_colors <- colorRampPalette(c("#00AA00", "#FFAA00", "#FF0000"))(n_delay_cols)
  cols <- c("red", "blue", delay_colors)
} else {
  cols <- c("red", "blue")
}

ltys <- c(1, 1, rep(2, n_delay_cols))
yr   <- range(mat_sub_pct)

# Create the plot
matplot(
  x_sub, mat_sub_pct,
  type="l", col=cols, lty=ltys, lwd=2,
  xaxt="n", ylim=yr,
  xlab="Weeks since randomisation",
  ylab="Cumulative incidence of TB (%)",
  main="Sensitivity Analysis: TBCHAMP Delayed-TPT scenarios with linear pre-curve",
  cex.axis = 1.0,  # Size 12 for axis numbers
  cex.lab = 1.0    # Size 12 for axis labels
)

# Custom x-axis ticks
neg_ticks <- seq(-back_wk, 0, by=2)     
neg_at    <- neg_width * (neg_ticks + back_wk) / back_wk
pos_ticks <- seq(0, 26, by=2) 
pos_at    <- neg_width + pos_ticks

axis(1,
     at     = c(neg_at, pos_at),
     labels = c(neg_ticks, pos_ticks),
     cex.axis = 1.0)

# Vertical line at t=0
abline(v=neg_width, lty=3, col="gray50")

# Legend with larger text
legend("topleft",
       legend    = colnames(all_mat),
       col       = cols,
       lty       = ltys,
       lwd       = 2,
       bty       = "n",
       ncol      = 1,        
       cex       = 1.2,      # Larger legend text
       y.intersp = 0.8,      
       x.intersp = 0.5,      
       inset     = c(0.02, 0.02))
cat("TBCHAMP plotting complete with original curve method!\n")