Comparison of Different Kenya CVD Model Structures: Structure 1 (Direct Risk) vs Structure 2 (Composite Risk) vs Structure 3 (Globorisk)

Introduction This analysis compares three health state modeling approaches:

Structure 1 (Direct Risk Computation): Stored in m_States

Structure 2 (Composite Risk): Stored in m_States_risk

Structure 3 (Globorisk): Stored in m_States_globorisk

All models simulate health state trajectories for 100,000 individuals over 61 cycles (cycles 0-60), with states including:

NE: No event (healthy)

MI: Myocardial infarction

ST: Stroke

PS: Post-stroke state

PM: Post-MI state

D: Death

Load the Data (No intervention Scenario)

# Load the datasets
load('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/data/m_States.rda') # No intervention
load('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/data/m_States_risk.rda') # No intervention
load('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/data/m_States_globorisk.rda') # No intervention


# Check the structure of the datasets
cat("Structure 1 (Direct Risk):\n")

## Structure 1 (Direct Risk):

str(m_States)

##  chr [1:100000, 1:61] "NE" "NE" "NE" "NE" "NE" "NE" "NE" "NE" "NE" "NE" ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:100000] "ind_1" "ind_2" "ind_3" "ind_4" ...
##   ..$ : chr [1:61] "cycle_0" "cycle_1" "cycle_2" "cycle_3" ...

cat("\nStructure 2 (Composite Risk):\n")

## 
## Structure 2 (Composite Risk):

str(m_States_risk)

##  chr [1:100000, 1:61] "NE" "NE" "NE" "NE" "NE" "NE" "NE" "NE" "NE" "NE" ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:100000] "ind_1" "ind_2" "ind_3" "ind_4" ...
##   ..$ : chr [1:61] "cycle_0" "cycle_1" "cycle_2" "cycle_3" ...

cat("\nStructure 3 (Globorisk):\n")

## 
## Structure 3 (Globorisk):

str(m_States_globorisk)

##  chr [1:100000, 1:61] "NE" "NE" "NE" "NE" "NE" "NE" "NE" "NE" "NE" "NE" ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:100000] "ind_1" "ind_2" "ind_3" "ind_4" ...
##   ..$ : chr [1:61] "cycle_0" "cycle_1" "cycle_2" "cycle_3" ...

Basic Structure Comparison

cat("========== 1. BASIC STRUCTURE COMPARISON ==========\n")

## ========== 1. BASIC STRUCTURE COMPARISON ==========

cat("Structure 1 (Direct Risk Computation):\n")

## Structure 1 (Direct Risk Computation):

cat(sprintf("  Dimensions: %d x %d\n", nrow(m_States), ncol(m_States)))

##   Dimensions: 100000 x 61

cat("\nStructure 2 (Composite Risk):\n")

## 
## Structure 2 (Composite Risk):

cat(sprintf("  Dimensions: %d x %d\n", nrow(m_States_risk), ncol(m_States_risk)))

##   Dimensions: 100000 x 61

cat("\nStructure 3 (Globorisk):\n")

## 
## Structure 3 (Globorisk):

cat(sprintf("  Dimensions: %d x %d\n", nrow(m_States_globorisk), ncol(m_States_globorisk)))

##   Dimensions: 100000 x 61

# Check dimensions consistency
dimensions_match <- identical(dim(m_States), dim(m_States_risk)) && 
                    identical(dim(m_States), dim(m_States_globorisk))
cat(sprintf("\nAll models have same dimensions: %s\n", dimensions_match))

## 
## All models have same dimensions: TRUE

State Space Comparison

states_struct1 <- unique(as.vector(m_States))
states_struct2 <- unique(as.vector(m_States_risk))
states_struct3 <- unique(as.vector(m_States_globorisk))

cat("States in each model:\n")

## States in each model:

cat("Structure 1 (Direct Risk):", paste(sort(states_struct1), collapse = ", "), "\n")

## Structure 1 (Direct Risk): D, MI, NE, PM, PS, ST

cat("Structure 2 (Composite Risk):", paste(sort(states_struct2), collapse = ", "), "\n")

## Structure 2 (Composite Risk): D, MI, NE, PM, PS, ST

cat("Structure 3 (Globorisk):", paste(sort(states_struct3), collapse = ", "), "\n")

## Structure 3 (Globorisk): D, MI, NE, PM, PS, ST

common_states <- Reduce(intersect, list(states_struct1, states_struct2, states_struct3))
cat(sprintf("\nCommon states across all models: %d\n", length(common_states)))

## 
## Common states across all models: 6

cat(paste(common_states, collapse = ", "), "\n")

## NE, D, ST, MI, PS, PM

Distribution Over Time

# Define time points to analyze
time_points <- c(1, 2, 5, 10, 20, 30, 61)  # cycle_0, cycle_1, cycle_4, etc.
time_points <- time_points[time_points <= ncol(m_States)]

# Store results for plotting
dist_results_all <- list()

for(t in time_points) {
  cycle_name <- colnames(m_States)[t]
  cycle_num <- t - 1
  
  cat(sprintf("\n### Cycle %d (%s)\n", cycle_num, cycle_name))
  
  dist_struct1 <- prop.table(table(m_States[, t]))
  dist_struct2 <- prop.table(table(m_States_risk[, t]))
  dist_struct3 <- prop.table(table(m_States_globorisk[, t]))
  
  # Create comparison table
  all_states <- sort(union(union(names(dist_struct1), names(dist_struct2)), names(dist_struct3)))
  comp_df <- data.frame(
    State = all_states,
    Structure1_Direct = sapply(all_states, function(s) ifelse(s %in% names(dist_struct1), dist_struct1[s], 0)),
    Structure2_Composite = sapply(all_states, function(s) ifelse(s %in% names(dist_struct2), dist_struct2[s], 0)),
    Structure3_Globorisk = sapply(all_states, function(s) ifelse(s %in% names(dist_struct3), dist_struct3[s], 0))
  )
  comp_df$Diff_1_2 <- comp_df$Structure1_Direct - comp_df$Structure2_Composite
  comp_df$Diff_1_3 <- comp_df$Structure1_Direct - comp_df$Structure3_Globorisk
  comp_df$Diff_2_3 <- comp_df$Structure2_Composite - comp_df$Structure3_Globorisk
  
  comp_df <- comp_df[order(comp_df$State), ]
  comp_df[, 2:4] <- round(comp_df[, 2:4], 4)
  comp_df[, 5:7] <- round(comp_df[, 5:7], 4)
  
  cat("\n**State Proportions:**\n\n")
  kable(comp_df[, c("State", "Structure1_Direct", "Structure2_Composite", "Structure3_Globorisk")], 
        caption = paste("Cycle", cycle_num, "State Distribution"),
        col.names = c("State", "Structure 1 (Direct)", "Structure 2 (Composite)", "Structure 3 (Globorisk)"),
        digits = 4) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
  
  # Store for plotting
  dist_results_all[[as.character(cycle_num)]] <- comp_df
}

## 
## ### Cycle 0 (cycle_0)
## 
## **State Proportions:**
## 
## 
## ### Cycle 1 (cycle_1)
## 
## **State Proportions:**
## 
## 
## ### Cycle 4 (cycle_4)
## 
## **State Proportions:**
## 
## 
## ### Cycle 9 (cycle_9)
## 
## **State Proportions:**
## 
## 
## ### Cycle 19 (cycle_19)
## 
## **State Proportions:**
## 
## 
## ### Cycle 29 (cycle_29)
## 
## **State Proportions:**
## 
## 
## ### Cycle 60 (cycle_60)
## 
## **State Proportions:**

3.1 Visualizing Distributions Over Time

# Prepare data for plotting
plot_data_all <- data.frame()
for(cycle in names(dist_results_all)) {
  df <- dist_results_all[[cycle]]
  df$Cycle <- as.numeric(cycle)
  plot_data_all <- rbind(plot_data_all, df)
}

# Melt data for faceted plotting
plot_data_melt <- reshape2::melt(plot_data_all, 
                       id.vars = c("Cycle", "State"), 
                       measure.vars = c("Structure1_Direct", "Structure2_Composite", "Structure3_Globorisk"),
                       variable.name = "Model", 
                       value.name = "Proportion")

# Recode model names
plot_data_melt$Model <- factor(plot_data_melt$Model,
                               levels = c("Structure1_Direct", "Structure2_Composite", "Structure3_Globorisk"),
                               labels = c("Structure 1 (Direct Risk)", "Structure 2 (Composite Risk)", "Structure 3 (Globorisk)"))

# Faceted plot by state
ggplot(plot_data_melt, aes(x = Cycle, y = Proportion, color = Model)) +
  geom_line(size = 1.2) +
  geom_point(size = 1.5) +
  facet_wrap(~ State, scales = "free_y", ncol = 3) +
  labs(title = "State Proportions Over Time by Model",
       x = "Cycle", y = "Proportion") +
  theme_minimal() +
  theme(legend.position = "bottom",
        strip.text = element_text(size = 12, face = "bold")) +
  scale_color_brewer(palette = "Set1")

3.2 Difference Plots

# Plot differences between models
diff_1_2 <- plot_data_all$Structure1_Direct - plot_data_all$Structure2_Composite
diff_1_3 <- plot_data_all$Structure1_Direct - plot_data_all$Structure3_Globorisk
diff_2_3 <- plot_data_all$Structure2_Composite - plot_data_all$Structure3_Globorisk

diff_data <- data.frame(
  Cycle = rep(plot_data_all$Cycle, 3),
  State = rep(plot_data_all$State, 3),
  Difference = c(diff_1_2, diff_1_3, diff_2_3),
  Comparison = factor(rep(c("Structure 1 (Direct) - Structure 2 (Composite)", 
                            "Structure 1 (Direct) - Structure 3 (Globorisk)", 
                            "Structure 2 (Composite) - Structure 3 (Globorisk)"), 
                          each = nrow(plot_data_all)))
)

ggplot(diff_data, aes(x = Cycle, y = Difference, color = State)) +
  geom_line(size = 1) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
  facet_wrap(~ Comparison, ncol = 1, scales = "free_y") +
  labs(title = "Differences in State Proportions Between Models",
       x = "Cycle", y = "Difference in Proportion") +
  theme_minimal() +
  theme(legend.position = "bottom") +
  scale_color_brewer(palette = "Set2")

State Occupancy Analysis

# Calculate occupancy for all models
occupancy1 <- sapply(common_states, function(state) {
  rowSums(m_States == state, na.rm = TRUE)
})
occupancy2 <- sapply(common_states, function(state) {
  rowSums(m_States_risk == state, na.rm = TRUE)
})
occupancy3 <- sapply(common_states, function(state) {
  rowSums(m_States_globorisk == state, na.rm = TRUE)
})

# Create comprehensive occupancy summary
occupancy_summary <- data.frame(
  State = factor(common_states, levels = state_order),
  Structure1_Direct_Mean = colMeans(occupancy1),
  Structure1_Direct_Pct = 100 * colMeans(occupancy1) / ncol(m_States),
  Structure2_Composite_Mean = colMeans(occupancy2),
  Structure2_Composite_Pct = 100 * colMeans(occupancy2) / ncol(m_States),
  Structure3_Globorisk_Mean = colMeans(occupancy3),
  Structure3_Globorisk_Pct = 100 * colMeans(occupancy3) / ncol(m_States)
)

occupancy_summary <- occupancy_summary[order(occupancy_summary$State), ]
occupancy_summary[, 2:7] <- round(occupancy_summary[, 2:7], 2)

kable(occupancy_summary, 
      caption = "State Occupancy Statistics (Mean Cycles per Individual)",
      col.names = c("State", "Struct 1 Mean", "Struct 1 %", "Struct 2 Mean", "Struct 2 %", 
                    "Struct 3 Mean", "Struct 3 %"),
      digits = 2) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))

State Occupancy Statistics (Mean Cycles per Individual)
	State	Struct 1 Mean	Struct 1 %	Struct 2 Mean	Struct 2 %	Struct 3 Mean	Struct 3 %
NE	NE	20.48	33.57	18.71	30.68	19.83	32.51
MI	MI	0.16	0.27	0.23	0.38	0.20	0.33
ST	ST	0.18	0.29	0.22	0.37	0.20	0.32
PS	PS	0.57	0.93	1.05	1.72	0.71	1.17
PM	PM	0.40	0.66	0.78	1.29	0.55	0.91
D	D	39.21	64.28	40.00	65.57	39.51	64.76

4.1 Occupancy Visualization

# Prepare data for plotting
occupancy_plot <- data.frame(
  State = rep(occupancy_summary$State, 3),
  Mean_Cycles = c(occupancy_summary$Structure1_Direct_Mean, 
                  occupancy_summary$Structure2_Composite_Mean,
                  occupancy_summary$Structure3_Globorisk_Mean),
  Model = rep(c("Structure 1 (Direct Risk)", "Structure 2 (Composite Risk)", "Structure 3 (Globorisk)"), 
              each = nrow(occupancy_summary))
)

ggplot(occupancy_plot, aes(x = State, y = Mean_Cycles, fill = Model)) +
  geom_bar(stat = "identity", position = "dodge", alpha = 0.8) +
  labs(title = "Mean Cycles Spent in Each State",
       x = "Health State", y = "Mean Cycles per Individual") +
  theme_minimal() +
  theme(legend.position = "bottom") +
  scale_fill_brewer(palette = "Set1")

Time-to-Event Analysis

# Time to event function with NA handling
time_to_event <- function(mat, state) {
  result <- apply(mat, 1, function(row) {
    idx <- which(row == state)[1]
    if(length(idx) == 0 || is.na(idx)) {
      return(ncol(mat) + 1)  # Never experience the event
    } else {
      return(idx)
    }
  })
  return(result)
}

# Calculate for all events
dead_state <- "D"
mi_state <- "MI"
st_state <- "ST"

# Death
ttd1 <- time_to_event(m_States, dead_state)
ttd2 <- time_to_event(m_States_risk, dead_state)
ttd3 <- time_to_event(m_States_globorisk, dead_state)

# MI
ttmi1 <- time_to_event(m_States, mi_state)
ttmi2 <- time_to_event(m_States_risk, mi_state)
ttmi3 <- time_to_event(m_States_globorisk, mi_state)

# ST
ttst1 <- time_to_event(m_States, st_state)
ttst2 <- time_to_event(m_States_risk, st_state)
ttst3 <- time_to_event(m_States_globorisk, st_state)

# Identify individuals who ever experience events
ever_mi1 <- ttmi1 <= ncol(m_States)
ever_mi2 <- ttmi2 <= ncol(m_States)
ever_mi3 <- ttmi3 <= ncol(m_States)

ever_st1 <- ttst1 <= ncol(m_States)
ever_st2 <- ttst2 <= ncol(m_States)
ever_st3 <- ttst3 <= ncol(m_States)

# Create death summary
death_summary <- data.frame(
  Model = c("Structure 1 (Direct)", "Structure 2 (Composite)", "Structure 3 (Globorisk)"),
  Mean_Survival = c(mean(ttd1), mean(ttd2), mean(ttd3)),
  Median_Survival = c(median(ttd1), median(ttd2), median(ttd3)),
  SD_Survival = c(sd(ttd1), sd(ttd2), sd(ttd3))
)
death_summary[, 2:4] <- round(death_summary[, 2:4], 1)

kable(death_summary, 
      caption = "Time to Death Statistics",
      col.names = c("Model", "Mean (Cycles)", "Median (Cycles)", "SD"),
      digits = 1) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))

Time to Death Statistics
Model	Mean (Cycles)	Median (Cycles)	SD
Structure 1 (Direct)	22.8	22	12.0
Structure 2 (Composite)	22.0	21	12.0
Structure 3 (Globorisk)	22.5	22	11.9

# Create MI summary
mi_summary <- data.frame(
  Model = c("Structure 1 (Direct)", "Structure 2 (Composite)", "Structure 3 (Globorisk)"),
  Ever_MI_Pct = c(100 * mean(ever_mi1), 100 * mean(ever_mi2), 100 * mean(ever_mi3)),
  Mean_Time_MI = c(mean(ttmi1[ever_mi1]), mean(ttmi2[ever_mi2]), mean(ttmi3[ever_mi3])),
  Median_Time_MI = c(median(ttmi1[ever_mi1]), median(ttmi2[ever_mi2]), median(ttmi3[ever_mi3]))
)
mi_summary[, 2] <- round(mi_summary[, 2], 1)
mi_summary[, 3:4] <- round(mi_summary[, 3:4], 1)

kable(mi_summary, 
      caption = "MI Event Statistics (Among Those Who Experience MI)",
      col.names = c("Model", "Ever MI (%)", "Mean Time (Cycles)", "Median Time (Cycles)")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))

MI Event Statistics (Among Those Who Experience MI)
Model	Ever MI (%)	Mean Time (Cycles)	Median Time (Cycles)
Structure 1 (Direct)	12.2	24.2	24
Structure 2 (Composite)	14.9	15.5	14
Structure 3 (Globorisk)	14.8	21.7	21

# Create ST summary
st_summary <- data.frame(
  Model = c("Structure 1 (Direct)", "Structure 2 (Composite)", "Structure 3 (Globorisk)"),
  Ever_ST_Pct = c(100 * mean(ever_st1), 100 * mean(ever_st2), 100 * mean(ever_st3)),
  Mean_Time_ST = c(mean(ttst1[ever_st1]), mean(ttst2[ever_st2]), mean(ttst3[ever_st3])),
  Median_Time_ST = c(median(ttst1[ever_st1]), median(ttst2[ever_st2]), median(ttst3[ever_st3]))
)
st_summary[, 2] <- round(st_summary[, 2], 1)
st_summary[, 3:4] <- round(st_summary[, 3:4], 1)

kable(st_summary, 
      caption = "ST Event Statistics (Among Those Who Experience ST)",
      col.names = c("Model", "Ever ST (%)", "Mean Time (Cycles)", "Median Time (Cycles)")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))

ST Event Statistics (Among Those Who Experience ST)
Model	Ever ST (%)	Mean Time (Cycles)	Median Time (Cycles)
Structure 1 (Direct)	15.3	23.0	23
Structure 2 (Composite)	17.8	15.6	14
Structure 3 (Globorisk)	16.4	21.1	20

Survival curves

# Calculate survival curves
cycles <- 0:(ncol(m_States)-1)

# MI-free survival
mi_free1 <- sapply(1:ncol(m_States), function(t) mean(ttmi1 > t))
mi_free2 <- sapply(1:ncol(m_States), function(t) mean(ttmi2 > t))
mi_free3 <- sapply(1:ncol(m_States), function(t) mean(ttmi3 > t))

# ST-free survival
st_free1 <- sapply(1:ncol(m_States), function(t) mean(ttst1 > t))
st_free2 <- sapply(1:ncol(m_States), function(t) mean(ttst2 > t))
st_free3 <- sapply(1:ncol(m_States), function(t) mean(ttst3 > t))

# Overall survival
survival1 <- sapply(1:ncol(m_States), function(t) mean(ttd1 > t))
survival2 <- sapply(1:ncol(m_States), function(t) mean(ttd2 > t))
survival3 <- sapply(1:ncol(m_States), function(t) mean(ttd3 > t))

# Create layout for multiple plots
par(mfrow = c(1, 3), mar = c(4, 4, 4, 2))

# MI-free survival
plot(cycles, mi_free1, type = "l", col = "blue", 
     ylim = c(0, 1), xlab = "Cycle", ylab = "Event-Free Probability",
     main = "MI-Free Survival", lwd = 2)
lines(cycles, mi_free2, col = "red", lwd = 2)
lines(cycles, mi_free3, col = "darkgreen", lwd = 2)
legend("topright", legend = c("Structure 1 (Direct)", "Structure 2 (Composite)", "Structure 3 (Globorisk)"), 
       col = c("blue", "red", "darkgreen"), lty = 1, lwd = 2, cex = 0.8)
grid()

# ST-free survival
plot(cycles, st_free1, type = "l", col = "blue", 
     ylim = c(0, 1), xlab = "Cycle", ylab = "Event-Free Probability",
     main = "ST-Free Survival", lwd = 2)
lines(cycles, st_free2, col = "red", lwd = 2)
lines(cycles, st_free3, col = "darkgreen", lwd = 2)
legend("topright", legend = c("Structure 1 (Direct)", "Structure 2 (Composite)", "Structure 3 (Globorisk)"), 
       col = c("blue", "red", "darkgreen"), lty = 1, lwd = 2, cex = 0.8)
grid()

# Overall survival
plot(cycles, survival1, type = "l", col = "blue", 
     ylim = c(0, 1), xlab = "Cycle", ylab = "Survival Probability",
     main = "Overall Survival", lwd = 2)
lines(cycles, survival2, col = "red", lwd = 2)
lines(cycles, survival3, col = "darkgreen", lwd = 2)
legend("topright", legend = c("Structure 1 (Direct)", "Structure 2 (Composite)", "Structure 3 (Globorisk)"), 
       col = c("blue", "red", "darkgreen"), lty = 1, lwd = 2, cex = 0.8)
grid()

# Reset plotting parameters
par(mfrow = c(1, 1))

5.1 Compute and Visualize Time to Death (D) using survival curves

dead_state <- "D"

# Time to event function
time_to_event <- function(mat, state) {
  apply(mat, 1, function(row) {
    idx <- which(row == state)[1]
    ifelse(is.na(idx), ncol(mat) + 1, idx)
  })
}

ttd1 <- time_to_event(m_States, dead_state)
ttd2 <- time_to_event(m_States_risk, dead_state)
ttd3 <- time_to_event(m_States_globorisk, dead_state)

# Create summary table
death_summary <- data.frame(
  Model = c("Direct Risk", "Composite Risk", "Globorisk"),
  Mean_Time = c(mean(ttd1), mean(ttd2), mean(ttd3)),
  Median_Time = c(median(ttd1), median(ttd2), median(ttd3)),
  SD_Time = c(sd(ttd1), sd(ttd2), sd(ttd3)),
  Q1_Time = c(quantile(ttd1, 0.25), quantile(ttd2, 0.25), quantile(ttd3, 0.25)),
  Q3_Time = c(quantile(ttd1, 0.75), quantile(ttd2, 0.75), quantile(ttd3, 0.75))
)
# Survival curves
survival1 <- sapply(1:ncol(m_States), function(t) mean(ttd1 > t))
survival2 <- sapply(1:ncol(m_States), function(t) mean(ttd2 > t))
survival3 <- sapply(1:ncol(m_States), function(t) mean(ttd3 > t))

# Plot all survival curves
plot(0:(ncol(m_States)-1), survival1, type = "l", col = "blue", 
     ylim = c(0, 1), xlab = "Cycle", ylab = "Survival Probability",
     main = "Survival Comparison Across Models", lwd = 2)
lines(0:(ncol(m_States)-1), survival2, col = "red", lwd = 2)
lines(0:(ncol(m_States)-1), survival3, col = "darkgreen", lwd = 2)
legend("topright", legend = c("Direct Risk", "Composite Risk", "Globorisk"), 
       col = c("blue", "red", "darkgreen"), lty = 1, lwd = 2)
grid()

5.2 Compute and Visualize Time to MI using survival curves

mi_state <- "MI"

ttmi1 <- time_to_event(m_States, mi_state)
ttmi2 <- time_to_event(m_States_risk, mi_state)
ttmi3 <- time_to_event(m_States_globorisk, mi_state)

ever_mi1 <- ttmi1 <= ncol(m_States)
ever_mi2 <- ttmi2 <= ncol(m_States)
ever_mi3 <- ttmi3 <= ncol(m_States)

mi_summary <- data.frame(
  Model = c("Direct Risk", "Composite Risk", "Globorisk"),
  Ever_MI_Pct = c(100 * mean(ever_mi1), 100 * mean(ever_mi2), 100 * mean(ever_mi3)),
  Mean_Time_MI = c(mean(ttmi1[ever_mi1]), mean(ttmi2[ever_mi2]), mean(ttmi3[ever_mi3])),
  Median_Time_MI = c(median(ttmi1[ever_mi1]), median(ttmi2[ever_mi2]), median(ttmi3[ever_mi3]))
)
mi_free1 <- sapply(1:ncol(m_States), function(t) mean(ttmi1 > t))
mi_free2 <- sapply(1:ncol(m_States), function(t) mean(ttmi2 > t))
mi_free3 <- sapply(1:ncol(m_States), function(t) mean(ttmi3 > t))

plot(0:(ncol(m_States)-1), mi_free1, type = "l", col = "blue", 
     ylim = c(0, 1), xlab = "Cycle", ylab = "MI-Free Probability",
     main = "MI-Free Survival Across Models", lwd = 2)
lines(0:(ncol(m_States)-1), mi_free2, col = "red", lwd = 2)
lines(0:(ncol(m_States)-1), mi_free3, col = "darkgreen", lwd = 2)
legend("bottomright", legend = c("Direct Risk", "Composite Risk", "Globorisk"), 
       col = c("blue", "red", "darkgreen"), lty = 1, lwd = 2)
grid()

5.3 Computation and Visualize time to ST using survival curves

st_state <- "ST"

ttst1 <- time_to_event(m_States, st_state)
ttst2 <- time_to_event(m_States_risk, st_state)
ttst3 <- time_to_event(m_States_globorisk, st_state)

ever_st1 <- ttst1 <= ncol(m_States)
ever_st2 <- ttst2 <= ncol(m_States)
ever_st3 <- ttst3 <= ncol(m_States)

st_summary <- data.frame(
  Model = c("Direct Risk", "Composite Risk", "Globorisk"),
  Ever_ST_Pct = c(100 * mean(ever_st1), 100 * mean(ever_st2), 100 * mean(ever_st3)),
  Mean_Time_ST = c(mean(ttst1[ever_st1]), mean(ttst2[ever_st2]), mean(ttst3[ever_st3])),
  Median_Time_ST = c(median(ttst1[ever_st1]), median(ttst2[ever_st2]), median(ttst3[ever_st3]))
)

st_free1 <- sapply(1:ncol(m_States), function(t) mean(ttst1 > t))
st_free2 <- sapply(1:ncol(m_States), function(t) mean(ttst2 > t))
st_free3 <- sapply(1:ncol(m_States), function(t) mean(ttst3 > t))

plot(0:(ncol(m_States)-1), st_free1, type = "l", col = "blue", 
     ylim = c(0, 1), xlab = "Cycle", ylab = "ST-Free Probability",
     main = "ST-Free Survival Across Models", lwd = 2)
lines(0:(ncol(m_States)-1), st_free2, col = "red", lwd = 2)
lines(0:(ncol(m_States)-1), st_free3, col = "darkgreen", lwd = 2)
legend("topright", legend = c("Direct Risk", "Composite Risk", "Globorisk"), 
       col = c("blue", "red", "darkgreen"), lty = 1, lwd = 2)
grid()

Transition Probability Comparison

# Get transition matrices
trans1 <- prop.table(table(m_States[, 1], m_States[, 2]), margin = 1)
trans2 <- prop.table(table(m_States_risk[, 1], m_States_risk[, 2]), margin = 1)
trans3 <- prop.table(table(m_States_globorisk[, 1], m_States_globorisk[, 2]), margin = 1)

# Focus on transitions from NE
ne_transitions <- data.frame(
  To_State = c("NE", "MI", "ST", "D"),
  Structure1_Direct = c(
    ifelse("NE" %in% rownames(trans1) & "NE" %in% colnames(trans1), trans1["NE", "NE"], 0),
    ifelse("NE" %in% rownames(trans1) & "MI" %in% colnames(trans1), trans1["NE", "MI"], 0),
    ifelse("NE" %in% rownames(trans1) & "ST" %in% colnames(trans1), trans1["NE", "ST"], 0),
    ifelse("NE" %in% rownames(trans1) & "D" %in% colnames(trans1), trans1["NE", "D"], 0)
  ),
  Structure2_Composite = c(
    ifelse("NE" %in% rownames(trans2) & "NE" %in% colnames(trans2), trans2["NE", "NE"], 0),
    ifelse("NE" %in% rownames(trans2) & "MI" %in% colnames(trans2), trans2["NE", "MI"], 0),
    ifelse("NE" %in% rownames(trans2) & "ST" %in% colnames(trans2), trans2["NE", "ST"], 0),
    ifelse("NE" %in% rownames(trans2) & "D" %in% colnames(trans2), trans2["NE", "D"], 0)
  ),
  Structure3_Globorisk = c(
    ifelse("NE" %in% rownames(trans3) & "NE" %in% colnames(trans3), trans3["NE", "NE"], 0),
    ifelse("NE" %in% rownames(trans3) & "MI" %in% colnames(trans3), trans3["NE", "MI"], 0),
    ifelse("NE" %in% rownames(trans3) & "ST" %in% colnames(trans3), trans3["NE", "ST"], 0),
    ifelse("NE" %in% rownames(trans3) & "D" %in% colnames(trans3), trans3["NE", "D"], 0)
  )
)

ne_transitions[, 2:4] <- round(ne_transitions[, 2:4], 4)

kable(ne_transitions, 
      caption = "Transition Probabilities from NE (Cycle 0 to Cycle 1)",
      col.names = c("To State", "Structure 1 (Direct)", "Structure 2 (Composite)", "Structure 3 (Globorisk)"),
      digits = 4) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))

Transition Probabilities from NE (Cycle 0 to Cycle 1)
To State	Structure 1 (Direct)	Structure 2 (Composite)	Structure 3 (Globorisk)
NE	0.9800	0.9675	0.9779
MI	0.0013	0.0069	0.0023
ST	0.0020	0.0082	0.0032
D	0.0167	0.0175	0.0167

Summary Statistics

# Calculate overall differences
total_cells <- prod(dim(m_States))

diff_1_2 <- sum(m_States != m_States_risk, na.rm = TRUE)
diff_1_3 <- sum(m_States != m_States_globorisk, na.rm = TRUE)
diff_2_3 <- sum(m_States_risk != m_States_globorisk, na.rm = TRUE)

summary_stats <- data.frame(
  Comparison = c("Structure 1 vs Structure 2", "Structure 1 vs Structure 3", "Structure 2 vs Structure 3"),
  Different_Cells = c(diff_1_2, diff_1_3, diff_2_3),
  Percentage = c(100 * diff_1_2 / total_cells,
                 100 * diff_1_3 / total_cells,
                 100 * diff_2_3 / total_cells)
)

summary_stats$Percentage <- round(summary_stats$Percentage, 2)

kable(summary_stats, 
      caption = "Overall Model Differences",
      col.names = c("Comparison", "Different Cells", "Percentage (%)"),
      digits = 2) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))

Overall Model Differences
Comparison	Different Cells	Percentage (%)
Structure 1 vs Structure 2	1398176	22.92
Structure 1 vs Structure 3	125803	2.06
Structure 2 vs Structure 3	1408076	23.08

CVD Events Comparison Across Models

## CVD Events Comparison Across Models

# Function to count CVD events (MI and ST) per individual
count_cvd_events <- function(mat) {
  # Check if individual ever experiences MI or ST
  has_mi <- apply(mat, 1, function(row) any(row == "MI"))
  has_st <- apply(mat, 1, function(row) any(row == "ST"))
  
  # Count events (each individual can have both MI and ST)
  n_events <- has_mi + has_st
  
  # Time to first CVD event (either MI or ST)
  time_to_cvd <- apply(mat, 1, function(row) {
    mi_idx <- which(row == "MI")[1]
    st_idx <- which(row == "ST")[1]
    first_event <- min(mi_idx, st_idx, na.rm = TRUE)
    ifelse(is.infinite(first_event), ncol(mat) + 1, first_event)
  })
  
  return(list(
    n_events = n_events,
    has_mi = has_mi,
    has_st = has_st,
    time_to_cvd = time_to_cvd
  ))
}

# Calculate for all models
cvd1 <- count_cvd_events(m_States)  # Structure 1
cvd2 <- count_cvd_events(m_States_risk)  # Structure 2
cvd3 <- count_cvd_events(m_States_globorisk)  # Structure 3

# Create summary table
cvd_summary <- data.frame(
  Model = c("Structure 1 (Direct)", "Structure 2 (Composite)", "Structure 3 (Globorisk)"),
  Any_CVD_Pct = c(
    100 * mean(cvd1$n_events > 0),
    100 * mean(cvd2$n_events > 0),
    100 * mean(cvd3$n_events > 0)
  ),
  MI_Only_Pct = c(
    100 * mean(cvd1$has_mi & !cvd1$has_st),
    100 * mean(cvd2$has_mi & !cvd2$has_st),
    100 * mean(cvd3$has_mi & !cvd3$has_st)
  ),
  ST_Only_Pct = c(
    100 * mean(!cvd1$has_mi & cvd1$has_st),
    100 * mean(!cvd2$has_mi & cvd2$has_st),
    100 * mean(!cvd3$has_mi & cvd3$has_st)
  ),
  Both_MI_ST_Pct = c(
    100 * mean(cvd1$has_mi & cvd1$has_st),
    100 * mean(cvd2$has_mi & cvd2$has_st),
    100 * mean(cvd3$has_mi & cvd3$has_st)
  ),
  Mean_Events_Per_Individual = c(
    mean(cvd1$n_events),
    mean(cvd2$n_events),
    mean(cvd3$n_events)
  ),
  Mean_Time_to_First_CVD = c(
    mean(cvd1$time_to_cvd[cvd1$time_to_cvd <= ncol(m_States)]),
    mean(cvd2$time_to_cvd[cvd2$time_to_cvd <= ncol(m_States)]),
    mean(cvd3$time_to_cvd[cvd3$time_to_cvd <= ncol(m_States)])
  )
)

# Round for display
cvd_summary[, 2:6] <- round(cvd_summary[, 2:6], 1)
cvd_summary[, 7] <- round(cvd_summary[, 7], 1)

kable(cvd_summary, 
      caption = "CVD Events Comparison Across Models",
      col.names = c("Model", "Any CVD (%)", "MI Only (%)", "ST Only (%)", 
                    "Both MI & ST (%)", "Mean Events/Person", "Mean Time to First CVD"),
      align = c('l', 'r', 'r', 'r', 'r', 'r', 'r')) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
  row_spec(1, background = "#f0f0f0") %>%
  row_spec(2, background = "#ffe6e6") %>%
  row_spec(3, background = "#e6f3ff")

CVD Events Comparison Across Models
Model	Any CVD (%)	MI Only (%)	ST Only (%)	Both MI & ST (%)	Mean Events/Person	Mean Time to First CVD
Structure 1 (Direct)	25.7	10.4	13.4	1.8	0.3	23.5
Structure 2 (Composite)	29.3	11.5	14.5	3.3	0.3	15.1
Structure 3 (Globorisk)	28.8	12.4	14.0	2.4	0.3	21.2

8.1 CVD Event Distribution

# Prepare data for plotting
cvd_plot_data <- data.frame(
  Model = rep(c("Structure 1 (Direct)", "Structure 2 (Composite)", "Structure 3 (Globorisk)"), 3),
  Event_Type = rep(c("MI Only", "ST Only", "Both MI & ST"), each = 3),
  Percentage = c(
    cvd_summary$MI_Only_Pct[1], cvd_summary$MI_Only_Pct[2], cvd_summary$MI_Only_Pct[3],
    cvd_summary$ST_Only_Pct[1], cvd_summary$ST_Only_Pct[2], cvd_summary$ST_Only_Pct[3],
    cvd_summary$Both_MI_ST_Pct[1], cvd_summary$Both_MI_ST_Pct[2], cvd_summary$Both_MI_ST_Pct[3]
  )
)

# Stacked bar plot
ggplot(cvd_plot_data, aes(x = Model, y = Percentage, fill = Event_Type)) +
  geom_bar(stat = "identity", position = "stack", alpha = 0.8) +
  labs(title = "CVD Event Distribution Across Models",
       x = "Model", y = "Percentage of Individuals (%)") +
  theme_minimal() +
  theme(legend.position = "bottom") +
  scale_fill_manual(values = c("MI Only" = "steelblue", 
                               "ST Only" = "darkred",
                               "Both MI & ST" = "purple"))

8.2 CVD Event-Free Survival

# Function to calculate CVD event-free survival
cvd_event_free <- function(mat) {
  # Time to first CVD event (MI or ST)
  time_to_cvd <- apply(mat, 1, function(row) {
    mi_idx <- which(row == "MI")[1]
    st_idx <- which(row == "ST")[1]
    first_event <- min(mi_idx, st_idx, na.rm = TRUE)
    ifelse(is.infinite(first_event), ncol(mat) + 1, first_event)
  })
  
  # Survival curve
  survival <- sapply(1:ncol(mat), function(t) mean(time_to_cvd > t))
  return(survival)
}

# Calculate for all models
cvd_free1 <- cvd_event_free(m_States)  # Structure 1
cvd_free2 <- cvd_event_free(m_States_risk)  # Structure 2
cvd_free3 <- cvd_event_free(m_States_globorisk)  # Structure 3

# Plot
cycles <- 0:(ncol(m_States)-1)
plot(cycles, cvd_free1, type = "l", col = "blue", 
     ylim = c(0, 1), xlab = "Cycle", ylab = "CVD-Free Survival Probability",
     main = "CVD Event-Free Survival Across Models", lwd = 2)
lines(cycles, cvd_free2, col = "red", lwd = 2)
lines(cycles, cvd_free3, col = "darkgreen", lwd = 2)
legend("topright", legend = c("Structure 1 (Direct)", "Structure 2 (Composite)", "Structure 3 (Globorisk)"), 
       col = c("blue", "red", "darkgreen"), lty = 1, lwd = 2)
grid()

9. Comprehensive CVD Events Analysis: First Events vs Total Events

# Function to count both first events and total events
analyze_cvd_events <- function(mat) {
  # First event analysis
  has_mi <- apply(mat, 1, function(row) any(row == "MI"))
  has_st <- apply(mat, 1, function(row) any(row == "ST"))
  
  # Time to first events
  time_to_mi <- apply(mat, 1, function(row) {
    idx <- which(row == "MI")[1]
    ifelse(is.na(idx), ncol(mat) + 1, idx)
  })
  
  time_to_st <- apply(mat, 1, function(row) {
    idx <- which(row == "ST")[1]
    ifelse(is.na(idx), ncol(mat) + 1, idx)
  })
  
  time_to_cvd <- apply(mat, 1, function(row) {
    mi_idx <- which(row == "MI")[1]
    st_idx <- which(row == "ST")[1]
    first_event <- min(mi_idx, st_idx, na.rm = TRUE)
    ifelse(is.infinite(first_event), ncol(mat) + 1, first_event)
  })
  
  # Determine first event type
  first_event_type <- rep("None", nrow(mat))
  for(i in 1:nrow(mat)) {
    if(time_to_cvd[i] <= ncol(mat)) {
      if(time_to_mi[i] < time_to_st[i]) {
        first_event_type[i] <- "MI"
      } else if(time_to_st[i] < time_to_mi[i]) {
        first_event_type[i] <- "ST"
      } else if(time_to_mi[i] == time_to_st[i] && time_to_mi[i] <= ncol(mat)) {
        first_event_type[i] <- "Both"
      }
    }
  }
  
  # Total events analysis (including multiple events)
  mi_count <- rowSums(mat == "MI", na.rm = TRUE)
  st_count <- rowSums(mat == "ST", na.rm = TRUE)
  total_cvd <- mi_count + st_count
  
  # Second events (individuals with at least 2 events)
  has_second_event <- total_cvd >= 2
  
  return(list(
    # First event statistics
    ever_mi = has_mi,
    ever_st = has_st,
    ever_cvd = has_mi | has_st,
    first_event_type = first_event_type,
    time_to_mi = time_to_mi,
    time_to_st = time_to_st,
    time_to_cvd = time_to_cvd,
    # Total event statistics
    mi_count = mi_count,
    st_count = st_count,
    total_cvd = total_cvd,
    has_second_event = has_second_event,
    # Summary counts
    total_mi_events = sum(mi_count),
    total_st_events = sum(st_count),
    total_cvd_events = sum(total_cvd),
    n_individuals = nrow(mat)
  ))
}

# Analyze all three models
cvd1 <- analyze_cvd_events(m_States)  # Structure 1
cvd2 <- analyze_cvd_events(m_States_risk)  # Structure 2
cvd3 <- analyze_cvd_events(m_States_globorisk)  # Structure 3

9.1 First Event Summary Table

first_event_summary <- data.frame(
  Model = c("Structure 1 (Direct)", "Structure 2 (Composite)", "Structure 3 (Globorisk)"),
  Any_CVD_First_Pct = c(
    100 * mean(cvd1$ever_cvd),
    100 * mean(cvd2$ever_cvd),
    100 * mean(cvd3$ever_cvd)
  ),
  MI_First_Pct = c(
    100 * mean(cvd1$first_event_type == "MI"),
    100 * mean(cvd2$first_event_type == "MI"),
    100 * mean(cvd3$first_event_type == "MI")
  ),
  ST_First_Pct = c(
    100 * mean(cvd1$first_event_type == "ST"),
    100 * mean(cvd2$first_event_type == "ST"),
    100 * mean(cvd3$first_event_type == "ST")
  ),
  Both_First_Pct = c(
    100 * mean(cvd1$first_event_type == "Both"),
    100 * mean(cvd2$first_event_type == "Both"),
    100 * mean(cvd3$first_event_type == "Both")
  ),
  Mean_Time_to_First_CVD = c(
    mean(cvd1$time_to_cvd[cvd1$time_to_cvd <= ncol(m_States)], na.rm = TRUE),
    mean(cvd2$time_to_cvd[cvd2$time_to_cvd <= ncol(m_States)], na.rm = TRUE),
    mean(cvd3$time_to_cvd[cvd3$time_to_cvd <= ncol(m_States)], na.rm = TRUE)
  ),
  Median_Time_to_First_CVD = c(
    median(cvd1$time_to_cvd[cvd1$time_to_cvd <= ncol(m_States)], na.rm = TRUE),
    median(cvd2$time_to_cvd[cvd2$time_to_cvd <= ncol(m_States)], na.rm = TRUE),
    median(cvd3$time_to_cvd[cvd3$time_to_cvd <= ncol(m_States)], na.rm = TRUE)
  )
)

# Round values
first_event_summary$Any_CVD_First_Pct <- round(first_event_summary$Any_CVD_First_Pct, 1)
first_event_summary$MI_First_Pct <- round(first_event_summary$MI_First_Pct, 1)
first_event_summary$ST_First_Pct <- round(first_event_summary$ST_First_Pct, 1)
first_event_summary$Both_First_Pct <- round(first_event_summary$Both_First_Pct, 1)
first_event_summary$Mean_Time_to_First_CVD <- round(first_event_summary$Mean_Time_to_First_CVD, 1)
first_event_summary$Median_Time_to_First_CVD <- round(first_event_summary$Median_Time_to_First_CVD, 1)

kable(first_event_summary, 
      caption = "First CVD Event Summary",
      col.names = c("Model", "Any CVD (%)", "MI First (%)", "ST First (%)", 
                    "Both First (%)", "Mean Time to First", "Median Time to First"),
      align = c('l', 'r', 'r', 'r', 'r', 'r', 'r')) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))

First CVD Event Summary
Model	Any CVD (%)	MI First (%)	ST First (%)	Mean Time to First	Median Time to First
Structure 1 (Direct)	25.7	11.3	14.3	23.5	23
Structure 2 (Composite)	29.3	13.3	16.0	15.1	13
Structure 3 (Globorisk)	28.8	13.6	15.2	21.2	20

9.2 First Event Distribution Visualization

# Prepare data for plotting
first_event_plot <- data.frame(
  Model = rep(c("Structure 1 (Direct)", "Structure 2 (Composite)", "Structure 3 (Globorisk)"), 3),
  Event_Type = rep(c("MI First", "ST First", "Both First"), each = 3),
  Percentage = c(
    first_event_summary$MI_First_Pct[1], first_event_summary$MI_First_Pct[2], first_event_summary$MI_First_Pct[3],
    first_event_summary$ST_First_Pct[1], first_event_summary$ST_First_Pct[2], first_event_summary$ST_First_Pct[3],
    first_event_summary$Both_First_Pct[1], first_event_summary$Both_First_Pct[2], first_event_summary$Both_First_Pct[3]
  )
)

ggplot(first_event_plot, aes(x = Model, y = Percentage, fill = Event_Type)) +
  geom_bar(stat = "identity", position = "stack", alpha = 0.8) +
  labs(title = "Distribution of First CVD Events",
       x = "Model", y = "Percentage of Individuals (%)") +
  theme_minimal() +
  theme(legend.position = "bottom") +
  scale_fill_manual(values = c("MI First" = "steelblue", 
                               "ST First" = "darkred",
                               "Both First" = "purple"))

9.3 Total Events Summary

total_events_summary <- data.frame(
  Model = c("Structure 1 (Direct)", "Structure 2 (Composite)", "Structure 3 (Globorisk)"),
  Total_MI_Events = c(
    cvd1$total_mi_events,
    cvd2$total_mi_events,
    cvd3$total_mi_events
  ),
  Total_ST_Events = c(
    cvd1$total_st_events,
    cvd2$total_st_events,
    cvd3$total_st_events
  ),
  Total_CVD_Events = c(
    cvd1$total_cvd_events,
    cvd2$total_cvd_events,
    cvd3$total_cvd_events
  ),
  Mean_Events_Per_Person = c(
    mean(cvd1$total_cvd),
    mean(cvd2$total_cvd),
    mean(cvd3$total_cvd)
  ),
  Pct_With_Multiple_Events = c(
    100 * mean(cvd1$has_second_event),
    100 * mean(cvd2$has_second_event),
    100 * mean(cvd3$has_second_event)
  )
)

# Format total events with commas
total_events_summary[, 2:4] <- lapply(total_events_summary[, 2:4], function(x) format(x, big.mark = ","))
total_events_summary[, 5] <- round(total_events_summary[, 5], 3)
total_events_summary[, 6] <- round(total_events_summary[, 6], 1)

kable(total_events_summary, 
      caption = "Total CVD Event Counts (All Events)",
      col.names = c("Model", "Total MI Events", "Total ST Events", "Total CVD Events",
                    "Mean Events/Person", "Multiple Events (%)"),
      align = c('l', 'r', 'r', 'r', 'r', 'r')) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))

Total CVD Event Counts (All Events)
Model	Total MI Events	Total ST Events	Total CVD Events	Mean Events/Person	Multiple Events (%)
Structure 1 (Direct)	16,323	17,758	34,081	0.341	6.2
Structure 2 (Composite)	22,918	22,449	45,367	0.454	10.6
Structure 3 (Globorisk)	20,420	19,678	40,098	0.401	8.1

9.4 Comparison: First Events vs Total Events

# Create comparison plot data
comparison_plot_data <- data.frame(
  Model = rep(c("Structure 1 (Direct)", "Structure 2 (Composite)", "Structure 3 (Globorisk)"), 2),
  Event_Type = rep(c("First Events", "Total Events"), each = 3),
  Count = c(
    sum(cvd1$ever_cvd),      # Structure 1 - First Events
    sum(cvd2$ever_cvd),      # Structure 2 - First Events  
    sum(cvd3$ever_cvd),      # Structure 3 - First Events
    cvd1$total_cvd_events,   # Structure 1 - Total Events
    cvd2$total_cvd_events,   # Structure 2 - Total Events
    cvd3$total_cvd_events    # Structure 3 - Total Events
  )
)

# Convert to thousands for better visualization
comparison_plot_data$Count_K <- comparison_plot_data$Count / 1000

# Create comparison table
comparison_table <- data.frame(
  Model = c("Structure 1 (Direct)", "Structure 2 (Composite)", "Structure 3 (Globorisk)"),
  First_Events = c(
    format(sum(cvd1$ever_cvd), big.mark = ","),
    format(sum(cvd2$ever_cvd), big.mark = ","),
    format(sum(cvd3$ever_cvd), big.mark = ",")
  ),
  Total_Events = c(
    format(cvd1$total_cvd_events, big.mark = ","),
    format(cvd2$total_cvd_events, big.mark = ","),
    format(cvd3$total_cvd_events, big.mark = ",")
  ),
  Additional_Events = c(
    format(cvd1$total_cvd_events - sum(cvd1$ever_cvd), big.mark = ","),
    format(cvd2$total_cvd_events - sum(cvd2$ever_cvd), big.mark = ","),
    format(cvd3$total_cvd_events - sum(cvd3$ever_cvd), big.mark = ",")
  ),
  Total_to_First_Ratio = c(
    round(cvd1$total_cvd_events / sum(cvd1$ever_cvd), 2),
    round(cvd2$total_cvd_events / sum(cvd2$ever_cvd), 2),
    round(cvd3$total_cvd_events / sum(cvd3$ever_cvd), 2)
  )
)

kable(comparison_table, 
      caption = "CVD Event Counts: First Events vs Total Events",
      col.names = c("Model", "First Events", "Total Events", "Additional Events", "Total/First Ratio"),
      align = c('l', 'r', 'r', 'r', 'r')) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
  row_spec(0, background = "#f0f0f0") %>%
  row_spec(1, background = "#f0f8ff") %>%
  row_spec(2, background = "#fff0f0") %>%
  row_spec(3, background = "#f0fff0") %>%
  footnote(general = "Additional Events = Total Events - First Events")

CVD Event Counts: First Events vs Total Events
Model	First Events	Total Events	Additional Events	Total/First Ratio
Structure 1 (Direct)	25,655	34,081	8,426	1.33
Structure 2 (Composite)	29,342	45,367	16,025	1.55
Structure 3 (Globorisk)	28,784	40,098	11,314	1.39
Note:
Additional Events = Total Events - First Events

9.5 First Events vs Total Events Visualization

# Plot: Grouped by Event Type (3 bars for First Events, 3 for Total Events)
ggplot(comparison_plot_data, aes(x = Event_Type, y = Count_K, fill = Model)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.9), alpha = 0.8, width = 0.8) +
  labs(title = "First Events vs Total CVD Events",
       subtitle = "Grouped by Event Type",
       x = "Event Type", 
       y = "Number of Events (Thousands)") +
  theme_minimal() +
  theme(
    legend.position = "bottom",
    legend.title = element_blank(),
    axis.text.x = element_text(size = 11, face = "bold")
  ) +
  scale_fill_manual(values = c("Structure 1 (Direct)" = "steelblue", 
                               "Structure 2 (Composite)" = "darkred",
                               "Structure 3 (Globorisk)" = "darkgreen")) +
  geom_text(aes(label = round(Count_K, 1)), 
            position = position_dodge(width = 0.9), 
            vjust = -0.5, 
            size = 3)

10. Master Summary Table

## 10. Master Summary Table 


# Ensure all summary objects have numeric columns
# Re-calculate death_summary with explicit numeric conversion
ttd1 <- time_to_event(m_States, "D")
ttd2 <- time_to_event(m_States_risk, "D")
ttd3 <- time_to_event(m_States_globorisk, "D")

death_summary <- data.frame(
  Model = c("Structure 1 (Direct)", "Structure 2 (Composite)", "Structure 3 (Globorisk)"),
  Mean_Survival = as.numeric(c(mean(ttd1, na.rm = TRUE), 
                                mean(ttd2, na.rm = TRUE), 
                                mean(ttd3, na.rm = TRUE))),
  Median_Survival = as.numeric(c(median(ttd1, na.rm = TRUE), 
                                  median(ttd2, na.rm = TRUE), 
                                  median(ttd3, na.rm = TRUE))),
  SD_Survival = as.numeric(c(sd(ttd1, na.rm = TRUE), 
                              sd(ttd2, na.rm = TRUE), 
                              sd(ttd3, na.rm = TRUE)))
)

# Ensure mi_summary has numeric columns
mi_summary <- data.frame(
  Model = c("Structure 1 (Direct)", "Structure 2 (Composite)", "Structure 3 (Globorisk)"),
  Ever_MI_Pct = as.numeric(c(100 * mean(ttmi1 <= 61, na.rm = TRUE), 
                             100 * mean(ttmi2 <= 61, na.rm = TRUE), 
                             100 * mean(ttmi3 <= 61, na.rm = TRUE))),
  Mean_Time_MI = as.numeric(c(mean(ttmi1[ttmi1 <= 61], na.rm = TRUE), 
                               mean(ttmi2[ttmi2 <= 61], na.rm = TRUE), 
                               mean(ttmi3[ttmi3 <= 61], na.rm = TRUE))),
  Median_Time_MI = as.numeric(c(median(ttmi1[ttmi1 <= 61], na.rm = TRUE), 
                                 median(ttmi2[ttmi2 <= 61], na.rm = TRUE), 
                                 median(ttmi3[ttmi3 <= 61], na.rm = TRUE)))
)

# Ensure st_summary has numeric columns
st_summary <- data.frame(
  Model = c("Structure 1 (Direct)", "Structure 2 (Composite)", "Structure 3 (Globorisk)"),
  Ever_ST_Pct = as.numeric(c(100 * mean(ttst1 <= 61, na.rm = TRUE), 
                             100 * mean(ttst2 <= 61, na.rm = TRUE), 
                             100 * mean(ttst3 <= 61, na.rm = TRUE))),
  Mean_Time_ST = as.numeric(c(mean(ttst1[ttst1 <= 61], na.rm = TRUE), 
                               mean(ttst2[ttst2 <= 61], na.rm = TRUE), 
                               mean(ttst3[ttst3 <= 61], na.rm = TRUE))),
  Median_Time_ST = as.numeric(c(median(ttst1[ttst1 <= 61], na.rm = TRUE), 
                                 median(ttst2[ttst2 <= 61], na.rm = TRUE), 
                                 median(ttst3[ttst3 <= 61], na.rm = TRUE)))
)

# Ensure cvd_summary has numeric columns from earlier calculation
# Re-calculate CVD events if needed
if(!exists("cvd1")) {
  cvd1 <- count_cvd_events(m_States)
  cvd2 <- count_cvd_events(m_States_risk)
  cvd3 <- count_cvd_events(m_States_globorisk)
}

# Create master summary table with explicit numeric conversion
master_summary <- data.frame(
  Metric = c(
    "NE Occupancy (cycles)",
    "MI Incidence (%)",
    "Mean Time to MI (cycles)",
    "ST Incidence (%)",
    "Mean Time to ST (cycles)",
    "Any CVD Event (%)",
    "Mean Time to First CVD (cycles)",
    "Mean Survival (cycles)",
    "Total CVD Events (N)",
    "Mean Events per Person",
    "Multiple Events (%)"
  ),
  Structure1_Direct = c(
    round(as.numeric(occupancy_summary$Structure1_Direct_Mean[occupancy_summary$State == "NE"]), 1),
    round(as.numeric(mi_summary$Ever_MI_Pct[1]), 1),
    round(as.numeric(mi_summary$Mean_Time_MI[1]), 1),
    round(as.numeric(st_summary$Ever_ST_Pct[1]), 1),
    round(as.numeric(st_summary$Mean_Time_ST[1]), 1),
    round(as.numeric(cvd_summary$Any_CVD_Pct[1]), 1),
    round(as.numeric(cvd_summary$Mean_Time_to_First_CVD[1]), 1),
    round(as.numeric(death_summary$Mean_Survival[1]), 1),
    format(as.numeric(cvd1$total_cvd_events), big.mark = ","),
    round(as.numeric(mean(cvd1$total_cvd, na.rm = TRUE)), 3),
    round(as.numeric(100 * mean(cvd1$has_second_event, na.rm = TRUE)), 1)
  ),
  Structure2_Composite = c(
    round(as.numeric(occupancy_summary$Structure2_Composite_Mean[occupancy_summary$State == "NE"]), 1),
    round(as.numeric(mi_summary$Ever_MI_Pct[2]), 1),
    round(as.numeric(mi_summary$Mean_Time_MI[2]), 1),
    round(as.numeric(st_summary$Ever_ST_Pct[2]), 1),
    round(as.numeric(st_summary$Mean_Time_ST[2]), 1),
    round(as.numeric(cvd_summary$Any_CVD_Pct[2]), 1),
    round(as.numeric(cvd_summary$Mean_Time_to_First_CVD[2]), 1),
    round(as.numeric(death_summary$Mean_Survival[2]), 1),
    format(as.numeric(cvd2$total_cvd_events), big.mark = ","),
    round(as.numeric(mean(cvd2$total_cvd, na.rm = TRUE)), 3),
    round(as.numeric(100 * mean(cvd2$has_second_event, na.rm = TRUE)), 1)
  ),
  Structure3_Globorisk = c(
    round(as.numeric(occupancy_summary$Structure3_Globorisk_Mean[occupancy_summary$State == "NE"]), 1),
    round(as.numeric(mi_summary$Ever_MI_Pct[3]), 1),
    round(as.numeric(mi_summary$Mean_Time_MI[3]), 1),
    round(as.numeric(st_summary$Ever_ST_Pct[3]), 1),
    round(as.numeric(st_summary$Mean_Time_ST[3]), 1),
    round(as.numeric(cvd_summary$Any_CVD_Pct[3]), 1),
    round(as.numeric(cvd_summary$Mean_Time_to_First_CVD[3]), 1),
    round(as.numeric(death_summary$Mean_Survival[3]), 1),
    format(as.numeric(cvd3$total_cvd_events), big.mark = ","),
    round(as.numeric(mean(cvd3$total_cvd, na.rm = TRUE)), 3),
    round(as.numeric(100 * mean(cvd3$has_second_event, na.rm = TRUE)), 1)
  )
)

# Display the table
kable(master_summary, 
      caption = "Master Summary Table: Key Metrics Across All Three Structures",
      col.names = c("Metric", "Structure 1 (Direct)", "Structure 2 (Composite)", "Structure 3 (Globorisk)"),
      align = c('l', 'r', 'r', 'r')) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
  row_spec(0, background = "#f0f0f0", bold = TRUE) %>%
  footnote(general = "DALYs are Disability-Adjusted Life Years. CVD includes both MI and ST events.")

Master Summary Table: Key Metrics Across All Three Structures
Metric	Structure 1 (Direct)	Structure 2 (Composite)	Structure 3 (Globorisk)
NE Occupancy (cycles)	20.5	18.7	19.8
MI Incidence (%)	12.2	14.9	14.8
Mean Time to MI (cycles)	24.2	15.5	21.7
ST Incidence (%)	15.3	17.8	16.4
Mean Time to ST (cycles)	23	15.6	21.1
Any CVD Event (%)	25.7	29.3	28.8
Mean Time to First CVD (cycles)	23.5	15.1	21.2
Mean Survival (cycles)	22.8	22	22.5
Total CVD Events (N)	34,081	45,367	40,098
Mean Events per Person	0.341	0.454	0.401
Multiple Events (%)	6.2	10.6	8.1
Note:
DALYs are Disability-Adjusted Life Years. CVD includes both MI and ST events.

Interpretation Summary

cat("\n## Key Findings\n\n")

## 
## ## Key Findings

cat("### Model Rankings (Most to Least Aggressive)\n\n")

## ### Model Rankings (Most to Least Aggressive)

cat("Based on the analysis, the models rank as follows:\n")

## Based on the analysis, the models rank as follows:

cat("1. **Structure 2 (Composite Risk)** - Most aggressive (highest event rates, earliest events)\n")

## 1. **Structure 2 (Composite Risk)** - Most aggressive (highest event rates, earliest events)

cat("2. **Structure 3 (Globorisk)** - Intermediate\n")

## 2. **Structure 3 (Globorisk)** - Intermediate

cat("3. **Structure 1 (Direct Risk)** - Least aggressive (lowest event rates, latest events)\n\n")

## 3. **Structure 1 (Direct Risk)** - Least aggressive (lowest event rates, latest events)

cat("### Key Differences\n\n")

## ### Key Differences

ne_occupancy <- occupancy_summary$Structure1_Direct_Mean[occupancy_summary$State == "NE"]
ne_occupancy_comp <- occupancy_summary$Structure2_Composite_Mean[occupancy_summary$State == "NE"]
ne_occupancy_globo <- occupancy_summary$Structure3_Globorisk_Mean[occupancy_summary$State == "NE"]

cat("**NE (Healthy) State Occupancy:**\n")

## **NE (Healthy) State Occupancy:**

cat(sprintf("- Structure 1 (Direct): %.1f cycles\n", ne_occupancy))

## - Structure 1 (Direct): 20.5 cycles

cat(sprintf("- Structure 3 (Globorisk): %.1f cycles (%.1f%% less than Structure 1)\n", 
    ne_occupancy_globo, 100 * (1 - ne_occupancy_globo / ne_occupancy)))

## - Structure 3 (Globorisk): 19.8 cycles (3.2% less than Structure 1)

cat(sprintf("- Structure 2 (Composite): %.1f cycles (%.1f%% less than Structure 1)\n\n",
    ne_occupancy_comp, 100 * (1 - ne_occupancy_comp / ne_occupancy)))

## - Structure 2 (Composite): 18.7 cycles (8.6% less than Structure 1)

cat("**MI Events:**\n")

## **MI Events:**

cat(sprintf("- Structure 1 (Direct): %.1f%% experience MI, mean time %.1f cycles\n", 
    mi_summary$Ever_MI_Pct[1], mi_summary$Mean_Time_MI[1]))

## - Structure 1 (Direct): 12.2% experience MI, mean time 24.2 cycles

cat(sprintf("- Structure 3 (Globorisk): %.1f%% experience MI (%.1f%% increase), mean time %.1f cycles (%.1f cycles earlier)\n",
    mi_summary$Ever_MI_Pct[3], mi_summary$Ever_MI_Pct[3] - mi_summary$Ever_MI_Pct[1],
    mi_summary$Mean_Time_MI[3], mi_summary$Mean_Time_MI[1] - mi_summary$Mean_Time_MI[3]))

## - Structure 3 (Globorisk): 14.8% experience MI (2.5% increase), mean time 21.7 cycles (2.5 cycles earlier)

cat(sprintf("- Structure 2 (Composite): %.1f%% experience MI (%.1f%% increase), mean time %.1f cycles (%.1f cycles earlier)\n\n",
    mi_summary$Ever_MI_Pct[2], mi_summary$Ever_MI_Pct[2] - mi_summary$Ever_MI_Pct[1],
    mi_summary$Mean_Time_MI[2], mi_summary$Mean_Time_MI[1] - mi_summary$Mean_Time_MI[2]))

## - Structure 2 (Composite): 14.9% experience MI (2.6% increase), mean time 15.5 cycles (8.7 cycles earlier)

cat("**ST Events:**\n")

## **ST Events:**

cat(sprintf("- Structure 1 (Direct): %.1f%% experience ST, mean time %.1f cycles\n", 
    st_summary$Ever_ST_Pct[1], st_summary$Mean_Time_ST[1]))

## - Structure 1 (Direct): 15.3% experience ST, mean time 23.0 cycles

cat(sprintf("- Structure 3 (Globorisk): %.1f%% experience ST (%.1f%% increase), mean time %.1f cycles (%.1f cycles earlier)\n",
    st_summary$Ever_ST_Pct[3], st_summary$Ever_ST_Pct[3] - st_summary$Ever_ST_Pct[1],
    st_summary$Mean_Time_ST[3], st_summary$Mean_Time_ST[1] - st_summary$Mean_Time_ST[3]))

## - Structure 3 (Globorisk): 16.4% experience ST (1.2% increase), mean time 21.1 cycles (2.0 cycles earlier)

cat(sprintf("- Structure 2 (Composite): %.1f%% experience ST (%.1f%% increase), mean time %.1f cycles (%.1f cycles earlier)\n\n",
    st_summary$Ever_ST_Pct[2], st_summary$Ever_ST_Pct[2] - st_summary$Ever_ST_Pct[1],
    st_summary$Mean_Time_ST[2], st_summary$Mean_Time_ST[1] - st_summary$Mean_Time_ST[2]))

## - Structure 2 (Composite): 17.8% experience ST (2.6% increase), mean time 15.6 cycles (7.4 cycles earlier)

cat("**Mortality:**\n")

## **Mortality:**

cat(sprintf("- Structure 1 (Direct): %.1f cycles mean survival\n", death_summary$Mean_Survival[1]))

## - Structure 1 (Direct): 22.8 cycles mean survival

cat(sprintf("- Structure 3 (Globorisk): %.1f cycles (%.1f cycles shorter)\n", 
    death_summary$Mean_Survival[3], death_summary$Mean_Survival[1] - death_summary$Mean_Survival[3]))

## - Structure 3 (Globorisk): 22.5 cycles (0.3 cycles shorter)

cat(sprintf("- Structure 2 (Composite): %.1f cycles (%.1f cycles shorter)\n\n",
    death_summary$Mean_Survival[2], death_summary$Mean_Survival[1] - death_summary$Mean_Survival[2]))

## - Structure 2 (Composite): 22.0 cycles (0.8 cycles shorter)

cat("## Conclusions\n\n")

## ## Conclusions

cat("This comprehensive analysis reveals systematic differences between the three modeling approaches:\n\n")

## This comprehensive analysis reveals systematic differences between the three modeling approaches:

cat("**Structure 1 (Direct Risk)** provides the most conservative estimates, with:\n")

## **Structure 1 (Direct Risk)** provides the most conservative estimates, with:

cat("- Highest NE occupancy (", ne_occupancy, " cycles)\n")

## - Highest NE occupancy ( 20.48  cycles)

cat("- Lowest event rates (MI: ", mi_summary$Ever_MI_Pct[1], "%, ST: ", st_summary$Ever_ST_Pct[1], "%)\n")

## - Lowest event rates (MI:  12.214 %, ST:  15.253 %)

cat("- Latest time to events (mean time to first CVD: ", cvd_summary$Mean_Time_to_First_CVD[1], " cycles)\n")

## - Latest time to events (mean time to first CVD:  23.5  cycles)

cat("- Longest survival (", death_summary$Mean_Survival[1], " cycles)\n\n")

## - Longest survival ( 22.78968  cycles)

cat("**Structure 3 (Globorisk)** shows intermediate results:\n")

## **Structure 3 (Globorisk)** shows intermediate results:

cat("- Moderate reduction in NE occupancy (", ne_occupancy_globo, " cycles)\n")

## - Moderate reduction in NE occupancy ( 19.83  cycles)

cat("- Moderate increase in event rates (MI: ", mi_summary$Ever_MI_Pct[3], "%, ST: ", st_summary$Ever_ST_Pct[3], "%)\n")

## - Moderate increase in event rates (MI:  14.751 %, ST:  16.425 %)

cat("- Earlier events (mean time to first CVD: ", cvd_summary$Mean_Time_to_First_CVD[3], " cycles)\n\n")

## - Earlier events (mean time to first CVD:  21.2  cycles)

cat("**Structure 2 (Composite Risk)** produces the most aggressive outcomes:\n")

## **Structure 2 (Composite Risk)** produces the most aggressive outcomes:

cat("- Lowest NE occupancy (", ne_occupancy_comp, " cycles, ", round(100 * (1 - ne_occupancy_comp / ne_occupancy), 1), "% reduction vs Structure 1)\n")

## - Lowest NE occupancy ( 18.71  cycles,  8.6 % reduction vs Structure 1)

cat("- Highest event rates (MI: ", mi_summary$Ever_MI_Pct[2], "%, ST: ", st_summary$Ever_ST_Pct[2], "%)\n")

## - Highest event rates (MI:  14.857 %, ST:  17.829 %)

cat("- Earliest events (mean time to first CVD: ", cvd_summary$Mean_Time_to_First_CVD[2], " cycles)\n")

## - Earliest events (mean time to first CVD:  15.1  cycles)

cat("- Shortest survival (", death_summary$Mean_Survival[2], " cycles)\n\n")

## - Shortest survival ( 22.00248  cycles)

Comparing cost effectiveness of Empower Health vs Usual Care under the three structures

Load lists with cost and daly data

# Clear workspace
rm(list = ls())
# Load the lists from rda file
# Structure 1 (Direct Risk)
load('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/data/res_Empower_Health.rda')
load('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/data/res_Usual_Care.rda')
# Structure 2 (Composite Risk)
load('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/data/res_Empower_Health_risk.rda')
load('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/data/res_Usual_Care_risk.rda')
# Structure 3 (Globorisk)
load('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/data/res_Empower_Health_globorisk.rda')
load('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/data/res_Usual_Care_globorisk.rda')

11.1 Extracting Mean Cost and DALY Data

# Structure 1 (Direct Risk)
empower_cost1 <- res_Empower_Health$mean_Dcosts
empower_daly1 <- res_Empower_Health$mean_Ddalys
usual_cost1 <- res_Usual_Care$mean_Dcosts
usual_daly1 <- res_Usual_Care$mean_Ddalys
# Structure 2 (Composite Risk)
empower_cost2 <- res_Empower_Health_risk$mean_Dcosts
empower_daly2 <- res_Empower_Health_risk$mean_Ddalys
usual_cost2 <- res_Usual_Care_risk$mean_Dcosts
usual_daly2 <- res_Usual_Care_risk$mean_Ddalys
# Structure 3 (Globorisk)
empower_cost3 <- res_Empower_Health_globorisk$mean_Dcosts
empower_daly3 <- res_Empower_Health_globorisk$mean_Ddalys
usual_cost3 <- res_Usual_Care_globorisk$mean_Dcosts
usual_daly3 <- res_Usual_Care_globorisk$mean_Ddalys

# Remove loaded objects to free memory
rm(res_Empower_Health, res_Usual_Care, res_Empower_Health_risk, res_Usual_Care_risk,res_Empower_Health_globorisk,res_Usual_Care_globorisk)

11.2 Create Summary Table for Cost-Effectiveness

## 11.2 Cost-Effectiveness Analysis: Empower Health vs Usual Care
## Using Kenya GDP Per Capita Thresholds

# Kenya GDP per capita (USD)
gdp_per_capita <- 2000

# Define WTP thresholds based on Kenya GDP
wtp_thresholds <- data.frame(
  Threshold = c("Half GDP", "1x GDP", "3x GDP"),
  Value = c(gdp_per_capita * 0.5, gdp_per_capita * 1, gdp_per_capita * 3),
  Color = c("lightblue", "steelblue", "darkblue")
)

# Create summary table with correct column selection
cost_effectiveness_summary <- data.frame(
  Model = c("Structure 1 (Direct Risk)", "Structure 2 (Composite Risk)", "Structure 3 (Globorisk)"),
  Empower_Cost = c(empower_cost1, empower_cost2, empower_cost3),
  Empower_DALY = c(empower_daly1, empower_daly2, empower_daly3),
  Usual_Care_Cost = c(usual_cost1, usual_cost2, usual_cost3),
  Usual_Care_DALY = c(usual_daly1, usual_daly2, usual_daly3)
)

# Calculate incremental metrics
cost_effectiveness_summary <- cost_effectiveness_summary %>%
  mutate(
    Incremental_Cost = Empower_Cost - Usual_Care_Cost,
    Incremental_DALY = Usual_Care_DALY - Empower_DALY,  # Positive if DALYs averted
    ICER = ifelse(Incremental_DALY > 0, 
                  Incremental_Cost / Incremental_DALY,
                  ifelse(Incremental_Cost > 0, NA_real_, 0))
  )

# Format for display - ensure we select only the columns we need
cost_effectiveness_display <- cost_effectiveness_summary %>%
  mutate(
    Empower_Cost_Display = round(Empower_Cost, 2),
    Empower_DALY_Display = round(Empower_DALY, 2),
    Usual_Care_Cost_Display = round(Usual_Care_Cost, 2),
    Usual_Care_DALY_Display = round(Usual_Care_DALY, 2),
    Incremental_Cost_Display = round(Incremental_Cost, 2),
    Incremental_DALY_Display = round(Incremental_DALY, 4),
    ICER_Display = case_when(
      is.na(ICER) & Incremental_DALY < 0 & Incremental_Cost > 0 ~ "Dominated",
      is.na(ICER) & Incremental_DALY > 0 & Incremental_Cost < 0 ~ "Dominant",
      TRUE ~ paste0("$", format(round(ICER, 0), big.mark = ","))
    )
  ) %>%
  # Select only the columns we need for display
  select(Model, Empower_Cost_Display, Empower_DALY_Display, 
         Usual_Care_Cost_Display, Usual_Care_DALY_Display,
         Incremental_Cost_Display, Incremental_DALY_Display, ICER_Display)

# Display the table with correct column names
kable(cost_effectiveness_display,
      caption = "Cost-Effectiveness Summary: Empower Health vs Usual Care",
      col.names = c("Model", "Empower Cost (USD)", "Empower DALYs", "Usual Care Cost (USD)", 
                    "Usual Care DALYs", "Incremental Cost (USD)", "Incremental DALYs", "ICER (USD/DALY)"),
      align = c('l', 'r', 'r', 'r', 'r', 'r', 'r', 'c')) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
  row_spec(0, background = "#f0f0f0", bold = TRUE) %>%
  row_spec(which(cost_effectiveness_display$ICER_Display == "Dominant"), background = "#d4edda") %>%
  row_spec(which(cost_effectiveness_display$ICER_Display == "Dominated"), background = "#f8d7da")

Cost-Effectiveness Summary: Empower Health vs Usual Care
Model	Empower Cost (USD)	Empower DALYs	Usual Care Cost (USD)	Usual Care DALYs	Incremental Cost (USD)	Incremental DALYs	ICER (USD/DALY)
Structure 1 (Direct Risk)	4781.48	7.38	4212.30	7.49	569.18	0.1089	$5,227
Structure 2 (Composite Risk)	4069.40	7.76	3726.74	7.80	342.66	0.0432	$7,925
Structure 3 (Globorisk)	4979.66	7.50	4422.52	7.61	557.14	0.1115	$4,997

11.3 Cost-Effectiveness Plane with Kenya GDP Thresholds

## Cost-Effectiveness Plane with Adjusted ICER Label Positions

# Create cost-effectiveness plane data
ce_plane_data <- data.frame(
  Model = cost_effectiveness_summary$Model,
  Delta_Cost = cost_effectiveness_summary$Incremental_Cost,
  Delta_DALY = cost_effectiveness_summary$Incremental_DALY,
  ICER = cost_effectiveness_summary$ICER
)

# Calculate axis limits with padding to ensure all points are visible
x_range <- range(ce_plane_data$Delta_DALY, na.rm = TRUE)
y_range <- range(ce_plane_data$Delta_Cost, na.rm = TRUE)

# Add 20% padding to both axes
x_padding <- diff(x_range) * 0.2
y_padding <- diff(y_range) * 0.2

x_limits <- c(min(0, x_range[1] - x_padding), max(x_range[2] + x_padding, x_range[2] * 1.2))
y_limits <- c(min(0, y_range[1] - y_padding), max(y_range[2] + y_padding, y_range[2] * 1.2))

# Define label positions at 85% of the x-axis range
label_x_pos <- x_limits[2] * 0.85

# Calculate y positions for each threshold line at the label_x_pos point
y_half <- wtp_thresholds$Value[1] * label_x_pos
y_1x <- wtp_thresholds$Value[2] * label_x_pos
y_3x <- wtp_thresholds$Value[3] * label_x_pos

# Create custom label positions
ce_plane_data <- ce_plane_data %>%
  mutate(
    label_vjust = case_when(
      Model == "Structure 1 (Direct Risk)" ~ -1.5,  # Above
      Model == "Structure 2 (Composite Risk)" ~ 1.8,  # Below
      Model == "Structure 3 (Globorisk)" ~ 2.0,  # Below 
      TRUE ~ -1.8
    ),
    label_text = case_when(
      !is.na(ICER) & ICER > 0 ~ paste0("ICER: $", format(round(ICER, 0), big.mark = ",")),
      !is.na(Delta_DALY) & Delta_DALY > 0 & Delta_Cost < 0 ~ "Dominant",
      !is.na(Delta_DALY) & Delta_DALY < 0 & Delta_Cost > 0 ~ "Dominated",
      TRUE ~ ""
    )
  )

# Create the plot
ggplot(ce_plane_data, aes(x = Delta_DALY, y = Delta_Cost, color = Model, shape = Model)) +
  geom_point(size = 6, alpha = 0.9, stroke = 1.5) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray50", size = 0.8) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray50", size = 0.8) +
  # Add WTP lines for Kenya GDP thresholds
  geom_abline(intercept = 0, slope = wtp_thresholds$Value[1], 
              linetype = "dotted", color = wtp_thresholds$Color[1], size = 1) +
  geom_abline(intercept = 0, slope = wtp_thresholds$Value[2], 
              linetype = "dashed", color = wtp_thresholds$Color[2], size = 1) +
  geom_abline(intercept = 0, slope = wtp_thresholds$Value[3], 
              linetype = "solid", color = wtp_thresholds$Color[3], size = 1) +
  # Add labels along the threshold lines
  annotate("text", 
           x = label_x_pos, 
           y = y_half, 
           label = paste0("Half GDP: $", format(wtp_thresholds$Value[1], big.mark = ","), "/DALY"), 
           size = 3.8, 
           color = wtp_thresholds$Color[1], 
           hjust = 0.5,
           vjust = -0.8,
           fontface = "bold") +
  annotate("text", 
           x = label_x_pos, 
           y = y_1x, 
           label = paste0("1x GDP: $", format(wtp_thresholds$Value[2], big.mark = ","), "/DALY"), 
           size = 3.8, 
           color = wtp_thresholds$Color[2], 
           hjust = 0.5,
           vjust = -0.8,
           fontface = "bold") +
  annotate("text", 
           x = label_x_pos, 
           y = y_3x, 
           label = paste0("3x GDP: $", format(wtp_thresholds$Value[3], big.mark = ","), "/DALY"), 
           size = 3.8, 
           color = wtp_thresholds$Color[3], 
           hjust = 0.5,
           vjust = -0.8,
           fontface = "bold") +
  # Set expanded axis limits
  scale_x_continuous(limits = x_limits, 
                     expand = expansion(mult = c(0.1, 0.15))) +
  scale_y_continuous(limits = y_limits, 
                     expand = expansion(mult = c(0.1, 0.15))) +
  labs(title = "Cost-Effectiveness Plane: Empower Health vs Usual Care",
       subtitle = paste0("Kenya GDP per capita = $", format(gdp_per_capita, big.mark = ","), " (USD)"),
       x = "Incremental DALYs Averted (Usual Care - Empower)", 
       y = "Incremental Cost (USD)") +
  theme_minimal() +
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        plot.title = element_text(size = 14, face = "bold"),
        plot.subtitle = element_text(size = 11),
        plot.margin = unit(c(1, 1, 1, 1), "cm")) +
  scale_color_manual(values = c("Structure 1 (Direct Risk)" = "steelblue", 
                                "Structure 2 (Composite Risk)" = "darkred",
                                "Structure 3 (Globorisk)" = "darkgreen")) +
  scale_shape_manual(values = c(16, 17, 18)) +
  # Add ICER labels with custom vertical positions
  geom_text(aes(label = label_text, vjust = label_vjust), 
            size = 3.2,
            fontface = "bold",
            show.legend = FALSE)

11.6 Comparing Model Structures

# Extract data from the cost-effectiveness summary
ce_structure_summary <- cost_effectiveness_summary %>%
  mutate(
    Structure = Model,
    ICER_Display = case_when(
      is.na(ICER) & Incremental_DALY > 0 & Incremental_Cost < 0 ~ "Dominant",
      is.na(ICER) & Incremental_DALY < 0 & Incremental_Cost > 0 ~ "Dominated",
      TRUE ~ paste0("$", format(round(ICER, 0), big.mark = ","))
    ),
    DALYs_Averted = Incremental_DALY,
    Cost_per_DALY = ifelse(!is.na(ICER), round(ICER, 0), NA),
    Economic_Outcome = case_when(
      !is.na(ICER) ~ paste0("$", format(round(ICER, 0), big.mark = ","), " per DALY averted"),
      Incremental_DALY > 0 & Incremental_Cost < 0 ~ "Cost-Saving (Dominant)",
      Incremental_DALY < 0 & Incremental_Cost > 0 ~ "Dominated (More costly, less effective)",
      TRUE ~ "Inconclusive"
    )
  )

# Display the summary table
ce_summary_display <- ce_structure_summary %>%
  select(Structure, Empower_Cost, Usual_Care_Cost, Incremental_Cost,
         Empower_DALY, Usual_Care_DALY, Incremental_DALY, 
         Economic_Outcome)

kable(ce_summary_display,
      caption = "Cost-Effectiveness Results Across Model Structures",
      col.names = c("Model Structure", "Empower Cost (USD)", "Usual Care Cost (USD)", 
                    "Incremental Cost (USD)", "Empower DALYs", "Usual Care DALYs", 
                    "Incremental DALYs Averted", "Economic Outcome"),
      align = c('l', 'r', 'r', 'r', 'r', 'r', 'r', 'l'),
      digits = c(0, 2, 2, 2, 2, 2, 4, 0)) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
  row_spec(0, background = "#f0f0f0", bold = TRUE)

Cost-Effectiveness Results Across Model Structures
Model Structure	Empower Cost (USD)	Usual Care Cost (USD)	Incremental Cost (USD)	Empower DALYs	Usual Care DALYs	Incremental DALYs Averted	Economic Outcome
Structure 1 (Direct Risk)	4781.48	4212.30	569.18	7.38	7.49	0.1089	$5,227 per DALY averted
Structure 2 (Composite Risk)	4069.40	3726.74	342.66	7.76	7.80	0.0432	$7,925 per DALY averted
Structure 3 (Globorisk)	4979.66	4422.52	557.14	7.50	7.61	0.1115	$4,997 per DALY averted

12.1 Comparative Analysis of Model Structure Outputs

cat("\n## Comparative Analysis of Model Structures\n\n")

## 
## ## Comparative Analysis of Model Structures

cat("### Structure 1 (Direct Risk) - Outputs\n\n")

## ### Structure 1 (Direct Risk) - Outputs

cat(sprintf("- **Empower Health cost:** $%.2f\n", ce_structure_summary$Empower_Cost[1]))

## - **Empower Health cost:** $4781.48

cat(sprintf("- **Usual Care cost:** $%.2f\n", ce_structure_summary$Usual_Care_Cost[1]))

## - **Usual Care cost:** $4212.30

cat(sprintf("- **Incremental cost:** $%.2f\n", ce_structure_summary$Incremental_Cost[1]))

## - **Incremental cost:** $569.18

cat(sprintf("- **Empower Health DALYs:** %.2f\n", ce_structure_summary$Empower_DALY[1]))

## - **Empower Health DALYs:** 7.38

cat(sprintf("- **Usual Care DALYs:** %.2f\n", ce_structure_summary$Usual_Care_DALY[1]))

## - **Usual Care DALYs:** 7.49

cat(sprintf("- **Incremental DALYs averted:** %.4f\n", ce_structure_summary$Incremental_DALY[1]))

## - **Incremental DALYs averted:** 0.1089

cat(sprintf("- **Economic outcome:** %s\n\n", ce_structure_summary$Economic_Outcome[1]))

## - **Economic outcome:** $5,227 per DALY averted

cat("### Structure 2 (Composite Risk) - Outputs\n\n")

## ### Structure 2 (Composite Risk) - Outputs

cat(sprintf("- **Empower Health cost:** $%.2f\n", ce_structure_summary$Empower_Cost[2]))

## - **Empower Health cost:** $4069.40

cat(sprintf("- **Usual Care cost:** $%.2f\n", ce_structure_summary$Usual_Care_Cost[2]))

## - **Usual Care cost:** $3726.74

cat(sprintf("- **Incremental cost:** $%.2f\n", ce_structure_summary$Incremental_Cost[2]))

## - **Incremental cost:** $342.66

cat(sprintf("- **Empower Health DALYs:** %.2f\n", ce_structure_summary$Empower_DALY[2]))

## - **Empower Health DALYs:** 7.76

cat(sprintf("- **Usual Care DALYs:** %.2f\n", ce_structure_summary$Usual_Care_DALY[2]))

## - **Usual Care DALYs:** 7.80

cat(sprintf("- **Incremental DALYs averted:** %.4f\n", ce_structure_summary$Incremental_DALY[2]))

## - **Incremental DALYs averted:** 0.0432

cat(sprintf("- **Economic outcome:** %s\n\n", ce_structure_summary$Economic_Outcome[2]))

## - **Economic outcome:** $7,925 per DALY averted

cat("### Structure 3 (Globorisk) - Outputs\n\n")

## ### Structure 3 (Globorisk) - Outputs

cat(sprintf("- **Empower Health cost:** $%.2f\n", ce_structure_summary$Empower_Cost[3]))

## - **Empower Health cost:** $4979.66

cat(sprintf("- **Usual Care cost:** $%.2f\n", ce_structure_summary$Usual_Care_Cost[3]))

## - **Usual Care cost:** $4422.52

cat(sprintf("- **Incremental cost:** $%.2f\n", ce_structure_summary$Incremental_Cost[3]))

## - **Incremental cost:** $557.14

cat(sprintf("- **Empower Health DALYs:** %.2f\n", ce_structure_summary$Empower_DALY[3]))

## - **Empower Health DALYs:** 7.50

cat(sprintf("- **Usual Care DALYs:** %.2f\n", ce_structure_summary$Usual_Care_DALY[3]))

## - **Usual Care DALYs:** 7.61

cat(sprintf("- **Incremental DALYs averted:** %.4f\n", ce_structure_summary$Incremental_DALY[3]))

## - **Incremental DALYs averted:** 0.1115

cat(sprintf("- **Economic outcome:** %s\n\n", ce_structure_summary$Economic_Outcome[3]))

## - **Economic outcome:** $4,997 per DALY averted

12.2 Key Differences Between Model Structures

# Create comparison data for visualization
comparison_data <- ce_structure_summary %>%
  select(Structure, Incremental_Cost, Incremental_DALY, ICER) %>%
  mutate(
    ICER_Display = ifelse(!is.na(ICER), paste0("$", format(round(ICER, 0), big.mark = ",")), 
                          ifelse(Incremental_DALY > 0 & Incremental_Cost < 0, "Dominant", "Dominated"))
  )

# Plot 1: Incremental DALYs Averted
p1 <- ggplot(comparison_data, aes(x = Structure, y = Incremental_DALY, fill = Structure)) +
  geom_bar(stat = "identity", alpha = 0.8) +
  labs(title = "Incremental DALYs Averted",
       x = "", y = "DALYs Averted") +
  theme_minimal() +
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 15, hjust = 1)) +
  scale_fill_manual(values = c("Structure 1 (Direct Risk)" = "steelblue", 
                               "Structure 2 (Composite Risk)" = "darkred",
                               "Structure 3 (Globorisk)" = "darkgreen")) +
  geom_text(aes(label = round(Incremental_DALY, 4)), vjust = -0.5, size = 3)

# Plot 2: Incremental Cost
p2 <- ggplot(comparison_data, aes(x = Structure, y = Incremental_Cost, fill = Structure)) +
  geom_bar(stat = "identity", alpha = 0.8) +
  labs(title = "Incremental Cost",
       x = "", y = "Cost (USD)") +
  theme_minimal() +
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 15, hjust = 1)) +
  scale_fill_manual(values = c("Structure 1 (Direct Risk)" = "steelblue", 
                               "Structure 2 (Composite Risk)" = "darkred",
                               "Structure 3 (Globorisk)" = "darkgreen")) +
  geom_text(aes(label = paste0("$", round(Incremental_Cost, 0))), vjust = -0.5, size = 3)

# Plot 3: ICER Comparison
p3 <- ggplot(comparison_data %>% filter(!is.na(ICER)), 
             aes(x = Structure, y = ICER, fill = Structure)) +
  geom_bar(stat = "identity", alpha = 0.8) +
  labs(title = "ICER (Cost per DALY Averted)",
       x = "", y = "ICER (USD/DALY)") +
  theme_minimal() +
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 15, hjust = 1)) +
  scale_fill_manual(values = c("Structure 1 (Direct Risk)" = "steelblue", 
                               "Structure 2 (Composite Risk)" = "darkred",
                               "Structure 3 (Globorisk)" = "darkgreen")) +
  geom_text(aes(label = paste0("$", format(round(ICER, 0), big.mark = ","))), 
            vjust = -0.5, size = 3)

# Arrange plots
library(gridExtra)
grid.arrange(p1, p2, p3, ncol = 3)

Comparison of Different Kenya CVD Model Structures: Structure 1 (Direct Risk) vs Structure 2 (Composite Risk) vs Structure 3 (Globorisk)

James Oguta

2026-03-25