1. Introduction

This analysis evaluates the distributional impact of Empower Health intervention compared to usual care across wealth quintiles, regions, and sex in Kenya. The analysis includes:

Health benefits (DALYs averted) and cost burden by equity stratifiers

Concentration curves for health benefits and costs

Cost-effectiveness ICERs by equity metric

Discounting at 3% per year (Kenya GDP per capita = $2,200)

Clear Workspace

rm(list=ls())
  1. Load Required Libraries
library(tidyverse)
library(ggplot2)
library(gridExtra)
library(scales)
  1. Define Parameters and Load Data
# Discount rate (3% per year)
DISCOUNT_RATE <- 0.03

# Kenya GDP per capita (WTP threshold)
WTP_THRESHOLD <- 2200

# Health state definitions
death_states <- c("D")
morbidity_states <- c("MI", "ST", "PS", "PM")

# Define file paths
base_path <- "/Users/jamesoguta/Documents/James Oguta/PhD Thesis_James Oguta/James Oguta Model/Kenya CVD Model/KenyaCVDModelFinal/data/"

file_path_Emp <- "/Users/jamesoguta/Documents/James Oguta/PhD Thesis_James Oguta/James Oguta Model/Kenya CVD Model/KenyaCVDModelFinal/sim_results/Empower Health/sim_1_data.rds"
file_path_UC <- "/Users/jamesoguta/Documents/James Oguta/PhD Thesis_James Oguta/James Oguta Model/Kenya CVD Model/KenyaCVDModelFinal/sim_results/Usual Care/sim_1_data.rds"

# Load simulation data
cat("Loading simulation data...\n")
## Loading simulation data...
sim_data_Emp <- readRDS(file_path_Emp)
sim_data_UC <- readRDS(file_path_UC)

# Extract matrices for Empower Health
m_States_Emp <- sim_data_Emp$m_States
m_Costs_Emp  <- sim_data_Emp$m_Costs
m_Effs_Emp   <- sim_data_Emp$m_Effs

# Extract matrices for Usual Care
m_States_UC <- sim_data_UC$m_States
m_Costs_UC  <- sim_data_UC$m_Costs
m_Effs_UC   <- sim_data_UC$m_Effs

# Load individual characteristics
load(paste0(base_path, "m_indi_features_3.rda"))

cat("✓ All datasets loaded successfully\n")
## ✓ All datasets loaded successfully
cat(sprintf("  Individuals: %d\n", nrow(m_States_UC)))
##   Individuals: 100000
cat(sprintf("  Cycles: %d\n", ncol(m_States_UC)))
##   Cycles: 61
  1. Apply Discounting
# Create discount factors
n_cycles <- ncol(m_States_UC)
cycles <- 0:(n_cycles - 1)
discount_factors <- 1 / (1 + DISCOUNT_RATE)^cycles

# Discounting function
discount_matrix <- function(matrix_data, discount_factors) {
  sweep(matrix_data, 2, discount_factors, FUN = "*")
}

# Apply discounting to costs and DALYs
cat("Applying discounting...\n")
## Applying discounting...
m_Costs_Emp_disc <- discount_matrix(m_Costs_Emp, discount_factors)
m_Effs_Emp_disc <- discount_matrix(m_Effs_Emp, discount_factors)
m_Costs_UC_disc <- discount_matrix(m_Costs_UC, discount_factors)
m_Effs_UC_disc <- discount_matrix(m_Effs_UC, discount_factors)
cat("✓ Discounting applied\n")
## ✓ Discounting applied
  1. Process Individual Characteristics
features_df <- as.data.frame(m_indi_features_3, stringsAsFactors = FALSE)

# Clean wealth variable
wealth_clean <- gsub(".*?([0-9]+).*", "\\1", features_df$wealth)
features_df$wealth_quintile_num <- as.numeric(wealth_clean)
features_df$wealth_quintile <- factor(features_df$wealth_quintile_num,
                                       levels = 1:5,
                                       labels = c("Q1_Poorest", "Q2", "Q3", "Q4", "Q5_Richest"))

# Process other variables
features_df$region <- as.factor(features_df$region)
features_df$female <- as.numeric(features_df$female)
features_df$male <- as.numeric(features_df$male)
features_df$sex <- ifelse(features_df$female == 1, "Female", 
                          ifelse(features_df$male == 1, "Male", "Unknown"))
features_df$sex <- as.factor(features_df$sex)
features_df$individual_id <- rownames(m_States_UC)

# Display summary
cat("\n=== INDIVIDUAL CHARACTERISTICS ===\n")
## 
## === INDIVIDUAL CHARACTERISTICS ===
cat("Wealth distribution:\n")
## Wealth distribution:
print(table(features_df$wealth_quintile))
## 
## Q1_Poorest         Q2         Q3         Q4 Q5_Richest 
##      18251      23373      23267      22430      12679
cat("\nSex distribution:\n")
## 
## Sex distribution:
print(table(features_df$sex))
## 
## Female   Male 
##  47597  52403
cat("\nRegion distribution:\n")
## 
## Region distribution:
print(table(features_df$region))
## 
##       Central         Coast       Eastern       Nairobi North Eastern 
##         15468          4444         19148          9419          2842 
##        Nyanza   Rift Valley       Western 
##         10214         24660         13805
  1. Process Intervention Arms
process_arm <- function(states_matrix, costs_matrix_disc, dalys_matrix_disc, arm_name) {
  n_ind <- nrow(states_matrix)
  n_cycles <- ncol(states_matrix)
  
  cat(sprintf("Processing %s: %d individuals\n", arm_name, n_ind))
  
  results <- data.frame(
    individual_id = rownames(states_matrix),
    arm = arm_name,
    total_costs = rowSums(costs_matrix_disc, na.rm = TRUE),
    total_dalys = rowSums(dalys_matrix_disc, na.rm = TRUE)
  )
  
  # Detect death
  is_death_matrix <- matrix(states_matrix %in% death_states, nrow = n_ind, ncol = n_cycles)
  death_cycle <- apply(is_death_matrix, 1, function(x) {
    idx <- which(x)[1]
    ifelse(is.na(idx), n_cycles + 1, idx)
  })
  
  # Detect morbidity
  is_morbidity_matrix <- matrix(states_matrix %in% morbidity_states, nrow = n_ind, ncol = n_cycles)
  morbidity_count <- rowSums(is_morbidity_matrix, na.rm = TRUE)
  
  results$death_cycle <- death_cycle
  results$cycles_survived <- pmin(death_cycle, n_cycles)
  results$alive_at_end <- death_cycle > n_cycles
  results$had_morbidity <- morbidity_count > 0
  
  return(results)
}

# Process both arms
cat("\n=== PROCESSING INTERVENTION ARMS ===\n")
## 
## === PROCESSING INTERVENTION ARMS ===
results_Emp <- process_arm(m_States_Emp, m_Costs_Emp_disc, m_Effs_Emp_disc, "Empower")
## Processing Empower: 100000 individuals
results_UC <- process_arm(m_States_UC, m_Costs_UC_disc, m_Effs_UC_disc, "UsualCare")
## Processing UsualCare: 100000 individuals
  1. Create Comparison Dataset
df_compare <- results_Emp %>%
  select(individual_id, total_costs_Emp = total_costs, total_dalys_Emp = total_dalys,
         alive_at_end_Emp = alive_at_end, had_morbidity_Emp = had_morbidity) %>%
  left_join(
    results_UC %>% select(individual_id, total_costs_UC = total_costs, 
                          total_dalys_UC = total_dalys, alive_at_end_UC = alive_at_end,
                          had_morbidity_UC = had_morbidity),
    by = "individual_id"
  ) %>%
  left_join(features_df, by = "individual_id") %>%
  mutate(
    delta_costs = total_costs_Emp - total_costs_UC,
    delta_dalys = total_dalys_Emp - total_dalys_UC,
    dalys_averted = -delta_dalys
  )

cat(sprintf("Comparison dataframe: %d rows\n", nrow(df_compare)))
## Comparison dataframe: 100000 rows
  1. Subgroup Analysis Function
subgroup_analysis <- function(data, group_var) {
  
  groups <- unique(data[[group_var]])
  groups <- groups[!is.na(groups)]
  results <- list()
  
  for (g in groups) {
    sub_data <- data[data[[group_var]] == g, ]
    n <- nrow(sub_data)
    if (n == 0) next
    
    # Mean outcomes
    mean_delta_costs <- mean(sub_data$delta_costs, na.rm = TRUE)
    mean_dalys_averted <- mean(sub_data$dalys_averted, na.rm = TRUE)
    
    # Standard errors
    se_costs <- sd(sub_data$delta_costs, na.rm = TRUE) / sqrt(n)
    se_dalys <- sd(sub_data$dalys_averted, na.rm = TRUE) / sqrt(n)
    
    # ICER
    if (mean_dalys_averted > 0) {
      icer <- mean_delta_costs / mean_dalys_averted
      icer_text <- sprintf("$%.0f", icer)
      cost_effective <- icer < WTP_THRESHOLD
    } else {
      icer <- NA
      icer_text <- "NA"
      cost_effective <- FALSE
    }
    
    # Confidence intervals
    costs_ci <- c(mean_delta_costs - 1.96 * se_costs, mean_delta_costs + 1.96 * se_costs)
    dalys_ci <- c(mean_dalys_averted - 1.96 * se_dalys, mean_dalys_averted + 1.96 * se_dalys)
    
    results[[as.character(g)]] <- data.frame(
      subgroup = as.character(g),
      n = n,
      delta_costs = mean_delta_costs,
      costs_ci_lower = costs_ci[1],
      costs_ci_upper = costs_ci[2],
      dalys_averted = mean_dalys_averted,
      dalys_ci_lower = dalys_ci[1],
      dalys_ci_upper = dalys_ci[2],
      icer = icer,
      icer_text = icer_text,
      cost_effective = cost_effective
    )
  }
  
  return(do.call(rbind, results))
}
  1. Run Subgroup Analyses
# Wealth quintile analysis
wealth_results <- subgroup_analysis(df_compare, "wealth_quintile")
wealth_results$subgroup <- factor(wealth_results$subgroup,
                                   levels = c("Q1_Poorest", "Q2", "Q3", "Q4", "Q5_Richest"))
wealth_results <- wealth_results %>% arrange(subgroup)

cat("\n=== WEALTH QUINTILE RESULTS ===\n")
## 
## === WEALTH QUINTILE RESULTS ===
print(wealth_results %>% select(subgroup, n, dalys_averted, delta_costs, icer_text))
##              subgroup     n dalys_averted delta_costs icer_text
## Q1_Poorest Q1_Poorest 18251    0.09988889    384.5282     $3850
## Q2                 Q2 23373    0.11187154    432.2140     $3863
## Q3                 Q3 23267    0.11779849    480.9004     $4082
## Q4                 Q4 22430    0.12649013    499.6531     $3950
## Q5_Richest Q5_Richest 12679    0.13123331    559.2831     $4262
# Sex analysis
sex_results <- subgroup_analysis(df_compare, "sex")
cat("\n=== SEX RESULTS ===\n")
## 
## === SEX RESULTS ===
print(sex_results %>% select(subgroup, n, dalys_averted, delta_costs, icer_text))
##        subgroup     n dalys_averted delta_costs icer_text
## Female   Female 47597     0.1150564    524.7948     $4561
## Male       Male 52403     0.1183788    412.7432     $3487
# Regional analysis
region_counts <- table(df_compare$region)
region_percent <- prop.table(region_counts) * 100
major_regions <- names(region_counts)[region_percent >= 1]
df_regions_major <- df_compare %>% filter(region %in% major_regions)
region_results <- subgroup_analysis(df_regions_major, "region")
region_results <- region_results %>% arrange(desc(dalys_averted))

cat("\n=== REGIONAL RESULTS (Top 10) ===\n")
## 
## === REGIONAL RESULTS (Top 10) ===
print(head(region_results %>% select(subgroup, n, dalys_averted, icer_text), 10))
##                    subgroup     n dalys_averted icer_text
## Nairobi             Nairobi  9419    0.13666225     $4260
## Coast                 Coast  4444    0.13272881     $3663
## Eastern             Eastern 19148    0.13168074     $3548
## Central             Central 15468    0.11966531     $4274
## Western             Western 13805    0.11803885     $3330
## Nyanza               Nyanza 10214    0.10238659     $4714
## Rift Valley     Rift Valley 24660    0.10155691     $4232
## North Eastern North Eastern  2842    0.08816788     $4602
  1. Overall Results
mean_cost_Emp <- mean(df_compare$total_costs_Emp, na.rm = TRUE)
mean_cost_UC <- mean(df_compare$total_costs_UC, na.rm = TRUE)
mean_daly_Emp <- mean(df_compare$total_dalys_Emp, na.rm = TRUE)
mean_daly_UC <- mean(df_compare$total_dalys_UC, na.rm = TRUE)

delta_costs <- mean_cost_Emp - mean_cost_UC
delta_dalys <- mean_daly_Emp - mean_daly_UC
dalys_averted <- -delta_dalys
icer_overall <- delta_costs / dalys_averted

cat("\n=== OVERALL RESULTS ===\n")
## 
## === OVERALL RESULTS ===
cat(sprintf("Empower: Cost = $%.2f, DALYs = %.6f\n", mean_cost_Emp, mean_daly_Emp))
## Empower: Cost = $5170.83, DALYs = 7.384656
cat(sprintf("Usual Care: Cost = $%.2f, DALYs = %.6f\n", mean_cost_UC, mean_daly_UC))
## Usual Care: Cost = $4704.75, DALYs = 7.501453
cat(sprintf("Incremental Cost: $%.2f\n", delta_costs))
## Incremental Cost: $466.08
cat(sprintf("DALYs Averted: %.6f\n", dalys_averted))
## DALYs Averted: 0.116797
cat(sprintf("ICER: $%.0f per DALY averted\n", icer_overall))
## ICER: $3990 per DALY averted
cat(sprintf("Cost-effective at Kenya GDP ($%.0f): %s\n", WTP_THRESHOLD, 
    ifelse(icer_overall < WTP_THRESHOLD, "YES", "NO")))
## Cost-effective at Kenya GDP ($2200): NO
  1. Concentration Curves
concentration_curve <- function(data, var_name, wealth_var = "wealth_quintile_num") {
  
  valid <- complete.cases(data[[var_name]], data[[wealth_var]])
  values <- data[[var_name]][valid]
  wealth <- data[[wealth_var]][valid]
  
  if (length(values) == 0) return(NULL)
  
  # Sort by wealth (poorest to richest)
  order_idx <- order(wealth)
  values_sorted <- values[order_idx]
  
  # Calculate cumulative shares
  n <- length(values_sorted)
  cum_pop <- (1:n) / n
  cum_values <- cumsum(values_sorted) / sum(values_sorted)
  
  # Calculate concentration index
  mu <- mean(values_sorted)
  if (mu != 0) {
    C <- (2 / (n * mu)) * sum(cum_pop * values_sorted) - 1
  } else {
    C <- NA
  }
  
  return(data.frame(Population = cum_pop, Cumulative = cum_values, C_index = C))
}

# Generate concentration curves
health_curve <- concentration_curve(df_compare, "dalys_averted", "wealth_quintile_num")
costs_curve <- concentration_curve(df_compare, "delta_costs", "wealth_quintile_num")

if (!is.null(health_curve)) {
  cat(sprintf("\nHealth benefits C-index: %.4f (POSITIVE = curve below diagonal)\n", unique(health_curve$C_index)))
  cat("Interpretation: PRO-RICH (richer individuals gain more health benefit)\n")
}
## 
## Health benefits C-index: 0.0451 (POSITIVE = curve below diagonal)
## Interpretation: PRO-RICH (richer individuals gain more health benefit)
if (!is.null(costs_curve)) {
  cat(sprintf("\nCosts C-index: %.4f (POSITIVE = curve below diagonal)\n", unique(costs_curve$C_index)))
  cat("Interpretation: PROGRESSIVE (richer individuals pay more - equitable)\n")
}
## 
## Costs C-index: 0.0646 (POSITIVE = curve below diagonal)
## Interpretation: PROGRESSIVE (richer individuals pay more - equitable)
  1. Costs and Benefits Analysis
# Overall means for reference lines
overall_dalys <- mean(df_compare$dalys_averted, na.rm = TRUE)
overall_costs <- mean(df_compare$delta_costs, na.rm = TRUE)

# Panel A: Health benefits by wealth
panel_A <- ggplot(wealth_results, aes(x = dalys_averted, y = subgroup)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "red", size = 0.8) +
  geom_vline(xintercept = overall_dalys, linetype = "dotted", color = "blue", size = 0.8) +
  geom_point(size = 4, color = "steelblue") +
  geom_errorbarh(aes(xmin = dalys_ci_lower, xmax = dalys_ci_upper), height = 0.2, size = 0.8, color = "steelblue") +
  annotate("text", x = overall_dalys + 0.01, y = 5.2, label = sprintf("Mean = %.4f", overall_dalys), size = 3, color = "blue") +
  labs(x = "\nDALYs Averted", y = "Wealth Quintile\n",
       title = "A) Health Benefits by Wealth Quintile",
       subtitle = "Positive = beneficial for Empower | Blue dotted line = population mean") +
  theme_minimal(base_size = 11) +
  theme(plot.title = element_text(face = "bold"))

# Panel B: Costs by wealth
panel_B <- ggplot(wealth_results, aes(x = delta_costs, y = subgroup)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "red", size = 0.8) +
  geom_vline(xintercept = overall_costs, linetype = "dotted", color = "blue", size = 0.8) +
  geom_point(size = 4, color = "darkred") +
  geom_errorbarh(aes(xmin = costs_ci_lower, xmax = costs_ci_upper), height = 0.2, size = 0.8, color = "darkred") +
  annotate("text", x = overall_costs + 10, y = 5.2, label = sprintf("Mean = $%.0f", overall_costs), size = 3, color = "blue") +
  labs(x = "\nIncremental Cost ($)", y = "Wealth Quintile\n",
       title = "B) Cost Burden by Wealth Quintile",
       subtitle = "Positive = higher cost for Empower | Blue dotted line = population mean") +
  theme_minimal(base_size = 11) +
  theme(plot.title = element_text(face = "bold"))

# Panel C: Health concentration curve
if (!is.null(health_curve)) {
  # Determine interpretation text based on curve position
  curve_position <- ifelse(unique(health_curve$C_index) > 0, "below", "above")
  
  panel_C <- ggplot(health_curve, aes(x = Population, y = Cumulative)) +
    geom_line(size = 1.2, color = "steelblue") +
    geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "gray50", size = 0.8) +
    annotate("text", x = 0.7, y = 0.2, 
             label = sprintf("C-index = %.4f (POSITIVE = curve below diagonal)", unique(health_curve$C_index)), 
             size = 3.5, fontface = "bold", color = "steelblue") +
    annotate("text", x = 0.7, y = 0.12, 
             label = ifelse(unique(health_curve$C_index) > 0, 
                           "Interpretation: PRO-RICH\n(Curve below diagonal = richer gain more benefit)\n→ INEQUITABLE (widens health inequalities)",
                           "Interpretation: PRO-POOR\n(Curve above diagonal = poorer gain more benefit)\n→ EQUITABLE (reduces health inequalities)"), 
             size = 2.8, color = "steelblue", lineheight = 1.2) +
    labs(x = "\nCumulative population (poorest to richest)", 
         y = "Cumulative DALYs averted\n",
         title = "C) Health Benefits Concentration Curve",
         subtitle = "Curve below diagonal (C>0) = PRO-RICH | Curve above diagonal (C<0) = PRO-POOR") +
    theme_minimal(base_size = 11) +
    theme(plot.title = element_text(face = "bold"))
} else {
  panel_C <- ggplot() + annotate("text", x=0.5, y=0.5, label="Insufficient data") + theme_void()
}

# Panel D: Costs concentration curve
if (!is.null(costs_curve)) {
  panel_D <- ggplot(costs_curve, aes(x = Population, y = Cumulative)) +
    geom_line(size = 1.2, color = "darkred") +
    geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "gray50", size = 0.8) +
    annotate("text", x = 0.7, y = 0.2, 
             label = sprintf("C-index = %.4f (POSITIVE = curve below diagonal)", unique(costs_curve$C_index)), 
             size = 3.5, fontface = "bold", color = "darkred") +
    annotate("text", x = 0.7, y = 0.12, 
             label = ifelse(unique(costs_curve$C_index) > 0, 
                           "Interpretation: PROGRESSIVE\n(Curve below diagonal = richer pay more)\n→ EQUITABLE (fair financing)",
                           "Interpretation: REGRESSIVE\n(Curve above diagonal = poorer pay more)\n→ INEQUITABLE (unfair burden)"), 
             size = 2.8, color = "darkred", lineheight = 1.2) +
    labs(x = "\nCumulative population (poorest to richest)", 
         y = "Cumulative incremental costs\n",
         title = "D) Cost Burden Concentration Curve",
         subtitle = "Curve below diagonal (C>0) = PROGRESSIVE | Curve above diagonal (C<0) = REGRESSIVE") +
    theme_minimal(base_size = 11) +
    theme(plot.title = element_text(face = "bold"))
} else {
  panel_D <- ggplot() + annotate("text", x=0.5, y=0.5, label="Insufficient data") + theme_void()
}

# Arrange all panels
grid.arrange(panel_A, panel_B, panel_C, panel_D, ncol = 2, nrow = 2,
             top = grid::textGrob("Equity Analysis: Costs and Health Benefits", 
                                 gp = grid::gpar(fontsize = 14, fontface = "bold")))

13. Sex and Regional Analyses

# Overall means for reference lines
overall_dalys <- mean(df_compare$dalys_averted, na.rm = TRUE)
overall_costs <- mean(df_compare$delta_costs, na.rm = TRUE)

# Panel A: Health benefits by wealth
panel_A <- ggplot(wealth_results, aes(x = dalys_averted, y = subgroup)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "red", size = 0.8) +
  geom_vline(xintercept = overall_dalys, linetype = "dotted", color = "blue", size = 0.8) +
  geom_point(size = 4, color = "steelblue") +
  geom_errorbarh(aes(xmin = dalys_ci_lower, xmax = dalys_ci_upper), height = 0.2, size = 0.8, color = "steelblue") +
  annotate("text", x = overall_dalys + 0.01, y = 5.2, label = sprintf("Mean = %.4f", overall_dalys), size = 3, color = "blue") +
  labs(x = "\nDALYs Averted", y = "Wealth Quintile\n",
       title = "A) Health Benefits by Wealth Quintile",
       subtitle = "Positive = beneficial for Empower | Blue dotted line = population mean") +
  theme_minimal(base_size = 11) +
  theme(plot.title = element_text(face = "bold"))

# Panel B: Costs by wealth
panel_B <- ggplot(wealth_results, aes(x = delta_costs, y = subgroup)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "red", size = 0.8) +
  geom_vline(xintercept = overall_costs, linetype = "dotted", color = "blue", size = 0.8) +
  geom_point(size = 4, color = "darkred") +
  geom_errorbarh(aes(xmin = costs_ci_lower, xmax = costs_ci_upper), height = 0.2, size = 0.8, color = "darkred") +
  annotate("text", x = overall_costs + 10, y = 5.2, label = sprintf("Mean = $%.0f", overall_costs), size = 3, color = "blue") +
  labs(x = "\nIncremental Cost ($)", y = "Wealth Quintile\n",
       title = "B) Cost Burden by Wealth Quintile",
       subtitle = "Positive = higher cost for Empower | Blue dotted line = population mean") +
  theme_minimal(base_size = 11) +
  theme(plot.title = element_text(face = "bold"))

# Panel C: Health concentration curve
if (!is.null(health_curve)) {
  panel_C <- ggplot(health_curve, aes(x = Population, y = Cumulative)) +
    geom_line(size = 1.2, color = "steelblue") +
    geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "gray50", size = 0.8) +
    annotate("text", x = 0.7, y = 0.2, 
             label = sprintf("C-index = %.4f", unique(health_curve$C_index)), 
             size = 4, fontface = "bold", color = "steelblue") +
    annotate("text", x = 0.7, y = 0.12, 
             label = ifelse(unique(health_curve$C_index) > 0, 
                           "Interpretation: PRO-RICH\n(Richer gain more benefit)",
                           "Interpretation: PRO-POOR\n(Poorer gain more benefit)"), 
             size = 3, color = "steelblue") +
    labs(x = "\nCumulative population (poorest to richest)", 
         y = "Cumulative DALYs averted\n",
         title = "C) Health Benefits Concentration Curve",
         subtitle = "Curve below diagonal = PRO-RICH | Above diagonal = PRO-POOR") +
    theme_minimal(base_size = 11) +
    theme(plot.title = element_text(face = "bold"))
} else {
  panel_C <- ggplot() + annotate("text", x=0.5, y=0.5, label="Insufficient data") + theme_void()
}

# Panel D: Costs concentration curve
if (!is.null(costs_curve)) {
  panel_D <- ggplot(costs_curve, aes(x = Population, y = Cumulative)) +
    geom_line(size = 1.2, color = "darkred") +
    geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "gray50", size = 0.8) +
    annotate("text", x = 0.7, y = 0.2, 
             label = sprintf("C-index = %.4f", unique(costs_curve$C_index)), 
             size = 4, fontface = "bold", color = "darkred") +
    annotate("text", x = 0.7, y = 0.12, 
             label = ifelse(unique(costs_curve$C_index) > 0, 
                           "Interpretation: PROGRESSIVE\n(Richer pay more - fair)",
                           "Interpretation: REGRESSIVE\n(Poorer pay more - unfair)"), 
             size = 3, color = "darkred") +
    labs(x = "\nCumulative population (poorest to richest)", 
         y = "Cumulative incremental costs\n",
         title = "D) Cost Burden Concentration Curve",
         subtitle = "Curve above diagonal = REGRESSIVE | Below diagonal = PROGRESSIVE") +
    theme_minimal(base_size = 11) +
    theme(plot.title = element_text(face = "bold"))
} else {
  panel_D <- ggplot() + annotate("text", x=0.5, y=0.5, label="Insufficient data") + theme_void()
}

# Arrange all panels
grid.arrange(panel_A, panel_B, panel_C, panel_D, ncol = 2, nrow = 2,
             top = grid::textGrob("Equity Analysis: Costs and Health Benefits", 
                                 gp = grid::gpar(fontsize = 14, fontface = "bold")))

14. ICERs by Equity Metric

# Prepare ICER data
icer_wealth <- wealth_results %>%
  filter(!is.na(icer) & is.finite(icer)) %>%
  mutate(category = "Wealth Quintile", 
         label = as.character(subgroup),
         ce_status = ifelse(icer < WTP_THRESHOLD, "Cost-effective", "Not cost-effective"))

icer_sex <- sex_results %>%
  filter(!is.na(icer) & is.finite(icer)) %>%
  mutate(category = "Sex", 
         label = as.character(subgroup),
         ce_status = ifelse(icer < WTP_THRESHOLD, "Cost-effective", "Not cost-effective"))

icer_region <- region_results %>%
  filter(!is.na(icer) & is.finite(icer)) %>%
  mutate(category = "Region", 
         label = as.character(subgroup),
         ce_status = ifelse(icer < WTP_THRESHOLD, "Cost-effective", "Not cost-effective")) %>%
  head(8)

# Panel I: ICERs by wealth quintile
panel_I <- ggplot(icer_wealth, aes(x = icer, y = reorder(label, -as.numeric(subgroup)), fill = ce_status)) +
  geom_bar(stat = "identity", alpha = 0.8, width = 0.7) +
  geom_vline(xintercept = icer_overall, linetype = "dotted", color = "gray50", size = 0.8) +
  geom_text(aes(label = sprintf("$%.0f", icer)), hjust = -0.2, size = 3) +
  scale_fill_manual(values = c("Cost-effective" = "green", "Not cost-effective" = "red")) +
  annotate("text", x = icer_overall + 50, y = 0.5, label = sprintf("Overall ICER = $%.0f", icer_overall), size = 3, color = "gray50") +
  labs(x = "ICER ($ per DALY averted)", y = "",
       title = "A) ICERs by Wealth Quintile") +
  theme_minimal(base_size = 11) +
  theme(legend.position = "bottom", plot.title = element_text(face = "bold"))

# Panel J: ICERs by sex
panel_J <- ggplot(icer_sex, aes(x = icer, y = label, fill = ce_status)) +
  geom_bar(stat = "identity", alpha = 0.8, width = 0.6) +
  geom_vline(xintercept = icer_overall, linetype = "dotted", color = "gray50", size = 0.8) +
  geom_text(aes(label = sprintf("$%.0f", icer)), hjust = -0.2, size = 3) +
  scale_fill_manual(values = c("Cost-effective" = "green", "Not cost-effective" = "red")) +
  annotate("text", x = icer_overall + 50, y = 2.2, label = sprintf("Overall ICER = $%.0f", icer_overall), size = 3, color = "gray50") +
  labs(x = "ICER ($ per DALY averted)", y = "",
       title = "B) ICERs by Sex") +
  theme_minimal(base_size = 11) +
  theme(legend.position = "bottom", plot.title = element_text(face = "bold"))

# Panel K: ICERs by region
if (nrow(icer_region) >= 2) {
  panel_K <- ggplot(icer_region, aes(x = icer, y = reorder(label, icer), fill = ce_status)) +
    geom_bar(stat = "identity", alpha = 0.8, width = 0.6) +
    geom_vline(xintercept = icer_overall, linetype = "dotted", color = "gray50", size = 0.8) +
    geom_text(aes(label = sprintf("$%.0f", icer)), hjust = -0.2, size = 2.5) +
    scale_fill_manual(values = c("Cost-effective" = "green", "Not cost-effective" = "red")) +
    annotate("text", x = icer_overall + 50, y = 8.5, label = sprintf("Overall ICER = $%.0f", icer_overall), size = 2.5, color = "gray50") +
    labs(x = "ICER ($ per DALY averted)", y = "",
         title = "C) ICERs by Region") +
    theme_minimal(base_size = 10) +
    theme(legend.position = "bottom", plot.title = element_text(face = "bold"))
} else {
  panel_K <- ggplot() + annotate("text", x=0.5, y=0.5, label="Insufficient data") + theme_void()
}

# Panel L: Overall ICER comparison
icer_summary <- data.frame(
  Metric = c("Overall", "Poorest (Q1)", "Richest (Q5)", "Male", "Female"),
  ICER = c(icer_overall,
           wealth_results$icer[wealth_results$subgroup == "Q1_Poorest"],
           wealth_results$icer[wealth_results$subgroup == "Q5_Richest"],
           sex_results$icer[sex_results$subgroup == "Male"],
           sex_results$icer[sex_results$subgroup == "Female"]),
  Group = c("Overall", "Wealth", "Wealth", "Sex", "Sex")
)

icer_summary <- icer_summary %>%
  mutate(ce_status = ifelse(ICER < WTP_THRESHOLD, "Cost-effective", "Not cost-effective"))

panel_L <- ggplot(icer_summary, aes(x = reorder(Metric, ICER), y = ICER, fill = ce_status)) +
  geom_bar(stat = "identity", alpha = 0.8, width = 0.7) +
  geom_text(aes(label = sprintf("$%.0f", ICER)), vjust = -0.5, size = 3) +
  scale_fill_manual(values = c("Cost-effective" = "green", "Not cost-effective" = "red")) +
  labs(x = "", y = "ICER ($ per DALY averted)\n",
       title = "D) ICER Comparison Across Subgroups") +
  theme_minimal(base_size = 11) +
  theme(legend.position = "bottom", plot.title = element_text(face = "bold"),
        axis.text.x = element_text(angle = 45, hjust = 1))

# Arrange all panels
grid.arrange(panel_I, panel_J, panel_K, panel_L, ncol = 2, nrow = 2,
             top = grid::textGrob("Cost-Effectiveness ICERs by Equity Metric", 
                                 gp = grid::gpar(fontsize = 14, fontface = "bold")))

15. Export Results

# Save data
write.csv(wealth_results, "kenya_wealth_results.csv", row.names = FALSE)
write.csv(sex_results, "kenya_sex_results.csv", row.names = FALSE)
write.csv(region_results, "kenya_region_results.csv", row.names = FALSE)

if (!is.null(health_curve)) {
  write.csv(health_curve, "kenya_concentration_curve_health.csv", row.names = FALSE)
}
if (!is.null(costs_curve)) {
  write.csv(costs_curve, "kenya_concentration_curve_costs.csv", row.names = FALSE)
}

# Policy summary
policy_summary <- data.frame(
  Metric = c(
    "Discount rate",
    "Overall ICER",
    "Kenya GDP per capita",
    "Cost-effective?",
    "Health Benefits C-index",
    "Health distribution interpretation",
    "Costs C-index",
    "Cost distribution interpretation",
    "Poorest quintile health benefit",
    "Richest quintile health benefit",
    "Poorest quintile cost burden",
    "Richest quintile cost burden",
    "Poorest quintile ICER",
    "Richest quintile ICER",
    "Male ICER",
    "Female ICER"
  ),
  Value = c(
    sprintf("%.1f%%", DISCOUNT_RATE * 100),
    sprintf("$%.0f per DALY", icer_overall),
    sprintf("$%.0f", WTP_THRESHOLD),
    ifelse(icer_overall < WTP_THRESHOLD, "YES", "NO"),
    sprintf("%.4f", ifelse(!is.null(health_curve), unique(health_curve$C_index), NA)),
    ifelse(!is.null(health_curve) && unique(health_curve$C_index) > 0, "PRO-RICH", "PRO-POOR"),
    sprintf("%.4f", ifelse(!is.null(costs_curve), unique(costs_curve$C_index), NA)),
    ifelse(!is.null(costs_curve) && unique(costs_curve$C_index) > 0, "PROGRESSIVE", "REGRESSIVE"),
    sprintf("%.6f", wealth_results$dalys_averted[wealth_results$subgroup == "Q1_Poorest"]),
    sprintf("%.6f", wealth_results$dalys_averted[wealth_results$subgroup == "Q5_Richest"]),
    sprintf("$%.2f", wealth_results$delta_costs[wealth_results$subgroup == "Q1_Poorest"]),
    sprintf("$%.2f", wealth_results$delta_costs[wealth_results$subgroup == "Q5_Richest"]),
    sprintf("$%.0f", wealth_results$icer[wealth_results$subgroup == "Q1_Poorest"]),
    sprintf("$%.0f", wealth_results$icer[wealth_results$subgroup == "Q5_Richest"]),
    sprintf("$%.0f", sex_results$icer[sex_results$subgroup == "Male"]),
    sprintf("$%.0f", sex_results$icer[sex_results$subgroup == "Female"])
  )
)

write.csv(policy_summary, "kenya_equity_policy_summary.csv", row.names = FALSE)
saveRDS(df_compare, "kenya_equity_analysis_data.rds")

cat("\n✓ All results exported successfully\n")
## 
## ✓ All results exported successfully
  1. Summary of Findings
cat("\n")
cat("========================================\n")
## ========================================
cat("EQUITY ANALYSIS COMPLETE\n")
## EQUITY ANALYSIS COMPLETE
cat("========================================\n")
## ========================================
cat(sprintf("Discount rate: %.1f%% per year\n", DISCOUNT_RATE * 100))
## Discount rate: 3.0% per year
cat(sprintf("Overall ICER: $%.0f per DALY averted\n", icer_overall))
## Overall ICER: $3990 per DALY averted
cat("\n--- HEALTH EQUITY ---\n")
## 
## --- HEALTH EQUITY ---
if (!is.null(health_curve)) {
  cat(sprintf("C-index: %.4f\n", unique(health_curve$C_index)))
  if (unique(health_curve$C_index) > 0) {
    cat("Interpretation: PRO-RICH (richer individuals gain more health benefit)\n")
  } else {
    cat("Interpretation: PRO-POOR (poorer individuals gain more health benefit)\n")
  }
}
## C-index: 0.0451
## Interpretation: PRO-RICH (richer individuals gain more health benefit)
cat("\n--- COST BURDEN EQUITY ---\n")
## 
## --- COST BURDEN EQUITY ---
if (!is.null(costs_curve)) {
  cat(sprintf("C-index: %.4f\n", unique(costs_curve$C_index)))
  if (unique(costs_curve$C_index) > 0) {
    cat("Interpretation: PROGRESSIVE (richer individuals bear higher costs - equitable)\n")
  } else {
    cat("Interpretation: REGRESSIVE (poorer individuals bear higher costs - inequitable)\n")
  }
}
## C-index: 0.0646
## Interpretation: PROGRESSIVE (richer individuals bear higher costs - equitable)
cat("\n--- KEY FINDINGS ---\n")
## 
## --- KEY FINDINGS ---
cat(sprintf("• Poorest quintile DALYs averted: %.6f\n", wealth_results$dalys_averted[wealth_results$subgroup == "Q1_Poorest"]))
## • Poorest quintile DALYs averted: 0.099889
cat(sprintf("• Richest quintile DALYs averted: %.6f\n", wealth_results$dalys_averted[wealth_results$subgroup == "Q5_Richest"]))
## • Richest quintile DALYs averted: 0.131233
cat(sprintf("• Wealth gap: %.6f DALYs\n", 
    wealth_results$dalys_averted[wealth_results$subgroup == "Q5_Richest"] - 
    wealth_results$dalys_averted[wealth_results$subgroup == "Q1_Poorest"]))
## • Wealth gap: 0.031344 DALYs
cat("\n========================================\n")
## 
## ========================================