0.1 Code Description

The following R script performs analyses of simulation outputs to assess the impact of varied patient numbers on the model results.

1 Clear the workspace

rm(list = ls())

1.1 Load Libraries

The script begins by loading the necessary R libraries:

library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## âś” dplyr     1.1.4     âś” readr     2.1.6
## âś” forcats   1.0.1     âś” stringr   1.6.0
## âś” ggplot2   4.0.2     âś” tibble    3.3.0
## âś” lubridate 1.9.4     âś” tidyr     1.3.1
## âś” purrr     1.2.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## âś– dplyr::filter() masks stats::filter()
## âś– dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(reshape2) # For reshaping data
## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(ggplot2) # For plotting
library(dampack) # For processing PSA results 
library(scales)
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
library(patchwork) # For combining plots
library(zoo) # For rolling calculationsi
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

2 Import PSA Datasets

# Import PSA datasets
load('/Users/jamesoguta/Documents/James Oguta/PhD Thesis_James Oguta/James Oguta Model/Kenya CVD Model/KenyaCVDModelFinal/data/results_df_numpats_no_trt.rda')
load('/Users/jamesoguta/Documents/James Oguta/PhD Thesis_James Oguta/James Oguta Model/Kenya CVD Model/KenyaCVDModelFinal/data/results_df_numpats_Emp_Health.rda')
load('/Users/jamesoguta/Documents/James Oguta/PhD Thesis_James Oguta/James Oguta Model/Kenya CVD Model/KenyaCVDModelFinal/data/results_df_numpats_UC.rda')

3 Rename datasets

num_pats_no_int <- results_df_numpats_no_trt
num_pats_Emp_Health <- results_df_numpats_Emp_Health
num_pats_UC <- results_df_numpats_UC
# Compute costs per DALY
num_pats_no_int$cost_daly <- num_pats_no_int$cost/num_pats_no_int$daly
num_pats_Emp_Health$cost_daly <- num_pats_Emp_Health$cost/num_pats_Emp_Health$daly
num_pats_UC$cost_daly <- num_pats_UC$cost/num_pats_UC$daly

head(num_pats_no_int)
##   num_patients     cost     daly cost_daly
## 1         5000 1404.409 7.225561  194.3668
## 2         5200 1572.424 7.290990  215.6668
## 3         5400 1530.815 7.309968  209.4148
## 4         5600 1523.123 7.250114  210.0826
## 5         5800 1515.568 7.259803  208.7615
## 6         6000 1579.671 7.433138  212.5174
head(num_pats_Emp_Health)
##   num_patients     cost     daly cost_daly
## 1         5000 5432.464 7.244207  749.9045
## 2         5200 5531.712 7.272240  760.6614
## 3         5400 5464.619 7.197389  759.2502
## 4         5600 5584.427 7.170733  778.7804
## 5         5800 5326.346 7.263128  733.3406
## 6         6000 5541.131 7.210286  768.5037
head(num_pats_UC)
##   num_patients     cost     daly cost_daly
## 1         5000 4871.973 7.374127  660.6848
## 2         5200 5010.039 7.401226  676.9201
## 3         5400 4942.220 7.326093  674.6051
## 4         5600 5013.450 7.291324  687.5911
## 5         5800 4784.787 7.367333  649.4599
## 6         6000 4992.048 7.331792  680.8769

4 Plot costs by number of patients-No intervention

ggplot(num_pats_no_int, aes(x = num_patients, y = cost)) +
  geom_line() +
  scale_x_continuous(
    breaks = seq(0, max(num_pats_no_int$num_patients), by = 100000),
    labels = scales::comma
  ) +
  labs(
    x = "Number of Patients Simulated",
    y = "Mean Discounted Costs",
    title = "Mean Discounted Costs by Number of Patients Simulated(No Intervention)"
  ) +
  theme_minimal()

# Plot Dalys by number of patients-No Intervention

ggplot(num_pats_no_int, aes(x = num_patients, y = daly)) +
  geom_line() +
  scale_x_continuous(
    breaks = seq(0, max(num_pats_no_int$num_patients), by = 100000),
    labels = scales::comma
  ) +
  labs(
    x = "Number of Patients Simulated",
    y = "Mean Discounted DALYs",
    title = "Mean Discounted DALYs by Number of Patients Simulated(No Intervention)"
  ) +
  theme_minimal()

# Plot Cost per Daly by number of patients-No Intervention

ggplot(num_pats_no_int, aes(x = num_patients, y = cost_daly)) +
  geom_line() +
  scale_x_continuous(
    breaks = seq(0, max(num_pats_no_int$num_patients), by = 100000),
    labels = scales::comma
  ) +
  labs(
    x = "Number of Patients Simulated",
    y = "Mean Discounted Cost per Daly",
    title = "Mean Discounted Cost per DALY by Number of Patients Simulated(No Intervention)"
  ) +
  theme_minimal()

# Plot costs by number of patients-Empower Health Intervention

ggplot(num_pats_Emp_Health, aes(x = num_patients, y = cost)) +
  geom_line() +
  scale_x_continuous(
    breaks = seq(0, max(num_pats_no_int$num_patients), by = 100000),
    labels = scales::comma
  ) +
  labs(
    x = "Number of Patients Simulated",
    y = "Mean Discounted Costs",
    title = "Mean Discounted Costs by Number of Patients Simulated(Empower Health Intervention)"
  ) +
  theme_minimal()

# Plot Dalys by number of patients-Empower Health Intervention

ggplot(num_pats_Emp_Health, aes(x = num_patients, y = daly)) +
  geom_line() +
  scale_x_continuous(
    breaks = seq(0, max(num_pats_no_int$num_patients), by = 100000),
    labels = scales::comma
  ) +
  labs(
    x = "Number of Patients Simulated",
    y = "Mean Discounted DALYs",
    title = "Mean Discounted DALYs by Number of Patients Simulated(Empower Health Intervention)"
  ) +
  theme_minimal()

# Plot Cost per Daly by number of patients-Empower Health Intervention

ggplot(num_pats_Emp_Health, aes(x = num_patients, y = cost_daly)) +
  geom_line() +
  scale_x_continuous(
    breaks = seq(0, max(num_pats_no_int$num_patients), by = 100000),
    labels = scales::comma
  ) +
  labs(
    x = "Number of Patients Simulated",
    y = "Mean Discounted Cost per Daly",
    title = "Mean Discounted Cost per DALY by Number of Patients Simulated(Empower Health Intervention)"
  ) +
  theme_minimal()

# Plot costs by number of patients-Usual Care Intervention

ggplot(num_pats_UC, aes(x = num_patients, y = cost)) +
  geom_line() +
  scale_x_continuous(
    breaks = seq(0, max(num_pats_UC$num_patients), by = 100000),
    labels = scales::comma
  ) +
  labs(
    x = "Number of Patients Simulated",
    y = "Mean Discounted Costs",
    title = "Mean Discounted Costs by Number of Patients Simulated(Usual Care Intervention)"
  ) +
  theme_minimal()

# Plot Dalys by number of patients-Usual Care Intervention

ggplot(num_pats_UC, aes(x = num_patients, y = daly)) +
  geom_line() +
  scale_x_continuous(
    breaks = seq(0, max(num_pats_UC$num_patients), by = 100000),
    labels = scales::comma
  ) +
  labs(
    x = "Number of Patients Simulated",
    y = "Mean Discounted DALYs",
    title = "Mean Discounted DALYs by Number of Patients Simulated(Usual Care Intervention)"
  ) +
  theme_minimal()

# Plot Cost per Daly by number of patients-Usual Care Intervention

ggplot(num_pats_UC, aes(x = num_patients, y = cost_daly)) +
  geom_line() +
  scale_x_continuous(
    breaks = seq(0, max(num_pats_UC$num_patients), by = 100000),
    labels = scales::comma
  ) +
  labs(
    x = "Number of Patients Simulated",
    y = "Mean Discounted Cost per Daly",
    title = "Mean Discounted Cost per DALY by Number of Patients Simulated(Usual Care Intervention)"
  ) +
  theme_minimal()

# Combining the datasets

# Add a group identifier to each dataset
num_pats_Emp_Health <- num_pats_Emp_Health %>%
  mutate(intervention = "Emp_Health")

num_pats_UC <- num_pats_UC %>%
  mutate(intervention = "UC")

num_pats_no_int <- num_pats_no_int %>%
  mutate(intervention = "No_Intervention")

# Combine all datasets
combined_data <- bind_rows(num_pats_Emp_Health, num_pats_UC, num_pats_no_int)
head(combined_data)
##   num_patients     cost     daly cost_daly intervention
## 1         5000 5432.464 7.244207  749.9045   Emp_Health
## 2         5200 5531.712 7.272240  760.6614   Emp_Health
## 3         5400 5464.619 7.197389  759.2502   Emp_Health
## 4         5600 5584.427 7.170733  778.7804   Emp_Health
## 5         5800 5326.346 7.263128  733.3406   Emp_Health
## 6         6000 5541.131 7.210286  768.5037   Emp_Health

5 Plot cost by number of patients simulated for all interventions

ggplot(combined_data, aes(x = num_patients, y = cost, color = intervention)) +
  geom_line(alpha = 0.8) +
  geom_point(size = 1) +
  scale_x_continuous(
    breaks = seq(0, max(combined_data$num_patients), by = 100000),
    labels = comma
  ) +
  labs(
    x = "Number of Patients",
    y = "Mean Discounted Costs (USD)",
    title = "Mean Discounted Costs by Number of Patients Simulated-All Interventions",
    color = "Scenario"
  ) +
  geom_smooth(se = FALSE, method = "loess", span = 0.2) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# Plot DALYs by number of patients simulated for all interventions

ggplot(combined_data, aes(x = num_patients, y = daly, color = intervention)) +
  geom_line(alpha = 0.8) +
  geom_point(size = 1) +
  scale_x_continuous(
    breaks = seq(0, max(combined_data$num_patients), by = 100000),
    labels = comma
  ) +
  labs(
    x = "Number of Patients",
    y = "Mean Discounted DALYs ",
    title = "Mean Discounted DALYs by Number of Patients Simulated-All Interventions",
    color = "Scenario"
  ) + 
  geom_smooth(se = FALSE, method = "loess", span = 0.2) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

6 Plot Cost per DALY by number of patients simulated for all interventions

ggplot(combined_data, aes(x = num_patients, y = cost_daly, color = intervention)) +
  geom_line(alpha = 0.8) +
  geom_point(size = 1) +
  scale_x_continuous(
    breaks = seq(0, max(combined_data$num_patients), by = 100000),
    labels = comma
  ) +
  labs(
    x = "Number of Patients",
    y = "Mean Discounted Cost per DALY",
    title = "Mean Discounted Discounted Cost per DALY by Number of Patients Simulated-All Interventions",
    color = "Scenario"
  ) + 
  geom_smooth(se = FALSE, method = "loess", span = 0.2) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# Plotting ICER Stabilization Curves

7 Merge the datasets by num_patients

icer_df <- num_pats_no_int %>%
  rename(cost_no_int = cost,
         dalys_no_int = daly) %>%
  select(num_patients, cost_no_int, dalys_no_int) %>%
  left_join(num_pats_UC %>%
              rename(cost_uc = cost,
                     dalys_uc = daly) %>%
              select(num_patients, cost_uc, dalys_uc),
            by = c("num_patients")) %>%
  left_join(num_pats_Emp_Health %>%
              rename(cost_emp = cost,
                     dalys_emp = daly) %>%
              select(num_patients, cost_emp, dalys_emp),
            by = c("num_patients"))
head(icer_df)
##   num_patients cost_no_int dalys_no_int  cost_uc dalys_uc cost_emp dalys_emp
## 1         5000    1404.409     7.225561 4871.973 7.374127 5432.464  7.244207
## 2         5200    1572.424     7.290990 5010.039 7.401226 5531.712  7.272240
## 3         5400    1530.815     7.309968 4942.220 7.326093 5464.619  7.197389
## 4         5600    1523.123     7.250114 5013.450 7.291324 5584.427  7.170733
## 5         5800    1515.568     7.259803 4784.787 7.367333 5326.346  7.263128
## 6         6000    1579.671     7.433138 4992.048 7.331792 5541.131  7.210286

8 Calculate ICER for each intervention vs no intervention

icer_df <- icer_df %>%
  mutate(
    incr_cost_uc = cost_uc - cost_no_int,
    incr_dalys_uc = dalys_no_int - dalys_uc,  # DALYs averted
    icer_uc = incr_cost_uc / incr_dalys_uc,

    incr_cost_emp = cost_emp - cost_no_int,
    incr_dalys_emp = dalys_no_int - dalys_emp,
    icer_emp = incr_cost_emp / incr_dalys_emp
  )
head(icer_df)
##   num_patients cost_no_int dalys_no_int  cost_uc dalys_uc cost_emp dalys_emp
## 1         5000    1404.409     7.225561 4871.973 7.374127 5432.464  7.244207
## 2         5200    1572.424     7.290990 5010.039 7.401226 5531.712  7.272240
## 3         5400    1530.815     7.309968 4942.220 7.326093 5464.619  7.197389
## 4         5600    1523.123     7.250114 5013.450 7.291324 5584.427  7.170733
## 5         5800    1515.568     7.259803 4784.787 7.367333 5326.346  7.263128
## 6         6000    1579.671     7.433138 4992.048 7.331792 5541.131  7.210286
##   incr_cost_uc incr_dalys_uc    icer_uc incr_cost_emp incr_dalys_emp
## 1     3467.564   -0.14856597  -23340.23      4028.054   -0.018646379
## 2     3437.615   -0.11023620  -31184.08      3959.288    0.018749674
## 3     3411.405   -0.01612480 -211562.60      3933.804    0.112578987
## 4     3490.327   -0.04121003  -84696.05      4061.304    0.079380966
## 5     3269.220   -0.10753059  -30402.69      3810.779   -0.003325285
## 6     3412.376    0.10134662   33670.35      3961.460    0.222852608
##      icer_emp
## 1  -216023.41
## 2   211165.71
## 3    34942.61
## 4    51162.19
## 5 -1146000.54
## 6    17776.14

9 Plot ICERs vs number of patients

# Reshape for plotting
icer_long <- icer_df %>%
  select(num_patients, icer_uc, icer_emp) %>%
  pivot_longer(cols = starts_with("icer_"), names_to = "strategy", values_to = "icer") %>%
  mutate(strategy = recode(strategy,
                           "icer_uc" = "Usual Care",
                           "icer_emp" = "Empowered Health"))

# Plot
icer_stabilization_curve <- ggplot(icer_long, aes(x = num_patients, y = icer, color = strategy)) +
  geom_point(alpha = 0.5, size = 1) +
  geom_smooth(method = "loess", se = FALSE, span = 0.2) +
  scale_y_continuous(labels = comma, limits = c(0, NA)) +
  scale_x_continuous(breaks = seq(0, max(icer_long$num_patients), by = 100000), labels = comma) +
  labs(
    title = "Mean ICER by Number of Patients Simulated",
    x = "Number of Patients Simulated",
    y = "ICER (USD per DALY averted)",
    color = "Intervention"
  ) +
  theme_minimal()
print(icer_stabilization_curve)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 48 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 48 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_smooth()`).

# Save the plot
ggsave("icer_stabilization_curve.png", width = 8, height = 6)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 48 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 48 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_smooth()`).
ggsave("icer_stabilization_curve.pdf", width = 8, height = 6)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 48 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 48 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_smooth()`).

10 Plotting ICER for Empower Health vs Usual Care by number of patients

11 Join the two datasets

icer_emp_vs_uc <- num_pats_Emp_Health %>%
  rename(cost_emp = cost,
         dalys_emp = daly) %>%
  select(num_patients, cost_emp, dalys_emp) %>%
  left_join(
    num_pats_UC %>%
      rename(cost_uc = cost,
             dalys_uc = daly) %>%
      select(num_patients, cost_uc, dalys_uc),
    by = c("num_patients")
  )
head(icer_emp_vs_uc)
##   num_patients cost_emp dalys_emp  cost_uc dalys_uc
## 1         5000 5432.464  7.244207 4871.973 7.374127
## 2         5200 5531.712  7.272240 5010.039 7.401226
## 3         5400 5464.619  7.197389 4942.220 7.326093
## 4         5600 5584.427  7.170733 5013.450 7.291324
## 5         5800 5326.346  7.263128 4784.787 7.367333
## 6         6000 5541.131  7.210286 4992.048 7.331792

12 Calculate ICER

icer_emp_vs_uc <- icer_emp_vs_uc %>%
  mutate(
    incr_cost = cost_emp - cost_uc,
    dalys_averted = dalys_uc - dalys_emp,  # positive means emp is better
    icer = incr_cost / dalys_averted
  )
head(icer_emp_vs_uc)
##   num_patients cost_emp dalys_emp  cost_uc dalys_uc incr_cost dalys_averted
## 1         5000 5432.464  7.244207 4871.973 7.374127  560.4906     0.1299196
## 2         5200 5531.712  7.272240 5010.039 7.401226  521.6735     0.1289859
## 3         5400 5464.619  7.197389 4942.220 7.326093  522.3991     0.1287038
## 4         5600 5584.427  7.170733 5013.450 7.291324  570.9770     0.1205910
## 5         5800 5326.346  7.263128 4784.787 7.367333  541.5592     0.1042053
## 6         6000 5541.131  7.210286 4992.048 7.331792  549.0833     0.1215060
##       icer
## 1 4314.134
## 2 4044.423
## 3 4058.925
## 4 4734.823
## 5 5197.040
## 6 4518.982

13 Plot the ICER vs number of patients (Empower Health vs Usual Care)

icer_emp_vs_uc_stabilization_curve <- ggplot(icer_emp_vs_uc, aes(x = num_patients, y = icer)) +
  geom_point(alpha = 0.5, size = 1, color = "#1b9e77") +
  geom_smooth(method = "loess", se = FALSE, span = 0.2, color = "#1b9e77") +
  scale_x_continuous(breaks = seq(0, max(icer_emp_vs_uc$num_patients), by = 100000),
                     labels = comma) +
  labs(
    title = NULL,
    x = "Number of Patients Simulated",
    y = "ICER (USD per DALY averted)"
  ) +
  theme_minimal()
print(icer_emp_vs_uc_stabilization_curve)
## `geom_smooth()` using formula = 'y ~ x'

# Save the plot
ggsave("icer_emp_vs_uc_stabilization_curve.png", width = 8, height = 6)
## `geom_smooth()` using formula = 'y ~ x'
ggsave("icer_emp_vs_uc_stabilization_curve.pdf", width = 8, height = 6)
## `geom_smooth()` using formula = 'y ~ x'

14 Plot costs for both interventions (Empower health and usual care) vs number of patients

ggplot(icer_emp_vs_uc, aes(x = num_patients)) +
  geom_line(aes(y = cost_emp, color = "Empower Health"), size = 1) +
  geom_line(aes(y = cost_uc, color = "Usual Care"), size = 1) +
  scale_x_continuous(breaks = seq(0, max(icer_emp_vs_uc$num_patients), by = 100000),
                     labels = comma) +
  labs(
    title = "Mean Discounted Costs by Number of Patients Simulated",
    x = "Number of Patients Simulated",
    y = "Mean Discounted Costs (USD)",
    color = "Intervention"
  ) +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Creating a dataset for stabilization analysis

# Option 1: Using rollmeanr for simpler syntax
stabilization_analysis <- icer_emp_vs_uc %>%
  arrange(num_patients) %>%
  mutate(
    # Rolling mean (window of 20)
    rolling_mean = rollmeanr(icer, k = 20, fill = NA),
    # Rolling SD (window of 20)
    rolling_sd = rollapplyr(icer, width = 20, FUN = sd, fill = NA),
    # Rolling coefficient of variation
    rolling_cv = rolling_sd / abs(rolling_mean),
    # Percent change
    pct_change = (rolling_mean / lag(rolling_mean) - 1) * 100
  )

# Alternative: Use zoo's rollapply with proper alignment
stabilization_analysis <- icer_emp_vs_uc %>%
  arrange(num_patients) %>%
  mutate(
    rolling_mean = rollapply(icer, width = 20, 
                            FUN = mean, fill = NA, align = "right", partial = FALSE),
    rolling_sd = rollapply(icer, width = 20, 
                          FUN = sd, fill = NA, align = "right", partial = FALSE),
    rolling_cv = rolling_sd / abs(rolling_mean),
    pct_change = (rolling_mean / lag(rolling_mean) - 1) * 100
  )
head(stabilization_analysis)
##   num_patients cost_emp dalys_emp  cost_uc dalys_uc incr_cost dalys_averted
## 1         5000 5432.464  7.244207 4871.973 7.374127  560.4906     0.1299196
## 2         5200 5531.712  7.272240 5010.039 7.401226  521.6735     0.1289859
## 3         5400 5464.619  7.197389 4942.220 7.326093  522.3991     0.1287038
## 4         5600 5584.427  7.170733 5013.450 7.291324  570.9770     0.1205910
## 5         5800 5326.346  7.263128 4784.787 7.367333  541.5592     0.1042053
## 6         6000 5541.131  7.210286 4992.048 7.331792  549.0833     0.1215060
##       icer rolling_mean rolling_sd rolling_cv pct_change
## 1 4314.134           NA         NA         NA         NA
## 2 4044.423           NA         NA         NA         NA
## 3 4058.925           NA         NA         NA         NA
## 4 4734.823           NA         NA         NA         NA
## 5 5197.040           NA         NA         NA         NA
## 6 4518.982           NA         NA         NA         NA

15 Find ICER Stabilisation Point

# Find where CV drops below 5% and stays below
cv_threshold <- 0.05

# Get indices where CV is below threshold
cv_below <- which(stabilization_analysis$rolling_cv < cv_threshold & 
                  !is.na(stabilization_analysis$rolling_cv))

if (length(cv_below) > 0) {
  # Find the first index where CV drops below threshold AND remains below for next 20 simulations
  stabilization_idx <- cv_below[1]
  
  # Check if it stays below
  remaining <- cv_below[cv_below >= stabilization_idx]
  if (length(remaining) >= 20) {
    min_patients <- stabilization_analysis$num_patients[stabilization_idx]
    cat(sprintf("âś“ ICER stabilizes (CV < %.1f%%) at: %s patients\n", 
                cv_threshold * 100, comma(min_patients)))
  } else {
    cat("âš  ICER hasn't consistently stabilized within simulated range\n")
    min_patients <- stabilization_analysis$num_patients[cv_below[1]]
    cat(sprintf("First crossing at: %s patients (but may not be stable)\n", comma(min_patients)))
  }
} else {
  cat("âś— ICER hasn't reached CV < 5% within simulated range\n")
  cat("Consider simulating more patients\n")
}
## âś“ ICER stabilizes (CV < 5.0%) at: 36,000 patients
# Visualize stabilization (corrected)
if (exists("min_patients")) {
  p1 <- ggplot(stabilization_analysis, aes(x = num_patients)) +
    geom_line(aes(y = rolling_cv), color = "blue", size = 1) +
    geom_hline(yintercept = cv_threshold, linetype = "dashed", color = "red", size = 0.8) +
    geom_vline(xintercept = min_patients, linetype = "dotted", color = "darkgreen", size = 0.8) +
    scale_x_continuous(labels = comma, breaks = seq(0, max(stabilization_analysis$num_patients), by = 100000)) +
    scale_y_continuous(labels = scales::percent, limits = c(0, max(stabilization_analysis$rolling_cv, na.rm = TRUE) * 1.1)) +
    labs(
      title = "ICER Stabilization Analysis",
      subtitle = paste0("Stabilization threshold: CV < ", cv_threshold * 100, "%"),
      x = "Number of Patients Simulated",
      y = "Rolling Coefficient of Variation (CV)",
      caption = paste0("Stabilization at ", comma(min_patients), " patients")
    ) +
    theme_minimal() +
    theme(
      plot.title = element_text(face = "bold"),
      panel.grid.minor = element_blank()
    )
  
  print(p1)
  ggsave("icer_stabilization_analysis.png", width = 10, height = 6, dpi = 300)
  ggsave("icer_stabilization_analysis.pdf", width = 10, height = 6)
}
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).

# Alternative stabilisation criteria

# Use multiple criteria for stabilization
stabilization_analysis <- stabilization_analysis %>%
  mutate(
    # Criterion 1: CV < 5%
    cv_stable = rolling_cv < 0.05,
    # Criterion 2: Percent change < 1% in absolute value
    pct_stable = abs(pct_change) < 1,
    # Criterion 3: Rolling mean within 2% of final value
    final_mean = tail(rolling_mean, 1),
    final_stable = abs((rolling_mean - final_mean) / final_mean * 100) < 2,
    # Combined stability (all criteria met)
    stable = cv_stable & pct_stable & final_stable
  )

# Find first point where all criteria are met
stable_idx <- which(stabilization_analysis$stable & !is.na(stabilization_analysis$stable))[1]

if (!is.na(stable_idx)) {
  min_patients_robust <- stabilization_analysis$num_patients[stable_idx]
  cat(sprintf("\n=== Robust Stabilization Analysis ===\n"))
  cat(sprintf("Minimum patients (all criteria): %s\n", comma(min_patients_robust)))
  cat(sprintf("  - CV < 5%%: âś“\n"))
  cat(sprintf("  - Change < 1%%: âś“\n"))
  cat(sprintf("  - Within 2%% of final: âś“\n"))
} else {
  cat("\n=== Robust Stabilization Analysis ===\n")
  cat("No stable point found with all criteria\n")
  cat("Current status at maximum patients:\n")
  last_row <- tail(stabilization_analysis, 1)
  cat(sprintf("  - CV: %.1f%%\n", last_row$rolling_cv * 100))
  cat(sprintf("  - Percent change: %.2f%%\n", last_row$pct_change))
  cat(sprintf("  - Difference from final: %.1f%%\n", 
      abs((last_row$rolling_mean - last_row$final_mean) / last_row$final_mean * 100)))
}
## 
## === Robust Stabilization Analysis ===
## Minimum patients (all criteria): 36,000
##   - CV < 5%: âś“
##   - Change < 1%: âś“
##   - Within 2% of final: âś“

16 Current Stabilisation Status

# Find where CV drops below 5% (0.05)
cv_threshold <- 0.05

# Get the stabilization point
stabilization_point <- stabilization_analysis %>%
  filter(!is.na(rolling_cv), rolling_cv < cv_threshold) %>%
  slice(1)

if(nrow(stabilization_point) > 0) {
  min_patients_needed <- stabilization_point$num_patients
  current_cv <- stabilization_point$rolling_cv
  
  cat("=== STABILIZATION ANALYSIS ===\n")
  cat(sprintf("Current CV at %s patients: %.1f%%\n", 
      comma(min_patients_needed), current_cv * 100))
  cat(sprintf("âś“ Target CV (< %.1f%%) achieved at: %s patients\n", 
      cv_threshold * 100, comma(min_patients_needed)))
} else {
  cat("CV still above 5% threshold\n")
  cat(sprintf("Latest CV: %.1f%% at %s patients\n",
      tail(stabilization_analysis$rolling_cv, 1) * 100,
      comma(tail(stabilization_analysis$num_patients, 1))))
}
## === STABILIZATION ANALYSIS ===
## Current CV at 36,000 patients: 4.8%
## âś“ Target CV (< 5.0%) achieved at: 36,000 patients
# Check the trend in CV
latest_cv <- tail(stabilization_analysis$rolling_cv, 1)
latest_patients <- tail(stabilization_analysis$num_patients, 1)

cat(sprintf("\nCurrent status at %s patients:\n", comma(latest_patients)))
## 
## Current status at 1,000,000 patients:
cat(sprintf("  - Rolling mean ICER: $%.0f\n", tail(stabilization_analysis$rolling_mean, 1)))
##   - Rolling mean ICER: $4068
cat(sprintf("  - Rolling CV: %.1f%%\n", latest_cv * 100))
##   - Rolling CV: 2.1%
cat(sprintf("  - Recent change: %.2f%%\n", tail(stabilization_analysis$pct_change, 1)))
##   - Recent change: 0.28%

17 ICER Stabilisation curve with Confidence Intervals

p_icer <- ggplot(stabilization_analysis, aes(x = num_patients)) +
  geom_point(aes(y = icer), alpha = 0.3, size = 0.8, color = "gray50") +
  geom_line(aes(y = rolling_mean), color = "#1b9e77", size = 1.2) +
  geom_ribbon(aes(ymin = rolling_mean - rolling_sd, 
                  ymax = rolling_mean + rolling_sd), 
              alpha = 0.2, fill = "#1b9e77") +
  geom_hline(yintercept = tail(stabilization_analysis$rolling_mean, 1), 
             linetype = "dashed", color = "darkgreen", alpha = 0.7) +
  scale_x_continuous(labels = comma, breaks = seq(0, max(stabilization_analysis$num_patients), by = 100000)) +
  scale_y_continuous(labels = comma) +
  labs(
    title = "ICER Stabilization: Empowered Health vs Usual Care",
    subtitle = paste0("Stabilized ICER: $", round(tail(stabilization_analysis$rolling_mean, 1), 0),
                     " (±", round(tail(stabilization_analysis$rolling_sd, 1), 0), ")"),
    x = "Number of Patients Simulated",
    y = "ICER (USD per DALY averted)"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    panel.grid.minor = element_blank()
  )

print(p_icer)
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).

ggsave("icer_stabilization_with_ci.png", width = 10, height = 6, dpi = 300)
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 19 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
ggsave("icer_stabilization_with_ci.pdf", width = 10, height = 6)
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 19 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).

Minimum patients needed

# Calculate required patients for different CV thresholds
cv_targets <- c(0.10, 0.07, 0.05, 0.03)
required_patients <- sapply(cv_targets, function(target) {
  idx <- which(stabilization_analysis$rolling_cv < target & !is.na(stabilization_analysis$rolling_cv))[1]
  if(!is.na(idx)) stabilization_analysis$num_patients[idx] else NA
})

requirements <- data.frame(
  Target_CV = paste0(cv_targets * 100, "%"),
  Patients_Needed = comma(required_patients),
  Status = ifelse(is.na(required_patients), "Not yet achieved", "Achieved")
)

cat("\n=== MINIMUM PATIENTS REQUIREMENTS ===\n")
## 
## === MINIMUM PATIENTS REQUIREMENTS ===
print(requirements)
##   Target_CV Patients_Needed   Status
## 1       10%           8,800 Achieved
## 2        7%          10,500 Achieved
## 3        5%          36,000 Achieved
## 4        3%          55,000 Achieved
# Final recommendation
latest_cv_value <- tail(stabilization_analysis$rolling_cv, 1)
latest_patients_value <- tail(stabilization_analysis$num_patients, 1)

cat("\n=== FINAL RECOMMENDATION ===\n")
## 
## === FINAL RECOMMENDATION ===
if(latest_cv_value <= 0.05) {
  cat(sprintf("âś“ Current simulation (%s patients) is SUFFICIENT\n", comma(latest_patients_value)))
  cat(sprintf("  ICER is stable with CV = %.1f%%\n", latest_cv_value * 100))
  cat(sprintf("  Minimum needed: %s patients\n", 
      comma(stabilization_analysis$num_patients[which(stabilization_analysis$rolling_cv < 0.05 & !is.na(stabilization_analysis$rolling_cv))[1]])))
} else if(latest_cv_value <= 0.07) {
  cat(sprintf("âš  Current simulation (%s patients) is ADEQUATE but consider more\n", comma(latest_patients_value)))
  cat(sprintf("  Current CV = %.1f%% (target < 5%%)\n", latest_cv_value * 100))
  cat(sprintf("  Recommended minimum: 500,000 patients\n"))
} else {
  cat(sprintf("âś— Current simulation (%s patients) is INSUFFICIENT\n", comma(latest_patients_value)))
  cat(sprintf("  Current CV = %.1f%% (too high)\n", latest_cv_value * 100))
  cat(sprintf("  Recommended minimum: 500,000 - 1,000,000 patients\n"))
}
## âś“ Current simulation (1,000,000 patients) is SUFFICIENT
##   ICER is stable with CV = 2.1%
##   Minimum needed: 36,000 patients

Visualising the convergence

# Create a comprehensive convergence plot


# Plot 1: CV over time
p1 <- ggplot(stabilization_analysis, aes(x = num_patients)) +
  geom_line(aes(y = rolling_cv), color = "blue", size = 1) +
  geom_hline(yintercept = 0.05, linetype = "dashed", color = "red", size = 0.8) +
  geom_hline(yintercept = 0.03, linetype = "dotted", color = "darkgreen", size = 0.8) +
  geom_vline(xintercept = 36000, linetype = "dashed", color = "purple", alpha = 0.5) +
  annotate("text", x = 36000, y = 0.07, label = "Stabilization at 36k", 
           angle = 90, vjust = -0.5, size = 3, color = "purple") +
  scale_x_continuous(labels = comma, breaks = seq(0, 1e6, by = 200000)) +
  scale_y_continuous(labels = scales::percent, limits = c(0, max(stabilization_analysis$rolling_cv, na.rm = TRUE) * 1.1)) +
  labs(
    title = "A. Coefficient of Variation (CV) Convergence",
    subtitle = "Target CV < 5% achieved at 36,000 patients",
    x = "Number of Patients Simulated",
    y = "Coefficient of Variation"
  ) +
  theme_minimal()

# Plot 2: ICER stabilization
p2 <- ggplot(stabilization_analysis, aes(x = num_patients)) +
  geom_point(aes(y = icer), alpha = 0.1, size = 0.5, color = "gray50") +
  geom_line(aes(y = rolling_mean), color = "#1b9e77", size = 1.2) +
  geom_ribbon(aes(ymin = rolling_mean - rolling_sd, 
                  ymax = rolling_mean + rolling_sd), 
              alpha = 0.2, fill = "#1b9e77") +
  geom_hline(yintercept = 4068, linetype = "dashed", color = "darkgreen", alpha = 0.7) +
  annotate("text", x = 1e6, y = 4068, label = "Final ICER: $4,068", 
           hjust = 1, vjust = -0.5, size = 3.5) +
  scale_x_continuous(labels = comma, breaks = seq(0, 1e6, by = 200000)) +
  scale_y_continuous(labels = comma) +
  labs(
    title = "B. ICER Stabilization",
    subtitle = "Stable around $4,068 per DALY averted",
    x = "Number of Patients Simulated",
    y = "ICER (USD per DALY averted)"
  ) +
  theme_minimal()

# Plot 3: Zoom of early stabilization
p3 <- stabilization_analysis %>%
  filter(num_patients <= 200000) %>%
  ggplot(aes(x = num_patients)) +
  geom_line(aes(y = rolling_mean), color = "#1b9e77", size = 1) +
  geom_ribbon(aes(ymin = rolling_mean - rolling_sd, 
                  ymax = rolling_mean + rolling_sd), 
              alpha = 0.3, fill = "#1b9e77") +
  geom_vline(xintercept = 36000, linetype = "dashed", color = "red", alpha = 0.7) +
  annotate("text", x = 36000, y = 4200, label = "Stabilization point\n36,000 patients", 
           size = 3, hjust = -0.1) +
  scale_x_continuous(labels = comma, breaks = seq(0, 200000, by = 50000)) +
  scale_y_continuous(labels = comma) +
  labs(
    title = "C. Early Stabilization (Zoomed)",
    subtitle = "ICER stabilizes rapidly",
    x = "Number of Patients Simulated",
    y = "ICER (USD per DALY averted)"
  ) +
  theme_minimal()

# Combine plots
combined_plot <- (p1 / p2 / p3) + 
  plot_annotation(
    title = "ICER Convergence Analysis: Empowered Health vs Usual Care",
    subtitle = paste0("Final ICER: $", round(tail(stabilization_analysis$rolling_mean, 1), 0), 
                     " per DALY averted | CV: ", round(tail(stabilization_analysis$rolling_cv, 1) * 100, 1), "%"),
    theme = theme(plot.title = element_text(face = "bold", size = 16))
  )

print(combined_plot)
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: annotation$theme is not a valid theme.
## Please use `theme()` to construct themes.

ggsave("icer_comprehensive_convergence.png", width = 12, height = 14, dpi = 300)
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: annotation$theme is not a valid theme.
## Please use `theme()` to construct themes.
ggsave("icer_comprehensive_convergence.pdf", width = 12, height = 14)
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: annotation$theme is not a valid theme.
## Please use `theme()` to construct themes.

Summary Table

# Create summary table of key metrics
summary_table <- stabilization_analysis %>%
  filter(num_patients %in% c(36000, 100000, 250000, 500000, 1000000)) %>%
  select(num_patients, rolling_mean, rolling_sd, rolling_cv, pct_change) %>%
  mutate(
    Patients = comma(num_patients),
    ICER = sprintf("$%.0f", rolling_mean),
    CI_95 = sprintf("$%.0f - $%.0f", rolling_mean - 1.96*rolling_sd, rolling_mean + 1.96*rolling_sd),
    CV = sprintf("%.1f%%", rolling_cv * 100),
    `Recent Change` = sprintf("%.2f%%", pct_change)
  ) %>%
  select(Patients, ICER, CI_95, CV, `Recent Change`)

cat("\n=== CONVERGENCE SUMMARY ===\n")
## 
## === CONVERGENCE SUMMARY ===
print(summary_table)
##    Patients  ICER         CI_95   CV Recent Change
## 1    36,000 $4088 $3703 - $4474 4.8%         0.42%
## 2   100,000 $4063 $3802 - $4324 3.3%         0.11%
## 3   250,000 $4027 $3815 - $4238 2.7%        -0.26%
## 4   500,000 $4050 $3845 - $4255 2.6%        -0.12%
## 5 1,000,000 $4068 $3901 - $4235 2.1%         0.28%