0.1 Code Description
1 Clear the workspace
- 1.1 Load Libraries
2 Import PSA Datasets
3 Rename datasets
4 Plot costs by number of patients-No intervention
5 Plot cost by number of patients simulated for all interventions
6 Plot Cost per DALY by number of patients simulated for all interventions
7 Merge the datasets by num_patients
8 Calculate ICER for each intervention vs no intervention
9 Plot ICERs vs number of patients
10 Plotting ICER for Empower Health vs Usual Care by number of patients
11 Join the two datasets
12 Calculate ICER
13 Plot the ICER vs number of patients (Empower Health vs Usual Care)
14 Plot costs for both interventions (Empower health and usual care) vs number of patients
15 Find ICER Stabilisation Point
16 Current Stabilisation Status
17 ICER Stabilisation curve with Confidence Intervals

0.1 Code Description

The following R script performs analyses of simulation outputs to assess the impact of varied patient numbers on the model results.

1 Clear the workspace

rm(list = ls())

1.1 Load Libraries

The script begins by loading the necessary R libraries:

library(tidyverse)

## Warning: package 'ggplot2' was built under R version 4.4.3

## Warning: package 'readr' was built under R version 4.4.3

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.2.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(reshape2) # For reshaping data

## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths

library(ggplot2) # For plotting
library(dampack) # For processing PSA results 
library(scales)

## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor

library(patchwork) # For combining plots
library(zoo) # For rolling calculationsi

## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

2 Import PSA Datasets

# Import PSA datasets
load('/Users/jamesoguta/Documents/James Oguta/PhD Thesis_James Oguta/James Oguta Model/Kenya CVD Model/KenyaCVDModelFinal/data/results_df_numpats_no_trt.rda')
load('/Users/jamesoguta/Documents/James Oguta/PhD Thesis_James Oguta/James Oguta Model/Kenya CVD Model/KenyaCVDModelFinal/data/results_df_numpats_Emp_Health.rda')
load('/Users/jamesoguta/Documents/James Oguta/PhD Thesis_James Oguta/James Oguta Model/Kenya CVD Model/KenyaCVDModelFinal/data/results_df_numpats_UC.rda')

3 Rename datasets

num_pats_no_int <- results_df_numpats_no_trt
num_pats_Emp_Health <- results_df_numpats_Emp_Health
num_pats_UC <- results_df_numpats_UC
# Compute costs per DALY
num_pats_no_int$cost_daly <- num_pats_no_int$cost/num_pats_no_int$daly
num_pats_Emp_Health$cost_daly <- num_pats_Emp_Health$cost/num_pats_Emp_Health$daly
num_pats_UC$cost_daly <- num_pats_UC$cost/num_pats_UC$daly

head(num_pats_no_int)

##   num_patients     cost     daly cost_daly
## 1         5000 1404.409 7.225561  194.3668
## 2         5200 1572.424 7.290990  215.6668
## 3         5400 1530.815 7.309968  209.4148
## 4         5600 1523.123 7.250114  210.0826
## 5         5800 1515.568 7.259803  208.7615
## 6         6000 1579.671 7.433138  212.5174

head(num_pats_Emp_Health)

##   num_patients     cost     daly cost_daly
## 1         5000 5432.464 7.244207  749.9045
## 2         5200 5531.712 7.272240  760.6614
## 3         5400 5464.619 7.197389  759.2502
## 4         5600 5584.427 7.170733  778.7804
## 5         5800 5326.346 7.263128  733.3406
## 6         6000 5541.131 7.210286  768.5037

head(num_pats_UC)

##   num_patients     cost     daly cost_daly
## 1         5000 4871.973 7.374127  660.6848
## 2         5200 5010.039 7.401226  676.9201
## 3         5400 4942.220 7.326093  674.6051
## 4         5600 5013.450 7.291324  687.5911
## 5         5800 4784.787 7.367333  649.4599
## 6         6000 4992.048 7.331792  680.8769

4 Plot costs by number of patients-No intervention

ggplot(num_pats_no_int, aes(x = num_patients, y = cost)) +
  geom_line() +
  scale_x_continuous(
    breaks = seq(0, max(num_pats_no_int$num_patients), by = 100000),
    labels = scales::comma
  ) +
  labs(
    x = "Number of Patients Simulated",
    y = "Mean Discounted Costs",
    title = "Mean Discounted Costs by Number of Patients Simulated(No Intervention)"
  ) +
  theme_minimal()

# Plot Dalys by number of patients-No Intervention

ggplot(num_pats_no_int, aes(x = num_patients, y = daly)) +
  geom_line() +
  scale_x_continuous(
    breaks = seq(0, max(num_pats_no_int$num_patients), by = 100000),
    labels = scales::comma
  ) +
  labs(
    x = "Number of Patients Simulated",
    y = "Mean Discounted DALYs",
    title = "Mean Discounted DALYs by Number of Patients Simulated(No Intervention)"
  ) +
  theme_minimal()

# Plot Cost per Daly by number of patients-No Intervention

ggplot(num_pats_no_int, aes(x = num_patients, y = cost_daly)) +
  geom_line() +
  scale_x_continuous(
    breaks = seq(0, max(num_pats_no_int$num_patients), by = 100000),
    labels = scales::comma
  ) +
  labs(
    x = "Number of Patients Simulated",
    y = "Mean Discounted Cost per Daly",
    title = "Mean Discounted Cost per DALY by Number of Patients Simulated(No Intervention)"
  ) +
  theme_minimal()

# Plot costs by number of patients-Empower Health Intervention

ggplot(num_pats_Emp_Health, aes(x = num_patients, y = cost)) +
  geom_line() +
  scale_x_continuous(
    breaks = seq(0, max(num_pats_no_int$num_patients), by = 100000),
    labels = scales::comma
  ) +
  labs(
    x = "Number of Patients Simulated",
    y = "Mean Discounted Costs",
    title = "Mean Discounted Costs by Number of Patients Simulated(Empower Health Intervention)"
  ) +
  theme_minimal()

# Plot Dalys by number of patients-Empower Health Intervention

ggplot(num_pats_Emp_Health, aes(x = num_patients, y = daly)) +
  geom_line() +
  scale_x_continuous(
    breaks = seq(0, max(num_pats_no_int$num_patients), by = 100000),
    labels = scales::comma
  ) +
  labs(
    x = "Number of Patients Simulated",
    y = "Mean Discounted DALYs",
    title = "Mean Discounted DALYs by Number of Patients Simulated(Empower Health Intervention)"
  ) +
  theme_minimal()

# Plot Cost per Daly by number of patients-Empower Health Intervention

ggplot(num_pats_Emp_Health, aes(x = num_patients, y = cost_daly)) +
  geom_line() +
  scale_x_continuous(
    breaks = seq(0, max(num_pats_no_int$num_patients), by = 100000),
    labels = scales::comma
  ) +
  labs(
    x = "Number of Patients Simulated",
    y = "Mean Discounted Cost per Daly",
    title = "Mean Discounted Cost per DALY by Number of Patients Simulated(Empower Health Intervention)"
  ) +
  theme_minimal()

# Plot costs by number of patients-Usual Care Intervention

ggplot(num_pats_UC, aes(x = num_patients, y = cost)) +
  geom_line() +
  scale_x_continuous(
    breaks = seq(0, max(num_pats_UC$num_patients), by = 100000),
    labels = scales::comma
  ) +
  labs(
    x = "Number of Patients Simulated",
    y = "Mean Discounted Costs",
    title = "Mean Discounted Costs by Number of Patients Simulated(Usual Care Intervention)"
  ) +
  theme_minimal()

# Plot Dalys by number of patients-Usual Care Intervention

ggplot(num_pats_UC, aes(x = num_patients, y = daly)) +
  geom_line() +
  scale_x_continuous(
    breaks = seq(0, max(num_pats_UC$num_patients), by = 100000),
    labels = scales::comma
  ) +
  labs(
    x = "Number of Patients Simulated",
    y = "Mean Discounted DALYs",
    title = "Mean Discounted DALYs by Number of Patients Simulated(Usual Care Intervention)"
  ) +
  theme_minimal()

# Plot Cost per Daly by number of patients-Usual Care Intervention

ggplot(num_pats_UC, aes(x = num_patients, y = cost_daly)) +
  geom_line() +
  scale_x_continuous(
    breaks = seq(0, max(num_pats_UC$num_patients), by = 100000),
    labels = scales::comma
  ) +
  labs(
    x = "Number of Patients Simulated",
    y = "Mean Discounted Cost per Daly",
    title = "Mean Discounted Cost per DALY by Number of Patients Simulated(Usual Care Intervention)"
  ) +
  theme_minimal()

# Combining the datasets

# Add a group identifier to each dataset
num_pats_Emp_Health <- num_pats_Emp_Health %>%
  mutate(intervention = "Emp_Health")

num_pats_UC <- num_pats_UC %>%
  mutate(intervention = "UC")

num_pats_no_int <- num_pats_no_int %>%
  mutate(intervention = "No_Intervention")

# Combine all datasets
combined_data <- bind_rows(num_pats_Emp_Health, num_pats_UC, num_pats_no_int)
head(combined_data)

##   num_patients     cost     daly cost_daly intervention
## 1         5000 5432.464 7.244207  749.9045   Emp_Health
## 2         5200 5531.712 7.272240  760.6614   Emp_Health
## 3         5400 5464.619 7.197389  759.2502   Emp_Health
## 4         5600 5584.427 7.170733  778.7804   Emp_Health
## 5         5800 5326.346 7.263128  733.3406   Emp_Health
## 6         6000 5541.131 7.210286  768.5037   Emp_Health

5 Plot cost by number of patients simulated for all interventions

ggplot(combined_data, aes(x = num_patients, y = cost, color = intervention)) +
  geom_line(alpha = 0.8) +
  geom_point(size = 1) +
  scale_x_continuous(
    breaks = seq(0, max(combined_data$num_patients), by = 100000),
    labels = comma
  ) +
  labs(
    x = "Number of Patients",
    y = "Mean Discounted Costs (USD)",
    title = "Mean Discounted Costs by Number of Patients Simulated-All Interventions",
    color = "Scenario"
  ) +
  geom_smooth(se = FALSE, method = "loess", span = 0.2) +
  theme_minimal()

## `geom_smooth()` using formula = 'y ~ x'

# Plot DALYs by number of patients simulated for all interventions

ggplot(combined_data, aes(x = num_patients, y = daly, color = intervention)) +
  geom_line(alpha = 0.8) +
  geom_point(size = 1) +
  scale_x_continuous(
    breaks = seq(0, max(combined_data$num_patients), by = 100000),
    labels = comma
  ) +
  labs(
    x = "Number of Patients",
    y = "Mean Discounted DALYs ",
    title = "Mean Discounted DALYs by Number of Patients Simulated-All Interventions",
    color = "Scenario"
  ) + 
  geom_smooth(se = FALSE, method = "loess", span = 0.2) +
  theme_minimal()

## `geom_smooth()` using formula = 'y ~ x'

6 Plot Cost per DALY by number of patients simulated for all interventions

ggplot(combined_data, aes(x = num_patients, y = cost_daly, color = intervention)) +
  geom_line(alpha = 0.8) +
  geom_point(size = 1) +
  scale_x_continuous(
    breaks = seq(0, max(combined_data$num_patients), by = 100000),
    labels = comma
  ) +
  labs(
    x = "Number of Patients",
    y = "Mean Discounted Cost per DALY",
    title = "Mean Discounted Discounted Cost per DALY by Number of Patients Simulated-All Interventions",
    color = "Scenario"
  ) + 
  geom_smooth(se = FALSE, method = "loess", span = 0.2) +
  theme_minimal()

## `geom_smooth()` using formula = 'y ~ x'

# Plotting ICER Stabilization Curves

7 Merge the datasets by num_patients

icer_df <- num_pats_no_int %>%
  rename(cost_no_int = cost,
         dalys_no_int = daly) %>%
  select(num_patients, cost_no_int, dalys_no_int) %>%
  left_join(num_pats_UC %>%
              rename(cost_uc = cost,
                     dalys_uc = daly) %>%
              select(num_patients, cost_uc, dalys_uc),
            by = c("num_patients")) %>%
  left_join(num_pats_Emp_Health %>%
              rename(cost_emp = cost,
                     dalys_emp = daly) %>%
              select(num_patients, cost_emp, dalys_emp),
            by = c("num_patients"))
head(icer_df)

##   num_patients cost_no_int dalys_no_int  cost_uc dalys_uc cost_emp dalys_emp
## 1         5000    1404.409     7.225561 4871.973 7.374127 5432.464  7.244207
## 2         5200    1572.424     7.290990 5010.039 7.401226 5531.712  7.272240
## 3         5400    1530.815     7.309968 4942.220 7.326093 5464.619  7.197389
## 4         5600    1523.123     7.250114 5013.450 7.291324 5584.427  7.170733
## 5         5800    1515.568     7.259803 4784.787 7.367333 5326.346  7.263128
## 6         6000    1579.671     7.433138 4992.048 7.331792 5541.131  7.210286

8 Calculate ICER for each intervention vs no intervention

icer_df <- icer_df %>%
  mutate(
    incr_cost_uc = cost_uc - cost_no_int,
    incr_dalys_uc = dalys_no_int - dalys_uc,  # DALYs averted
    icer_uc = incr_cost_uc / incr_dalys_uc,

    incr_cost_emp = cost_emp - cost_no_int,
    incr_dalys_emp = dalys_no_int - dalys_emp,
    icer_emp = incr_cost_emp / incr_dalys_emp
  )
head(icer_df)

##   num_patients cost_no_int dalys_no_int  cost_uc dalys_uc cost_emp dalys_emp
## 1         5000    1404.409     7.225561 4871.973 7.374127 5432.464  7.244207
## 2         5200    1572.424     7.290990 5010.039 7.401226 5531.712  7.272240
## 3         5400    1530.815     7.309968 4942.220 7.326093 5464.619  7.197389
## 4         5600    1523.123     7.250114 5013.450 7.291324 5584.427  7.170733
## 5         5800    1515.568     7.259803 4784.787 7.367333 5326.346  7.263128
## 6         6000    1579.671     7.433138 4992.048 7.331792 5541.131  7.210286
##   incr_cost_uc incr_dalys_uc    icer_uc incr_cost_emp incr_dalys_emp
## 1     3467.564   -0.14856597  -23340.23      4028.054   -0.018646379
## 2     3437.615   -0.11023620  -31184.08      3959.288    0.018749674
## 3     3411.405   -0.01612480 -211562.60      3933.804    0.112578987
## 4     3490.327   -0.04121003  -84696.05      4061.304    0.079380966
## 5     3269.220   -0.10753059  -30402.69      3810.779   -0.003325285
## 6     3412.376    0.10134662   33670.35      3961.460    0.222852608
##      icer_emp
## 1  -216023.41
## 2   211165.71
## 3    34942.61
## 4    51162.19
## 5 -1146000.54
## 6    17776.14

9 Plot ICERs vs number of patients

# Reshape for plotting
icer_long <- icer_df %>%
  select(num_patients, icer_uc, icer_emp) %>%
  pivot_longer(cols = starts_with("icer_"), names_to = "strategy", values_to = "icer") %>%
  mutate(strategy = recode(strategy,
                           "icer_uc" = "Usual Care",
                           "icer_emp" = "Empowered Health"))

# Plot
icer_stabilization_curve <- ggplot(icer_long, aes(x = num_patients, y = icer, color = strategy)) +
  geom_point(alpha = 0.5, size = 1) +
  geom_smooth(method = "loess", se = FALSE, span = 0.2) +
  scale_y_continuous(labels = comma, limits = c(0, NA)) +
  scale_x_continuous(breaks = seq(0, max(icer_long$num_patients), by = 100000), labels = comma) +
  labs(
    title = "Mean ICER by Number of Patients Simulated",
    x = "Number of Patients Simulated",
    y = "ICER (USD per DALY averted)",
    color = "Intervention"
  ) +
  theme_minimal()
print(icer_stabilization_curve)

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 48 rows containing non-finite outside the scale range
## (`stat_smooth()`).

## Warning: Removed 48 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_smooth()`).

# Save the plot
ggsave("icer_stabilization_curve.png", width = 8, height = 6)

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 48 rows containing non-finite outside the scale range
## (`stat_smooth()`).

## Warning: Removed 48 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_smooth()`).

ggsave("icer_stabilization_curve.pdf", width = 8, height = 6)

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 48 rows containing non-finite outside the scale range
## (`stat_smooth()`).

## Warning: Removed 48 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_smooth()`).

10 Plotting ICER for Empower Health vs Usual Care by number of patients

11 Join the two datasets

icer_emp_vs_uc <- num_pats_Emp_Health %>%
  rename(cost_emp = cost,
         dalys_emp = daly) %>%
  select(num_patients, cost_emp, dalys_emp) %>%
  left_join(
    num_pats_UC %>%
      rename(cost_uc = cost,
             dalys_uc = daly) %>%
      select(num_patients, cost_uc, dalys_uc),
    by = c("num_patients")
  )
head(icer_emp_vs_uc)

##   num_patients cost_emp dalys_emp  cost_uc dalys_uc
## 1         5000 5432.464  7.244207 4871.973 7.374127
## 2         5200 5531.712  7.272240 5010.039 7.401226
## 3         5400 5464.619  7.197389 4942.220 7.326093
## 4         5600 5584.427  7.170733 5013.450 7.291324
## 5         5800 5326.346  7.263128 4784.787 7.367333
## 6         6000 5541.131  7.210286 4992.048 7.331792

12 Calculate ICER

icer_emp_vs_uc <- icer_emp_vs_uc %>%
  mutate(
    incr_cost = cost_emp - cost_uc,
    dalys_averted = dalys_uc - dalys_emp,  # positive means emp is better
    icer = incr_cost / dalys_averted
  )
head(icer_emp_vs_uc)

##   num_patients cost_emp dalys_emp  cost_uc dalys_uc incr_cost dalys_averted
## 1         5000 5432.464  7.244207 4871.973 7.374127  560.4906     0.1299196
## 2         5200 5531.712  7.272240 5010.039 7.401226  521.6735     0.1289859
## 3         5400 5464.619  7.197389 4942.220 7.326093  522.3991     0.1287038
## 4         5600 5584.427  7.170733 5013.450 7.291324  570.9770     0.1205910
## 5         5800 5326.346  7.263128 4784.787 7.367333  541.5592     0.1042053
## 6         6000 5541.131  7.210286 4992.048 7.331792  549.0833     0.1215060
##       icer
## 1 4314.134
## 2 4044.423
## 3 4058.925
## 4 4734.823
## 5 5197.040
## 6 4518.982

13 Plot the ICER vs number of patients (Empower Health vs Usual Care)

icer_emp_vs_uc_stabilization_curve <- ggplot(icer_emp_vs_uc, aes(x = num_patients, y = icer)) +
  geom_point(alpha = 0.5, size = 1, color = "#1b9e77") +
  geom_smooth(method = "loess", se = FALSE, span = 0.2, color = "#1b9e77") +
  scale_x_continuous(breaks = seq(0, max(icer_emp_vs_uc$num_patients), by = 100000),
                     labels = comma) +
  labs(
    title = NULL,
    x = "Number of Patients Simulated",
    y = "ICER (USD per DALY averted)"
  ) +
  theme_minimal()
print(icer_emp_vs_uc_stabilization_curve)

## `geom_smooth()` using formula = 'y ~ x'

# Save the plot
ggsave("icer_emp_vs_uc_stabilization_curve.png", width = 8, height = 6)

## `geom_smooth()` using formula = 'y ~ x'

ggsave("icer_emp_vs_uc_stabilization_curve.pdf", width = 8, height = 6)

## `geom_smooth()` using formula = 'y ~ x'

14 Plot costs for both interventions (Empower health and usual care) vs number of patients

ggplot(icer_emp_vs_uc, aes(x = num_patients)) +
  geom_line(aes(y = cost_emp, color = "Empower Health"), size = 1) +
  geom_line(aes(y = cost_uc, color = "Usual Care"), size = 1) +
  scale_x_continuous(breaks = seq(0, max(icer_emp_vs_uc$num_patients), by = 100000),
                     labels = comma) +
  labs(
    title = "Mean Discounted Costs by Number of Patients Simulated",
    x = "Number of Patients Simulated",
    y = "Mean Discounted Costs (USD)",
    color = "Intervention"
  ) +
  theme_minimal()

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Creating a dataset for stabilization analysis

# Option 1: Using rollmeanr for simpler syntax
stabilization_analysis <- icer_emp_vs_uc %>%
  arrange(num_patients) %>%
  mutate(
    # Rolling mean (window of 20)
    rolling_mean = rollmeanr(icer, k = 20, fill = NA),
    # Rolling SD (window of 20)
    rolling_sd = rollapplyr(icer, width = 20, FUN = sd, fill = NA),
    # Rolling coefficient of variation
    rolling_cv = rolling_sd / abs(rolling_mean),
    # Percent change
    pct_change = (rolling_mean / lag(rolling_mean) - 1) * 100
  )

# Alternative: Use zoo's rollapply with proper alignment
stabilization_analysis <- icer_emp_vs_uc %>%
  arrange(num_patients) %>%
  mutate(
    rolling_mean = rollapply(icer, width = 20, 
                            FUN = mean, fill = NA, align = "right", partial = FALSE),
    rolling_sd = rollapply(icer, width = 20, 
                          FUN = sd, fill = NA, align = "right", partial = FALSE),
    rolling_cv = rolling_sd / abs(rolling_mean),
    pct_change = (rolling_mean / lag(rolling_mean) - 1) * 100
  )
head(stabilization_analysis)

##   num_patients cost_emp dalys_emp  cost_uc dalys_uc incr_cost dalys_averted
## 1         5000 5432.464  7.244207 4871.973 7.374127  560.4906     0.1299196
## 2         5200 5531.712  7.272240 5010.039 7.401226  521.6735     0.1289859
## 3         5400 5464.619  7.197389 4942.220 7.326093  522.3991     0.1287038
## 4         5600 5584.427  7.170733 5013.450 7.291324  570.9770     0.1205910
## 5         5800 5326.346  7.263128 4784.787 7.367333  541.5592     0.1042053
## 6         6000 5541.131  7.210286 4992.048 7.331792  549.0833     0.1215060
##       icer rolling_mean rolling_sd rolling_cv pct_change
## 1 4314.134           NA         NA         NA         NA
## 2 4044.423           NA         NA         NA         NA
## 3 4058.925           NA         NA         NA         NA
## 4 4734.823           NA         NA         NA         NA
## 5 5197.040           NA         NA         NA         NA
## 6 4518.982           NA         NA         NA         NA

15 Find ICER Stabilisation Point

# Find where CV drops below 5% and stays below
cv_threshold <- 0.05

# Get indices where CV is below threshold
cv_below <- which(stabilization_analysis$rolling_cv < cv_threshold & 
                  !is.na(stabilization_analysis$rolling_cv))

if (length(cv_below) > 0) {
  # Find the first index where CV drops below threshold AND remains below for next 20 simulations
  stabilization_idx <- cv_below[1]
  
  # Check if it stays below
  remaining <- cv_below[cv_below >= stabilization_idx]
  if (length(remaining) >= 20) {
    min_patients <- stabilization_analysis$num_patients[stabilization_idx]
    cat(sprintf("✓ ICER stabilizes (CV < %.1f%%) at: %s patients\n", 
                cv_threshold * 100, comma(min_patients)))
  } else {
    cat("⚠ ICER hasn't consistently stabilized within simulated range\n")
    min_patients <- stabilization_analysis$num_patients[cv_below[1]]
    cat(sprintf("First crossing at: %s patients (but may not be stable)\n", comma(min_patients)))
  }
} else {
  cat("✗ ICER hasn't reached CV < 5% within simulated range\n")
  cat("Consider simulating more patients\n")
}

## ✓ ICER stabilizes (CV < 5.0%) at: 36,000 patients

# Visualize stabilization (corrected)
if (exists("min_patients")) {
  p1 <- ggplot(stabilization_analysis, aes(x = num_patients)) +
    geom_line(aes(y = rolling_cv), color = "blue", size = 1) +
    geom_hline(yintercept = cv_threshold, linetype = "dashed", color = "red", size = 0.8) +
    geom_vline(xintercept = min_patients, linetype = "dotted", color = "darkgreen", size = 0.8) +
    scale_x_continuous(labels = comma, breaks = seq(0, max(stabilization_analysis$num_patients), by = 100000)) +
    scale_y_continuous(labels = scales::percent, limits = c(0, max(stabilization_analysis$rolling_cv, na.rm = TRUE) * 1.1)) +
    labs(
      title = "ICER Stabilization Analysis",
      subtitle = paste0("Stabilization threshold: CV < ", cv_threshold * 100, "%"),
      x = "Number of Patients Simulated",
      y = "Rolling Coefficient of Variation (CV)",
      caption = paste0("Stabilization at ", comma(min_patients), " patients")
    ) +
    theme_minimal() +
    theme(
      plot.title = element_text(face = "bold"),
      panel.grid.minor = element_blank()
    )
  
  print(p1)
  ggsave("icer_stabilization_analysis.png", width = 10, height = 6, dpi = 300)
  ggsave("icer_stabilization_analysis.pdf", width = 10, height = 6)
}

## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).

# Alternative stabilisation criteria

# Use multiple criteria for stabilization
stabilization_analysis <- stabilization_analysis %>%
  mutate(
    # Criterion 1: CV < 5%
    cv_stable = rolling_cv < 0.05,
    # Criterion 2: Percent change < 1% in absolute value
    pct_stable = abs(pct_change) < 1,
    # Criterion 3: Rolling mean within 2% of final value
    final_mean = tail(rolling_mean, 1),
    final_stable = abs((rolling_mean - final_mean) / final_mean * 100) < 2,
    # Combined stability (all criteria met)
    stable = cv_stable & pct_stable & final_stable
  )

# Find first point where all criteria are met
stable_idx <- which(stabilization_analysis$stable & !is.na(stabilization_analysis$stable))[1]

if (!is.na(stable_idx)) {
  min_patients_robust <- stabilization_analysis$num_patients[stable_idx]
  cat(sprintf("\n=== Robust Stabilization Analysis ===\n"))
  cat(sprintf("Minimum patients (all criteria): %s\n", comma(min_patients_robust)))
  cat(sprintf("  - CV < 5%%: ✓\n"))
  cat(sprintf("  - Change < 1%%: ✓\n"))
  cat(sprintf("  - Within 2%% of final: ✓\n"))
} else {
  cat("\n=== Robust Stabilization Analysis ===\n")
  cat("No stable point found with all criteria\n")
  cat("Current status at maximum patients:\n")
  last_row <- tail(stabilization_analysis, 1)
  cat(sprintf("  - CV: %.1f%%\n", last_row$rolling_cv * 100))
  cat(sprintf("  - Percent change: %.2f%%\n", last_row$pct_change))
  cat(sprintf("  - Difference from final: %.1f%%\n", 
      abs((last_row$rolling_mean - last_row$final_mean) / last_row$final_mean * 100)))
}

## 
## === Robust Stabilization Analysis ===
## Minimum patients (all criteria): 36,000
##   - CV < 5%: ✓
##   - Change < 1%: ✓
##   - Within 2% of final: ✓

16 Current Stabilisation Status

# Find where CV drops below 5% (0.05)
cv_threshold <- 0.05

# Get the stabilization point
stabilization_point <- stabilization_analysis %>%
  filter(!is.na(rolling_cv), rolling_cv < cv_threshold) %>%
  slice(1)

if(nrow(stabilization_point) > 0) {
  min_patients_needed <- stabilization_point$num_patients
  current_cv <- stabilization_point$rolling_cv
  
  cat("=== STABILIZATION ANALYSIS ===\n")
  cat(sprintf("Current CV at %s patients: %.1f%%\n", 
      comma(min_patients_needed), current_cv * 100))
  cat(sprintf("✓ Target CV (< %.1f%%) achieved at: %s patients\n", 
      cv_threshold * 100, comma(min_patients_needed)))
} else {
  cat("CV still above 5% threshold\n")
  cat(sprintf("Latest CV: %.1f%% at %s patients\n",
      tail(stabilization_analysis$rolling_cv, 1) * 100,
      comma(tail(stabilization_analysis$num_patients, 1))))
}

## === STABILIZATION ANALYSIS ===
## Current CV at 36,000 patients: 4.8%
## ✓ Target CV (< 5.0%) achieved at: 36,000 patients

# Check the trend in CV
latest_cv <- tail(stabilization_analysis$rolling_cv, 1)
latest_patients <- tail(stabilization_analysis$num_patients, 1)

cat(sprintf("\nCurrent status at %s patients:\n", comma(latest_patients)))

## 
## Current status at 1,000,000 patients:

cat(sprintf("  - Rolling mean ICER: $%.0f\n", tail(stabilization_analysis$rolling_mean, 1)))

##   - Rolling mean ICER: $4068

cat(sprintf("  - Rolling CV: %.1f%%\n", latest_cv * 100))

##   - Rolling CV: 2.1%

cat(sprintf("  - Recent change: %.2f%%\n", tail(stabilization_analysis$pct_change, 1)))

##   - Recent change: 0.28%

17 ICER Stabilisation curve with Confidence Intervals

p_icer <- ggplot(stabilization_analysis, aes(x = num_patients)) +
  geom_point(aes(y = icer), alpha = 0.3, size = 0.8, color = "gray50") +
  geom_line(aes(y = rolling_mean), color = "#1b9e77", size = 1.2) +
  geom_ribbon(aes(ymin = rolling_mean - rolling_sd, 
                  ymax = rolling_mean + rolling_sd), 
              alpha = 0.2, fill = "#1b9e77") +
  geom_hline(yintercept = tail(stabilization_analysis$rolling_mean, 1), 
             linetype = "dashed", color = "darkgreen", alpha = 0.7) +
  scale_x_continuous(labels = comma, breaks = seq(0, max(stabilization_analysis$num_patients), by = 100000)) +
  scale_y_continuous(labels = comma) +
  labs(
    title = "ICER Stabilization: Empowered Health vs Usual Care",
    subtitle = paste0("Stabilized ICER: $", round(tail(stabilization_analysis$rolling_mean, 1), 0),
                     " (±", round(tail(stabilization_analysis$rolling_sd, 1), 0), ")"),
    x = "Number of Patients Simulated",
    y = "ICER (USD per DALY averted)"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    panel.grid.minor = element_blank()
  )

print(p_icer)

## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).

ggsave("icer_stabilization_with_ci.png", width = 10, height = 6, dpi = 300)

## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 19 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).

ggsave("icer_stabilization_with_ci.pdf", width = 10, height = 6)

## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 19 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).

Minimum patients needed

# Calculate required patients for different CV thresholds
cv_targets <- c(0.10, 0.07, 0.05, 0.03)
required_patients <- sapply(cv_targets, function(target) {
  idx <- which(stabilization_analysis$rolling_cv < target & !is.na(stabilization_analysis$rolling_cv))[1]
  if(!is.na(idx)) stabilization_analysis$num_patients[idx] else NA
})

requirements <- data.frame(
  Target_CV = paste0(cv_targets * 100, "%"),
  Patients_Needed = comma(required_patients),
  Status = ifelse(is.na(required_patients), "Not yet achieved", "Achieved")
)

cat("\n=== MINIMUM PATIENTS REQUIREMENTS ===\n")

## 
## === MINIMUM PATIENTS REQUIREMENTS ===

print(requirements)

##   Target_CV Patients_Needed   Status
## 1       10%           8,800 Achieved
## 2        7%          10,500 Achieved
## 3        5%          36,000 Achieved
## 4        3%          55,000 Achieved

# Final recommendation
latest_cv_value <- tail(stabilization_analysis$rolling_cv, 1)
latest_patients_value <- tail(stabilization_analysis$num_patients, 1)

cat("\n=== FINAL RECOMMENDATION ===\n")

## 
## === FINAL RECOMMENDATION ===

if(latest_cv_value <= 0.05) {
  cat(sprintf("✓ Current simulation (%s patients) is SUFFICIENT\n", comma(latest_patients_value)))
  cat(sprintf("  ICER is stable with CV = %.1f%%\n", latest_cv_value * 100))
  cat(sprintf("  Minimum needed: %s patients\n", 
      comma(stabilization_analysis$num_patients[which(stabilization_analysis$rolling_cv < 0.05 & !is.na(stabilization_analysis$rolling_cv))[1]])))
} else if(latest_cv_value <= 0.07) {
  cat(sprintf("⚠ Current simulation (%s patients) is ADEQUATE but consider more\n", comma(latest_patients_value)))
  cat(sprintf("  Current CV = %.1f%% (target < 5%%)\n", latest_cv_value * 100))
  cat(sprintf("  Recommended minimum: 500,000 patients\n"))
} else {
  cat(sprintf("✗ Current simulation (%s patients) is INSUFFICIENT\n", comma(latest_patients_value)))
  cat(sprintf("  Current CV = %.1f%% (too high)\n", latest_cv_value * 100))
  cat(sprintf("  Recommended minimum: 500,000 - 1,000,000 patients\n"))
}

## ✓ Current simulation (1,000,000 patients) is SUFFICIENT
##   ICER is stable with CV = 2.1%
##   Minimum needed: 36,000 patients

Visualising the convergence

# Create a comprehensive convergence plot


# Plot 1: CV over time
p1 <- ggplot(stabilization_analysis, aes(x = num_patients)) +
  geom_line(aes(y = rolling_cv), color = "blue", size = 1) +
  geom_hline(yintercept = 0.05, linetype = "dashed", color = "red", size = 0.8) +
  geom_hline(yintercept = 0.03, linetype = "dotted", color = "darkgreen", size = 0.8) +
  geom_vline(xintercept = 36000, linetype = "dashed", color = "purple", alpha = 0.5) +
  annotate("text", x = 36000, y = 0.07, label = "Stabilization at 36k", 
           angle = 90, vjust = -0.5, size = 3, color = "purple") +
  scale_x_continuous(labels = comma, breaks = seq(0, 1e6, by = 200000)) +
  scale_y_continuous(labels = scales::percent, limits = c(0, max(stabilization_analysis$rolling_cv, na.rm = TRUE) * 1.1)) +
  labs(
    title = "A. Coefficient of Variation (CV) Convergence",
    subtitle = "Target CV < 5% achieved at 36,000 patients",
    x = "Number of Patients Simulated",
    y = "Coefficient of Variation"
  ) +
  theme_minimal()

# Plot 2: ICER stabilization
p2 <- ggplot(stabilization_analysis, aes(x = num_patients)) +
  geom_point(aes(y = icer), alpha = 0.1, size = 0.5, color = "gray50") +
  geom_line(aes(y = rolling_mean), color = "#1b9e77", size = 1.2) +
  geom_ribbon(aes(ymin = rolling_mean - rolling_sd, 
                  ymax = rolling_mean + rolling_sd), 
              alpha = 0.2, fill = "#1b9e77") +
  geom_hline(yintercept = 4068, linetype = "dashed", color = "darkgreen", alpha = 0.7) +
  annotate("text", x = 1e6, y = 4068, label = "Final ICER: $4,068", 
           hjust = 1, vjust = -0.5, size = 3.5) +
  scale_x_continuous(labels = comma, breaks = seq(0, 1e6, by = 200000)) +
  scale_y_continuous(labels = comma) +
  labs(
    title = "B. ICER Stabilization",
    subtitle = "Stable around $4,068 per DALY averted",
    x = "Number of Patients Simulated",
    y = "ICER (USD per DALY averted)"
  ) +
  theme_minimal()

# Plot 3: Zoom of early stabilization
p3 <- stabilization_analysis %>%
  filter(num_patients <= 200000) %>%
  ggplot(aes(x = num_patients)) +
  geom_line(aes(y = rolling_mean), color = "#1b9e77", size = 1) +
  geom_ribbon(aes(ymin = rolling_mean - rolling_sd, 
                  ymax = rolling_mean + rolling_sd), 
              alpha = 0.3, fill = "#1b9e77") +
  geom_vline(xintercept = 36000, linetype = "dashed", color = "red", alpha = 0.7) +
  annotate("text", x = 36000, y = 4200, label = "Stabilization point\n36,000 patients", 
           size = 3, hjust = -0.1) +
  scale_x_continuous(labels = comma, breaks = seq(0, 200000, by = 50000)) +
  scale_y_continuous(labels = comma) +
  labs(
    title = "C. Early Stabilization (Zoomed)",
    subtitle = "ICER stabilizes rapidly",
    x = "Number of Patients Simulated",
    y = "ICER (USD per DALY averted)"
  ) +
  theme_minimal()

# Combine plots
combined_plot <- (p1 / p2 / p3) + 
  plot_annotation(
    title = "ICER Convergence Analysis: Empowered Health vs Usual Care",
    subtitle = paste0("Final ICER: $", round(tail(stabilization_analysis$rolling_mean, 1), 0), 
                     " per DALY averted | CV: ", round(tail(stabilization_analysis$rolling_cv, 1) * 100, 1), "%"),
    theme = theme(plot.title = element_text(face = "bold", size = 16))
  )

print(combined_plot)

## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).

## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).

## Warning: annotation$theme is not a valid theme.
## Please use `theme()` to construct themes.

ggsave("icer_comprehensive_convergence.png", width = 12, height = 14, dpi = 300)

## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).

## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).

## Warning: annotation$theme is not a valid theme.
## Please use `theme()` to construct themes.

ggsave("icer_comprehensive_convergence.pdf", width = 12, height = 14)

## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).

## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).

## Warning: annotation$theme is not a valid theme.
## Please use `theme()` to construct themes.

Summary Table

# Create summary table of key metrics
summary_table <- stabilization_analysis %>%
  filter(num_patients %in% c(36000, 100000, 250000, 500000, 1000000)) %>%
  select(num_patients, rolling_mean, rolling_sd, rolling_cv, pct_change) %>%
  mutate(
    Patients = comma(num_patients),
    ICER = sprintf("$%.0f", rolling_mean),
    CI_95 = sprintf("$%.0f - $%.0f", rolling_mean - 1.96*rolling_sd, rolling_mean + 1.96*rolling_sd),
    CV = sprintf("%.1f%%", rolling_cv * 100),
    `Recent Change` = sprintf("%.2f%%", pct_change)
  ) %>%
  select(Patients, ICER, CI_95, CV, `Recent Change`)

cat("\n=== CONVERGENCE SUMMARY ===\n")

## 
## === CONVERGENCE SUMMARY ===

print(summary_table)

##    Patients  ICER         CI_95   CV Recent Change
## 1    36,000 $4088 $3703 - $4474 4.8%         0.42%
## 2   100,000 $4063 $3802 - $4324 3.3%         0.11%
## 3   250,000 $4027 $3815 - $4238 2.7%        -0.26%
## 4   500,000 $4050 $3845 - $4255 2.6%        -0.12%
## 5 1,000,000 $4068 $3901 - $4235 2.1%         0.28%

Diagnosing the appropriate number of patients to be simulated–Kenya CVD Microsimulation Model

James Oguta

2026-04-16