0.1 Code Description
1 Clear the workspace
- 1.1 Load Libraries
2 Import PSA Datasets
3 Rename datasets
4 Plot costs by number of patients-No intervention
5 Plot cost by number of patients simulated for all interventions
6 Plot Cost per DALY by number of patients simulated for all interventions
7 Merge the datasets by num_patients and seed_used_for_run
8 Calculate ICER for each intervention vs no intervention
9 Plot ICERs vs number of patients
10 Calculate ICER
11 Plot the ICER vs number of patients (Empower Health vs Usual Care)

0.1 Code Description

The following R script performs analyses of simulation outputs to assess the impact of varied patient numbers on the model results.

1 Clear the workspace

rm(list = ls())

1.1 Load Libraries

The script begins by loading the necessary R libraries:

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(reshape2) # For reshaping data

## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths

library(ggplot2) # For plotting
library(dampack) # For processing PSA results 
library(scales)

## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor

2 Import PSA Datasets

# Import PSA datasets
load('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/data/results_df_numpats_no_trt.rda')
load('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/data/results_df_numpats_Emp_Health.rda')
load('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/data/results_df_numpats_UC.rda')

3 Rename datasets

num_pats_no_int <- results_df_numpats_no_trt
num_pats_Emp_Health <- results_df_numpats_Emp_Health
num_pats_UC <- results_df_numpats_UC
# Compute costs per DALY
num_pats_no_int$cost_daly <- num_pats_no_int$mean_discounted_costs/num_pats_no_int$mean_discounted_dalys
num_pats_Emp_Health$cost_daly <- num_pats_Emp_Health$mean_discounted_costs/num_pats_Emp_Health$mean_discounted_dalys
num_pats_UC$cost_daly <- num_pats_UC$mean_discounted_costs/num_pats_UC$mean_discounted_dalys

head(num_pats_no_int)

##   num_patients mean_discounted_costs mean_discounted_dalys seed_used_for_run
## 1           50              2726.650              3.487271              1235
## 2          100              2061.795              6.312776              1236
## 3          150              2240.179              5.694106              1237
## 4          200              1995.336              6.018664              1238
## 5          250              2018.048              5.774757              1239
## 6          300              2068.937              6.614094              1240
##   cost_daly
## 1  781.8865
## 2  326.6067
## 3  393.4207
## 4  331.5248
## 5  349.4602
## 6  312.8074

head(num_pats_Emp_Health)

##   num_patients mean_discounted_costs mean_discounted_dalys seed_used_for_run
## 1           50              6342.636              2.978003              1235
## 2          100              5340.920              6.081693              1236
## 3          150              5856.239              5.331499              1237
## 4          200              5813.093              5.728547              1238
## 5          250              5773.903              5.459715              1239
## 6          300              5753.368              6.384375              1240
##   cost_daly
## 1 2129.8288
## 2  878.1962
## 3 1098.4226
## 4 1014.7587
## 5 1057.5466
## 6  901.1639

head(num_pats_UC)

##   num_patients mean_discounted_costs mean_discounted_dalys seed_used_for_run
## 1           50              6251.281              3.246685              1235
## 2          100              5570.367              6.188617              1236
## 3          150              5931.939              5.419564              1237
## 4          200              5756.515              5.827624              1238
## 5          250              5690.599              5.582022              1239
## 6          300              5799.277              6.414520              1240
##   cost_daly
## 1 1925.4351
## 2  900.0989
## 3 1094.5417
## 4  987.7979
## 5 1019.4511
## 6  904.0859

4 Plot costs by number of patients-No intervention

ggplot(num_pats_no_int, aes(x = num_patients, y = mean_discounted_costs)) +
  geom_line() +
  scale_x_continuous(
    breaks = seq(0, max(num_pats_no_int$num_patients), by = 100000),
    labels = scales::comma
  ) +
  labs(
    x = "Number of Patients Simulated",
    y = "Mean Discounted Costs",
    title = "Mean Discounted Costs by Number of Patients Simulated(No Intervention)"
  ) +
  theme_minimal()

# Plot Dalys by number of patients-No Intervention

ggplot(num_pats_no_int, aes(x = num_patients, y = mean_discounted_dalys)) +
  geom_line() +
  scale_x_continuous(
    breaks = seq(0, max(num_pats_no_int$num_patients), by = 100000),
    labels = scales::comma
  ) +
  labs(
    x = "Number of Patients Simulated",
    y = "Mean Discounted DALYs",
    title = "Mean Discounted DALYs by Number of Patients Simulated(No Intervention)"
  ) +
  theme_minimal()

# Plot Cost per Daly by number of patients-No Intervention

ggplot(num_pats_no_int, aes(x = num_patients, y = cost_daly)) +
  geom_line() +
  scale_x_continuous(
    breaks = seq(0, max(num_pats_no_int$num_patients), by = 100000),
    labels = scales::comma
  ) +
  labs(
    x = "Number of Patients Simulated",
    y = "Mean Discounted Cost per Daly",
    title = "Mean Discounted Cost per DALY by Number of Patients Simulated(No Intervention)"
  ) +
  theme_minimal()

# Plot costs by number of patients-Empower Health Intervention

ggplot(num_pats_Emp_Health, aes(x = num_patients, y = mean_discounted_costs)) +
  geom_line() +
  scale_x_continuous(
    breaks = seq(0, max(num_pats_no_int$num_patients), by = 100000),
    labels = scales::comma
  ) +
  labs(
    x = "Number of Patients Simulated",
    y = "Mean Discounted Costs",
    title = "Mean Discounted Costs by Number of Patients Simulated(Empower Health Intervention)"
  ) +
  theme_minimal()

# Plot Dalys by number of patients-Empower Health Intervention

ggplot(num_pats_Emp_Health, aes(x = num_patients, y = mean_discounted_dalys)) +
  geom_line() +
  scale_x_continuous(
    breaks = seq(0, max(num_pats_no_int$num_patients), by = 100000),
    labels = scales::comma
  ) +
  labs(
    x = "Number of Patients Simulated",
    y = "Mean Discounted DALYs",
    title = "Mean Discounted DALYs by Number of Patients Simulated(Empower Health Intervention)"
  ) +
  theme_minimal()

# Plot Cost per Daly by number of patients-Empower Health Intervention

ggplot(num_pats_Emp_Health, aes(x = num_patients, y = cost_daly)) +
  geom_line() +
  scale_x_continuous(
    breaks = seq(0, max(num_pats_no_int$num_patients), by = 100000),
    labels = scales::comma
  ) +
  labs(
    x = "Number of Patients Simulated",
    y = "Mean Discounted Cost per Daly",
    title = "Mean Discounted Cost per DALY by Number of Patients Simulated(Empower Health Intervention)"
  ) +
  theme_minimal()

# Plot costs by number of patients-Usual Care Intervention

ggplot(num_pats_UC, aes(x = num_patients, y = mean_discounted_costs)) +
  geom_line() +
  scale_x_continuous(
    breaks = seq(0, max(num_pats_UC$num_patients), by = 100000),
    labels = scales::comma
  ) +
  labs(
    x = "Number of Patients Simulated",
    y = "Mean Discounted Costs",
    title = "Mean Discounted Costs by Number of Patients Simulated(Usual Care Intervention)"
  ) +
  theme_minimal()

# Plot Dalys by number of patients-Usual Care Intervention

ggplot(num_pats_UC, aes(x = num_patients, y = mean_discounted_dalys)) +
  geom_line() +
  scale_x_continuous(
    breaks = seq(0, max(num_pats_UC$num_patients), by = 100000),
    labels = scales::comma
  ) +
  labs(
    x = "Number of Patients Simulated",
    y = "Mean Discounted DALYs",
    title = "Mean Discounted DALYs by Number of Patients Simulated(Usual Care Intervention)"
  ) +
  theme_minimal()

# Plot Cost per Daly by number of patients-Usual Care Intervention

ggplot(num_pats_UC, aes(x = num_patients, y = cost_daly)) +
  geom_line() +
  scale_x_continuous(
    breaks = seq(0, max(num_pats_UC$num_patients), by = 100000),
    labels = scales::comma
  ) +
  labs(
    x = "Number of Patients Simulated",
    y = "Mean Discounted Cost per Daly",
    title = "Mean Discounted Cost per DALY by Number of Patients Simulated(Usual Care Intervention)"
  ) +
  theme_minimal()

# Combining the datasets

# Add a group identifier to each dataset
num_pats_Emp_Health <- num_pats_Emp_Health %>%
  mutate(intervention = "Emp_Health")

num_pats_UC <- num_pats_UC %>%
  mutate(intervention = "UC")

num_pats_no_int <- num_pats_no_int %>%
  mutate(intervention = "No_Intervention")

# Combine all datasets
combined_data <- bind_rows(num_pats_Emp_Health, num_pats_UC, num_pats_no_int)
head(combined_data)

##   num_patients mean_discounted_costs mean_discounted_dalys seed_used_for_run
## 1           50              6342.636              2.978003              1235
## 2          100              5340.920              6.081693              1236
## 3          150              5856.239              5.331499              1237
## 4          200              5813.093              5.728547              1238
## 5          250              5773.903              5.459715              1239
## 6          300              5753.368              6.384375              1240
##   cost_daly intervention
## 1 2129.8288   Emp_Health
## 2  878.1962   Emp_Health
## 3 1098.4226   Emp_Health
## 4 1014.7587   Emp_Health
## 5 1057.5466   Emp_Health
## 6  901.1639   Emp_Health

5 Plot cost by number of patients simulated for all interventions

ggplot(combined_data, aes(x = num_patients, y = mean_discounted_costs, color = intervention)) +
  geom_line(alpha = 0.8) +
  geom_point(size = 1) +
  scale_x_continuous(
    breaks = seq(0, max(combined_data$num_patients), by = 100000),
    labels = comma
  ) +
  labs(
    x = "Number of Patients",
    y = "Mean Discounted Costs (USD)",
    title = "Mean Discounted Costs by Number of Patients Simulated-All Interventions",
    color = "Scenario"
  ) +
  geom_smooth(se = FALSE, method = "loess", span = 0.2) +
  theme_minimal()

## `geom_smooth()` using formula = 'y ~ x'

# Plot DALYs by number of patients simulated for all interventions

ggplot(combined_data, aes(x = num_patients, y = mean_discounted_dalys, color = intervention)) +
  geom_line(alpha = 0.8) +
  geom_point(size = 1) +
  scale_x_continuous(
    breaks = seq(0, max(combined_data$num_patients), by = 100000),
    labels = comma
  ) +
  labs(
    x = "Number of Patients",
    y = "Mean Discounted DALYs ",
    title = "Mean Discounted DALYs by Number of Patients Simulated-All Interventions",
    color = "Scenario"
  ) + 
  geom_smooth(se = FALSE, method = "loess", span = 0.2) +
  theme_minimal()

## `geom_smooth()` using formula = 'y ~ x'

6 Plot Cost per DALY by number of patients simulated for all interventions

ggplot(combined_data, aes(x = num_patients, y = cost_daly, color = intervention)) +
  geom_line(alpha = 0.8) +
  geom_point(size = 1) +
  scale_x_continuous(
    breaks = seq(0, max(combined_data$num_patients), by = 100000),
    labels = comma
  ) +
  labs(
    x = "Number of Patients",
    y = "Mean Discounted Cost per DALY",
    title = "Mean Discounted Discounted Cost per DALY by Number of Patients Simulated-All Interventions",
    color = "Scenario"
  ) + 
  geom_smooth(se = FALSE, method = "loess", span = 0.2) +
  theme_minimal()

## `geom_smooth()` using formula = 'y ~ x'

# Plotting ICER Stabilization Curves

7 Merge the datasets by num_patients and seed_used_for_run

icer_df <- num_pats_no_int %>%
  rename(cost_no_int = mean_discounted_costs,
         dalys_no_int = mean_discounted_dalys) %>%
  select(num_patients, seed_used_for_run, cost_no_int, dalys_no_int) %>%
  left_join(num_pats_UC %>%
              rename(cost_uc = mean_discounted_costs,
                     dalys_uc = mean_discounted_dalys) %>%
              select(num_patients, seed_used_for_run, cost_uc, dalys_uc),
            by = c("num_patients", "seed_used_for_run")) %>%
  left_join(num_pats_Emp_Health %>%
              rename(cost_emp = mean_discounted_costs,
                     dalys_emp = mean_discounted_dalys) %>%
              select(num_patients, seed_used_for_run, cost_emp, dalys_emp),
            by = c("num_patients", "seed_used_for_run"))
head(icer_df)

##   num_patients seed_used_for_run cost_no_int dalys_no_int  cost_uc dalys_uc
## 1           50              1235    2726.650     3.487271 6251.281 3.246685
## 2          100              1236    2061.795     6.312776 5570.367 6.188617
## 3          150              1237    2240.179     5.694106 5931.939 5.419564
## 4          200              1238    1995.336     6.018664 5756.515 5.827624
## 5          250              1239    2018.048     5.774757 5690.599 5.582022
## 6          300              1240    2068.937     6.614094 5799.277 6.414520
##   cost_emp dalys_emp
## 1 6342.636  2.978003
## 2 5340.920  6.081693
## 3 5856.239  5.331499
## 4 5813.093  5.728547
## 5 5773.903  5.459715
## 6 5753.368  6.384375

8 Calculate ICER for each intervention vs no intervention

icer_df <- icer_df %>%
  mutate(
    incr_cost_uc = cost_uc - cost_no_int,
    incr_dalys_uc = dalys_no_int - dalys_uc,  # DALYs averted
    icer_uc = incr_cost_uc / incr_dalys_uc,

    incr_cost_emp = cost_emp - cost_no_int,
    incr_dalys_emp = dalys_no_int - dalys_emp,
    icer_emp = incr_cost_emp / incr_dalys_emp
  )
head(icer_df)

##   num_patients seed_used_for_run cost_no_int dalys_no_int  cost_uc dalys_uc
## 1           50              1235    2726.650     3.487271 6251.281 3.246685
## 2          100              1236    2061.795     6.312776 5570.367 6.188617
## 3          150              1237    2240.179     5.694106 5931.939 5.419564
## 4          200              1238    1995.336     6.018664 5756.515 5.827624
## 5          250              1239    2018.048     5.774757 5690.599 5.582022
## 6          300              1240    2068.937     6.614094 5799.277 6.414520
##   cost_emp dalys_emp incr_cost_uc incr_dalys_uc  icer_uc incr_cost_emp
## 1 6342.636  2.978003     3524.631     0.2405861 14650.18      3615.985
## 2 5340.920  6.081693     3508.573     0.1241591 28258.69      3279.125
## 3 5856.239  5.331499     3691.760     0.2745423 13446.96      3616.060
## 4 5813.093  5.728547     3761.179     0.1910399 19687.93      3817.757
## 5 5773.903  5.459715     3672.551     0.1927347 19054.95      3755.855
## 6 5753.368  6.384375     3730.340     0.1995742 18691.49      3684.431
##   incr_dalys_emp  icer_emp
## 1      0.5092683  7100.354
## 2      0.2310828 14190.262
## 3      0.3626075  9972.381
## 4      0.2901171 13159.368
## 5      0.3150423 11921.749
## 6      0.2297192 16038.848

9 Plot ICERs vs number of patients

# Reshape for plotting
icer_long <- icer_df %>%
  select(num_patients, seed_used_for_run, icer_uc, icer_emp) %>%
  pivot_longer(cols = starts_with("icer_"), names_to = "strategy", values_to = "icer") %>%
  mutate(strategy = recode(strategy,
                           "icer_uc" = "Usual Care",
                           "icer_emp" = "Empowered Health"))

# Plot
ggplot(icer_long, aes(x = num_patients, y = icer, color = strategy)) +
  geom_point(alpha = 0.5, size = 1) +
  geom_smooth(method = "loess", se = FALSE, span = 0.2) +
  scale_y_continuous(labels = comma, limits = c(0, NA)) +
  scale_x_continuous(breaks = seq(0, max(icer_long$num_patients), by = 100000), labels = comma) +
  labs(
    title = "ICERs by Number of Patients Simulated",
    x = "Number of Patients Simulated",
    y = "ICER (USD per DALY averted)",
    color = "Intervention"
  ) +
  theme_minimal()

## `geom_smooth()` using formula = 'y ~ x'

# Plotting ICER for Empower Health vs Usual Care by number of patients # Join the two datasets

icer_emp_vs_uc <- num_pats_Emp_Health %>%
  rename(cost_emp = mean_discounted_costs,
         dalys_emp = mean_discounted_dalys) %>%
  select(num_patients, seed_used_for_run, cost_emp, dalys_emp) %>%
  left_join(
    num_pats_UC %>%
      rename(cost_uc = mean_discounted_costs,
             dalys_uc = mean_discounted_dalys) %>%
      select(num_patients, seed_used_for_run, cost_uc, dalys_uc),
    by = c("num_patients", "seed_used_for_run")
  )
head(icer_emp_vs_uc)

##   num_patients seed_used_for_run cost_emp dalys_emp  cost_uc dalys_uc
## 1           50              1235 6342.636  2.978003 6251.281 3.246685
## 2          100              1236 5340.920  6.081693 5570.367 6.188617
## 3          150              1237 5856.239  5.331499 5931.939 5.419564
## 4          200              1238 5813.093  5.728547 5756.515 5.827624
## 5          250              1239 5773.903  5.459715 5690.599 5.582022
## 6          300              1240 5753.368  6.384375 5799.277 6.414520

10 Calculate ICER

icer_emp_vs_uc <- icer_emp_vs_uc %>%
  mutate(
    incr_cost = cost_emp - cost_uc,
    dalys_averted = dalys_uc - dalys_emp,  # positive means emp is better
    icer = incr_cost / dalys_averted
  )
head(icer_emp_vs_uc)

##   num_patients seed_used_for_run cost_emp dalys_emp  cost_uc dalys_uc
## 1           50              1235 6342.636  2.978003 6251.281 3.246685
## 2          100              1236 5340.920  6.081693 5570.367 6.188617
## 3          150              1237 5856.239  5.331499 5931.939 5.419564
## 4          200              1238 5813.093  5.728547 5756.515 5.827624
## 5          250              1239 5773.903  5.459715 5690.599 5.582022
## 6          300              1240 5753.368  6.384375 5799.277 6.414520
##    incr_cost dalys_averted       icer
## 1   91.35454    0.26868221   340.0096
## 2 -229.44787    0.10692368 -2145.9032
## 3  -75.69986    0.08806512  -859.5895
## 4   56.57835    0.09907720   571.0531
## 5   83.30415    0.12230752   681.1040
## 6  -45.90900    0.03014493 -1522.9430

11 Plot the ICER vs number of patients (Empower Health vs Usual Care)

ggplot(icer_emp_vs_uc, aes(x = num_patients, y = icer)) +
  geom_point(alpha = 0.5, size = 1, color = "#1b9e77") +
  geom_smooth(method = "loess", se = FALSE, span = 0.2, color = "#1b9e77") +
  scale_x_continuous(breaks = seq(0, max(icer_emp_vs_uc$num_patients), by = 100000),
                     labels = comma) +
  labs(
    title = "ICER by number of patients simulated: Empower Health vs Usual Care",
    x = "Number of Patients Simulated",
    y = "ICER (USD per DALY averted)"
  ) +
  theme_minimal()

## `geom_smooth()` using formula = 'y ~ x'

Diagnosing the appropriate number of patients to be simulated–Kenya CVD Microsimulation Model

James Oguta

June 10, 2025