The following R script performs analyses of simulation outputs to assess the impact of varied patient numbers on the model results.
The script begins by loading the necessary R libraries:
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## âś” dplyr 1.1.4 âś” readr 2.1.6
## âś” forcats 1.0.1 âś” stringr 1.6.0
## âś” ggplot2 4.0.2 âś” tibble 3.3.0
## âś” lubridate 1.9.4 âś” tidyr 1.3.1
## âś” purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## âś– dplyr::filter() masks stats::filter()
## âś– dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
##
## Attaching package: 'reshape2'
##
## The following object is masked from 'package:tidyr':
##
## smiths
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
# Import PSA datasets
load('/Users/jamesoguta/Documents/James Oguta/PhD Thesis_James Oguta/James Oguta Model/Kenya CVD Model/KenyaCVDModelFinal/data/results_df_numpats_no_trt.rda')
load('/Users/jamesoguta/Documents/James Oguta/PhD Thesis_James Oguta/James Oguta Model/Kenya CVD Model/KenyaCVDModelFinal/data/results_df_numpats_Emp_Health.rda')
load('/Users/jamesoguta/Documents/James Oguta/PhD Thesis_James Oguta/James Oguta Model/Kenya CVD Model/KenyaCVDModelFinal/data/results_df_numpats_UC.rda')num_pats_no_int <- results_df_numpats_no_trt
num_pats_Emp_Health <- results_df_numpats_Emp_Health
num_pats_UC <- results_df_numpats_UC
# Compute costs per DALY
num_pats_no_int$cost_daly <- num_pats_no_int$cost/num_pats_no_int$daly
num_pats_Emp_Health$cost_daly <- num_pats_Emp_Health$cost/num_pats_Emp_Health$daly
num_pats_UC$cost_daly <- num_pats_UC$cost/num_pats_UC$daly
head(num_pats_no_int)## num_patients cost daly cost_daly
## 1 5000 1404.409 7.225561 194.3668
## 2 5200 1572.424 7.290990 215.6668
## 3 5400 1530.815 7.309968 209.4148
## 4 5600 1523.123 7.250114 210.0826
## 5 5800 1515.568 7.259803 208.7615
## 6 6000 1579.671 7.433138 212.5174
## num_patients cost daly cost_daly
## 1 5000 5432.464 7.244207 749.9045
## 2 5200 5531.712 7.272240 760.6614
## 3 5400 5464.619 7.197389 759.2502
## 4 5600 5584.427 7.170733 778.7804
## 5 5800 5326.346 7.263128 733.3406
## 6 6000 5541.131 7.210286 768.5037
## num_patients cost daly cost_daly
## 1 5000 4871.973 7.374127 660.6848
## 2 5200 5010.039 7.401226 676.9201
## 3 5400 4942.220 7.326093 674.6051
## 4 5600 5013.450 7.291324 687.5911
## 5 5800 4784.787 7.367333 649.4599
## 6 6000 4992.048 7.331792 680.8769
ggplot(num_pats_no_int, aes(x = num_patients, y = cost)) +
geom_line() +
scale_x_continuous(
breaks = seq(0, max(num_pats_no_int$num_patients), by = 100000),
labels = scales::comma
) +
labs(
x = "Number of Patients Simulated",
y = "Mean Discounted Costs",
title = "Mean Discounted Costs by Number of Patients Simulated(No Intervention)"
) +
theme_minimal()
# Plot Dalys by number of patients-No Intervention
ggplot(num_pats_no_int, aes(x = num_patients, y = daly)) +
geom_line() +
scale_x_continuous(
breaks = seq(0, max(num_pats_no_int$num_patients), by = 100000),
labels = scales::comma
) +
labs(
x = "Number of Patients Simulated",
y = "Mean Discounted DALYs",
title = "Mean Discounted DALYs by Number of Patients Simulated(No Intervention)"
) +
theme_minimal()
# Plot Cost per Daly by number of patients-No Intervention
ggplot(num_pats_no_int, aes(x = num_patients, y = cost_daly)) +
geom_line() +
scale_x_continuous(
breaks = seq(0, max(num_pats_no_int$num_patients), by = 100000),
labels = scales::comma
) +
labs(
x = "Number of Patients Simulated",
y = "Mean Discounted Cost per Daly",
title = "Mean Discounted Cost per DALY by Number of Patients Simulated(No Intervention)"
) +
theme_minimal()
# Plot costs by number of patients-Empower Health Intervention
ggplot(num_pats_Emp_Health, aes(x = num_patients, y = cost)) +
geom_line() +
scale_x_continuous(
breaks = seq(0, max(num_pats_no_int$num_patients), by = 100000),
labels = scales::comma
) +
labs(
x = "Number of Patients Simulated",
y = "Mean Discounted Costs",
title = "Mean Discounted Costs by Number of Patients Simulated(Empower Health Intervention)"
) +
theme_minimal()
# Plot Dalys by number of patients-Empower Health Intervention
ggplot(num_pats_Emp_Health, aes(x = num_patients, y = daly)) +
geom_line() +
scale_x_continuous(
breaks = seq(0, max(num_pats_no_int$num_patients), by = 100000),
labels = scales::comma
) +
labs(
x = "Number of Patients Simulated",
y = "Mean Discounted DALYs",
title = "Mean Discounted DALYs by Number of Patients Simulated(Empower Health Intervention)"
) +
theme_minimal()
# Plot Cost per Daly by number of patients-Empower Health
Intervention
ggplot(num_pats_Emp_Health, aes(x = num_patients, y = cost_daly)) +
geom_line() +
scale_x_continuous(
breaks = seq(0, max(num_pats_no_int$num_patients), by = 100000),
labels = scales::comma
) +
labs(
x = "Number of Patients Simulated",
y = "Mean Discounted Cost per Daly",
title = "Mean Discounted Cost per DALY by Number of Patients Simulated(Empower Health Intervention)"
) +
theme_minimal()
# Plot costs by number of patients-Usual Care Intervention
ggplot(num_pats_UC, aes(x = num_patients, y = cost)) +
geom_line() +
scale_x_continuous(
breaks = seq(0, max(num_pats_UC$num_patients), by = 100000),
labels = scales::comma
) +
labs(
x = "Number of Patients Simulated",
y = "Mean Discounted Costs",
title = "Mean Discounted Costs by Number of Patients Simulated(Usual Care Intervention)"
) +
theme_minimal()
# Plot Dalys by number of patients-Usual Care Intervention
ggplot(num_pats_UC, aes(x = num_patients, y = daly)) +
geom_line() +
scale_x_continuous(
breaks = seq(0, max(num_pats_UC$num_patients), by = 100000),
labels = scales::comma
) +
labs(
x = "Number of Patients Simulated",
y = "Mean Discounted DALYs",
title = "Mean Discounted DALYs by Number of Patients Simulated(Usual Care Intervention)"
) +
theme_minimal()
# Plot Cost per Daly by number of patients-Usual Care Intervention
ggplot(num_pats_UC, aes(x = num_patients, y = cost_daly)) +
geom_line() +
scale_x_continuous(
breaks = seq(0, max(num_pats_UC$num_patients), by = 100000),
labels = scales::comma
) +
labs(
x = "Number of Patients Simulated",
y = "Mean Discounted Cost per Daly",
title = "Mean Discounted Cost per DALY by Number of Patients Simulated(Usual Care Intervention)"
) +
theme_minimal()
# Combining the datasets
# Add a group identifier to each dataset
num_pats_Emp_Health <- num_pats_Emp_Health %>%
mutate(intervention = "Emp_Health")
num_pats_UC <- num_pats_UC %>%
mutate(intervention = "UC")
num_pats_no_int <- num_pats_no_int %>%
mutate(intervention = "No_Intervention")
# Combine all datasets
combined_data <- bind_rows(num_pats_Emp_Health, num_pats_UC, num_pats_no_int)
head(combined_data)## num_patients cost daly cost_daly intervention
## 1 5000 5432.464 7.244207 749.9045 Emp_Health
## 2 5200 5531.712 7.272240 760.6614 Emp_Health
## 3 5400 5464.619 7.197389 759.2502 Emp_Health
## 4 5600 5584.427 7.170733 778.7804 Emp_Health
## 5 5800 5326.346 7.263128 733.3406 Emp_Health
## 6 6000 5541.131 7.210286 768.5037 Emp_Health
ggplot(combined_data, aes(x = num_patients, y = cost, color = intervention)) +
geom_line(alpha = 0.8) +
geom_point(size = 1) +
scale_x_continuous(
breaks = seq(0, max(combined_data$num_patients), by = 100000),
labels = comma
) +
labs(
x = "Number of Patients",
y = "Mean Discounted Costs (USD)",
title = "Mean Discounted Costs by Number of Patients Simulated-All Interventions",
color = "Scenario"
) +
geom_smooth(se = FALSE, method = "loess", span = 0.2) +
theme_minimal()## `geom_smooth()` using formula = 'y ~ x'
# Plot DALYs by number of patients simulated for all interventions
ggplot(combined_data, aes(x = num_patients, y = daly, color = intervention)) +
geom_line(alpha = 0.8) +
geom_point(size = 1) +
scale_x_continuous(
breaks = seq(0, max(combined_data$num_patients), by = 100000),
labels = comma
) +
labs(
x = "Number of Patients",
y = "Mean Discounted DALYs ",
title = "Mean Discounted DALYs by Number of Patients Simulated-All Interventions",
color = "Scenario"
) +
geom_smooth(se = FALSE, method = "loess", span = 0.2) +
theme_minimal()## `geom_smooth()` using formula = 'y ~ x'
ggplot(combined_data, aes(x = num_patients, y = cost_daly, color = intervention)) +
geom_line(alpha = 0.8) +
geom_point(size = 1) +
scale_x_continuous(
breaks = seq(0, max(combined_data$num_patients), by = 100000),
labels = comma
) +
labs(
x = "Number of Patients",
y = "Mean Discounted Cost per DALY",
title = "Mean Discounted Discounted Cost per DALY by Number of Patients Simulated-All Interventions",
color = "Scenario"
) +
geom_smooth(se = FALSE, method = "loess", span = 0.2) +
theme_minimal()## `geom_smooth()` using formula = 'y ~ x'
# Plotting ICER Stabilization Curves
icer_df <- num_pats_no_int %>%
rename(cost_no_int = cost,
dalys_no_int = daly) %>%
select(num_patients, cost_no_int, dalys_no_int) %>%
left_join(num_pats_UC %>%
rename(cost_uc = cost,
dalys_uc = daly) %>%
select(num_patients, cost_uc, dalys_uc),
by = c("num_patients")) %>%
left_join(num_pats_Emp_Health %>%
rename(cost_emp = cost,
dalys_emp = daly) %>%
select(num_patients, cost_emp, dalys_emp),
by = c("num_patients"))
head(icer_df)## num_patients cost_no_int dalys_no_int cost_uc dalys_uc cost_emp dalys_emp
## 1 5000 1404.409 7.225561 4871.973 7.374127 5432.464 7.244207
## 2 5200 1572.424 7.290990 5010.039 7.401226 5531.712 7.272240
## 3 5400 1530.815 7.309968 4942.220 7.326093 5464.619 7.197389
## 4 5600 1523.123 7.250114 5013.450 7.291324 5584.427 7.170733
## 5 5800 1515.568 7.259803 4784.787 7.367333 5326.346 7.263128
## 6 6000 1579.671 7.433138 4992.048 7.331792 5541.131 7.210286
icer_df <- icer_df %>%
mutate(
incr_cost_uc = cost_uc - cost_no_int,
incr_dalys_uc = dalys_no_int - dalys_uc, # DALYs averted
icer_uc = incr_cost_uc / incr_dalys_uc,
incr_cost_emp = cost_emp - cost_no_int,
incr_dalys_emp = dalys_no_int - dalys_emp,
icer_emp = incr_cost_emp / incr_dalys_emp
)
head(icer_df)## num_patients cost_no_int dalys_no_int cost_uc dalys_uc cost_emp dalys_emp
## 1 5000 1404.409 7.225561 4871.973 7.374127 5432.464 7.244207
## 2 5200 1572.424 7.290990 5010.039 7.401226 5531.712 7.272240
## 3 5400 1530.815 7.309968 4942.220 7.326093 5464.619 7.197389
## 4 5600 1523.123 7.250114 5013.450 7.291324 5584.427 7.170733
## 5 5800 1515.568 7.259803 4784.787 7.367333 5326.346 7.263128
## 6 6000 1579.671 7.433138 4992.048 7.331792 5541.131 7.210286
## incr_cost_uc incr_dalys_uc icer_uc incr_cost_emp incr_dalys_emp
## 1 3467.564 -0.14856597 -23340.23 4028.054 -0.018646379
## 2 3437.615 -0.11023620 -31184.08 3959.288 0.018749674
## 3 3411.405 -0.01612480 -211562.60 3933.804 0.112578987
## 4 3490.327 -0.04121003 -84696.05 4061.304 0.079380966
## 5 3269.220 -0.10753059 -30402.69 3810.779 -0.003325285
## 6 3412.376 0.10134662 33670.35 3961.460 0.222852608
## icer_emp
## 1 -216023.41
## 2 211165.71
## 3 34942.61
## 4 51162.19
## 5 -1146000.54
## 6 17776.14
# Reshape for plotting
icer_long <- icer_df %>%
select(num_patients, icer_uc, icer_emp) %>%
pivot_longer(cols = starts_with("icer_"), names_to = "strategy", values_to = "icer") %>%
mutate(strategy = recode(strategy,
"icer_uc" = "Usual Care",
"icer_emp" = "Empowered Health"))
# Plot
icer_stabilization_curve <- ggplot(icer_long, aes(x = num_patients, y = icer, color = strategy)) +
geom_point(alpha = 0.5, size = 1) +
geom_smooth(method = "loess", se = FALSE, span = 0.2) +
scale_y_continuous(labels = comma, limits = c(0, NA)) +
scale_x_continuous(breaks = seq(0, max(icer_long$num_patients), by = 100000), labels = comma) +
labs(
title = "Mean ICER by Number of Patients Simulated",
x = "Number of Patients Simulated",
y = "ICER (USD per DALY averted)",
color = "Intervention"
) +
theme_minimal()
print(icer_stabilization_curve)## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 48 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 48 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 48 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 48 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 48 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 48 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_smooth()`).
icer_emp_vs_uc <- num_pats_Emp_Health %>%
rename(cost_emp = cost,
dalys_emp = daly) %>%
select(num_patients, cost_emp, dalys_emp) %>%
left_join(
num_pats_UC %>%
rename(cost_uc = cost,
dalys_uc = daly) %>%
select(num_patients, cost_uc, dalys_uc),
by = c("num_patients")
)
head(icer_emp_vs_uc)## num_patients cost_emp dalys_emp cost_uc dalys_uc
## 1 5000 5432.464 7.244207 4871.973 7.374127
## 2 5200 5531.712 7.272240 5010.039 7.401226
## 3 5400 5464.619 7.197389 4942.220 7.326093
## 4 5600 5584.427 7.170733 5013.450 7.291324
## 5 5800 5326.346 7.263128 4784.787 7.367333
## 6 6000 5541.131 7.210286 4992.048 7.331792
icer_emp_vs_uc <- icer_emp_vs_uc %>%
mutate(
incr_cost = cost_emp - cost_uc,
dalys_averted = dalys_uc - dalys_emp, # positive means emp is better
icer = incr_cost / dalys_averted
)
head(icer_emp_vs_uc)## num_patients cost_emp dalys_emp cost_uc dalys_uc incr_cost dalys_averted
## 1 5000 5432.464 7.244207 4871.973 7.374127 560.4906 0.1299196
## 2 5200 5531.712 7.272240 5010.039 7.401226 521.6735 0.1289859
## 3 5400 5464.619 7.197389 4942.220 7.326093 522.3991 0.1287038
## 4 5600 5584.427 7.170733 5013.450 7.291324 570.9770 0.1205910
## 5 5800 5326.346 7.263128 4784.787 7.367333 541.5592 0.1042053
## 6 6000 5541.131 7.210286 4992.048 7.331792 549.0833 0.1215060
## icer
## 1 4314.134
## 2 4044.423
## 3 4058.925
## 4 4734.823
## 5 5197.040
## 6 4518.982
icer_emp_vs_uc_stabilization_curve <- ggplot(icer_emp_vs_uc, aes(x = num_patients, y = icer)) +
geom_point(alpha = 0.5, size = 1, color = "#1b9e77") +
geom_smooth(method = "loess", se = FALSE, span = 0.2, color = "#1b9e77") +
scale_x_continuous(breaks = seq(0, max(icer_emp_vs_uc$num_patients), by = 100000),
labels = comma) +
labs(
title = NULL,
x = "Number of Patients Simulated",
y = "ICER (USD per DALY averted)"
) +
theme_minimal()
print(icer_emp_vs_uc_stabilization_curve)## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
ggplot(icer_emp_vs_uc, aes(x = num_patients)) +
geom_line(aes(y = cost_emp, color = "Empower Health"), size = 1) +
geom_line(aes(y = cost_uc, color = "Usual Care"), size = 1) +
scale_x_continuous(breaks = seq(0, max(icer_emp_vs_uc$num_patients), by = 100000),
labels = comma) +
labs(
title = "Mean Discounted Costs by Number of Patients Simulated",
x = "Number of Patients Simulated",
y = "Mean Discounted Costs (USD)",
color = "Intervention"
) +
theme_minimal()## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Creating a dataset for stabilization analysis
# Option 1: Using rollmeanr for simpler syntax
stabilization_analysis <- icer_emp_vs_uc %>%
arrange(num_patients) %>%
mutate(
# Rolling mean (window of 20)
rolling_mean = rollmeanr(icer, k = 20, fill = NA),
# Rolling SD (window of 20)
rolling_sd = rollapplyr(icer, width = 20, FUN = sd, fill = NA),
# Rolling coefficient of variation
rolling_cv = rolling_sd / abs(rolling_mean),
# Percent change
pct_change = (rolling_mean / lag(rolling_mean) - 1) * 100
)
# Alternative: Use zoo's rollapply with proper alignment
stabilization_analysis <- icer_emp_vs_uc %>%
arrange(num_patients) %>%
mutate(
rolling_mean = rollapply(icer, width = 20,
FUN = mean, fill = NA, align = "right", partial = FALSE),
rolling_sd = rollapply(icer, width = 20,
FUN = sd, fill = NA, align = "right", partial = FALSE),
rolling_cv = rolling_sd / abs(rolling_mean),
pct_change = (rolling_mean / lag(rolling_mean) - 1) * 100
)
head(stabilization_analysis)## num_patients cost_emp dalys_emp cost_uc dalys_uc incr_cost dalys_averted
## 1 5000 5432.464 7.244207 4871.973 7.374127 560.4906 0.1299196
## 2 5200 5531.712 7.272240 5010.039 7.401226 521.6735 0.1289859
## 3 5400 5464.619 7.197389 4942.220 7.326093 522.3991 0.1287038
## 4 5600 5584.427 7.170733 5013.450 7.291324 570.9770 0.1205910
## 5 5800 5326.346 7.263128 4784.787 7.367333 541.5592 0.1042053
## 6 6000 5541.131 7.210286 4992.048 7.331792 549.0833 0.1215060
## icer rolling_mean rolling_sd rolling_cv pct_change
## 1 4314.134 NA NA NA NA
## 2 4044.423 NA NA NA NA
## 3 4058.925 NA NA NA NA
## 4 4734.823 NA NA NA NA
## 5 5197.040 NA NA NA NA
## 6 4518.982 NA NA NA NA
# Find where CV drops below 5% and stays below
cv_threshold <- 0.05
# Get indices where CV is below threshold
cv_below <- which(stabilization_analysis$rolling_cv < cv_threshold &
!is.na(stabilization_analysis$rolling_cv))
if (length(cv_below) > 0) {
# Find the first index where CV drops below threshold AND remains below for next 20 simulations
stabilization_idx <- cv_below[1]
# Check if it stays below
remaining <- cv_below[cv_below >= stabilization_idx]
if (length(remaining) >= 20) {
min_patients <- stabilization_analysis$num_patients[stabilization_idx]
cat(sprintf("âś“ ICER stabilizes (CV < %.1f%%) at: %s patients\n",
cv_threshold * 100, comma(min_patients)))
} else {
cat("âš ICER hasn't consistently stabilized within simulated range\n")
min_patients <- stabilization_analysis$num_patients[cv_below[1]]
cat(sprintf("First crossing at: %s patients (but may not be stable)\n", comma(min_patients)))
}
} else {
cat("âś— ICER hasn't reached CV < 5% within simulated range\n")
cat("Consider simulating more patients\n")
}## âś“ ICER stabilizes (CV < 5.0%) at: 36,000 patients
# Visualize stabilization (corrected)
if (exists("min_patients")) {
p1 <- ggplot(stabilization_analysis, aes(x = num_patients)) +
geom_line(aes(y = rolling_cv), color = "blue", size = 1) +
geom_hline(yintercept = cv_threshold, linetype = "dashed", color = "red", size = 0.8) +
geom_vline(xintercept = min_patients, linetype = "dotted", color = "darkgreen", size = 0.8) +
scale_x_continuous(labels = comma, breaks = seq(0, max(stabilization_analysis$num_patients), by = 100000)) +
scale_y_continuous(labels = scales::percent, limits = c(0, max(stabilization_analysis$rolling_cv, na.rm = TRUE) * 1.1)) +
labs(
title = "ICER Stabilization Analysis",
subtitle = paste0("Stabilization threshold: CV < ", cv_threshold * 100, "%"),
x = "Number of Patients Simulated",
y = "Rolling Coefficient of Variation (CV)",
caption = paste0("Stabilization at ", comma(min_patients), " patients")
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold"),
panel.grid.minor = element_blank()
)
print(p1)
ggsave("icer_stabilization_analysis.png", width = 10, height = 6, dpi = 300)
ggsave("icer_stabilization_analysis.pdf", width = 10, height = 6)
}## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
# Alternative stabilisation criteria
# Use multiple criteria for stabilization
stabilization_analysis <- stabilization_analysis %>%
mutate(
# Criterion 1: CV < 5%
cv_stable = rolling_cv < 0.05,
# Criterion 2: Percent change < 1% in absolute value
pct_stable = abs(pct_change) < 1,
# Criterion 3: Rolling mean within 2% of final value
final_mean = tail(rolling_mean, 1),
final_stable = abs((rolling_mean - final_mean) / final_mean * 100) < 2,
# Combined stability (all criteria met)
stable = cv_stable & pct_stable & final_stable
)
# Find first point where all criteria are met
stable_idx <- which(stabilization_analysis$stable & !is.na(stabilization_analysis$stable))[1]
if (!is.na(stable_idx)) {
min_patients_robust <- stabilization_analysis$num_patients[stable_idx]
cat(sprintf("\n=== Robust Stabilization Analysis ===\n"))
cat(sprintf("Minimum patients (all criteria): %s\n", comma(min_patients_robust)))
cat(sprintf(" - CV < 5%%: âś“\n"))
cat(sprintf(" - Change < 1%%: âś“\n"))
cat(sprintf(" - Within 2%% of final: âś“\n"))
} else {
cat("\n=== Robust Stabilization Analysis ===\n")
cat("No stable point found with all criteria\n")
cat("Current status at maximum patients:\n")
last_row <- tail(stabilization_analysis, 1)
cat(sprintf(" - CV: %.1f%%\n", last_row$rolling_cv * 100))
cat(sprintf(" - Percent change: %.2f%%\n", last_row$pct_change))
cat(sprintf(" - Difference from final: %.1f%%\n",
abs((last_row$rolling_mean - last_row$final_mean) / last_row$final_mean * 100)))
}##
## === Robust Stabilization Analysis ===
## Minimum patients (all criteria): 36,000
## - CV < 5%: âś“
## - Change < 1%: âś“
## - Within 2% of final: âś“
# Find where CV drops below 5% (0.05)
cv_threshold <- 0.05
# Get the stabilization point
stabilization_point <- stabilization_analysis %>%
filter(!is.na(rolling_cv), rolling_cv < cv_threshold) %>%
slice(1)
if(nrow(stabilization_point) > 0) {
min_patients_needed <- stabilization_point$num_patients
current_cv <- stabilization_point$rolling_cv
cat("=== STABILIZATION ANALYSIS ===\n")
cat(sprintf("Current CV at %s patients: %.1f%%\n",
comma(min_patients_needed), current_cv * 100))
cat(sprintf("âś“ Target CV (< %.1f%%) achieved at: %s patients\n",
cv_threshold * 100, comma(min_patients_needed)))
} else {
cat("CV still above 5% threshold\n")
cat(sprintf("Latest CV: %.1f%% at %s patients\n",
tail(stabilization_analysis$rolling_cv, 1) * 100,
comma(tail(stabilization_analysis$num_patients, 1))))
}## === STABILIZATION ANALYSIS ===
## Current CV at 36,000 patients: 4.8%
## âś“ Target CV (< 5.0%) achieved at: 36,000 patients
# Check the trend in CV
latest_cv <- tail(stabilization_analysis$rolling_cv, 1)
latest_patients <- tail(stabilization_analysis$num_patients, 1)
cat(sprintf("\nCurrent status at %s patients:\n", comma(latest_patients)))##
## Current status at 1,000,000 patients:
## - Rolling mean ICER: $4068
## - Rolling CV: 2.1%
## - Recent change: 0.28%
p_icer <- ggplot(stabilization_analysis, aes(x = num_patients)) +
geom_point(aes(y = icer), alpha = 0.3, size = 0.8, color = "gray50") +
geom_line(aes(y = rolling_mean), color = "#1b9e77", size = 1.2) +
geom_ribbon(aes(ymin = rolling_mean - rolling_sd,
ymax = rolling_mean + rolling_sd),
alpha = 0.2, fill = "#1b9e77") +
geom_hline(yintercept = tail(stabilization_analysis$rolling_mean, 1),
linetype = "dashed", color = "darkgreen", alpha = 0.7) +
scale_x_continuous(labels = comma, breaks = seq(0, max(stabilization_analysis$num_patients), by = 100000)) +
scale_y_continuous(labels = comma) +
labs(
title = "ICER Stabilization: Empowered Health vs Usual Care",
subtitle = paste0("Stabilized ICER: $", round(tail(stabilization_analysis$rolling_mean, 1), 0),
" (±", round(tail(stabilization_analysis$rolling_sd, 1), 0), ")"),
x = "Number of Patients Simulated",
y = "ICER (USD per DALY averted)"
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 14),
panel.grid.minor = element_blank()
)
print(p_icer)## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 19 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 19 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
Minimum patients needed
# Calculate required patients for different CV thresholds
cv_targets <- c(0.10, 0.07, 0.05, 0.03)
required_patients <- sapply(cv_targets, function(target) {
idx <- which(stabilization_analysis$rolling_cv < target & !is.na(stabilization_analysis$rolling_cv))[1]
if(!is.na(idx)) stabilization_analysis$num_patients[idx] else NA
})
requirements <- data.frame(
Target_CV = paste0(cv_targets * 100, "%"),
Patients_Needed = comma(required_patients),
Status = ifelse(is.na(required_patients), "Not yet achieved", "Achieved")
)
cat("\n=== MINIMUM PATIENTS REQUIREMENTS ===\n")##
## === MINIMUM PATIENTS REQUIREMENTS ===
## Target_CV Patients_Needed Status
## 1 10% 8,800 Achieved
## 2 7% 10,500 Achieved
## 3 5% 36,000 Achieved
## 4 3% 55,000 Achieved
# Final recommendation
latest_cv_value <- tail(stabilization_analysis$rolling_cv, 1)
latest_patients_value <- tail(stabilization_analysis$num_patients, 1)
cat("\n=== FINAL RECOMMENDATION ===\n")##
## === FINAL RECOMMENDATION ===
if(latest_cv_value <= 0.05) {
cat(sprintf("âś“ Current simulation (%s patients) is SUFFICIENT\n", comma(latest_patients_value)))
cat(sprintf(" ICER is stable with CV = %.1f%%\n", latest_cv_value * 100))
cat(sprintf(" Minimum needed: %s patients\n",
comma(stabilization_analysis$num_patients[which(stabilization_analysis$rolling_cv < 0.05 & !is.na(stabilization_analysis$rolling_cv))[1]])))
} else if(latest_cv_value <= 0.07) {
cat(sprintf("âš Current simulation (%s patients) is ADEQUATE but consider more\n", comma(latest_patients_value)))
cat(sprintf(" Current CV = %.1f%% (target < 5%%)\n", latest_cv_value * 100))
cat(sprintf(" Recommended minimum: 500,000 patients\n"))
} else {
cat(sprintf("âś— Current simulation (%s patients) is INSUFFICIENT\n", comma(latest_patients_value)))
cat(sprintf(" Current CV = %.1f%% (too high)\n", latest_cv_value * 100))
cat(sprintf(" Recommended minimum: 500,000 - 1,000,000 patients\n"))
}## âś“ Current simulation (1,000,000 patients) is SUFFICIENT
## ICER is stable with CV = 2.1%
## Minimum needed: 36,000 patients
Visualising the convergence
# Create a comprehensive convergence plot
# Plot 1: CV over time
p1 <- ggplot(stabilization_analysis, aes(x = num_patients)) +
geom_line(aes(y = rolling_cv), color = "blue", size = 1) +
geom_hline(yintercept = 0.05, linetype = "dashed", color = "red", size = 0.8) +
geom_hline(yintercept = 0.03, linetype = "dotted", color = "darkgreen", size = 0.8) +
geom_vline(xintercept = 36000, linetype = "dashed", color = "purple", alpha = 0.5) +
annotate("text", x = 36000, y = 0.07, label = "Stabilization at 36k",
angle = 90, vjust = -0.5, size = 3, color = "purple") +
scale_x_continuous(labels = comma, breaks = seq(0, 1e6, by = 200000)) +
scale_y_continuous(labels = scales::percent, limits = c(0, max(stabilization_analysis$rolling_cv, na.rm = TRUE) * 1.1)) +
labs(
title = "A. Coefficient of Variation (CV) Convergence",
subtitle = "Target CV < 5% achieved at 36,000 patients",
x = "Number of Patients Simulated",
y = "Coefficient of Variation"
) +
theme_minimal()
# Plot 2: ICER stabilization
p2 <- ggplot(stabilization_analysis, aes(x = num_patients)) +
geom_point(aes(y = icer), alpha = 0.1, size = 0.5, color = "gray50") +
geom_line(aes(y = rolling_mean), color = "#1b9e77", size = 1.2) +
geom_ribbon(aes(ymin = rolling_mean - rolling_sd,
ymax = rolling_mean + rolling_sd),
alpha = 0.2, fill = "#1b9e77") +
geom_hline(yintercept = 4068, linetype = "dashed", color = "darkgreen", alpha = 0.7) +
annotate("text", x = 1e6, y = 4068, label = "Final ICER: $4,068",
hjust = 1, vjust = -0.5, size = 3.5) +
scale_x_continuous(labels = comma, breaks = seq(0, 1e6, by = 200000)) +
scale_y_continuous(labels = comma) +
labs(
title = "B. ICER Stabilization",
subtitle = "Stable around $4,068 per DALY averted",
x = "Number of Patients Simulated",
y = "ICER (USD per DALY averted)"
) +
theme_minimal()
# Plot 3: Zoom of early stabilization
p3 <- stabilization_analysis %>%
filter(num_patients <= 200000) %>%
ggplot(aes(x = num_patients)) +
geom_line(aes(y = rolling_mean), color = "#1b9e77", size = 1) +
geom_ribbon(aes(ymin = rolling_mean - rolling_sd,
ymax = rolling_mean + rolling_sd),
alpha = 0.3, fill = "#1b9e77") +
geom_vline(xintercept = 36000, linetype = "dashed", color = "red", alpha = 0.7) +
annotate("text", x = 36000, y = 4200, label = "Stabilization point\n36,000 patients",
size = 3, hjust = -0.1) +
scale_x_continuous(labels = comma, breaks = seq(0, 200000, by = 50000)) +
scale_y_continuous(labels = comma) +
labs(
title = "C. Early Stabilization (Zoomed)",
subtitle = "ICER stabilizes rapidly",
x = "Number of Patients Simulated",
y = "ICER (USD per DALY averted)"
) +
theme_minimal()
# Combine plots
combined_plot <- (p1 / p2 / p3) +
plot_annotation(
title = "ICER Convergence Analysis: Empowered Health vs Usual Care",
subtitle = paste0("Final ICER: $", round(tail(stabilization_analysis$rolling_mean, 1), 0),
" per DALY averted | CV: ", round(tail(stabilization_analysis$rolling_cv, 1) * 100, 1), "%"),
theme = theme(plot.title = element_text(face = "bold", size = 16))
)
print(combined_plot)## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: annotation$theme is not a valid theme.
## Please use `theme()` to construct themes.
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: annotation$theme is not a valid theme.
## Please use `theme()` to construct themes.
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: annotation$theme is not a valid theme.
## Please use `theme()` to construct themes.
Summary Table
# Create summary table of key metrics
summary_table <- stabilization_analysis %>%
filter(num_patients %in% c(36000, 100000, 250000, 500000, 1000000)) %>%
select(num_patients, rolling_mean, rolling_sd, rolling_cv, pct_change) %>%
mutate(
Patients = comma(num_patients),
ICER = sprintf("$%.0f", rolling_mean),
CI_95 = sprintf("$%.0f - $%.0f", rolling_mean - 1.96*rolling_sd, rolling_mean + 1.96*rolling_sd),
CV = sprintf("%.1f%%", rolling_cv * 100),
`Recent Change` = sprintf("%.2f%%", pct_change)
) %>%
select(Patients, ICER, CI_95, CV, `Recent Change`)
cat("\n=== CONVERGENCE SUMMARY ===\n")##
## === CONVERGENCE SUMMARY ===
## Patients ICER CI_95 CV Recent Change
## 1 36,000 $4088 $3703 - $4474 4.8% 0.42%
## 2 100,000 $4063 $3802 - $4324 3.3% 0.11%
## 3 250,000 $4027 $3815 - $4238 2.7% -0.26%
## 4 500,000 $4050 $3845 - $4255 2.6% -0.12%
## 5 1,000,000 $4068 $3901 - $4235 2.1% 0.28%