set.seed(123)
pkgs <- c(
"lhs", # For Latin Hypercube Sampling
"ggplot2", # For plotting
"pander", # For nice tables
"gridExtra", # For arranging multiple plots
"tidyr", # For data manipulation
"DiceKriging", # For Gaussian Process emulation
"DiceOptim" # For GP optimization
)
new_pkgs <- pkgs[!(pkgs %in% installed.packages()[, "Package"])]
if (length(new_pkgs)) install.packages(new_pkgs)
invisible(lapply(pkgs, library, character.only = TRUE))
uw_purple <- "#4b2e83"
uw_gold <- "#b7a57a"
uw_theme <- function() {
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(color = uw_purple, face = "bold", size = 14),
axis.title = element_text(color = "black"),
axis.text = element_text(color = "black"),
strip.background = element_rect(fill = uw_purple, color = uw_purple),
strip.text = element_text(color = "white", face = "bold"),
panel.grid.minor = element_blank()
)
}Emulating a Sick-Sicker Markov Model with Linear and GP Emulators in R
Introduction
This document presents an implementation of a Sick-Sicker Markov model and emulation strategy using linear regression and Gaussian Process (GP) meta-models. The goal is to approximate a computationally intensive Markov model with faster surrogates suitable for large-scale analyses (e.g., probabilistic sensitivity analysis).
Steps to build an emulator:
1. Define the original model (The model to be emulated).
2. Generate a Latin Hypercube Sample (LHS) design for training data.
3. Evaluate the Markov model at the training points to obtain outputs (costs and QALYs).
4. Fit linear regression and GP emulators to the training data.
5. Validate the emulators on a separate test set.
Setup and Packages
Sick-Sicker Markov Model
Sick-Sicker model description: This is a four-state Markov model with the following states: - Healthy (H) - Sick (S1) - Sicker (S2) - Dead (D)
We define transition probabilities between these states, along with costs and utilities associated with each state. The model runs for a specified number of cycles (T), applying a discount rate to future costs and utilities.
For illustrative purposes, we are assuming we want to simulate a disiease progression over a long time horizon (3000 cycles that represent days of follow-up ). The transition probabilities, costs, and utilities are adapted accordingly.
The transition diagram is shown below:
Transition probabilities:
pHS: Probability of moving from Healthy to SickpS1H: Probability of moving from Sick to HealthypS1S2: Probability of moving from Sick to SickerpS1D: Probability of moving from Sick to DeadpS2D: Probability of moving from Sicker to Dead
sick_sicker_model <- function(params,
T = 3000,
discount = 8.09e-05,
pHD = 0.00001,
costs = c(H = 10, S1 = 100, S2 = 500, D = 0),
#costs = c(H = 10, S1 = 15, S2 = 20, D = 0), #if cost are similar there is less skewness
utils = c(H = 1.00/365, S1 = 0.70/365,
S2 = 0.40/365, D = 0.00)) {
if (is.null(names(params))) {
stop("params must be a named numeric vector: pHS, pS1H, pS1S2, pS1D, pS2D")
}
pHS <- params["pHS"]
pS1H <- params["pS1H"]
pS1S2 <- params["pS1S2"]
pS1D <- params["pS1D"]
pS2D <- params["pS2D"]
if (any(c(pHS, pS1H, pS1S2, pS1D, pS2D, pHD) < 0)) stop("Negative probability in params")
if (pHS + pHD > 1) stop("H row probabilities exceed 1")
if (pS1H + pS1S2 + pS1D > 1) stop("S1 row probabilities exceed 1")
if (pS2D > 1) stop("S2D cannot exceed 1")
M <- matrix(0, nrow = 4, ncol = 4)
M[1, ] <- c(1 - pHS - pHD, pHS, 0, pHD)
M[2, ] <- c(pS1H, 1 - pS1H - pS1S2 - pS1D, pS1S2, pS1D)
M[3, ] <- c(0, 0, 1 - pS2D, pS2D)
M[4, ] <- c(0, 0, 0, 1)
#Sanity Check (rows sum to 1)
row_sums <- rowSums(M)
if (any(abs(row_sums - 1) > 1e-8)) {
stop("Transition matrix rows do not sum to 1")
}
state <- c(H = 1, S1 = 0, S2 = 0, D = 0)
total_cost <- 0
total_qaly <- 0
for (t in 1:T) {
state <- as.numeric(state %*% M)
names(state) <- c("H", "S1", "S2", "D")
disc <- 1 / ((1 + discount)^(t - 1))
total_cost <- total_cost + sum(state * costs) * disc
total_qaly <- total_qaly + sum(state * utils) * disc
}
list(cost = total_cost, qaly = total_qaly,
final_state = state, M = M)
}Let’s test the model with some example parameters:
example_params <- c(pHS = 0.0001, pS1H = 0.001, pS1S2 = 0.02, pS1D = 0.01, pS2D = 0.03)
result <- sick_sicker_model(example_params)
pander::pander(result, caption = "Sick-Sicker Model Output for Example Parameters")cost: 26084
qaly: 6.303
final_state:
H S1 S2 D 0.7259 0.00235 0.001572 0.2702 M:
0.9999 1e-04 0 1e-05 0.001 0.969 0.02 0.01 0 0 0.97 0.03 0 0 0 1
Design of Experiments (LHS)
A Latin Hypercube Sample is a statistical method for generating a sample of plausible collections of parameter values from a multidimensional distribution.
LHS is a way to:
- Spread parameter samples evenly across each dimension,
- Avoid wasting simulations in duplicate regions,
- Get better coverage of the space with fewer model runs.
The basic idea:
Imagine you have one parameter, say a probability that can vary from 0 to 1.
- If you take 10 random samples, they might all end up between 0.2 and 0.5.
- With LHS, you force the samples to spread out.
In 1D, LHS:
- Divides the range (e.g., 0 to 1) into \(N\) equal intervals.
- In each interval, it selects one value at random.
- This gives you \(N\) samples, each in a different interval.
Let’s code this in R for 100 samples:
set.seed(123)
n_samples <- 100
# Random sampling
random_samples <- runif(n_samples, min = 0, max = 1)
# LHS sampling
lhs_intervals <- seq(0, 1, length.out = n_samples + 1)
lhs_samples <- lhs_intervals[-(n_samples + 1)] +
runif(n_samples, min = 0, max = 1 / n_samples)
df_lhs <- data.frame(
Sample = c(random_samples, lhs_samples),
Method = rep(c("Random", "LHS"), each = n_samples)
)
ggplot(df_lhs, aes(x = Sample, y = Method)) +
geom_point(color = uw_purple, size = 2, alpha = 0.6) +
labs(
title = "Comparison of Random Sampling vs Latin Hypercube Sampling (LHS)",
x = "Parameter Value",
y = "Sampling Method"
) +
uw_theme()What are the advantages of LHS over simple random sampling? How does LHS ensure better coverage of the parameter space?
Let’s now use LHS to sample multiple parameters for our Sick-Sicker model.
Define parameter bounds and generate LHS design
Parameter bounds are usually based on expert opinion, literature review, or empirical data. They define the plausible range for each parameter.
param_bounds <- data.frame(
name = c("pHS", "pS1H", "pS1S2", "pS1D", "pS2D"),
lower = c(0.00001, 0.00, 0.005, 0.001, 0.005),
upper = c(0.0002, 0.0020, 0.050, 0.020, 0.050)
)
scale_lhs <- function(U, lower, upper) {
sweep(sweep(U, 2, (upper - lower), `*`), 2, lower, `+`)
}
d <- nrow(param_bounds)
n_train <- 1000
U_train <- lhs::randomLHS(n_train, d)
X_train <- scale_lhs(U_train,
lower = param_bounds$lower,
upper = param_bounds$upper)
colnames(X_train) <- param_bounds$nameParameter Distributions
Why look at parameter distributions? * Ensure that the sampled parameters cover the entire specified range. * Identify any potential clustering or gaps in the sampling.
df_params <- as.data.frame(X_train)
df_params_long <- tidyr::pivot_longer(
df_params,
cols = dplyr::everything(),
names_to = "Parameter",
values_to = "Value"
)
ggplot(df_params_long, aes(x = Value)) +
geom_histogram(bins = 50,
fill = uw_purple,
color = "white") +
facet_wrap(~ Parameter, scales = "free") +
labs(
title = "Distributions of Sampled Parameters",
x = "Parameter value",
y = "Frequency"
) +
uw_theme()Other way to check the parameter space coverage
pairs(X_train,
pch = 19,
col = rgb(75, 46, 131, maxColorValue = 255, alpha = 50),
main = "Pairwise Scatter Plots of Sampled Parameters")Get the Model Outputs for Training Set
Once we have the training inputs, we need to evaluate the original Sick-Sicker model at these points to get the corresponding outputs (costs and QALYs).
train_cost <- numeric(n_train)
train_qaly <- numeric(n_train)
for (i in 1:n_train) {
res <- sick_sicker_model(X_train[i, ])
train_cost[i] <- res$cost
train_qaly[i] <- res$qaly
}
df_outputs <- data.frame(
Cost = train_cost,
QALY = train_qaly
)Output Distributions
Why look at output distributions? * Understand the range and variability of model outputs. * Identify any skewness or multimodality that may affect emulator fitting.
Why the cost outputs are often right-skewed? * Healthcare costs can vary widely, with a small number of patients incurring very high costs. * This leads to a distribution where most values are low, but there are a few extreme high values (right tail).
p1 <- ggplot(df_outputs, aes(x = Cost)) +
geom_histogram(bins = 50,
fill = uw_gold,
color = "white") +
labs(
title = "Histogram of Total Costs",
x = "Total Cost",
y = "Frequency"
) +
uw_theme()
p2 <- ggplot(df_outputs, aes(x = QALY)) +
geom_histogram(bins = 50,
fill = uw_purple,
color = "white") +
labs(
title = "Histogram of Total QALYs",
x = "Total QALYs",
y = "Frequency"
) +
uw_theme()
gridExtra::grid.arrange(p1, p2, ncol = 2)Linear Regression Emulators
We will fit linear regression models to predict costs and QALYs based on the input parameters.
Why linear regression?
- Simple and interpretable (Not always).
- Fast to fit and predict.
- Provides a baseline for comparison with more complex emulators (e.g., GP).
Let’s fit the simplest linear model considering only main effects of our parameters.
X_train_df <- as.data.frame(X_train)
lm_cost <- lm(
train_cost ~ pHS + pS1H + pS1S2 + pS1D + pS2D,
data = X_train_df
)
lm_qaly <- lm(
train_qaly ~ pHS + pS1H + pS1S2 + pS1D + pS2D,
data = X_train_df
)
# Model with interactions and quadratic terms
# lm_cost <- lm(
# train_cost ~ .^2 +
# I(pHS^2) + I(pS1H^2) + I(pS1S2^2) + I(pS1D^2) + I(pS2D^2),
# data = X_train_df
# )
#
# lm_qaly <- lm(
# train_qaly ~ .^2 +
# I(pHS^2) + I(pS1H^2) + I(pS1S2^2) + I(pS1D^2) + I(pS2D^2),
# data = X_train_df
# )
pander::pander(summary(lm_cost),
caption = "Linear Regression Emulator Summary for Cost")| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | 32070 | 295 | 108.7 | 0 |
| pHS | 8607205 | 1211843 | 7.103 | 2.331e-12 |
| pS1H | -3418 | 115651 | -0.02955 | 0.9764 |
| pS1S2 | 25459 | 5133 | 4.96 | 8.296e-07 |
| pS1D | -124197 | 12133 | -10.24 | 1.887e-23 |
| pS2D | -179623 | 5118 | -35.09 | 3.599e-176 |
| Observations | Residual Std. Error | \(R^2\) | Adjusted \(R^2\) |
|---|---|---|---|
| 1000 | 2099 | 0.5856 | 0.5835 |
pander::pander(summary(lm_qaly),
caption = "Linear Regression Emulator Summary for QALYs")| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | 7.162 | 0.003328 | 2152 | 0 |
| pHS | -8071 | 13.67 | -590.5 | 0 |
| pS1H | 24 | 1.304 | 18.4 | 2.872e-65 |
| pS1S2 | -0.9649 | 0.0579 | -16.67 | 3.505e-55 |
| pS1D | -1.508 | 0.1368 | -11.02 | 1.037e-26 |
| pS2D | -0.427 | 0.05773 | -7.397 | 2.96e-13 |
| Observations | Residual Std. Error | \(R^2\) | Adjusted \(R^2\) |
|---|---|---|---|
| 1000 | 0.02368 | 0.9972 | 0.9972 |
How do we interpret the coefficients of the linear regression models?
Do the signs of the coefficients make sense in the context of the Sick-Sicker model?
Validation on a Test Set
n_test <- 1000
U_test <- lhs::randomLHS(n_test, d)
X_test <- scale_lhs(U_test,
lower = param_bounds$lower,
upper = param_bounds$upper)
colnames(X_test) <- param_bounds$name
X_test_df <- as.data.frame(X_test)
test_cost <- numeric(n_test)
test_qaly <- numeric(n_test)
for (i in 1:n_test) {
res <- sick_sicker_model(X_test[i, ])
test_cost[i] <- res$cost
test_qaly[i] <- res$qaly
}Metrics and Plots (Linear Emulator)
pred_cost_lm <- predict(lm_cost, newdata = X_test_df, se.fit = TRUE)
pred_qaly_lm <- predict(lm_qaly, newdata = X_test_df, se.fit = TRUE)
sigma_cost_lm <- summary(lm_cost)$sigma
sigma_qaly_lm <- summary(lm_qaly)$sigma
sd_cost_lm <- sqrt(pred_cost_lm$se.fit^2 + sigma_cost_lm^2)
sd_qaly_lm <- sqrt(pred_qaly_lm$se.fit^2 + sigma_qaly_lm^2)
metrics <- function(y_true, y_hat, y_sd = NULL) {
rmse <- sqrt(mean((y_hat - y_true)^2))
mae <- mean(abs(y_hat - y_true))
r2 <- 1 - sum((y_hat - y_true)^2) / sum((y_true - mean(y_true))^2)
cov95 <- NA
if (!is.null(y_sd)) {
cov95 <- mean(y_true >= (y_hat - 1.96 * y_sd) &
y_true <= (y_hat + 1.96 * y_sd))
}
c(RMSE = rmse, MAE = mae, R2 = r2, Coverage_95 = cov95)
}
metrics_cost_lm <- metrics(test_cost, pred_cost_lm$fit, sd_cost_lm)
metrics_qaly_lm <- metrics(test_qaly, pred_qaly_lm$fit, sd_qaly_lm)
pander::pander(as.data.frame(t(metrics_cost_lm)),
caption = "Linear Emulator Validation Metrics for Cost")| RMSE | MAE | R2 | Coverage_95 |
|---|---|---|---|
| 2187 | 1443 | 0.578 | 0.962 |
pander::pander(as.data.frame(t(metrics_qaly_lm)),
caption = "Linear Emulator Validation Metrics for QALYs")| RMSE | MAE | R2 | Coverage_95 |
|---|---|---|---|
| 0.02632 | 0.02005 | 0.9965 | 0.941 |
df_cost <- data.frame(
true = test_cost,
pred = pred_cost_lm$fit,
model = "Linear model"
)
df_qaly <- data.frame(
true = test_qaly,
pred = pred_qaly_lm$fit,
model = "Linear model"
)
p_cost <- ggplot(df_cost, aes(x = true, y = pred, color = model)) +
geom_point(alpha = 0.6) +
geom_abline(slope = 1, intercept = 0, color = "black") +
scale_color_manual(values = c("Linear model" = uw_purple)) +
labs(
title = "Cost: Predicted vs True (Linear Emulator)",
x = "True Cost",
y = "Predicted Cost",
color = "Model"
) +
uw_theme()
p_qaly <- ggplot(df_qaly, aes(x = true, y = pred, color = model)) +
geom_point(alpha = 0.6) +
geom_abline(slope = 1, intercept = 0, color = "black") +
scale_color_manual(values = c("Linear model" = uw_gold)) +
labs(
title = "QALYs: Predicted vs True (Linear Emulator)",
x = "True QALYs",
y = "Predicted QALYs",
color = "Model"
) +
uw_theme()
gridExtra::grid.arrange(p_cost, p_qaly, ncol = 2)- How well did the linear emulators perform?
- How do you interpret the 45-degree line in the predicted vs. true plots?
- Why is it important for the points to be close to this line?
- How we can improve emulator performance if needed?
Single Scenario Prediction
## 4.3 Single scenario comparison ----
example_params <- c(pHS = 1.5e-05, pS1H = 1e-03, pS1S2 = 2e-02, pS1D = 1e-02, pS2D = 3e-02)
true_res <- sick_sicker_model(example_params)
# Linear
emu_cost_lm <- predict(lm_cost, newdata = as.data.frame(t(example_params)), se.fit = TRUE)
emu_qaly_lm <- predict(lm_qaly, newdata = as.data.frame(t(example_params)), se.fit = TRUE)
emu_cost_lm_sd <- sqrt(emu_cost_lm$se.fit^2 + sigma_cost_lm^2)
emu_qaly_lm_sd <- sqrt(emu_qaly_lm$se.fit^2 + sigma_qaly_lm^2)
# Summary table for single scenario
results_df <- data.frame(
Model = c("True", "Linear"),
Cost = c( sprintf("%.0f",true_res$cost),
sprintf("%.0f (%.0f to %.0f)", emu_cost_lm$fit, emu_cost_lm$fit - 1.96 * emu_cost_lm_sd, emu_cost_lm$fit + 1.96 * emu_cost_lm_sd)
),
QALYs = c( sprintf("%.2f", true_res$qaly),
sprintf("%.2f (%.2f to %.2f)", emu_qaly_lm$fit, emu_qaly_lm$fit - 1.96 * emu_qaly_lm_sd, emu_qaly_lm$fit + 1.96 * emu_qaly_lm_sd)
)
)
pander::pander(results_df, caption = "Single Scenario Comparison of True vs Emulated Outputs")| Model | Cost | QALYs |
|---|---|---|
| True | 26248 | 7.05 |
| Linear | 26074 (21951 to 30198) | 7.02 (6.97 to 7.06) |
Speed comparison (original vs Linear )
n_speed <- 10000
U_speed <- lhs::randomLHS(n_speed, d)
X_speed <- scale_lhs(U_speed, lower = param_bounds$lower, upper = param_bounds$upper)
colnames(X_speed) <- param_bounds$name
X_speed_df <- as.data.frame(X_speed)
cat("\nTiming 10000 scenarios:\n")
Timing 10000 scenarios:
t_true <- system.time({
true_cost_vec <- apply(X_speed, 1, function(x) sick_sicker_model(x)$cost)
})
t_lm <- system.time({
emu_cost_vec_lm <- predict(lm_cost, newdata = X_speed_df)
})
# Table of timings
timings_df <- data.frame(
Model = c("Original", "Linear Emulator"),
Time_Seconds = c(t_true[3], t_lm[3]),
Time_per_Scenario_ms = c((t_true[3] / n_speed) * 1000, (t_lm[3] / n_speed) * 1000)
)
pander::pander(timings_df, caption = "Timing Comparison for 5000 Scenarios")| Model | Time_Seconds | Time_per_Scenario_ms |
|---|---|---|
| Original | 21.08 | 2.108 |
| Linear Emulator | 0.001 | 1e-04 |
Emulator application: Probabilistic Sensitivity Analysis (PSA)
What is a PSA? Probabilistic Sensitivity Analysis (PSA) is a technique used in health economics and decision analysis to assess the uncertainty in model outcomes due to uncertainty in input parameters. In PSA, probability distributions are assigned to uncertain parameters, and multiple simulations are run by sampling from these distributions. This allows for the estimation of the distribution of outcomes (e.g., costs, QALYs) and helps decision-makers understand the robustness of their conclusions.
PSA using Linear Emulators
n_psa <- 1000
#Distributions for parameters of probabilities that reflect uncertainty
set.seed(456)
psa_params <- data.frame(
pHS = rnorm(n_psa, mean = 0.0001, sd = 0.00002), # Normal distribution
pS1H = rnorm(n_psa, mean = 0.001, sd = 0.0002), # Normal distribution
pS1S2 = rnorm(n_psa, mean = 0.02, sd = 0.005), # Normal distribution
pS1D = rnorm(n_psa, mean = 0.01, sd = 0.002), # Normal distribution
pS2D = rnorm(n_psa, mean = 0.03, sd = 0.005) # Normal distribution
)
# Evaluate emulators for PSA
#Using the original model (for comparison)
psa_costs_true <- numeric(n_psa)
psa_qalys_true <- numeric(n_psa)
#Using linear emulator
psa_costs_lm <- numeric(n_psa)
psa_qalys_lm <- numeric(n_psa)
# PSA using the original model
for (i in 1:n_psa) {
params_i <- as.numeric(psa_params[i, ])
names(params_i) <- colnames(psa_params)
res <- sick_sicker_model(params_i)
psa_costs_true[i] <- res$cost
psa_qalys_true[i] <- res$qaly
}
# PSA using Linear emulators
for (i in 1:n_psa) {
params_i <- as.numeric(psa_params[i, ])
names(params_i) <- colnames(psa_params)
psa_costs_lm[i] <- predict(lm_cost, newdata = as.data.frame(t(params_i)))
psa_qalys_lm[i] <- predict(lm_qaly, newdata = as.data.frame(t(params_i)))
}
# Summarize mean and CI for PSA results using linear emulators
#Actual model results
psa_results_true <- data.frame(
Outcome = c("Cost", "QALYs"),
Mean = c(mean(psa_costs_true), mean(psa_qalys_true)),
SD = c(sd(psa_costs_true), sd(psa_qalys_true)),
CI_Lower = c(quantile(psa_costs_true, 0.025), quantile(psa_qalys_true, 0.025)),
CI_Upper = c(quantile(psa_costs_true, 0.975), quantile(psa_qalys_true, 0.975))
)
pander::pander(head(psa_results_true), caption = "PSA Results using Original Model")| Outcome | Mean | SD | CI_Lower | CI_Upper |
|---|---|---|---|---|
| Cost | 26122 | 494.6 | 25356 | 27306 |
| QALYs | 6.299 | 0.1591 | 6.005 | 6.605 |
#Linear model results
psa_results_lm <- data.frame(
Outcome = c("Cost", "QALYs"),
Mean = c(mean(psa_costs_lm), mean(psa_qalys_lm)),
SD = c(sd(psa_costs_lm), sd(psa_qalys_lm)),
CI_Lower = c(quantile(psa_costs_lm, 0.025), quantile(psa_qalys_lm, 0.025)),
CI_Upper = c(quantile(psa_costs_lm, 0.975), quantile(psa_qalys_lm, 0.975))
)
pander::pander(head(psa_results_lm), caption = "PSA Results using Linear Emulators")| Outcome | Mean | SD | CI_Lower | CI_Upper |
|---|---|---|---|---|
| Cost | 26779 | 942.6 | 24955 | 28644 |
| QALYs | 6.323 | 0.1587 | 6.017 | 6.623 |
Figure: PSA Distributions
# Figure that compares the QALYs estimation distributions from Linear emulators
df_psa_qalys <- data.frame(
QALY = psa_qalys_lm,
Model = "Linear Emulator")
p_psa_qalys <- ggplot(df_psa_qalys, aes(x = QALY, fill = Model)) +
geom_density(alpha = 0.4) +
labs(title = "PSA on Total QALYs Distributions by emulator type", x = "QALYs", y = "Density") +
theme_minimal()
#Add the distribution from the actual model as a dashed line
df_psa_qalys_true <- data.frame(
QALY = psa_qalys_true,
Model = rep("Original Model", each = n_psa)
)
p_psa_qalys <- p_psa_qalys +
geom_density(data = df_psa_qalys_true, aes(x = QALY), color = "black", linetype = "dashed", linewidth = 1, alpha = 0.01) +
labs(title = "PSA on Total QALYs Distributions \n (with Original Model)")
# Figure that compares the Cost estimation distributions from Linear emulators
df_psa_cost <- data.frame(
Cost = psa_costs_lm,
Model = "Linear Emulator")
p_psa_cost <- ggplot(df_psa_cost, aes(x = Cost, fill = Model)) +
geom_density(alpha = 0.4) +
labs(title = "PSA on Total Costs Distributions \n by emulator type", x = "Costs", y = "Density") +
theme_minimal()
#Add the distribution from the actual model as a dashed line
df_psa_cost_true <- data.frame(
Cost = psa_costs_true,
Model = rep("Original Model", each = n_psa)
)
p_psa_cost <- p_psa_cost +
geom_density(data = df_psa_cost_true, aes(x = Cost), color = "black", linetype = "dashed", linewidth = 1, alpha = 0.01) +
labs(title = "PSA on Total Costs Distributions \n (with Original Model)")
#Combine plots
grid.arrange(p_psa_cost, p_psa_qalys, ncol = 2)- How well do the PSA results from the linear emulators match those from the original model?
- Is there any noticeable difference in the distributions?
- What if QALYs distributions are also not well captured by the linear emulator?
PSA, GP Emulators, and Further Results
The remaining sections (PSA, GP emulators, timing comparisons) can be implemented exactly as in the script version, following the same structure used above for the linear emulators (definition, validation, PSA, and graphical diagnostics).
# Install/load required packages
pkgs_gp <- c("DiceKriging", "DiceOptim")
new_pkgs_gp <- pkgs_gp[!(pkgs_gp %in% installed.packages()[,"Package"])]
if(length(new_pkgs_gp)) install.packages(new_pkgs_gp)
lapply(pkgs_gp, library, character.only = TRUE)[[1]]
[1] "DiceOptim" "DiceKriging" "tidyr" "gridExtra" "pander"
[6] "ggplot2" "lhs" "stats" "graphics" "grDevices"
[11] "utils" "datasets" "methods" "base"
[[2]]
[1] "DiceOptim" "DiceKriging" "tidyr" "gridExtra" "pander"
[6] "ggplot2" "lhs" "stats" "graphics" "grDevices"
[11] "utils" "datasets" "methods" "base"
# Scale inputs to [0,1] for GP fitting
scale_to_unit <- function(X, lower, upper) {
sweep(sweep(X, 2, lower, `-`), 2, (upper - lower), `/`)
}
lower <- param_bounds$lower
upper <- param_bounds$upper
X_train_unit <- scale_to_unit(X_train, lower, upper)
set.seed(2025)
n_train_gp <- 200
idx_gp <- sample(seq_len(n_train), n_train_gp)
X_train_gp_unit <- X_train_unit[idx_gp, ]
y_cost_gp <- train_cost[idx_gp]
y_qaly_gp <- train_qaly[idx_gp]
# Fit GP emulators (Matern 5/2 covariance) for cost and QALYs
gp_cost <- DiceKriging::km(
design = as.data.frame(X_train_gp_unit),
response = y_cost_gp,
covtype = "matern5_2",
nugget.estim = TRUE,
control = list(trace = FALSE)
)
gp_qaly <- DiceKriging::km(
design = as.data.frame(X_train_gp_unit),
response = y_qaly_gp,
covtype = "matern5_2",
nugget.estim = TRUE,
control = list(trace = FALSE)
)
# Scale test inputs to [0,1]
X_test_unit <- scale_to_unit(X_test, lower, upper)
# GP predictions on test set
pred_cost_gp <- predict(
gp_cost,
newdata = as.data.frame(X_test_unit),
type = "UK",
se.compute = TRUE
)
pred_qaly_gp <- predict(
gp_qaly,
newdata = as.data.frame(X_test_unit),
type = "UK",
se.compute = TRUE
)
mean_cost_gp <- pred_cost_gp$mean
sd_cost_gp <- pred_cost_gp$sd
mean_qaly_gp <- pred_qaly_gp$mean
sd_qaly_gp <- pred_qaly_gp$sd
# GP prediction performance metrics
metrics_cost_gp <- metrics(test_cost, mean_cost_gp, sd_cost_gp)
metrics_qaly_gp <- metrics(test_qaly, mean_qaly_gp, sd_qaly_gp)
#Print metrics tables including linear emulator results for comparison
pander::pander(
rbind(
as.data.frame(t(metrics_cost_lm)),
as.data.frame(t(metrics_cost_gp))
),
caption = "Validation Metrics for Cost: Linear vs GP Emulator",
row.names = c("Linear Emulator", "GP Emulator")
)| RMSE | MAE | R2 | Coverage_95 | |
|---|---|---|---|---|
| Linear Emulator | 2187 | 1443 | 0.578 | 0.962 |
| GP Emulator | 232.8 | 115.8 | 0.9952 | 0.941 |
pander::pander(
rbind(
as.data.frame(t(metrics_qaly_lm)),
as.data.frame(t(metrics_qaly_gp))
),
caption = "Validation Metrics for QALYs: Linear vs GP Emulator",
row.names = c("Linear Emulator", "GP Emulator")
)| RMSE | MAE | R2 | Coverage_95 | |
|---|---|---|---|---|
| Linear Emulator | 0.02632 | 0.02005 | 0.9965 | 0.941 |
| GP Emulator | 0.004769 | 0.002154 | 0.9999 | 0.957 |
Prediction performance plots (GP Emulator)
# GP prediction performance plots including linear emulator results for comparison
df_cost_gp <- data.frame(
true = test_cost,
pred_lm = pred_cost_lm$fit,
pred_gp = mean_cost_gp
)
df_cost_gp_long <- tidyr::pivot_longer(
df_cost_gp,
cols = c("pred_lm", "pred_gp"),
names_to = "Model",
values_to = "Predicted"
)
p_cost_gp <- ggplot(df_cost_gp_long, aes(x = true, y = Predicted, color = Model)) +
geom_point(alpha = 0.6) +
geom_abline(slope = 1, intercept = 0, color = "black") +
scale_color_manual(values = c("pred_lm" = uw_purple, "pred_gp" = uw_gold),
labels = c("Linear Emulator", "GP Emulator")) +
labs(
title = "Cost: Predicted vs True (Linear vs GP Emulator)",
x = "True Cost",
y = "Predicted Cost",
color = "Model"
) +
uw_theme()
df_qaly_gp <- data.frame(
true = test_qaly,
pred_lm = pred_qaly_lm$fit,
pred_gp = mean_qaly_gp
)
df_qaly_gp_long <- tidyr::pivot_longer(
df_qaly_gp,
cols = c("pred_lm", "pred_gp"),
names_to = "Model",
values_to = "Predicted"
)
p_qaly_gp <- ggplot(df_qaly_gp_long, aes(x = true, y = Predicted, color = Model)) +
geom_point(alpha = 0.6) +
geom_abline(slope = 1, intercept = 0, color = "black") +
scale_color_manual(values = c("pred_lm" = uw_purple, "pred_gp" = uw_gold),
labels = c("Linear Emulator", "GP Emulator")) +
labs(
title = "QALYs: Predicted vs True (Linear vs GP Emulator)",
x = "True QALYs",
y = "Predicted QALYs",
color = "Model"
) +
uw_theme()
grid.arrange(p_cost_gp, p_qaly_gp, ncol = 2)Speed comparison (Original vs GP Emulator)
# Speed comparison (original vs GP emulator)
X_speed_unit <- scale_to_unit(X_speed, lower, upper)
t_gp <- system.time({
emu_cost_vec_gp <- predict(
gp_cost,
newdata = as.data.frame(X_speed_unit),
type = "UK"
)$mean
})
# Table of timings including GP emulator and linear emulator for comparison
timings_df <- data.frame(
Model = c("Original", "Linear Emulator", "GP Emulator"),
Time_Seconds = c(t_true[3], t_lm[3], t_gp[3]),
Time_per_Scenario_ms = c((t_true[3] / n_speed) * 1000,
(t_lm[3] / n_speed) * 1000,
(t_gp[3] / n_speed) * 1000)
)
pander::pander(timings_df, caption = "Timing Comparison for 10000 Scenarios")| Model | Time_Seconds | Time_per_Scenario_ms |
|---|---|---|
| Original | 21.08 | 2.108 |
| Linear Emulator | 0.001 | 1e-04 |
| GP Emulator | 0.328 | 0.0328 |
Figure: PSA Distributions (GP Emulator)
# Figure that compares the QALYs estimation distributions from GP emulators and linear emulators
# PSA using GP emulators
psa_costs_gp <- numeric(n_psa)
psa_qalys_gp <- numeric(n_psa)
for (i in 1:n_psa) {
params_i <- as.numeric(psa_params[i, ])
names(params_i) <- colnames(psa_params)
psa_costs_gp[i] <- predict(
gp_cost,
newdata = as.data.frame(scale_to_unit(as.matrix(t(params_i)), lower, upper)),
type = "UK"
)$mean
psa_qalys_gp[i] <- predict(
gp_qaly,
newdata = as.data.frame(scale_to_unit(as.matrix(t(params_i)), lower, upper)),
type = "UK"
)$mean
}
# Figure that compares the QALYs estimation distributions from GP emulators
df_psa_qalys_all <- data.frame(
QALY = c(psa_qalys_lm, psa_qalys_gp),
Model = rep(c("Linear Emulator", "GP Emulator"), each = n_psa)
)
p_psa_qalys_all <- ggplot(df_psa_qalys_all, aes(x = QALY, fill = Model)) +
geom_density(alpha = 0.4) +
labs(title = "PSA on Total QALYs Distributions by emulator type", x = "QALYs", y = "Density") +
theme_minimal()
#Add the distribution from the actual model as a dashed line
df_psa_qalys_true <- data.frame(
QALY = psa_qalys_true,
Model = rep("Original Model", each = n_psa)
)
p_psa_qalys_all <- p_psa_qalys_all +
geom_density(data = df_psa_qalys_true, aes(x = QALY), color = "black", linetype = "dashed", linewidth = 1, alpha = 0.01) +
labs(title = "PSA on Total QALYs Distributions \n (with Original Model)")
# Figure that compares the Cost estimation distributions from GP emulators
df_psa_cost_all <- data.frame(
Cost = c(psa_costs_lm, psa_costs_gp),
Model = rep(c("Linear Emulator", "GP Emulator"), each = n_psa)
)
p_psa_cost_all <- ggplot(df_psa_cost_all, aes(x = Cost, fill = Model)) +
geom_density(alpha = 0.4) +
labs(title = "PSA on Total Costs Distributions \n by emulator type", x = "Costs", y = "Density") +
theme_minimal()
#Add the distribution from the actual model as a dashed line
df_psa_cost_true <- data.frame(
Cost = psa_costs_true,
Model = rep("Original Model", each = n_psa)
)
p_psa_cost_all <- p_psa_cost_all +
geom_density(data = df_psa_cost_true, aes(x = Cost), color = "black", linetype = "dashed", linewidth = 1, alpha = 0.01) +
labs(title = "PSA on Total Costs Distributions \n (with Original Model)")
#Combine plots
grid.arrange(p_psa_cost_all, p_psa_qalys_all, ncol = 2)Key Takeaways
- Computationally intensive Markov models can be emulated using linear and GP meta-models.
- Linear emulators offer substantial speed gains with acceptable predictive accuracy.
- GP emulators typically improve fit and uncertainty quantification at the expense of more complex fitting.
- Emulators make PSA and other large-scale analyses feasible by replacing repeated calls to the full model.
- Validation metrics and graphical diagnostics are essential to judge emulator adequacy for decision making.