This code uses the MultiBD package - code example adapted from package vignette
The inference in this code relies on the Metroplis Hasting sampler. This is one of the work horse MCMC samplers used in Bayesian statistics. There are a ton of resources and lectures online that detail MH samplers if you want to learn more about the statistics.
#clear environment
rm(list = ls())
#install.packages("MultiBD")
#install.packages("MCMCpack", repos = 'http://cran.us.r-project.org')
#install.packages("BiocManager")
#load packages
library(MultiBD)
library(BiocManager)
library(MCMCpack)
library(dplyr)
library(ggplot2)
library(deSolve)
First, we will simulate data that we know the true parameter values of. Here is a simple SIR model with \(\beta = 0.02\) and \(\alpha = 2\) with some random noise added.
#Set initial conditions
init <- c(S=200, I=5, R=0) # popn initial values
parameters <- c(b= 0.02,
a=2)
#set time series
times <- seq(0, 4, by = 0.5) #unit=days, seq(start day, end day, step)
#Model function
disease_dynamics <- function(time, state, parameters) {
with(
as.list(c(state, parameters)), {
#Equations
dS <- - b*S*I
dI <- b*S*I - a*I
dR <- a*I
return(list(c(dS, dI, dR)))
}
)
}
set.seed(123)
df <- as.data.frame(ode(y = init, times = times, func = disease_dynamics, parms = parameters))%>%
mutate(S= round(S + rnorm(length(times), 0,1)),
I= round(I + rnorm(length(times), 0,1)),
R= round(R + rnorm(length(times), 0,1)))%>%
mutate(S=ifelse(S < 0, 0, S),
I= ifelse(I < 0, 0, I),
R= ifelse(R < 0, 0, R)
)
Our resulting data that we will now try to fit parameter values is:
knitr::kable(df)
| time | S | I | R |
|---|---|---|---|
| 0.0 | 199 | 5 | 1 |
| 0.5 | 184 | 14 | 8 |
| 1.0 | 154 | 26 | 26 |
| 1.5 | 112 | 35 | 58 |
| 2.0 | 79 | 33 | 92 |
| 2.5 | 61 | 23 | 121 |
| 3.0 | 49 | 17 | 141 |
| 3.5 | 42 | 9 | 151 |
| 4.0 | 40 | 3 | 160 |
Visualized:
#Create color palette
Pal1 = c("S" = "goldenrod",
"I" = "red3",
"R" = "dodgerblue3")
#plot
fig <- ggplot() +
theme_classic()+
geom_point(data=df, aes(x=time, y=S, color="S"), size=3)+
geom_point(data=df, aes(x=time, y=I, color="I"), size=3)+
geom_point(data=df, aes(x=time, y=R, color="R"), size=3)+
geom_line(data=df, aes(x=time, y=S, color="S"), size=1, linetype = "dotted")+
geom_line(data=df, aes(x=time, y=I, color="I"),linetype = "dotted", size=1)+
geom_line(data=df, aes(x=time, y=R, color="R"),linetype = "dotted", size=1)+
scale_color_manual(values=Pal1, name="Disease state")+
theme(text = element_text(size = 20))+
ylab("Number of individuals") + xlab("Time")
fig
To fit the ‘unknown’ parameter values, we will use the metropolis hasting algorithm.
Specify functions for Metropolis hasting
#log liklihood of SIR
loglik_sir <- function(param, data) {
alpha <- exp(param[1]) # Rates must be non-negative
beta <- exp(param[2])
# Set-up SIR model
drates1 <- function(a, b) { 0 }
brates2 <- function(a, b) { 0 }
drates2 <- function(a, b) { alpha * b }
trans12 <- function(a, b) { beta * a * b }
sum(sapply(1:(nrow(data) - 1), # Sum across all time steps k
function(k) {
log(
dbd_prob( # Compute the transition probability matrix
t = data$time[k + 1] - data$time[k], # Time increment
a0 = data$S[k], b0 = data$I[k], # From: S(t_k), I(t_k)
drates1, brates2, drates2, trans12,
a = data$S[k + 1], B = data$S[k] + data$I[k] - data$S[k + 1],
computeMode = 4, nblocks = 80 # Compute using 4 threads
)[1, data$I[k + 1] + 1] # To: S(t_(k+1)), I(t_(k+1))
)
}))
}
#log priors for alpha and beta
logprior <- function(param) {
log_alpha <- param[1]
log_beta <- param[2]
dnorm(log_alpha, mean = 0, sd = 100, log = TRUE) +
dnorm(log_beta, mean = 0, sd = 100, log = TRUE)
}
Simulate posterior distribution via Metropolis hasting
set.seed(1234)
#set initial conditions
alpha0 <- 2
beta0 <- 0.01
#Find posterior sample
post_sample <- MCMCmetrop1R(fun = function(param) { loglik_sir(param, df) + logprior(param) },
theta.init = log(c(alpha0, beta0)),
mcmc = 1000, burnin = 200)
##
##
## @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
## The Metropolis acceptance rate was 0.56000
## @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
#format posterior sample
post_sample <- post_sample%>%
as.data.frame()%>%
rename(log_alpha=V1, log_beta=V2)%>%
mutate(sim = 1:nrow(post_sample))
What does the posterior sample look like?
#create color palette
Pal2 = c("log alpha" = "goldenrod",
"log beta" = "dodgerblue")
Pal3 = c("alpha" = "goldenrod",
"beta" = "dodgerblue")
#visualize metropolis hasting steps
fig3 <- ggplot()+
theme_classic()+
geom_line(data=post_sample, aes(y=log_alpha, x=sim, color="log alpha"))+
geom_line(data=post_sample, aes(y=log_beta, x=sim, color="log beta"))+
scale_color_manual(values=Pal2, name="Parameter")+
theme(text = element_text(size = 20))+
ylab("Parameter estimate") + xlab("Simulation number")+
ggtitle("Metropolis Hasting steps")
#visualize posterior distributions
fig1 <- ggplot()+
theme_classic()+
geom_histogram(data=post_sample, aes(x=exp(log_alpha), fill="alpha"), bins=30, color="black")+
scale_fill_manual(values=Pal3, name="Parameter")+
theme(text = element_text(size = 20))+
xlab("alpha posterior samples")
fig2 <- ggplot()+
theme_classic()+
geom_histogram(data=post_sample, aes(x=exp(log_beta), fill="beta"), bins=30, color="black")+
scale_fill_manual(values=Pal3, name="Parameter")+
theme(text = element_text(size = 20))+
xlab("beta posterior samples")
ggpubr::ggarrange(fig1, fig2, common.legend = TRUE)
Examine fit:
#Set initial conditions
init <- c(S=df$S[1], I=df$I[1], R=df$R[1]) # popn initial values
parameters <- c(b= exp(mean(post_sample$log_beta)),
a=exp(mean(post_sample$log_alpha))
)
#set time series
times <- seq(0, tail(df$time, n=1), by = tail(df$time, n=1)/nrow(df)) #unit=days, seq(start day, end day, step)
#Model function
disease_dynamics <- function(time, state, parameters) {
with(
as.list(c(state, parameters)), {
#Equations
dS <- - b*S*I
dI <- b*S*I - a*I
dR <- a*I
return(list(c(dS, dI, dR)))
}
)
}
#Run model
df.out <- as.data.frame(ode(y = init, times = times, func = disease_dynamics, parms = parameters))
#compare parameter estimates
fit <- data.frame(parameter = c("beta", "alpha"),
true_value=c(0.02,2),
mean_post = c(exp(mean(post_sample$log_beta)), exp(mean(post_sample$log_alpha))),
lower_95CI = c(quantile(exp(post_sample$log_beta), probs = c(0.025))[[1]],
quantile(exp(post_sample$log_alpha), probs = c(0.025))[[1]]),
upper_95CI = c(quantile(exp(post_sample$log_beta), probs = c(0.975))[[1]],
quantile(exp(post_sample$log_alpha), probs = c(0.975))[[1]])
)%>%
mutate(mean_post = round(mean_post, 2),
lower_95CI = round(lower_95CI, 2),
upper_95CI = round(upper_95CI, 2))
knitr::kable(fit)
| parameter | true_value | mean_post | lower_95CI | upper_95CI |
|---|---|---|---|---|
| beta | 0.02 | 0.02 | 0.02 | 0.02 |
| alpha | 2.00 | 2.01 | 1.69 | 2.39 |
Visualize:
#add fit lines to data plot
fig +
geom_line(data=df.out, aes(x=times, y=S, color="S"), size=2, alpha=0.5)+
geom_line(data=df.out, aes(x=times, y=I, color="I"), size=2, alpha=0.5)+
geom_line(data=df.out, aes(x=times, y=R, color="R"), size=2, alpha=0.5)