alpha = 0.90
CHI <- read.csv("C:/Users/Lisa/Desktop/677/CHI.csv", header = TRUE)
CHI <- CHI[seq(1, length(CHI$CASES.i), by = 2), ]
## definition of school indicator
SCH <- c(-1, -1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1, 1, 1, 1, 1, 1, -1)
SCH <- rep(SCH, length(CHI$CASES))[1:length(CHI$CASES)]
CHI$SCH <- SCH
## to analyze a subset of the data
CHI <- CHI[c(1:length(CHI$CASES)), ]
CHI$CASES <- as.numeric(CHI$CASES)
CHI$CASES.i <- as.numeric(CHI$CASES.i)
CHI$TMAX <- as.numeric(CHI$TMAX)
CHI$TMIN <- as.numeric(CHI$TMIN)
CHI$WEEK <- as.numeric(CHI$WEEK)
CHI$BWK <- as.numeric(CHI$BWK)
CHI$SCH <- as.numeric(CHI$SCH)
CHI$B <- as.numeric(CHI$B)
CHI$B.5 <- as.numeric(CHI$B.5)
## to change back and forth from B.5 vs. B CHI$B.5 <- CHI$B
## this is what the data looks like head(CHI)
plot(cumsum(CHI$B.5), cumsum(CHI$CASES.i), type = "l", xlab = "Cumulative sum of births (lagged 5 years)",
ylab = "Cumulative sum of reported cases")
x <- c(1:1e+06)
lines(x, x, col = "red")
legend("bottomright", c("incidence", "expected incidence"), col = c("black",
"red", "gray"), lty = 1)
## fit a smooth spline of cumulative measles on cumulative births with 2.5
## degrees of freedom
cum.reg <- smooth.spline(cumsum(CHI$B.5), cumsum(CHI$CASES.i), df = 2.5)
lm <- lm(cumsum(CHI$CASES.i) ~ cumsum(CHI$B.5))
# abline(lm, col='gray')
## predict points using the smooth spline and calculate residuals, D
D <- predict(cum.reg)$y - cumsum(CHI$CASES.i)
B <- CHI$B.5
# plot(D, type='l')
## under reporting is given by slope of smooth spline
u <- predict(cum.reg, deriv = 1)$y
rho <- mean(u)
legend("topleft", paste("rho", round(rho, 3), sep = " = "))
## Ic are actual cases - reported cases divided by u
Ic = CHI$CASES.i/u
# plot(Ic, type='l')
lIt = log(Ic[2:(length(CHI$CASES) + 1)])
lItm1 = log(Ic[1:length(CHI$CASES)])
Dtm1 = D[1:length(CHI$CASES)]
## remove values of -Inf from I - glm function does not like these!
for (i in 1:(length(lIt) - 1)) {
if (lIt[i] == -Inf) {
lIt[i] <- 0
}
}
for (i in 1:length(lItm1)) {
if (lItm1[i] == -Inf) {
lItm1[i] <- 0
}
}
## mean populaiton estimate
pop = mean(CHI$POP.5)
seas = rep(1:26, length(CHI$CASES))[1:length(CHI$CASES)]
seas <- as.factor(seas)
## test Smeans from 1% to whole population
Smean = seq(0.01, 1, by = 0.001) * pop
## this is a place to store the likelihoods of the data for each setting
## of Smean
llik = rep(NA, length(Smean))
## now perform the log linear regressions at each Smean
for (i in 1:length(Smean)) {
lStm1 = log(Smean[i] + Dtm1)
glmfit = glm(lIt ~ -1 + as.factor(seas) + lItm1 + offset(lStm1))
llik[i] = glmfit$deviance
}
sbar <- Smean[which(llik == min(llik))]
sbar
## [1] 84237
## plot likelihood curve
plot(Smean, llik, type = "l", xlim = c(0, (0.5 * pop)), xlab = "sbar", ylab = "deviance (- log likelihood)")
legend("topright", paste("sbar", round(sbar, 0), sep = " = "))
## The Smean we want to use is the one that minimizes the log likelihood
plot(D + sbar, type = "l", xlab = "Biweek", ylab = "S")
sbar.def <- sbar
D.def <- D
B.def <- CHI$B.5
alpha.def <- 0.9
## TSIR code
## pass B, sbar, and D results from above and guess at coefficients for
## Beta function
runTSIR <- function(alpha = alpha.def, B = B.def, sbar = sbar.def, D = D.def,
Beta = "TMP", guess = c(x1 = 3.8e-05, x2 = 0.4), initial.state = c(S = sbar.def -
Ic[1] * rho, I = Ic[1] * rho, R = CHI$POP[1] - sbar.def - Ic[1] * rho,
CI = Ic[1] * rho)) {
## create empty vectors to store S, I, R, B, Beta estimates
S <- rep(NA, length(CHI$CASES))
I <- rep(NA, length(CHI$CASES))
R <- rep(NA, length(CHI$CASES))
CI <- rep(NA, length(CHI$CASES))
## set time = 1 values to initial states
S[1] <- sbar + D[1]
I[1] <- initial.state["I"]
R[1] <- initial.state["R"]
CI[1] <- initial.state["CI"]
## betas are a function of the normalized climate data - I used tmax here.
## The x1-x2 are parameters to fit the seasonal forcing equation.
tmax <- (CHI$TMAX - 140)/150
Beta.TMP <- guess["x1"] * (1 + (guess["x2"] * (tmax)))
Beta.SCH <- guess["x1"] * (1 + (guess["x2"] * (CHI$SCH)))
Beta.CST <- rep(guess["x1"], length(CHI$CASES))
if (Beta == "TMP") {
for (t in 2:length(CHI$CASES)) {
S[t] <- D[t] + sbar
I[t] <- Beta.TMP[t] * S[t - 1] * (I[t - 1]^alpha)
R[t] <- I[t - 1] + R[t - 1]
CI[t] <- I[t] + CI[t - 1]
}
} else if (Beta == "SCH") {
for (t in 2:length(CHI$CASES)) {
S[t] <- D[t] + sbar
I[t] <- Beta.SCH[t] * S[t - 1] * (I[t - 1]^alpha)
R[t] <- I[t - 1] + R[t - 1]
CI[t] <- I[t] + CI[t - 1]
}
} else if (Beta == "CST") {
for (t in 2:length(CHI$CASES)) {
S[t] <- D[t] + sbar
I[t] <- Beta.CST[t] * S[t - 1] * (I[t - 1]^alpha)
R[t] <- I[t - 1] + R[t - 1]
CI[t] <- I[t] + CI[t - 1]
}
}
tsir.output <- data.frame(S, I, R, CI, Beta.TMP, Beta.SCH, Beta.CST)
}
######################## LS1 is temperature ##
LS1 <- function(x) {
sum((runTSIR(guess = x, Beta = "TMP")$I - CHI$CASES.i/u)^2)
}
g <- c(x1 = 2.5e-05, x2 = -0.4)
p <- optim(g, LS1)
## show optimal values
p$par
## x1 x2
## 0.0000242 -0.8175902
## show MSE
MSE1 <- LS1(p$par)
MSE1
## [1] 5.524e+09
optimal1 <- as.vector(p$par)
out.opt1 <- runTSIR(guess = c(x1 = as.numeric(p$par[1]), x2 = as.numeric(p$par[2])),
Beta = "TMP")
head(out.opt1)
## S I R CI Beta.TMP Beta.SCH Beta.CST
## 1 88955 448.5 1801929 448.5 4.826e-05 4.398e-05 2.42e-05
## 2 88932 1046.8 1802378 1495.3 4.832e-05 4.398e-05 2.42e-05
## 3 88861 1865.3 1803425 3360.6 4.016e-05 4.413e-06 2.42e-05
## 4 88723 3513.4 1805290 6874.0 4.501e-05 4.413e-06 2.42e-05
## 5 88619 5641.2 1808803 12515.2 4.094e-05 4.413e-06 2.42e-05
## 6 88361 6274.2 1814444 18789.4 2.977e-05 4.413e-06 2.42e-05
######################## LS2 is school #######
LS2 <- function(x) {
sum((runTSIR(guess = x, Beta = "SCH")$I - CHI$CASES.i/u)^2)
}
g <- c(x1 = 2.5e-05, x2 = 0.4)
k <- optim(g, LS2)
par(mfrow = c(1, 3), mar = c(4, 4, 4, 2))
plot(CHI$BWK, out.opt1$S, type = "l", xlab = "Biweek", ylab = "Number of people",
main = "S")
plot(CHI$BWK, out.opt1$I, type = "l", xlab = "Biweek", ylab = "", main = "I")
plot(CHI$BWK, out.opt1$R, type = "l", xlab = "Biweek", ylab = "", main = "R")
par(mfrow = c(1, 1))
## show optimal values
k$par
## x1 x2
## 2.388e-05 1.880e-01
## show MSE
MSE2 <- LS2(k$par)
MSE2
## [1] 6.702e+09
optimal2 <- as.vector(k$par)
out.opt2 <- runTSIR(guess = c(x1 = as.numeric(k$par[1]), x2 = as.numeric(k$par[2])),
Beta = "SCH")
head(out.opt2)
## S I R CI Beta.TMP Beta.SCH Beta.CST
## 1 88955 448.5 1801929 448.5 1.842e-05 1.939e-05 2.388e-05
## 2 88932 420.1 1802378 868.6 1.841e-05 1.939e-05 2.388e-05
## 3 88861 579.3 1802798 1447.8 2.026e-05 2.837e-05 2.388e-05
## 4 88723 772.9 1803377 2220.7 1.916e-05 2.837e-05 2.388e-05
## 5 88619 1000.3 1804150 3221.0 2.008e-05 2.837e-05 2.388e-05
## 6 88361 1260.3 1805150 4481.3 2.261e-05 2.837e-05 2.388e-05
########################## LS3 is constant #######
LS3 <- function(x) {
sum((runTSIR(guess = x, Beta = "CST")$I - CHI$CASES.i/u)^2)
}
g <- c(x1 = 2.5e-05, x2 = 0.8)
q <- optim(g, LS3)
## show optimal values
q$par
## x1 x2
## 2.419e-05 8.005e-01
## show MSE
MSE3 <- LS3(q$par)
MSE3
## [1] 7.345e+09
optimal3 <- as.vector(q$par)
out.opt3 <- runTSIR(guess = c(x1 = as.numeric(q$par[1]), x2 = as.numeric(q$par[2])),
Beta = "CST")
head(out.opt3)
## S I R CI Beta.TMP Beta.SCH Beta.CST
## 1 88955 448.5 1801929 448.5 6.294e-07 4.825e-06 2.419e-05
## 2 88932 524.1 1802378 972.6 5.741e-07 4.825e-06 2.419e-05
## 3 88861 602.8 1802902 1575.3 8.560e-06 4.356e-05 2.419e-05
## 4 88723 683.1 1803505 2258.4 3.811e-06 4.356e-05 2.419e-05
## 5 88619 763.3 1804188 3021.7 7.794e-06 4.356e-05 2.419e-05
## 6 88361 842.6 1804951 3864.3 1.873e-05 4.356e-05 2.419e-05
## get average betas for each biweek
Beta.TMP <- rep(NA, 26)
for (i in 1:26) {
Beta.TMP[i] <- mean(out.opt1$Beta.TMP[rep(i, length(out.opt1$Beta.TMP),
by = 26)])
}
Beta.SCH <- rep(NA, 26)
for (i in 1:26) {
Beta.SCH[i] <- mean(out.opt2$Beta.SCH[rep(i, length(out.opt2$Beta.SCH),
by = 26)])
}
Beta.CST <- rep(NA, 26)
for (i in 1:26) {
Beta.CST[i] <- mean(out.opt3$Beta.CST[rep(i, length(out.opt3$Beta.CST),
by = 26)])
}
## Betas.26 is dataframe of average betas for 1 year
Betas.26 <- data.frame(Beta.TMP, Beta.SCH, Beta.CST)
plot(Betas.26$Beta.TMP, col = "magenta", type = "l", ylab = "Beta", xlab = "Biweek",
xlim = c(1, 26), lwd = 2, pch = 16, ylim = c(min(Beta.SCH, Beta.TMP), max(Beta.SCH,
Beta.TMP)))
lines(Betas.26$Beta.SCH, col = "darkgreen", lwd = 2)
lines(Betas.26$Beta.CST, col = "blue", lwd = 2)
legend("topright", c("Constant ", "School", "Temperature "), col = c("blue",
"darkgreen", "magenta"), lwd = 2)
## Betas is all betas
Betas <- data.frame(Beta.TMP = out.opt1$Beta.TMP, Beta.SCH = out.opt2$Beta.SCH,
Beta.CST = out.opt3$Beta.CST)
# plot(CHI$BWK,out.opt1$I, type='l', col='purple',
# ylim=c(0,4000),xlim=c(0,26*10)) lines(CHI$BWK, out.opt2$I, type='l',
# col='orange')
## Fourier analysis - plot spectrum and ID dominant frequencies
freq <- spectrum(CHI$CASES.i)$freq
spec <- spectrum(CHI$CASES.i)$spec
data <- data.frame(freq, spec, period = 1/freq, period.yrs = (1/freq)/26)
data <- data[order(-spec), ]
head(data)
## freq spec period period.yrs
## 29 0.03867 45250385 25.86 0.9947
## 10 0.01333 16794311 75.00 2.8846
## 39 0.05200 12610415 19.23 0.7396
## 11 0.01467 11277672 68.18 2.6224
## 40 0.05333 6865138 18.75 0.7212
## 18 0.02400 6643292 41.67 1.6026
par(mar = c(4, 4, 2, 2))
spectrum(CHI$CASES.i, main = "", xlab = "Frequency (1/Period)", ylab = "Spectral Density",
ylim = c(10, 1e+08), xlim = c(-0.01, 0.5))
text(data[1, 1], data[1, 2], paste(round(data[1, 3], 1), "bwks", sep = "\n"),
pos = 4, cex = 0.7)
text(data[2, 1], data[2, 2], paste(round(data[2, 3], 1), "bwks", sep = "\n"),
pos = 3, cex = 0.7)
## parameters
param <- data.frame(TEMP = p$par, SCHOOL = k$par, CONSTANT = c(q$par[1], NA))
row.names(param) <- c("beta_0", "beta_1")
## plot fits against data
plot(CHI$BWK, out.opt1$I, type = "l", col = "red", lwd = 2)
lines(CHI$BWK, CHI$CASES.i)
legend("topright", c(paste("MSE:", round(MSE1, 0), sep = "")))
plot(CHI$BWK, out.opt2$I, type = "l", col = "red", lwd = 2)
lines(CHI$BWK, CHI$CASES.i)
legend("topright", c(paste("MSE:", round(MSE2, 0), sep = "")))
plot(CHI$BWK, out.opt3$I, type = "l", col = "red", lwd = 2)
lines(CHI$BWK, CHI$CASES.i)
legend("topright", c(paste("MSE:", round(MSE3, 0), sep = "")))
param
## TEMP SCHOOL CONSTANT
## beta_0 0.0000242 2.388e-05 2.419e-05
## beta_1 -0.8175902 1.880e-01 NA