## data mangement stuff
# BAL <- read.csv('C:/Users/Lisa/Desktop/677/Baltimore.BWK.csv', header=T,
# stringsAsFactors=FALSE)
# BAL <- BAL[seq(1,length(BAL$CASES),by=2),]
# BAL <- BAL[c(1:744),]
# SCH <-
# c(-1,-1,1,1,1,1,-1,1,1,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,1,1,1,1,1,1,-1)
# SCH <- rep(SCH, length(BAL$CASES))[1:744]
BAL <- read.csv("C:/Users/Lisa/Desktop/677/BAL.FINAL.csv")
BAL$CASES <- as.numeric(BAL$CASES)
BAL$CASES.i <- as.numeric(BAL$CASES.i)
BAL$TMAX <- as.numeric(BAL$TMAX)
BAL$TMIN <- as.numeric(BAL$TMIN)
BAL$WEEK <- as.numeric(BAL$WEEK)
BAL$BWK <- as.numeric(BAL$BWK)
BAL$BWK.c <- as.numeric(BAL$BWK.c)
BAL$SCH <- as.numeric(BAL$SCH)
BAL$B <- as.numeric(BAL$B)
BAL$B.5 <- as.numeric(BAL$B.5)
## this is what the data looks like head(BAL)
# plot(cumsum(BAL$B.5), cumsum(BAL$CASES.i), type='l')
x <- c(1:1e+06)
# lines(x,x, col='red')
## fit a smooth spline of cumulative measles on cumulative births with 2.5
## degrees of freedom
cum.reg <- smooth.spline(cumsum(BAL$B.5), cumsum(BAL$CASES.i), df = 2.5)
## predict points using the smooth spline and calculate residuals, D
D <- predict(cum.reg)$y - cumsum(BAL$CASES.i)
B <- BAL$B.5
# plot(D, type='l')
## under reporting is given by slope of smooth spline
u <- predict(cum.reg, deriv = 1)$y
## Ic are actual cases - reported cases divided by u
Ic = BAL$CASES.i/u
# plot(Ic, type='l')
lIt = log(Ic[2:745])
lItm1 = log(Ic[1:744])
Dtm1 = D[1:744]
## remove values of -Inf from I - glm function does not like these!
for (i in 1:743) {
if (lIt[i] == -Inf) {
lIt[i] <- 0
}
}
for (i in 1:length(lItm1)) {
if (lItm1[i] == -Inf) {
lItm1[i] <- 0
}
}
## mean populaiton estimate
pop = mean(BAL$POP)
pop.5 = 1735201.498
seas = rep(1:26, length(BAL$CASES))[1:744]
seas <- as.factor(seas)
## test Smeans from 1% to whole population
Smean = seq(0.01, 1, by = 0.001) * pop.5
## this is a place to store the likelihoods of the data for each setting
## of Smean
llik = rep(NA, length(Smean))
## now perform the log linear regressions at each Smean
for (i in 1:length(Smean)) {
lStm1 = log(Smean[i] + Dtm1)
glmfit = glm(lIt ~ -1 + as.factor(seas) + lItm1 + offset(lStm1))
llik[i] = glmfit$deviance
}
## plot likelihood curve plot(Smean, llik, type='l', xlim=c(0,200000))
## The Smean we want to use is the one that minimizes the log likelihood
sbar <- Smean[which(llik == min(llik))]
sbar
## [1] 24293
# plot(D+sbar, type='l')
sbar.def <- sbar
D.def <- D
B.def <- BAL$B.5
alpha.def <- 0.95
## TSIR code
## pass B, sbar, and D results from above and guess at coefficients for
## Beta function
runTSIR <- function(alpha = alpha.def, B = B.def, sbar = sbar.def, D = D.def,
Beta = "TMP", guess = c(x1 = 3.8e-05, x2 = 0.4), initial.state = c(S = sbar.def -
181, I = 181, R = BAL$POP[1] - sbar.def - 181, CI = 181)) {
## create empty vectors to store S, I, R, B, Beta estimates
S <- rep(NA, length(BAL$CASES))
I <- rep(NA, length(BAL$CASES))
R <- rep(NA, length(BAL$CASES))
CI <- rep(NA, length(BAL$CASES))
## set time = 1 values to initial states
S[1] <- D[1] + sbar
I[1] <- initial.state["I"]
R[1] <- initial.state["R"]
CI[1] <- initial.state["CI"]
## betas are a function of the normalized climate data - I used tmax here.
## The x1-x3 are parameters to fit the seasonal forcing equation.
tmax <- (BAL$TMAX - 170)/150
Beta.TMP <- guess["x1"] * (1 + (guess["x2"] * (tmax)))
Beta.SCH <- guess["x1"] * (1 + (guess["x2"] * (BAL$SCH)))
Beta.CST <- rep(guess["x1"], length(BAL$CASES))
if (Beta == "TMP") {
for (t in 2:length(BAL$CASES)) {
S[t] <- D[t] + sbar
I[t] <- Beta.TMP[t] * S[t - 1] * (I[t - 1]^alpha)
R[t] <- I[t - 1] + R[t - 1]
CI[t] <- I[t] + CI[t - 1]
}
} else if (Beta == "SCH") {
for (t in 2:length(BAL$CASES)) {
S[t] <- D[t] + sbar
I[t] <- Beta.SCH[t] * S[t - 1] * (I[t - 1]^alpha)
R[t] <- I[t - 1] + R[t - 1]
CI[t] <- I[t] + CI[t - 1]
}
} else if (Beta == "CST") {
for (t in 2:length(BAL$CASES)) {
S[t] <- D[t] + sbar
I[t] <- Beta.CST[t] * S[t - 1] * (I[t - 1]^alpha)
R[t] <- I[t - 1] + R[t - 1]
CI[t] <- I[t] + CI[t - 1]
}
}
tsir.output <- data.frame(S, I, R, CI, Beta)
}
LS1 <- function(x) {
sum((runTSIR(guess = x, Beta = "CST")$I - BAL$CASES.i/u)^2)
}
g <- c(x1 = 5e-05, x2 = 0.4)
p <- optim(g, LS1)
## show optimal values
p$par[1]
## x1
## 5.078e-05
## show MSE LS1(p$par)
optimal <- as.vector(p$par)
out.opt <- runTSIR(guess = c(x1 = as.numeric(p$par[1]), x2 = as.numeric(p$par[2])),
Beta = "CST")
## plot of our predicted incidences (in red) versus actuall incidences (in
## black)
plot(BAL$BWK, out.opt$I/u, col = "red", type = "l", lwd = 2, ylim = c(0, 4000),
main = "Constant Beta")
lines(BAL$BWK, BAL$CASES.i, col = "black", lwd = 1)
legend("topright", c(paste("MSE:", round(LS1(p$par), 0), sep = "")))
LS1 <- function(x) {
sum((runTSIR(guess = x, Beta = "TMP")$I - BAL$CASES.i/u)^2)
}
g <- c(x1 = 5e-05, x2 = 0.4)
p <- optim(g, LS1)
## show optimal values
p$par
## x1 x2
## 0.0000511 -0.2904173
## show MSE LS1(p$par)
optimal <- as.vector(p$par)
out.opt <- runTSIR(guess = c(x1 = as.numeric(p$par[1]), x2 = as.numeric(p$par[2])),
Beta = "TMP")
## plot of our predicted incidences (in red) versus actuall incidences (in
## black)
plot(BAL$BWK, out.opt$I/u, col = "red", type = "l", lwd = 2, ylim = c(0, 4000),
main = "Beta ~ Temp")
lines(BAL$BWK, BAL$CASES.i, col = "black", lwd = 1)
legend("topright", c(paste("MSE:", round(LS1(p$par), 0), sep = "")))
LS1 <- function(x) {
sum((runTSIR(guess = x, Beta = "SCH")$I - BAL$CASES.i/u)^2)
}
g <- c(x1 = 5e-05, x2 = 0.4)
p <- optim(g, LS1)
## show optimal values
p$par
## x1 x2
## 0.0000521 0.3466298
## show MSE LS1(p$par)
optimal <- as.vector(p$par)
out.opt <- runTSIR(guess = c(x1 = as.numeric(p$par[1]), x2 = as.numeric(p$par[2])),
Beta = "SCH")
## plot of our predicted incidences (in red) versus actuall incidences (in
## black)
plot(BAL$BWK, out.opt$I/u, col = "red", type = "l", lwd = 2, ylim = c(0, 4000),
main = "Beta ~ School")
lines(BAL$BWK, BAL$CASES.i, col = "black", lwd = 1)
legend("topright", c(paste("MSE:", round(LS1(p$par), 0), sep = "")))