## data mangement stuff
BAL <- read.csv("C:/Users/Lisa/Desktop/677/Baltimore.BWK.csv", header = T, stringsAsFactors = FALSE)
BAL <- BAL[seq(1, length(BAL$CASES), by = 2), ]
BAL <- BAL[c(1:744), ]
BAL$CASES <- as.numeric(BAL$CASES)
## Warning: NAs introduced by coercion
BAL$CASES.i <- as.numeric(BAL$CASES.i)
BAL$TMAX <- as.numeric(BAL$TMAX)
BAL$TMIN <- as.numeric(BAL$TMIN)
BAL$WEEK <- as.numeric(BAL$WEEK)
BAL$BWK <- as.numeric(BAL$BWK)
BAL$BWK.c <- as.numeric(BAL$BWK.c)
## this is what the data looks like
head(BAL)
## YEAR WEEK BWK.c CASES CASES.i TMAX TMIN RATE B POP BWK
## 1 1920 1 1 181 181 24.214 -46.14 0.01970 1447 1914686 1
## 3 1920 3 2 274 274 4.429 -61.79 0.01970 1447 1914686 2
## 5 1920 5 3 270 270 47.929 -32.14 0.01977 1454 1914691 3
## 7 1920 7 4 323 323 46.786 -26.57 0.01982 1456 1914666 4
## 9 1920 9 5 308 308 46.786 -48.86 0.01971 1444 1914656 5
## 11 1920 11 6 372 372 133.357 23.00 0.01962 1441 1914647 6
## plot cumulative births (x) and cumulative cases (y). 1 to 1 is plotted
## in red
plot(cumsum(BAL$B), cumsum(BAL$CASES.i), type = "l")
x <- c(1:1e+06)
lines(x, x, col = "red")
## fit a smooth spline of cumulative measles on cumulative births with 2.5
## degrees of freedom
cum.reg <- smooth.spline(cumsum(BAL$B), cumsum(BAL$CASES.i), df = 2.5)
## predict points using the smooth spline and calculate residuals, D
D <- predict(cum.reg)$y - cumsum(BAL$CASES.i)
B <- BAL$B
plot(D, type = "l")
## under reporting is given by slope of smooth spline
u <- predict(cum.reg, deriv = 1)$y
## Ic are actual cases - reported cases divided by u
Ic = BAL$CASES.i/u
plot(Ic, type = "l")
lIt = log(Ic[2:745])
lItm1 = log(Ic[1:744])
Dtm1 = D[1:744]
## remove values of -Inf from I - glm function does not like these!
for (i in 1:743) {
if (lIt[i] == -Inf) {
lIt[i] <- 0
}
}
for (i in 1:length(lItm1)) {
if (lItm1[i] == -Inf) {
lItm1[i] <- 0
}
}
## mean populaiton estimate
pop = mean(BAL$POP)
seas = rep(1:26, length(BAL$CASES))[1:744]
seas <- as.factor(seas)
## test Smeans from 1% to whole population
Smean = seq(0.01, 1, by = 0.001) * pop
## this is a place to store the likelihoods of the data for each setting
## of Smean
llik = rep(NA, length(Smean))
## now perform the log linear regressions at each Smean
for (i in 1:length(Smean)) {
lStm1 = log(Smean[i] + Dtm1)
glmfit = glm(lIt ~ -1 + as.factor(seas) + lItm1 + offset(lStm1))
llik[i] = glmfit$deviance
}
## plot likelihood curve
plot(Smean, llik, type = "l", xlim = c(0, 2e+05))
## The Smean we want to use is the one that minimizes the log likelihood
sbar <- Smean[which(llik == min(llik))]
sbar
## [1] 46576
plot(D + sbar, type = "l")
sbar.def <- sbar
D.def <- D
B.def <- BAL$B
alpha.def <- 0.95
## TSIR code
## pass B, sbar, and D results from above and guess at coefficients for
## Beta function
runTSIR <- function(alpha = alpha.def, B = B.def, sbar = sbar.def, D = D.def,
guess = c(x1 = 3.8e-05, x2 = -0.4), initial.state = c(S = sbar.def - 181,
I = 181, R = BAL$POP[1] - 60000 - 181, CI = 181)) {
## create empty vectors to store S, I, R, B, Beta estimates
S <- rep(NA, length(BAL$CASES))
I <- rep(NA, length(BAL$CASES))
R <- rep(NA, length(BAL$CASES))
CI <- rep(NA, length(BAL$CASES))
Beta <- rep(NA, length(BAL$CASES))
Rt <- rep(NA, length(BAL$CASES))
## set time = 1 values to initial states
S[1] <- D[1] + sbar
I[1] <- initial.state["I"]
R[1] <- initial.state["R"]
CI[1] <- initial.state["CI"]
## betas are a function of the normalized climate data - I used tmax here.
## The x1-x3 are parameters to fit the seasonal forcing equation.
tmax <- (BAL$TMAX - 192.8)/192.8
Beta <- guess["x1"] * (1 + (guess["x2"] * (tmax)))
for (t in 2:length(BAL$CASES)) {
S[t] <- D[t] + sbar
I[t] <- Beta[t] * S[t - 1] * (I[t - 1]^alpha)
R[t] <- I[t - 1] + R[t - 1]
CI[t] <- I[t] + CI[t - 1]
}
tsir.output <- data.frame(S, I, R, CI, Beta, C.t = BAL$CASES.i/u)
}
out <- runTSIR()
head(out, 52)
## S I R CI Beta C.t
## 1 43773 181.0 1854505 181.0 5.129e-05 1043.50
## 2 43750 322.9 1854686 503.9 5.285e-05 1579.67
## 3 43733 523.0 1855008 1026.9 4.942e-05 1556.60
## 4 43662 828.1 1855531 1855.0 4.951e-05 1862.16
## 5 43605 1279.4 1856360 3134.4 4.951e-05 1775.68
## 6 43483 1665.2 1857639 4799.6 4.269e-05 2144.65
## 7 43338 1980.2 1859304 6779.8 3.963e-05 2190.78
## 8 43034 2405.5 1861284 9185.3 4.097e-05 3044.02
## 9 42702 2715.5 1863690 11900.8 3.872e-05 3228.51
## 10 42374 2952.6 1866405 14853.4 3.781e-05 3228.51
## 11 42105 2849.7 1869358 17703.2 3.396e-05 2899.89
## 12 42042 2540.8 1872208 20244.0 3.152e-05 1764.15
## 13 42129 2269.8 1874749 22513.8 3.145e-05 910.90
## 14 42289 1946.3 1877018 24460.1 2.995e-05 576.52
## 15 42507 1739.3 1878965 26199.4 3.086e-05 242.14
## 16 42749 1550.6 1880704 27750.0 3.046e-05 103.77
## 17 42993 1457.3 1882254 29207.3 3.174e-05 92.24
## 18 43233 1426.7 1883712 30634.0 3.278e-05 92.24
## 19 43472 1434.2 1885138 32068.2 3.343e-05 86.48
## 20 43713 1495.5 1886573 33563.7 3.450e-05 34.59
## 21 43942 1517.3 1888068 35081.1 3.345e-05 69.18
## 22 44161 1700.0 1889586 36781.0 3.677e-05 109.54
## 23 44377 2292.6 1891286 39073.6 4.430e-05 86.48
## 24 44582 3107.2 1893578 42180.8 4.497e-05 149.89
## 25 44777 4221.1 1896685 46401.8 4.555e-05 219.07
## 26 44981 6164.0 1900906 52565.9 4.951e-05 167.18
## 27 45172 8244.0 1907070 60809.9 4.600e-05 368.95
## 28 45372 11723.8 1915314 72533.7 4.942e-05 322.83
## 29 45554 15866.9 1927038 88400.6 4.765e-05 426.60
## 30 45666 20779.9 1942905 109180.5 4.663e-05 835.90
## 31 45822 24653.0 1963685 133833.5 4.271e-05 564.95
## 32 45973 26819.5 1988338 160653.0 3.936e-05 593.77
## 33 46108 27719.4 2015158 188372.4 3.743e-05 605.29
## 34 46205 29600.5 2042877 217973.0 3.863e-05 755.17
## 35 46249 30844.9 2072477 248817.8 3.774e-05 1077.97
## 36 46251 31100.0 2103322 279917.8 3.655e-05 1343.13
## 37 46253 28472.7 2134422 308390.5 3.320e-05 1360.41
## 38 46336 25153.5 2162895 333544.0 3.190e-05 951.13
## 39 46499 19955.3 2188048 353499.3 2.842e-05 489.97
## 40 46716 16432.1 2208004 369931.4 2.905e-05 276.69
## 41 46963 13604.3 2224436 383535.7 2.879e-05 103.76
## 42 47207 12102.8 2238040 395638.5 3.049e-05 115.28
## 43 47462 11190.2 2250143 406828.7 3.134e-05 51.88
## 44 47708 10034.1 2261333 416862.8 3.011e-05 86.46
## 45 47962 9556.0 2271367 426418.8 3.164e-05 23.06
## 46 48208 9901.2 2280923 436320.0 3.416e-05 34.58
## 47 48436 11088.8 2290824 447408.8 3.680e-05 92.22
## 48 48665 13256.0 2301913 460664.8 3.932e-05 74.93
## 49 48854 17936.1 2315169 478601.0 4.469e-05 265.12
## 50 48983 22721.4 2333105 501322.4 4.231e-05 616.69
## 51 49080 31700.3 2355827 533022.7 4.703e-05 806.87
## 52 49201 45070.5 2387527 578093.1 4.864e-05 668.54
plot(BAL$BWK, out$S, type = "l")
plot(BAL$BWK, out$I, type = "l")
plot(BAL$BWK, out$R, type = "l")
plot(BAL$BWK, out$CI, type = "l")
plot(BAL$BWK, out$Beta, type = "l")
## optimizing for beta parameters sbar and x1-x3
LS1 <- function(x) {
sum((runTSIR(guess = x)$I - BAL$CASES.i/u)^2)
}
g <- c(x1 = 3.8e-05, x2 = -0.4)
p <- optim(g, LS1)
## show optimal values
p$par
## x1 x2
## 2.717e-05 -3.600e-01
## show MSE
LS1(p$par)
## [1] 5.398e+09
optimal <- as.vector(p$par)
out.opt <- runTSIR(guess = c(x1 = as.numeric(p$par[1]), x2 = as.numeric(p$par[2])))
## plot of our predicted incidences (in red) versus actuall incidences (in
## black) - alpha is 0.95
plot(BAL$BWK, out.opt$I, col = "red", type = "l", lwd = 2, ylim = c(0, 4000))
lines(BAL$BWK, BAL$CASES.i, col = "black", lwd = 1)