## data management stuff
BAL <- read.csv("C:/Users/Lisa/Desktop/677/BAL.BWK.csv", header = T, stringsAsFactors = FALSE)
BAL.BWK <- BAL[c(seq(1, length(BAL$WEEK), 2)), ]
BAL.BWK <- BAL.BWK[, c(1, 2, 4:8)]
names(BAL.BWK) <- c("YEAR", "WEEK", "BWK", "CASES", "CASES_i", "TMAX", "TMIN")
BAL.BWK$CASES <- as.numeric(BAL.BWK$CASES)
BAL.BWK$CASES_i <- as.numeric(BAL.BWK$CASES_i)
BAL.BWK$TMAX <- as.numeric(BAL.BWK$TMAX)
BAL.BWK$TMIN <- as.numeric(BAL.BWK$TMIN)
BAL.BWK$WEEK <- as.numeric(BAL.BWK$WEEK)
BAL.BWK$BWK <- as.numeric(BAL.BWK$BWK)
CASES_c <- rep(NA, length(BAL.BWK$CASES))
CASES_c[1] <- BAL.BWK$CASES_i[1]
for (i in 2:length(BAL.BWK$CASES)) {
CASES_c[i] <- BAL.BWK$CASES_i[i] + CASES_c[i - 1]
}
BAL.BWK <- data.frame(BAL.BWK, CASES_c)
## here is what the data looks like. CASES_i are cases with missing
## values imputed. CASES_c are cumulative cases.
head(BAL.BWK, 26)
## YEAR WEEK BWK CASES CASES_i TMAX TMIN CASES_c
## 1 1920 1 1 181 181 24.214 -46.14 181
## 3 1920 3 2 274 274 4.429 -61.79 455
## 5 1920 5 3 270 270 47.929 -32.14 725
## 7 1920 7 4 323 323 46.786 -26.57 1048
## 9 1920 9 5 308 308 46.786 -48.86 1356
## 11 1920 11 6 372 372 133.357 23.00 1728
## 13 1920 13 7 380 380 172.143 71.86 2108
## 15 1920 15 8 528 528 155.143 52.79 2636
## 17 1920 17 9 560 560 183.714 90.43 3196
## 19 1920 19 10 560 560 195.214 101.07 3756
## 21 1920 21 11 503 503 244.000 146.36 4259
## 23 1920 23 12 306 306 275.000 170.14 4565
## 25 1920 25 13 158 158 275.929 177.71 4723
## 27 1920 27 14 100 100 294.857 205.57 4823
## 29 1920 29 15 42 42 283.357 190.07 4865
## 31 1920 31 16 18 18 288.500 194.43 4883
## 33 1920 33 17 16 16 272.143 205.14 4899
## 35 1920 35 18 16 16 259.071 175.36 4915
## 37 1920 37 19 15 15 250.714 155.86 4930
## 39 1920 39 20 6 6 237.214 144.86 4936
## 41 1920 41 21 12 12 250.500 135.21 4948
## 43 1920 43 22 19 19 208.357 108.71 4967
## 45 1920 45 23 15 15 112.929 44.07 4982
## 47 1920 47 24 26 26 104.429 52.07 5008
## 49 1920 49 25 38 38 97.000 35.29 5046
## 51 1920 51 26 29 29 46.857 -17.93 5075
## Example for how to find rho, D_t's, and estimated S compartment values
pop.i = 734000
pop.f = 9e+05
sbar.def <- pop.i/2
mu.def <- 0.017/365
mu <- mu.def
## population at time t
pop <- seq(pop.i, pop.f, length = length(BAL.BWK$CASES))
## number of births - this is a guess needs real values/estimates!
B <- (mu) * 14 * pop
head(B)
## [1] 478.6 478.7 478.9 479.0 479.1 479.2
## cumulative births
B_c <- rep(NA, length(B))
B_c[1] <- B[1]
for (i in 2:length(B)) {
B_c[i] <- B_c[i - 1] + B[i]
}
head(B_c)
## [1] 478.6 957.3 1436.2 1915.2 2394.3 2873.5
x <- seq(0, 150000, length = 1000)
## find rho - the underreporting rate
fit <- lm(B_c ~ CASES_c)
fit2 <- lm(CASES_c ~ B_c)
summary(fit2)
##
## Call:
## lm(formula = CASES_c ~ B_c)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9778 -2770 -300 2650 9139
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.73e+03 2.81e+02 6.14 1.3e-09 ***
## B_c 4.04e-01 1.27e-03 317.36 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3910 on 742 degrees of freedom
## (143 observations deleted due to missingness)
## Multiple R-squared: 0.993, Adjusted R-squared: 0.993
## F-statistic: 1.01e+05 on 1 and 742 DF, p-value: <2e-16
plot(B_c, CASES_c, type = "l")
abline(fit2)
lines(x, x, col = "red")
rho <- summary(fit2)$coef[2, 1]
rho
## [1] 0.4044
## find D_t - the residuals from the regression of cum births on cum cases
D_t <- fit$residuals
## guess an sbar
sbar <- 350000
## plot D_t + guess for sbar - this is the compartment S
plot(D_t + sbar, type = "l")
## TSIR CODE
runTSIR <- function(pop.i = 734000, pop.f = 9e+05, alpha = 0.97, x1 = 3.754e-06,
x2 = -0.4008, x3 = 0.27627, mu = mu.def, sbar = sbar.def, guess = c(sbar = sbar.def,
x1 = 3.754e-06, x2 = -0.4008, alpha = 0.97), initial.state = c(S = pop.i/2 -
181, I = 181, R = pop.i/2, CI = 181), freq.dependent = TRUE) {
## pop[t] is size of population at time t I am assuming the population
## increases linearly over time from pop.i to pop.f
pop <- seq(pop.i, pop.f, length = length(BAL.BWK$CASES))
## assumption: birth rate is a constant B is number of births per bi-week
## B ranges from ~280-350 births every 2 weeks - does this seem
## reasonable?
B <- (mu) * 14 * pop
B_c <- rep(NA, length(B))
B_c[1] <- B[1]
for (i in 2:length(B)) {
B_c[i] <- B_c[i - 1] + B[i]
}
CASES_c <- rep(NA, length(BAL.BWK$CASES))
CASES_c[1] <- BAL.BWK$CASES_i[1]
for (i in 2:length(BAL.BWK$CASES)) {
CASES_c[i] <- BAL.BWK$CASES_i[i] + CASES_c[i - 1]
}
## find rho - the underreporting rate
fit <- lm(B_c ~ CASES_c)
fit2 <- lm(CASES_c ~ B_c)
# summary(fit2) plot(B_c, CASES_c, type='l') abline(fit2) lines(x,x,
# col='red')
rho <- summary(fit2)$coef[2, 1]
rho
CASES_rho <- CASES_c/rho
fit3 <- lm(B_c ~ CASES_rho)
## find D_t - the residuals from the regression of cum births on cum cases
D_t <- fit3$residuals
## plot D_t + guess for sbar - this is the compartment S plot(D_t+sbar,
## type='l')
## create empty vectors to store S, I, R, B, Beta estimates
S <- rep(NA, length(BAL.BWK$CASES))
I <- rep(NA, length(BAL.BWK$CASES))
R <- rep(NA, length(BAL.BWK$CASES))
CI <- rep(NA, length(BAL.BWK$CASES))
Beta <- rep(NA, length(BAL.BWK$CASES))
Rt <- rep(NA, length(BAL.BWK$CASES))
## set time = 1 values to initial states
S[1] <- D_t[1] + sbar
I[1] <- initial.state["I"]
R[1] <- initial.state["R"]
CI[1] <- initial.state["CI"]
## betas are a function of the normalized climate data - I used tmax here.
## The x1-x3 are parameters to fit the seasonal forcing equation.
tmax <- (BAL.BWK$TMAX - 192.8)/192.8
Beta <- guess["x1"] * (1 + (guess["x2"] * tmax))
for (t in (2:length(BAL.BWK$CASES))) {
S[t] <- D_t[t] + guess["sbar"]
I[t] <- Beta[t] * S[t - 1] * I[t - 1]^guess["alpha"]
R[t] <- I[t - 1] + R[t - 1]
CI[t] <- I[t] + CI[t - 1]
}
tsir.output <- data.frame(S, I, R, CI, Beta)
}
## optimizing for beta parameters sbar and x1, x2
LS1 <- function(x) {
sum((runTSIR(guess = x)$I[1:700] - BAL.BWK$CASES_i[1:700]/rho)^2)
}
z <- c(sbar = 3e+05, x1 = 3.754e-06, x2 = -0.4008, alpha = 0.97)
q <- optim(z, LS1)
## show optimal values
q$par
## sbar x1 x2 alpha
## 3.150e+05 3.803e-06 -4.008e-01 9.700e-01
optimal <- as.vector(q$par)
out.opt <- runTSIR(guess = c(sbar = as.numeric(q$par[1]), x1 = as.numeric(q$par[2]),
x2 = as.numeric(q$par[3]), alpha = as.numeric(q$par[4])))
## here is what the output looks like for the optimized values
head(out.opt, 26)
## S I R CI Beta
## 1 369883 181.0 367000 181.0 5.136e-06
## 2 317689 303.2 367181 484.2 5.293e-06
## 3 317505 401.5 367484 885.7 4.949e-06
## 4 317191 528.0 367886 1413.7 4.958e-06
## 5 316914 688.0 368414 2101.7 4.958e-06
## 6 316480 765.8 369102 2867.5 4.273e-06
## 7 316026 787.7 369867 3655.2 3.967e-06
## 8 315210 835.8 370655 4491.0 4.101e-06
## 9 314315 834.3 371491 5325.3 3.875e-06
## 10 313420 811.0 372325 6136.3 3.784e-06
## 11 312665 706.6 373136 6842.8 3.398e-06
## 12 312393 572.2 373843 7415.0 3.153e-06
## 13 312486 464.8 374415 7879.8 3.146e-06
## 14 312720 362.0 374880 8241.7 2.996e-06
## 15 313097 292.8 375242 8534.6 3.087e-06
## 16 313534 235.6 375535 8770.2 3.047e-06
## 17 313975 199.1 375770 8969.3 3.176e-06
## 18 314416 174.9 375969 9144.2 3.279e-06
## 19 314860 157.6 376144 9301.8 3.345e-06
## 20 315327 147.2 376302 9448.9 3.452e-06
## 21 315778 133.7 376449 9582.6 3.347e-06
## 22 316213 134.2 376583 9716.8 3.680e-06
## 23 316657 162.4 376717 9879.2 4.435e-06
## 24 317075 198.8 376879 10078.0 4.502e-06
## 25 317463 245.2 377078 10323.2 4.561e-06
## 26 317873 327.2 377323 10650.4 4.957e-06
## plot predicted values for each compartment
plot(BAL.BWK$BWK, out.opt$S, type = "l")
plot(BAL.BWK$BWK, out.opt$I, type = "l")
plot(BAL.BWK$BWK, out.opt$R, type = "l")
plot(BAL.BWK$BWK, out.opt$CI, type = "l")
plot(BAL.BWK$BWK, out.opt$Beta, type = "l")
## plot of our predicted incidences (in red) versus actuall incidences (in
## black)
plot(BAL.BWK$BWK, BAL.BWK$CASES_i/rho, col = "black", lwd = 1, type = "l")
lines(BAL.BWK$BWK, out.opt$I, col = "red", lwd = 2)