Code for Data Management stuff:
## read in csv file - call it BAL. I did the compression into biweeks in
## excel even though I hate excel. I imputed some missing case data in
## the excel file as well - I called these CASES_i. I think missing data
## will be a big issue later!
BAL <- read.csv("C:/Users/Lisa/Desktop/677/BAL.BWK.csv", header = T, stringsAsFactors = FALSE)
## this is what the BAL data looks like
head(BAL)
## YEAR WEEK CASES BWK CASES.BWK CASES.BWK_i TMAXAV.BWK TMINAV.BWK DAY1TMAX
## 1 1920 1 93 1 181 181 24.214 -46.14 100
## 2 1920 2 88 1 181 181 24.214 -46.14 50
## 3 1920 3 164 2 274 274 4.429 -61.79 6
## 4 1920 4 110 2 274 274 4.429 -61.79 -11
## 5 1920 5 144 3 270 270 47.929 -32.14 -6
## 6 1920 6 126 3 270 270 47.929 -32.14 0
## DAY2TMAX DAY3TMAX DAY4TMAX DAY5TMAX DAY6TMAX DAY7TMAX DAY1TMIN DAY2TMIN
## 1 -11 -72 -56 -33 22 44 -11 -94
## 2 83 28 56 33 78 17 22 22
## 3 -39 17 -28 -17 -39 33 -78 -89
## 4 11 22 6 -44 67 78 -44 -17
## 5 144 33 -33 50 89 39 -67 -39
## 6 44 50 33 50 100 78 -28 -17
## DAY3TMIN DAY4TMIN DAY5TMIN DAY6TMIN DAY7TMIN TMAXAV TMINAV TMAXSD
## 1 -128 -111 -117 -78 -6 -0.8571 -77.857 60.48
## 2 -11 -17 -17 -28 -72 49.2857 -14.429 25.06
## 3 -83 -89 -94 -83 -39 -9.5714 -79.286 28.54
## 4 6 -83 -94 -50 -28 18.4286 -44.286 42.61
## 5 -117 -133 -33 -11 -28 45.1429 -61.143 58.63
## 6 0 6 0 0 17 50.7143 -3.143 31.90
## TMINSD
## 1 50.06
## 2 32.09
## 3 18.52
## 4 35.45
## 5 46.93
## 6 14.88
BAL.BWK <- BAL[c(seq(1, length(BAL$WEEK), 2)), ]
BAL.BWK <- BAL.BWK[, c(1, 2, 4:8)]
names(BAL.BWK) <- c("YEAR", "WEEK", "BWK", "CASES", "CASES_i", "TMAX", "TMIN")
BAL.BWK$CASES <- as.numeric(BAL.BWK$CASES)
BAL.BWK$CASES_i <- as.numeric(BAL.BWK$CASES_i)
BAL.BWK$TMAX <- as.numeric(BAL.BWK$TMAX)
BAL.BWK$TMIN <- as.numeric(BAL.BWK$TMIN)
BAL.BWK$WEEK <- as.numeric(BAL.BWK$WEEK)
BAL.BWK$BWK <- as.numeric(BAL.BWK$BWK)
## final data set we want to work with is called BAL.BWK - this is what
## the BAL.BWK data looks like
head(BAL.BWK)
## YEAR WEEK BWK CASES CASES_i TMAX TMIN
## 1 1920 1 1 181 181 24.214 -46.14
## 3 1920 3 2 274 274 4.429 -61.79
## 5 1920 5 3 270 270 47.929 -32.14
## 7 1920 7 4 323 323 46.786 -26.57
## 9 1920 9 5 308 308 46.786 -48.86
## 11 1920 11 6 372 372 133.357 23.00
Code for TSIR:
## First thing we need to do is to load some packages. We don't really
## need this to do a TSIR I don't think but well load it anyway
library(deSolve)
library(xtable) ## xtable makes pretty tables
library(lattice) ## lattice makes pretty graphs
library(ggplot2) ## ggplot2 also makes pretty graphs
## TSIR Code
## I just made up a default birth rate here
mu.def <- 0.01/365
runTSIR <- function(pop.i = 734000, pop.f = 9e+05, alpha = 0.97, initial.state = c(S = (pop.i/2) -
181, I = 181, R = (pop.i/2), CI = 181), mu = mu.def, freq.dependent = TRUE) {
## pop[t] is size of population at time t - I am assuming the population
## increases linearly over time from pop.i to pop.f
pop <- seq(pop.i, pop.f, length = length(BAL.BWK$CASES))
## assumption: birth rate is a constant. B is number of births per
## bi-week. B ranges from ~280-350 births every 2 weeks - does this seem
## reasonable?
B <- (mu) * 14 * pop
## create empty vectors to store S, I, R, B, Beta estimates
S <- rep(NA, length(BAL.BWK$CASES))
I <- rep(NA, length(BAL.BWK$CASES))
R <- rep(NA, length(BAL.BWK$CASES))
CI <- rep(NA, length(BAL.BWK$CASES))
Beta <- rep(NA, length(BAL.BWK$CASES))
Rt <- rep(NA, length(BAL.BWK$CASES))
## set time = 1 values to initial states
S[1] <- initial.state["S"]
I[1] <- initial.state["I"]
R[1] <- initial.state["R"]
CI[1] <- initial.state["CI"]
for (t in (2:length(BAL.BWK$CASES))) {
I[t] <- BAL.BWK$CASES_i[t]
S[t] <- S[t - 1] + B[t - 1] - I[t] - (mu * 14 * S[t - 1])
R[t] <- I[t - 1] + R[t - 1] - (mu * 14 * R[t - 1])
CI[t] <- I[t] + CI[t - 1]
Beta[t] <- I[t]/(S[t - 1] * (I[t - 1]^alpha))
Rt[t] <- (Beta[t]/0.5) * pop
}
tsir.output <- data.frame(S, I, R, CI, Beta, Rt)
}
## run the runTSIR function with default vals and store as out
out <- runTSIR()
## this is what the output looks like
head(out)
## S I R CI Beta Rt
## 1 366819 181 367000 181 NA NA
## 2 366686 274 367040 455 4.823e-06 7.081
## 3 366557 270 367173 725 3.180e-06 4.669
## 4 366375 323 367303 1048 3.860e-06 5.667
## 5 366208 308 367485 1356 3.095e-06 4.544
## 6 365977 372 367652 1728 3.917e-06 5.750
## plot comparment S
plot(BAL.BWK$BWK, out$S, type = "l")
## plot compartment I
plot(BAL.BWK$BWK, out$I, type = "l")
## plot compartment R
plot(BAL.BWK$BWK, out$R, type = "l")
## plot compartment CI
plot(BAL.BWK$BWK, out$CI, type = "l")
## plot betas
plot(BAL.BWK$BWK, out$Beta, type = "l")
## plot R0's
plot(BAL.BWK$BWK, out$Rt, type = "l")
## some more plots - plots of betas and max temp for every 5 years
plot(BAL.BWK$BWK[1:26 * 5], out$Beta[1:26 * 5], type = "l", lwd = 2, xlim = c(1,
26 * 5), ylim = c(0, 1e-05))
lines(BAL.BWK$BWK[1:26 * 5], BAL.BWK$TMAX[1:26 * 5]/max(BAL.BWK$TMAX) * 5e-06,
col = "skyblue")
plot(BAL.BWK$BWK[130:260], out$Beta[130:260], type = "l", lwd = 2, xlim = c(130,
260), ylim = c(0, 1e-05))
lines(BAL.BWK$BWK[130:260], BAL.BWK$TMAX[130:260]/max(BAL.BWK$TMAX) * 5e-06,
col = "skyblue")
plot(BAL.BWK$BWK[260:390], out$Beta[260:390], type = "l", lwd = 2, xlim = c(260,
390), ylim = c(0, 1e-05))
lines(BAL.BWK$BWK[260:390], BAL.BWK$TMAX[260:390]/max(BAL.BWK$TMAX) * 5e-06,
col = "skyblue")
plot(BAL.BWK$BWK[390:520], out$Beta[390:520], type = "l", lwd = 2, xlim = c(390,
520), ylim = c(0, 1e-05))
lines(BAL.BWK$BWK[390:520], BAL.BWK$TMAX[390:520]/max(BAL.BWK$TMAX) * 5e-06,
col = "skyblue")
plot(BAL.BWK$BWK[520:650], out$Beta[520:650], type = "l", lwd = 2, xlim = c(520,
650), ylim = c(0, 1e-05))
lines(BAL.BWK$BWK[520:650], BAL.BWK$TMAX[520:650]/max(BAL.BWK$TMAX) * 5e-06,
col = "skyblue")
plot(BAL.BWK$BWK[650:780], out$Beta[650:780], type = "l", lwd = 2, xlim = c(650,
780), ylim = c(0, 1e-05))
lines(BAL.BWK$BWK[650:780], BAL.BWK$TMAX[650:780]/max(BAL.BWK$TMAX) * 5e-06,
col = "skyblue")
plot(BAL.BWK$BWK[780:880], out$Beta[780:880], type = "l", lwd = 2, xlim = c(780,
880), ylim = c(0, 1e-05))
lines(BAL.BWK$BWK[780:880], BAL.BWK$TMAX[780:880]/max(BAL.BWK$TMAX) * 5e-06,
col = "skyblue")
## make a data frame of predicted betas and their week of the year. do a
## linear regression of betas on week of the year to get 26 estimates for
## beta
beta.reg <- data.frame(Beta = as.numeric(out$Beta), WEEK = as.numeric(BAL.BWK$WEEK))
## since some of the empirical betas could not be calculated I removed
## missing data. I also removed values of Nan, Inf, -Inf because lm wont
## run with these values ugh!
beta.reg <- beta.reg[complete.cases(beta.reg) == TRUE, ]
beta.reg <- beta.reg[beta.reg$Beta != Inf, ]
beta.reg <- beta.reg[beta.reg$Beta != -Inf, ]
## this is how you do a simple linear regression in R! Use the lm()
## function, formula = outcome ~ covariates, data = name of dataset. lm()
## will return something called an lm object, store the lm object as
## something - here I called it beta.lm
beta.lm <- lm(formula = Beta ~ as.factor(WEEK), data = beta.reg)
## summary(lm object) - summary(beta.lm) will show you the coefficients -
## we have one beta estimate for each week of the year
summary(beta.lm)
##
## Call:
## lm(formula = Beta ~ as.factor(WEEK), data = beta.reg)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.90e-06 -1.38e-06 -3.70e-07 7.80e-07 4.14e-05
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.25e-06 8.81e-07 8.23 9.8e-16 ***
## as.factor(WEEK)2 -1.37e-06 1.35e-06 -1.02 0.30955
## as.factor(WEEK)3 2.63e-07 1.23e-06 0.21 0.83073
## as.factor(WEEK)4 -2.75e-06 1.35e-06 -2.04 0.04133 *
## as.factor(WEEK)5 -2.25e-06 1.23e-06 -1.83 0.06702 .
## as.factor(WEEK)6 -2.76e-06 1.35e-06 -2.05 0.04045 *
## as.factor(WEEK)7 -2.36e-06 1.23e-06 -1.92 0.05506 .
## as.factor(WEEK)8 -3.03e-06 1.35e-06 -2.25 0.02491 *
## as.factor(WEEK)9 -2.71e-06 1.23e-06 -2.21 0.02743 *
## as.factor(WEEK)10 -2.84e-06 1.35e-06 -2.11 0.03543 *
## as.factor(WEEK)11 -2.68e-06 1.23e-06 -2.18 0.02952 *
## as.factor(WEEK)12 -3.64e-06 1.35e-06 -2.70 0.00701 **
## as.factor(WEEK)13 -2.81e-06 1.23e-06 -2.29 0.02250 *
## as.factor(WEEK)14 -3.51e-06 1.35e-06 -2.61 0.00938 **
## as.factor(WEEK)15 -3.41e-06 1.23e-06 -2.77 0.00569 **
## as.factor(WEEK)16 -2.83e-06 1.35e-06 -2.10 0.03589 *
## as.factor(WEEK)17 -2.93e-06 1.23e-06 -2.38 0.01745 *
## as.factor(WEEK)18 -3.80e-06 1.35e-06 -2.83 0.00486 **
## as.factor(WEEK)19 -3.83e-06 1.23e-06 -3.12 0.00187 **
## as.factor(WEEK)20 -1.49e-06 1.35e-06 -1.10 0.26963
## as.factor(WEEK)21 -4.10e-06 1.23e-06 -3.34 0.00089 ***
## as.factor(WEEK)22 -4.14e-06 1.35e-06 -3.07 0.00219 **
## as.factor(WEEK)23 -4.63e-06 1.23e-06 -3.77 0.00018 ***
## as.factor(WEEK)24 -4.61e-06 1.35e-06 -3.43 0.00065 ***
## as.factor(WEEK)25 -5.54e-06 1.23e-06 -4.51 7.6e-06 ***
## as.factor(WEEK)26 -4.32e-06 1.35e-06 -3.21 0.00139 **
## as.factor(WEEK)27 -5.14e-06 1.23e-06 -4.18 3.2e-05 ***
## as.factor(WEEK)28 -5.27e-06 1.38e-06 -3.82 0.00015 ***
## as.factor(WEEK)29 -5.67e-06 1.23e-06 -4.62 4.6e-06 ***
## as.factor(WEEK)30 -5.59e-06 1.38e-06 -4.05 5.7e-05 ***
## as.factor(WEEK)31 -5.18e-06 1.23e-06 -4.21 2.8e-05 ***
## as.factor(WEEK)32 -4.49e-06 1.38e-06 -3.25 0.00122 **
## as.factor(WEEK)33 -5.18e-06 1.25e-06 -4.15 3.7e-05 ***
## as.factor(WEEK)34 -4.62e-06 1.38e-06 -3.34 0.00088 ***
## as.factor(WEEK)35 -3.99e-06 1.25e-06 -3.20 0.00143 **
## as.factor(WEEK)36 -2.87e-06 1.42e-06 -2.02 0.04394 *
## as.factor(WEEK)37 -4.81e-06 1.23e-06 -3.91 0.00010 ***
## as.factor(WEEK)38 -2.26e-06 1.38e-06 -1.64 0.10216
## as.factor(WEEK)39 -5.09e-06 1.25e-06 -4.08 5.0e-05 ***
## as.factor(WEEK)40 -3.04e-06 1.42e-06 -2.14 0.03266 *
## as.factor(WEEK)41 -3.76e-06 1.27e-06 -2.97 0.00308 **
## as.factor(WEEK)42 -4.22e-06 1.38e-06 -3.05 0.00235 **
## as.factor(WEEK)43 9.21e-07 1.27e-06 0.73 0.46744
## as.factor(WEEK)44 -9.18e-07 1.42e-06 -0.65 0.51874
## as.factor(WEEK)45 -2.94e-06 1.23e-06 -2.39 0.01703 *
## as.factor(WEEK)46 -2.34e-06 1.38e-06 -1.70 0.08996 .
## as.factor(WEEK)47 -2.22e-06 1.23e-06 -1.81 0.07130 .
## as.factor(WEEK)48 -3.42e-06 1.38e-06 -2.47 0.01362 *
## as.factor(WEEK)49 -2.45e-06 1.23e-06 -2.00 0.04614 *
## as.factor(WEEK)50 -2.19e-06 1.38e-06 -1.58 0.11388
## as.factor(WEEK)51 -3.28e-06 1.23e-06 -2.67 0.00770 **
## as.factor(WEEK)52 -7.20e-07 1.38e-06 -0.52 0.60213
## as.factor(WEEK)53 -3.84e-06 2.22e-06 -1.73 0.08380 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.53e-06 on 680 degrees of freedom
## Multiple R-squared: 0.168, Adjusted R-squared: 0.104
## F-statistic: 2.63 on 52 and 680 DF, p-value: 1.61e-08
## fitting the empirical betas to a seasonality function - the empirical
## betas we found follow some seasonal forcing function that we want to
## fit
## t is time in consecutive biweeks. I am only using a subset of the data
## for the fit because of missing data :-/ we can decide how to deal with
## missing data later!
t <- BAL.BWK$BWK[2:250]
## seasonal() is the seasonal forcing function we are trying to fit our
## emperical betas to. We want to find an equation to describe our betas
## with form: B_t = B0*(1+(B1*sin(c*t))) - I think we could use either sin
## or cos functions - I picked cos because it lowered MSE
seasonal <- function(x) {
x[1] * (1 + (x[2] * cos(x[3] * t)))
}
## LS() is the least squares function that we use for optimization. we
## want to minimize the sum of squared errors. between our empirical
## betas and the fitted values from our seasonal() function
LS <- function(x) {
sum((out$Beta[2:250] - seasonal(x))^2)
}
## here is a vector of guesses for the parameters - I looked at our
## empirical betas to pick some decent guesses
guess <- c(1.55e-06, 1.55e-06, 0.25)
## optim() does the minimization of the LS function using the guess vector
## for start values
x <- optim(guess, LS)
## these are the optimal parameter values
x$par
## [1] 3.617e-06 3.315e-01 2.408e-01
## this is the MSE for the optimal parameters
LS(x$par)
## [1] 8.779e-10
## plot emipirical betas, fitted betas, and max/min temperatures - beta's
## are in black, tmax in blue, tmin in purple
plot(BAL.BWK$BWK, out$Beta, type = "l", xlim = c(2, 26 * 5), ylab = "Beta",
xlab = "Biweek")
lines(seasonal(x$par), col = "red", lwd = 2)
lines(BAL.BWK$BWK, (BAL.BWK$TMIN/1e+08) + 5e-06, col = "purple", lwd = 2)
lines(BAL.BWK$BWK, BAL.BWK$TMAX/1e+08 + 5e-06, col = "blue", lwd = 2)