2013/05/02 TSIR Model: Measles in Baltimore 1920-1950

Code for Data Management stuff:

## read in csv file - call it BAL.  I did the compression into biweeks in
## excel even though I hate excel.  I imputed some missing case data in
## the excel file as well - I called these CASES_i.  I think missing data
## will be a big issue later!
BAL <- read.csv("C:/Users/Lisa/Desktop/677/BAL.BWK.csv", header = T, stringsAsFactors = FALSE)

## this is what the BAL data looks like
head(BAL)
##   YEAR WEEK CASES BWK CASES.BWK CASES.BWK_i TMAXAV.BWK TMINAV.BWK DAY1TMAX
## 1 1920    1    93   1       181         181     24.214     -46.14      100
## 2 1920    2    88   1       181         181     24.214     -46.14       50
## 3 1920    3   164   2       274         274      4.429     -61.79        6
## 4 1920    4   110   2       274         274      4.429     -61.79      -11
## 5 1920    5   144   3       270         270     47.929     -32.14       -6
## 6 1920    6   126   3       270         270     47.929     -32.14        0
##   DAY2TMAX DAY3TMAX DAY4TMAX DAY5TMAX DAY6TMAX DAY7TMAX DAY1TMIN DAY2TMIN
## 1      -11      -72      -56      -33       22       44      -11      -94
## 2       83       28       56       33       78       17       22       22
## 3      -39       17      -28      -17      -39       33      -78      -89
## 4       11       22        6      -44       67       78      -44      -17
## 5      144       33      -33       50       89       39      -67      -39
## 6       44       50       33       50      100       78      -28      -17
##   DAY3TMIN DAY4TMIN DAY5TMIN DAY6TMIN DAY7TMIN  TMAXAV  TMINAV TMAXSD
## 1     -128     -111     -117      -78       -6 -0.8571 -77.857  60.48
## 2      -11      -17      -17      -28      -72 49.2857 -14.429  25.06
## 3      -83      -89      -94      -83      -39 -9.5714 -79.286  28.54
## 4        6      -83      -94      -50      -28 18.4286 -44.286  42.61
## 5     -117     -133      -33      -11      -28 45.1429 -61.143  58.63
## 6        0        6        0        0       17 50.7143  -3.143  31.90
##   TMINSD
## 1  50.06
## 2  32.09
## 3  18.52
## 4  35.45
## 5  46.93
## 6  14.88

BAL.BWK <- BAL[c(seq(1, length(BAL$WEEK), 2)), ]
BAL.BWK <- BAL.BWK[, c(1, 2, 4:8)]
names(BAL.BWK) <- c("YEAR", "WEEK", "BWK", "CASES", "CASES_i", "TMAX", "TMIN")
BAL.BWK$CASES <- as.numeric(BAL.BWK$CASES)
BAL.BWK$CASES_i <- as.numeric(BAL.BWK$CASES_i)
BAL.BWK$TMAX <- as.numeric(BAL.BWK$TMAX)
BAL.BWK$TMIN <- as.numeric(BAL.BWK$TMIN)
BAL.BWK$WEEK <- as.numeric(BAL.BWK$WEEK)
BAL.BWK$BWK <- as.numeric(BAL.BWK$BWK)

## final data set we want to work with is called BAL.BWK - this is what
## the BAL.BWK data looks like
head(BAL.BWK)
##    YEAR WEEK BWK CASES CASES_i    TMAX   TMIN
## 1  1920    1   1   181     181  24.214 -46.14
## 3  1920    3   2   274     274   4.429 -61.79
## 5  1920    5   3   270     270  47.929 -32.14
## 7  1920    7   4   323     323  46.786 -26.57
## 9  1920    9   5   308     308  46.786 -48.86
## 11 1920   11   6   372     372 133.357  23.00

Code for TSIR:

## First thing we need to do is to load some packages.  We don't really
## need this to do a TSIR I don't think but well load it anyway
library(deSolve)
library(xtable)  ## xtable makes pretty tables
library(lattice)  ## lattice makes pretty graphs
library(ggplot2)  ## ggplot2 also makes pretty graphs

## TSIR Code

## I just made up a default birth rate here
mu.def <- 0.01/365

runTSIR <- function(pop.i = 734000, pop.f = 9e+05, alpha = 0.97, initial.state = c(S = (pop.i/2) - 
    181, I = 181, R = (pop.i/2), CI = 181), mu = mu.def, freq.dependent = TRUE) {

    ## pop[t] is size of population at time t - I am assuming the population
    ## increases linearly over time from pop.i to pop.f
    pop <- seq(pop.i, pop.f, length = length(BAL.BWK$CASES))

    ## assumption: birth rate is a constant.  B is number of births per
    ## bi-week.  B ranges from ~280-350 births every 2 weeks - does this seem
    ## reasonable?
    B <- (mu) * 14 * pop

    ## create empty vectors to store S, I, R, B, Beta estimates
    S <- rep(NA, length(BAL.BWK$CASES))
    I <- rep(NA, length(BAL.BWK$CASES))
    R <- rep(NA, length(BAL.BWK$CASES))
    CI <- rep(NA, length(BAL.BWK$CASES))
    Beta <- rep(NA, length(BAL.BWK$CASES))
    Rt <- rep(NA, length(BAL.BWK$CASES))

    ## set time = 1 values to initial states
    S[1] <- initial.state["S"]
    I[1] <- initial.state["I"]
    R[1] <- initial.state["R"]
    CI[1] <- initial.state["CI"]

    for (t in (2:length(BAL.BWK$CASES))) {
        I[t] <- BAL.BWK$CASES_i[t]
        S[t] <- S[t - 1] + B[t - 1] - I[t] - (mu * 14 * S[t - 1])
        R[t] <- I[t - 1] + R[t - 1] - (mu * 14 * R[t - 1])
        CI[t] <- I[t] + CI[t - 1]

        Beta[t] <- I[t]/(S[t - 1] * (I[t - 1]^alpha))
        Rt[t] <- (Beta[t]/0.5) * pop
    }

    tsir.output <- data.frame(S, I, R, CI, Beta, Rt)
}


## run the runTSIR function with default vals and store as out
out <- runTSIR()

## this is what the output looks like
head(out)
##        S   I      R   CI      Beta    Rt
## 1 366819 181 367000  181        NA    NA
## 2 366686 274 367040  455 4.823e-06 7.081
## 3 366557 270 367173  725 3.180e-06 4.669
## 4 366375 323 367303 1048 3.860e-06 5.667
## 5 366208 308 367485 1356 3.095e-06 4.544
## 6 365977 372 367652 1728 3.917e-06 5.750

## plot comparment S
plot(BAL.BWK$BWK, out$S, type = "l")

plot of chunk unnamed-chunk-2

## plot compartment I
plot(BAL.BWK$BWK, out$I, type = "l")

plot of chunk unnamed-chunk-2

## plot compartment R
plot(BAL.BWK$BWK, out$R, type = "l")

plot of chunk unnamed-chunk-2

## plot compartment CI
plot(BAL.BWK$BWK, out$CI, type = "l")

plot of chunk unnamed-chunk-2

## plot betas
plot(BAL.BWK$BWK, out$Beta, type = "l")

plot of chunk unnamed-chunk-2

## plot R0's
plot(BAL.BWK$BWK, out$Rt, type = "l")

plot of chunk unnamed-chunk-2


## some more plots - plots of betas and max temp for every 5 years
plot(BAL.BWK$BWK[1:26 * 5], out$Beta[1:26 * 5], type = "l", lwd = 2, xlim = c(1, 
    26 * 5), ylim = c(0, 1e-05))
lines(BAL.BWK$BWK[1:26 * 5], BAL.BWK$TMAX[1:26 * 5]/max(BAL.BWK$TMAX) * 5e-06, 
    col = "skyblue")

plot of chunk unnamed-chunk-2


plot(BAL.BWK$BWK[130:260], out$Beta[130:260], type = "l", lwd = 2, xlim = c(130, 
    260), ylim = c(0, 1e-05))
lines(BAL.BWK$BWK[130:260], BAL.BWK$TMAX[130:260]/max(BAL.BWK$TMAX) * 5e-06, 
    col = "skyblue")

plot of chunk unnamed-chunk-2


plot(BAL.BWK$BWK[260:390], out$Beta[260:390], type = "l", lwd = 2, xlim = c(260, 
    390), ylim = c(0, 1e-05))
lines(BAL.BWK$BWK[260:390], BAL.BWK$TMAX[260:390]/max(BAL.BWK$TMAX) * 5e-06, 
    col = "skyblue")

plot of chunk unnamed-chunk-2


plot(BAL.BWK$BWK[390:520], out$Beta[390:520], type = "l", lwd = 2, xlim = c(390, 
    520), ylim = c(0, 1e-05))
lines(BAL.BWK$BWK[390:520], BAL.BWK$TMAX[390:520]/max(BAL.BWK$TMAX) * 5e-06, 
    col = "skyblue")

plot of chunk unnamed-chunk-2


plot(BAL.BWK$BWK[520:650], out$Beta[520:650], type = "l", lwd = 2, xlim = c(520, 
    650), ylim = c(0, 1e-05))
lines(BAL.BWK$BWK[520:650], BAL.BWK$TMAX[520:650]/max(BAL.BWK$TMAX) * 5e-06, 
    col = "skyblue")

plot of chunk unnamed-chunk-2


plot(BAL.BWK$BWK[650:780], out$Beta[650:780], type = "l", lwd = 2, xlim = c(650, 
    780), ylim = c(0, 1e-05))
lines(BAL.BWK$BWK[650:780], BAL.BWK$TMAX[650:780]/max(BAL.BWK$TMAX) * 5e-06, 
    col = "skyblue")

plot of chunk unnamed-chunk-2


plot(BAL.BWK$BWK[780:880], out$Beta[780:880], type = "l", lwd = 2, xlim = c(780, 
    880), ylim = c(0, 1e-05))
lines(BAL.BWK$BWK[780:880], BAL.BWK$TMAX[780:880]/max(BAL.BWK$TMAX) * 5e-06, 
    col = "skyblue")

plot of chunk unnamed-chunk-2


## make a data frame of predicted betas and their week of the year.  do a
## linear regression of betas on week of the year to get 26 estimates for
## beta

beta.reg <- data.frame(Beta = as.numeric(out$Beta), WEEK = as.numeric(BAL.BWK$WEEK))

## since some of the empirical betas could not be calculated I removed
## missing data.  I also removed values of Nan, Inf, -Inf because lm wont
## run with these values ugh!
beta.reg <- beta.reg[complete.cases(beta.reg) == TRUE, ]
beta.reg <- beta.reg[beta.reg$Beta != Inf, ]
beta.reg <- beta.reg[beta.reg$Beta != -Inf, ]

## this is how you do a simple linear regression in R!  Use the lm()
## function, formula = outcome ~ covariates, data = name of dataset.  lm()
## will return something called an lm object, store the lm object as
## something - here I called it beta.lm
beta.lm <- lm(formula = Beta ~ as.factor(WEEK), data = beta.reg)

## summary(lm object) - summary(beta.lm) will show you the coefficients -
## we have one beta estimate for each week of the year
summary(beta.lm)
## 
## Call:
## lm(formula = Beta ~ as.factor(WEEK), data = beta.reg)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -6.90e-06 -1.38e-06 -3.70e-07  7.80e-07  4.14e-05 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        7.25e-06   8.81e-07    8.23  9.8e-16 ***
## as.factor(WEEK)2  -1.37e-06   1.35e-06   -1.02  0.30955    
## as.factor(WEEK)3   2.63e-07   1.23e-06    0.21  0.83073    
## as.factor(WEEK)4  -2.75e-06   1.35e-06   -2.04  0.04133 *  
## as.factor(WEEK)5  -2.25e-06   1.23e-06   -1.83  0.06702 .  
## as.factor(WEEK)6  -2.76e-06   1.35e-06   -2.05  0.04045 *  
## as.factor(WEEK)7  -2.36e-06   1.23e-06   -1.92  0.05506 .  
## as.factor(WEEK)8  -3.03e-06   1.35e-06   -2.25  0.02491 *  
## as.factor(WEEK)9  -2.71e-06   1.23e-06   -2.21  0.02743 *  
## as.factor(WEEK)10 -2.84e-06   1.35e-06   -2.11  0.03543 *  
## as.factor(WEEK)11 -2.68e-06   1.23e-06   -2.18  0.02952 *  
## as.factor(WEEK)12 -3.64e-06   1.35e-06   -2.70  0.00701 ** 
## as.factor(WEEK)13 -2.81e-06   1.23e-06   -2.29  0.02250 *  
## as.factor(WEEK)14 -3.51e-06   1.35e-06   -2.61  0.00938 ** 
## as.factor(WEEK)15 -3.41e-06   1.23e-06   -2.77  0.00569 ** 
## as.factor(WEEK)16 -2.83e-06   1.35e-06   -2.10  0.03589 *  
## as.factor(WEEK)17 -2.93e-06   1.23e-06   -2.38  0.01745 *  
## as.factor(WEEK)18 -3.80e-06   1.35e-06   -2.83  0.00486 ** 
## as.factor(WEEK)19 -3.83e-06   1.23e-06   -3.12  0.00187 ** 
## as.factor(WEEK)20 -1.49e-06   1.35e-06   -1.10  0.26963    
## as.factor(WEEK)21 -4.10e-06   1.23e-06   -3.34  0.00089 ***
## as.factor(WEEK)22 -4.14e-06   1.35e-06   -3.07  0.00219 ** 
## as.factor(WEEK)23 -4.63e-06   1.23e-06   -3.77  0.00018 ***
## as.factor(WEEK)24 -4.61e-06   1.35e-06   -3.43  0.00065 ***
## as.factor(WEEK)25 -5.54e-06   1.23e-06   -4.51  7.6e-06 ***
## as.factor(WEEK)26 -4.32e-06   1.35e-06   -3.21  0.00139 ** 
## as.factor(WEEK)27 -5.14e-06   1.23e-06   -4.18  3.2e-05 ***
## as.factor(WEEK)28 -5.27e-06   1.38e-06   -3.82  0.00015 ***
## as.factor(WEEK)29 -5.67e-06   1.23e-06   -4.62  4.6e-06 ***
## as.factor(WEEK)30 -5.59e-06   1.38e-06   -4.05  5.7e-05 ***
## as.factor(WEEK)31 -5.18e-06   1.23e-06   -4.21  2.8e-05 ***
## as.factor(WEEK)32 -4.49e-06   1.38e-06   -3.25  0.00122 ** 
## as.factor(WEEK)33 -5.18e-06   1.25e-06   -4.15  3.7e-05 ***
## as.factor(WEEK)34 -4.62e-06   1.38e-06   -3.34  0.00088 ***
## as.factor(WEEK)35 -3.99e-06   1.25e-06   -3.20  0.00143 ** 
## as.factor(WEEK)36 -2.87e-06   1.42e-06   -2.02  0.04394 *  
## as.factor(WEEK)37 -4.81e-06   1.23e-06   -3.91  0.00010 ***
## as.factor(WEEK)38 -2.26e-06   1.38e-06   -1.64  0.10216    
## as.factor(WEEK)39 -5.09e-06   1.25e-06   -4.08  5.0e-05 ***
## as.factor(WEEK)40 -3.04e-06   1.42e-06   -2.14  0.03266 *  
## as.factor(WEEK)41 -3.76e-06   1.27e-06   -2.97  0.00308 ** 
## as.factor(WEEK)42 -4.22e-06   1.38e-06   -3.05  0.00235 ** 
## as.factor(WEEK)43  9.21e-07   1.27e-06    0.73  0.46744    
## as.factor(WEEK)44 -9.18e-07   1.42e-06   -0.65  0.51874    
## as.factor(WEEK)45 -2.94e-06   1.23e-06   -2.39  0.01703 *  
## as.factor(WEEK)46 -2.34e-06   1.38e-06   -1.70  0.08996 .  
## as.factor(WEEK)47 -2.22e-06   1.23e-06   -1.81  0.07130 .  
## as.factor(WEEK)48 -3.42e-06   1.38e-06   -2.47  0.01362 *  
## as.factor(WEEK)49 -2.45e-06   1.23e-06   -2.00  0.04614 *  
## as.factor(WEEK)50 -2.19e-06   1.38e-06   -1.58  0.11388    
## as.factor(WEEK)51 -3.28e-06   1.23e-06   -2.67  0.00770 ** 
## as.factor(WEEK)52 -7.20e-07   1.38e-06   -0.52  0.60213    
## as.factor(WEEK)53 -3.84e-06   2.22e-06   -1.73  0.08380 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.53e-06 on 680 degrees of freedom
## Multiple R-squared:  0.168,  Adjusted R-squared:  0.104 
## F-statistic: 2.63 on 52 and 680 DF,  p-value: 1.61e-08

## fitting the empirical betas to a seasonality function - the empirical
## betas we found follow some seasonal forcing function that we want to
## fit

## t is time in consecutive biweeks.  I am only using a subset of the data
## for the fit because of missing data :-/ we can decide how to deal with
## missing data later!
t <- BAL.BWK$BWK[2:250]

## seasonal() is the seasonal forcing function we are trying to fit our
## emperical betas to.  We want to find an equation to describe our betas
## with form: B_t = B0*(1+(B1*sin(c*t))) - I think we could use either sin
## or cos functions - I picked cos because it lowered MSE
seasonal <- function(x) {
    x[1] * (1 + (x[2] * cos(x[3] * t)))
}

## LS() is the least squares function that we use for optimization.  we
## want to minimize the sum of squared errors.  between our empirical
## betas and the fitted values from our seasonal() function
LS <- function(x) {
    sum((out$Beta[2:250] - seasonal(x))^2)
}

## here is a vector of guesses for the parameters - I looked at our
## empirical betas to pick some decent guesses
guess <- c(1.55e-06, 1.55e-06, 0.25)

## optim() does the minimization of the LS function using the guess vector
## for start values
x <- optim(guess, LS)

## these are the optimal parameter values
x$par
## [1] 3.617e-06 3.315e-01 2.408e-01

## this is the MSE for the optimal parameters
LS(x$par)
## [1] 8.779e-10

## plot emipirical betas, fitted betas, and max/min temperatures - beta's
## are in black, tmax in blue, tmin in purple
plot(BAL.BWK$BWK, out$Beta, type = "l", xlim = c(2, 26 * 5), ylab = "Beta", 
    xlab = "Biweek")
lines(seasonal(x$par), col = "red", lwd = 2)
lines(BAL.BWK$BWK, (BAL.BWK$TMIN/1e+08) + 5e-06, col = "purple", lwd = 2)
lines(BAL.BWK$BWK, BAL.BWK$TMAX/1e+08 + 5e-06, col = "blue", lwd = 2)

plot of chunk unnamed-chunk-2