2013/05/03 TSIR Model: Measles in Baltimore 1920-1950


## data management stuff
BAL <- read.csv("C:/Users/Lisa/Desktop/677/BAL.BWK.csv", header = T, stringsAsFactors = FALSE)

BAL.BWK <- BAL[c(seq(1, length(BAL$WEEK), 2)), ]
BAL.BWK <- BAL.BWK[, c(1, 2, 4:8)]
names(BAL.BWK) <- c("YEAR", "WEEK", "BWK", "CASES", "CASES_i", "TMAX", "TMIN")
BAL.BWK$CASES <- as.numeric(BAL.BWK$CASES)
BAL.BWK$CASES_i <- as.numeric(BAL.BWK$CASES_i)
BAL.BWK$TMAX <- as.numeric(BAL.BWK$TMAX)
BAL.BWK$TMIN <- as.numeric(BAL.BWK$TMIN)
BAL.BWK$WEEK <- as.numeric(BAL.BWK$WEEK)
BAL.BWK$BWK <- as.numeric(BAL.BWK$BWK)

CASES_c <- rep(NA, length(BAL.BWK$CASES))
CASES_c[1] <- BAL.BWK$CASES_i[1]

for (i in 2:length(BAL.BWK$CASES)) {
    CASES_c[i] <- BAL.BWK$CASES_i[i] + CASES_c[i - 1]
}

BAL.BWK <- data.frame(BAL.BWK, CASES_c)

## here is what the data looks like.  CASES_i are cases with missing
## values imputed.  CASES_c are cumulative cases.
head(BAL.BWK, 26)

##    YEAR WEEK BWK CASES CASES_i    TMAX   TMIN CASES_c
## 1  1920    1   1   181     181  24.214 -46.14     181
## 3  1920    3   2   274     274   4.429 -61.79     455
## 5  1920    5   3   270     270  47.929 -32.14     725
## 7  1920    7   4   323     323  46.786 -26.57    1048
## 9  1920    9   5   308     308  46.786 -48.86    1356
## 11 1920   11   6   372     372 133.357  23.00    1728
## 13 1920   13   7   380     380 172.143  71.86    2108
## 15 1920   15   8   528     528 155.143  52.79    2636
## 17 1920   17   9   560     560 183.714  90.43    3196
## 19 1920   19  10   560     560 195.214 101.07    3756
## 21 1920   21  11   503     503 244.000 146.36    4259
## 23 1920   23  12   306     306 275.000 170.14    4565
## 25 1920   25  13   158     158 275.929 177.71    4723
## 27 1920   27  14   100     100 294.857 205.57    4823
## 29 1920   29  15    42      42 283.357 190.07    4865
## 31 1920   31  16    18      18 288.500 194.43    4883
## 33 1920   33  17    16      16 272.143 205.14    4899
## 35 1920   35  18    16      16 259.071 175.36    4915
## 37 1920   37  19    15      15 250.714 155.86    4930
## 39 1920   39  20     6       6 237.214 144.86    4936
## 41 1920   41  21    12      12 250.500 135.21    4948
## 43 1920   43  22    19      19 208.357 108.71    4967
## 45 1920   45  23    15      15 112.929  44.07    4982
## 47 1920   47  24    26      26 104.429  52.07    5008
## 49 1920   49  25    38      38  97.000  35.29    5046
## 51 1920   51  26    29      29  46.857 -17.93    5075


## Example for how to find rho, D_t's, and estimated S compartment values

pop.i = 734000
pop.f = 9e+05
sbar.def <- pop.i/2
mu.def <- 0.017/365
mu <- mu.def

## population at time t
pop <- seq(pop.i, pop.f, length = length(BAL.BWK$CASES))

## number of births - this is a guess needs real values/estimates!
B <- (mu) * 14 * pop
head(B)

## [1] 478.6 478.7 478.9 479.0 479.1 479.2


## cumulative births
B_c <- rep(NA, length(B))
B_c[1] <- B[1]
for (i in 2:length(B)) {
    B_c[i] <- B_c[i - 1] + B[i]
}
head(B_c)

## [1]  478.6  957.3 1436.2 1915.2 2394.3 2873.5



x <- seq(0, 150000, length = 1000)

## find rho - the underreporting rate
fit <- lm(B_c ~ CASES_c)
fit2 <- lm(CASES_c ~ B_c)
summary(fit2)

## 
## Call:
## lm(formula = CASES_c ~ B_c)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -9778  -2770   -300   2650   9139 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.73e+03   2.81e+02    6.14  1.3e-09 ***
## B_c         4.04e-01   1.27e-03  317.36  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3910 on 742 degrees of freedom
##   (143 observations deleted due to missingness)
## Multiple R-squared:  0.993,  Adjusted R-squared:  0.993 
## F-statistic: 1.01e+05 on 1 and 742 DF,  p-value: <2e-16

plot(B_c, CASES_c, type = "l")
abline(fit2)
lines(x, x, col = "red")

plot of chunk unnamed-chunk-1

rho <- summary(fit2)$coef[2, 1]
rho

## [1] 0.4044


## find D_t - the residuals from the regression of cum births on cum cases
D_t <- fit$residuals
## guess an sbar
sbar <- 350000
## plot D_t + guess for sbar - this is the compartment S
plot(D_t + sbar, type = "l")

plot of chunk unnamed-chunk-1


## TSIR CODE

runTSIR <- function(pop.i = 734000, pop.f = 9e+05, alpha = 0.97, x1 = 3.754e-06, 
    x2 = -0.4008, x3 = 0.27627, mu = mu.def, sbar = sbar.def, guess = c(sbar = sbar.def, 
        x1 = 3.754e-06, x2 = -0.4008, alpha = 0.97), initial.state = c(S = pop.i/2 - 
        181, I = 181, R = pop.i/2, CI = 181), freq.dependent = TRUE) {

    ## pop[t] is size of population at time t I am assuming the population
    ## increases linearly over time from pop.i to pop.f
    pop <- seq(pop.i, pop.f, length = length(BAL.BWK$CASES))

    ## assumption: birth rate is a constant B is number of births per bi-week
    ## B ranges from ~280-350 births every 2 weeks - does this seem
    ## reasonable?
    B <- (mu) * 14 * pop

    B_c <- rep(NA, length(B))
    B_c[1] <- B[1]
    for (i in 2:length(B)) {
        B_c[i] <- B_c[i - 1] + B[i]
    }

    CASES_c <- rep(NA, length(BAL.BWK$CASES))
    CASES_c[1] <- BAL.BWK$CASES_i[1]

    for (i in 2:length(BAL.BWK$CASES)) {
        CASES_c[i] <- BAL.BWK$CASES_i[i] + CASES_c[i - 1]
    }

    ## find rho - the underreporting rate
    fit <- lm(B_c ~ CASES_c)
    fit2 <- lm(CASES_c ~ B_c)
    # summary(fit2) plot(B_c, CASES_c, type='l') abline(fit2) lines(x,x,
    # col='red')
    rho <- summary(fit2)$coef[2, 1]
    rho

    CASES_rho <- CASES_c/rho
    fit3 <- lm(B_c ~ CASES_rho)

    ## find D_t - the residuals from the regression of cum births on cum cases
    D_t <- fit3$residuals
    ## plot D_t + guess for sbar - this is the compartment S plot(D_t+sbar,
    ## type='l')

    ## create empty vectors to store S, I, R, B, Beta estimates
    S <- rep(NA, length(BAL.BWK$CASES))
    I <- rep(NA, length(BAL.BWK$CASES))
    R <- rep(NA, length(BAL.BWK$CASES))
    CI <- rep(NA, length(BAL.BWK$CASES))
    Beta <- rep(NA, length(BAL.BWK$CASES))
    Rt <- rep(NA, length(BAL.BWK$CASES))

    ## set time = 1 values to initial states
    S[1] <- D_t[1] + sbar
    I[1] <- initial.state["I"]
    R[1] <- initial.state["R"]
    CI[1] <- initial.state["CI"]

    ## betas are a function of the normalized climate data - I used tmax here.
    ## The x1-x3 are parameters to fit the seasonal forcing equation.
    tmax <- (BAL.BWK$TMAX - 192.8)/192.8
    Beta <- guess["x1"] * (1 + (guess["x2"] * tmax))

    for (t in (2:length(BAL.BWK$CASES))) {
        S[t] <- D_t[t] + guess["sbar"]
        I[t] <- Beta[t] * S[t - 1] * I[t - 1]^guess["alpha"]
        R[t] <- I[t - 1] + R[t - 1]
        CI[t] <- I[t] + CI[t - 1]
    }

    tsir.output <- data.frame(S, I, R, CI, Beta)
}


## optimizing for beta parameters sbar and x1, x2

LS1 <- function(x) {
    sum((runTSIR(guess = x)$I[1:700] - BAL.BWK$CASES_i[1:700]/rho)^2)
}

z <- c(sbar = 3e+05, x1 = 3.754e-06, x2 = -0.4008, alpha = 0.97)
q <- optim(z, LS1)

## show optimal values
q$par

##       sbar         x1         x2      alpha 
##  3.150e+05  3.803e-06 -4.008e-01  9.700e-01


optimal <- as.vector(q$par)

out.opt <- runTSIR(guess = c(sbar = as.numeric(q$par[1]), x1 = as.numeric(q$par[2]), 
    x2 = as.numeric(q$par[3]), alpha = as.numeric(q$par[4])))

## here is what the output looks like for the optimized values
head(out.opt, 26)

##         S     I      R      CI      Beta
## 1  369883 181.0 367000   181.0 5.136e-06
## 2  317689 303.2 367181   484.2 5.293e-06
## 3  317505 401.5 367484   885.7 4.949e-06
## 4  317191 528.0 367886  1413.7 4.958e-06
## 5  316914 688.0 368414  2101.7 4.958e-06
## 6  316480 765.8 369102  2867.5 4.273e-06
## 7  316026 787.7 369867  3655.2 3.967e-06
## 8  315210 835.8 370655  4491.0 4.101e-06
## 9  314315 834.3 371491  5325.3 3.875e-06
## 10 313420 811.0 372325  6136.3 3.784e-06
## 11 312665 706.6 373136  6842.8 3.398e-06
## 12 312393 572.2 373843  7415.0 3.153e-06
## 13 312486 464.8 374415  7879.8 3.146e-06
## 14 312720 362.0 374880  8241.7 2.996e-06
## 15 313097 292.8 375242  8534.6 3.087e-06
## 16 313534 235.6 375535  8770.2 3.047e-06
## 17 313975 199.1 375770  8969.3 3.176e-06
## 18 314416 174.9 375969  9144.2 3.279e-06
## 19 314860 157.6 376144  9301.8 3.345e-06
## 20 315327 147.2 376302  9448.9 3.452e-06
## 21 315778 133.7 376449  9582.6 3.347e-06
## 22 316213 134.2 376583  9716.8 3.680e-06
## 23 316657 162.4 376717  9879.2 4.435e-06
## 24 317075 198.8 376879 10078.0 4.502e-06
## 25 317463 245.2 377078 10323.2 4.561e-06
## 26 317873 327.2 377323 10650.4 4.957e-06


## plot predicted values for each compartment
plot(BAL.BWK$BWK, out.opt$S, type = "l")

plot of chunk unnamed-chunk-1

plot(BAL.BWK$BWK, out.opt$I, type = "l")

plot of chunk unnamed-chunk-1

plot(BAL.BWK$BWK, out.opt$R, type = "l")

plot of chunk unnamed-chunk-1

plot(BAL.BWK$BWK, out.opt$CI, type = "l")

plot of chunk unnamed-chunk-1

plot(BAL.BWK$BWK, out.opt$Beta, type = "l")

plot of chunk unnamed-chunk-1


## plot of our predicted incidences (in red) versus actuall incidences (in
## black)

plot(BAL.BWK$BWK, BAL.BWK$CASES_i/rho, col = "black", lwd = 1, type = "l")
lines(BAL.BWK$BWK, out.opt$I, col = "red", lwd = 2)

plot of chunk unnamed-chunk-1