2013/05/05 Measles in Baltimore 1920-1550, SCHOOL BETA FUNCTION


## data mangement stuff

BAL <- read.csv("C:/Users/Lisa/Desktop/677/Baltimore.BWK.csv", header = T, stringsAsFactors = FALSE)

BAL <- BAL[seq(1, length(BAL$CASES), by = 2), ]

BAL <- BAL[c(1:744), ]

SCH <- c(-1, -1, 1, 1, 1, 1, -1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
    1, 1, 1, 1, 1, 1, -1)

SCH <- rep(SCH, length(BAL$CASES))[1:744]

BAL$CASES <- as.numeric(BAL$CASES)
BAL$CASES.i <- as.numeric(BAL$CASES.i)
BAL$TMAX <- as.numeric(BAL$TMAX)
BAL$TMIN <- as.numeric(BAL$TMIN)
BAL$WEEK <- as.numeric(BAL$WEEK)
BAL$BWK <- as.numeric(BAL$BWK)
BAL$BWK.c <- as.numeric(BAL$BWK.c)

## this is what the data looks like
head(BAL)
##    YEAR WEEK BWK.c CASES CASES.i    TMAX   TMIN    RATE    B     POP BWK
## 1  1920    1     1   181     181  24.214 -46.14 0.01970 1447 1914686   1
## 3  1920    3     2   274     274   4.429 -61.79 0.01970 1447 1914686   2
## 5  1920    5     3   270     270  47.929 -32.14 0.01977 1454 1914691   3
## 7  1920    7     4   323     323  46.786 -26.57 0.01982 1456 1914666   4
## 9  1920    9     5   308     308  46.786 -48.86 0.01971 1444 1914656   5
## 11 1920   11     6   372     372 133.357  23.00 0.01962 1441 1914647   6

plot(cumsum(BAL$B), cumsum(BAL$CASES.i), type = "l")
x <- c(1:1e+06)
lines(x, x, col = "red")

plot of chunk unnamed-chunk-1


## fit a smooth spline of cumulative measles on cumulative births with 2.5
## degrees of freedom

cum.reg <- smooth.spline(cumsum(BAL$B), cumsum(BAL$CASES.i), df = 2.5)

## predict points using the smooth spline and calculate residuals, D
D <- predict(cum.reg)$y - cumsum(BAL$CASES.i)

B <- BAL$B

plot(D, type = "l")

plot of chunk unnamed-chunk-1


## under reporting is given by slope of smooth spline
u <- predict(cum.reg, deriv = 1)$y

## Ic are actual cases - reported cases divided by u
Ic = BAL$CASES.i/u

plot(Ic, type = "l")

plot of chunk unnamed-chunk-1


lIt = log(Ic[2:745])
lItm1 = log(Ic[1:744])
Dtm1 = D[1:744]

## remove values of -Inf from I - glm function does not like these!
for (i in 1:743) {
    if (lIt[i] == -Inf) {
        lIt[i] <- 0
    }
}

for (i in 1:length(lItm1)) {
    if (lItm1[i] == -Inf) {
        lItm1[i] <- 0
    }
}

## mean populaiton estimate
pop = mean(BAL$POP)
seas = rep(1:26, length(BAL$CASES))[1:744]

seas <- as.factor(seas)

## test Smeans from 1% to whole population
Smean = seq(0.01, 1, by = 0.001) * pop

## this is a place to store the likelihoods of the data for each setting
## of Smean
llik = rep(NA, length(Smean))

## now perform the log linear regressions at each Smean
for (i in 1:length(Smean)) {
    lStm1 = log(Smean[i] + Dtm1)
    glmfit = glm(lIt ~ -1 + as.factor(seas) + lItm1 + offset(lStm1))
    llik[i] = glmfit$deviance
}

## plot likelihood curve
plot(Smean, llik, type = "l", xlim = c(0, 2e+05))

plot of chunk unnamed-chunk-1


## The Smean we want to use is the one that minimizes the log likelihood
sbar <- Smean[which(llik == min(llik))]

plot(D + sbar, type = "l")

plot of chunk unnamed-chunk-1


sbar.def <- sbar
D.def <- D
B.def <- BAL$B
alpha.def <- 0.954


## TSIR code

## pass B, sbar, and D results from above and guess at coefficients for
## Beta function

runTSIR <- function(alpha = alpha.def, B = B.def, sbar = sbar.def, D = D.def, 
    guess = c(x1 = 3.8e-05, x2 = 0.4), initial.state = c(S = sbar.def - 181, 
        I = 181, R = BAL$POP[1] - sbar.def - 181, CI = 181)) {

    ## create empty vectors to store S, I, R, B, Beta estimates
    S <- rep(NA, length(BAL$CASES))
    I <- rep(NA, length(BAL$CASES))
    R <- rep(NA, length(BAL$CASES))
    CI <- rep(NA, length(BAL$CASES))
    Beta <- rep(NA, length(BAL$CASES))
    Rt <- rep(NA, length(BAL$CASES))

    ## set time = 1 values to initial states
    S[1] <- D[1] + sbar
    I[1] <- initial.state["I"]
    R[1] <- initial.state["R"]
    CI[1] <- initial.state["CI"]

    ## betas are a function of the normalized climate data - I used tmax here.
    ## The x1-x3 are parameters to fit the seasonal forcing equation.  tmax <-
    ## (BAL$TMAX-192.8)/192.8 Beta <- guess['x1]*(1+(guess['x2']*(tmax)))
    Beta <- guess["x1"] * (1 + (guess["x2"] * (SCH)))

    for (t in 2:length(BAL$CASES)) {
        S[t] <- D[t] + sbar
        I[t] <- Beta[t] * S[t - 1] * (I[t - 1]^alpha)
        R[t] <- I[t - 1] + R[t - 1]
        CI[t] <- I[t] + CI[t - 1]
    }

    tsir.output <- data.frame(S, I, R, CI, Beta, C.t = BAL$CASES.i/u)
}

out <- runTSIR()
head(out, 52)
##        S       I       R       CI     Beta     C.t
## 1  43773   181.0 1867929    181.0 2.28e-05 1043.50
## 2  43750   142.2 1868110    323.2 2.28e-05 1579.67
## 3  43733   263.5 1868252    586.8 5.32e-05 1556.60
## 4  43662   474.4 1868515   1061.2 5.32e-05 1862.16
## 5  43605   830.0 1868990   1891.2 5.32e-05 1775.68
## 6  43483  1413.4 1869820   3304.6 5.32e-05 2144.65
## 7  43338  1003.7 1871233   4308.3 2.28e-05 2190.78
## 8  43034  1683.9 1872237   5992.2 5.32e-05 3044.02
## 9  42702  2739.2 1873921   8731.4 5.32e-05 3228.51
## 10 42374  4323.7 1876660  13055.1 5.32e-05 3228.51
## 11 42105  2842.1 1880984  15897.2 2.28e-05 2899.89
## 12 42042  1892.5 1883826  17789.7 2.28e-05 1764.15
## 13 42129  1282.1 1885718  19071.7 2.28e-05  910.90
## 14 42289   886.0 1887000  19957.8 2.28e-05  576.52
## 15 42507   625.2 1887886  20583.0 2.28e-05  242.14
## 16 42749   450.6 1888512  21033.6 2.28e-05  103.77
## 17 42993   331.6 1888962  21365.2 2.28e-05   92.24
## 18 43233   248.9 1889294  21614.1 2.28e-05   92.24
## 19 43472   190.3 1889543  21804.4 2.28e-05   86.48
## 20 43713   345.8 1889733  22150.2 5.32e-05   34.59
## 21 43942   614.5 1890079  22764.7 5.32e-05   69.18
## 22 44161  1069.2 1890693  23833.8 5.32e-05  109.54
## 23 44377  1822.4 1891762  25656.3 5.32e-05   86.48
## 24 44582  3046.0 1893585  28702.3 5.32e-05  149.89
## 25 44777  4995.2 1896631  33697.5 5.32e-05  219.07
## 26 44981  3446.8 1901626  37144.3 2.28e-05  167.18
## 27 45172  2430.3 1905073  39574.6 2.28e-05  368.95
## 28 45372  1748.8 1907503  41323.4 2.28e-05  322.83
## 29 45554  2994.0 1909252  44317.4 5.32e-05  426.60
## 30 45666  5021.0 1912246  49338.4 5.32e-05  835.90
## 31 45822  8242.4 1917267  57580.8 5.32e-05  564.95
## 32 45973 13271.0 1925509  70851.8 5.32e-05  593.77
## 33 46108  8988.6 1938780  79840.4 2.28e-05  605.29
## 34 46205 14504.7 1947769  94345.1 5.32e-05  755.17
## 35 46249 22944.4 1962274 117289.6 5.32e-05 1077.97
## 36 46251 35571.4 1985218 152860.9 5.32e-05 1343.13
## 37 46253 23163.8 2020790 176024.8 2.28e-05 1360.41
## 38 46336 15385.4 2043953 191410.1 2.28e-05  951.13
## 39 46499 10431.6 2059339 201841.8 2.28e-05  489.97
## 40 46716  7225.9 2069770 209067.7 2.28e-05  276.69
## 41 46963  5114.3 2076996 214181.9 2.28e-05  103.76
## 42 47207  3697.1 2082111 217879.1 2.28e-05  115.28
## 43 47462  2727.0 2085808 220606.1 2.28e-05   51.88
## 44 47708  2050.8 2088535 222656.9 2.28e-05   86.46
## 45 47962  1570.7 2090585 224227.6 2.28e-05   23.06
## 46 48208  2856.9 2092156 227084.5 5.32e-05   34.58
## 47 48436  5081.0 2095013 232165.5 5.32e-05   92.22
## 48 48665  8842.1 2100094 241007.6 5.32e-05   74.93
## 49 48854 15071.0 2108936 256078.5 5.32e-05  265.12
## 50 48983 25162.8 2124007 281241.4 5.32e-05  616.69
## 51 49080 41141.2 2149170 322382.6 5.32e-05  806.87
## 52 49201 28239.4 2190311 350622.0 2.28e-05  668.54

plot(BAL$BWK, out$S, type = "l")

plot of chunk unnamed-chunk-1

plot(BAL$BWK, out$I, type = "l")

plot of chunk unnamed-chunk-1

plot(BAL$BWK, out$R, type = "l")

plot of chunk unnamed-chunk-1

plot(BAL$BWK, out$CI, type = "l")

plot of chunk unnamed-chunk-1

plot(BAL$BWK, out$Beta, type = "l")

plot of chunk unnamed-chunk-1


## optimizing for beta parameters sbar and x1-x3

LS1 <- function(x) {
    sum((runTSIR(guess = x)$I - BAL$CASES.i/u)^2)
}

g <- c(x1 = 3.8e-05, x2 = 0.4)
p <- optim(g, LS1)

## show optimal values
p$par
##        x1        x2 
## 2.865e-05 4.400e-01

## show MSE
LS1(p$par)
## [1] 5.861e+09

optimal <- as.vector(p$par)

out.opt <- runTSIR(guess = c(x1 = as.numeric(p$par[1]), x2 = as.numeric(p$par[2])))

## plot of our predicted incidences (in red) versus actuall incidences (in
## black)


plot(BAL$BWK, out.opt$I, col = "red", type = "l", lwd = 2, ylim = c(0, 4000))
lines(BAL$BWK, BAL$CASES.i, col = "black", lwd = 1)

plot of chunk unnamed-chunk-1