2013/05/09 Comparison of temp, school, and constant betas

Here is the output with the revised birthrates. The optimal alpha (fudge factor) is about 0.88 now which is a little weird, but otherwise I think things look OK. The mean underreporting rate is 0.37, and sbar is about 3% of the population which seems reasonable to me.

## data mangement stuff

BAL <- read.csv("C:/Users/Lisa/Desktop/677/BAL.FINAL.REVISED.csv")

SCH <- c(-1, -1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
    1, 1, 1, 1, 1, 1, -1)

SCH <- rep(SCH, length(BAL$CASES))[1:744]

BAL$SCH <- SCH

## to analyze a subset of the data
BAL <- BAL[c(1:744), ]

BAL$CASES <- as.numeric(BAL$CASES)
BAL$CASES.i <- as.numeric(BAL$CASES.i)
BAL$TMAX <- as.numeric(BAL$TMAX)
BAL$TMIN <- as.numeric(BAL$TMIN)
BAL$WEEK <- as.numeric(BAL$WEEK)
BAL$BWK <- as.numeric(BAL$BWK)
BAL$SCH <- as.numeric(BAL$SCH)
BAL$B <- as.numeric(BAL$B)
BAL$B.5 <- as.numeric(BAL$B.5)

# BAL$B.5 <- BAL$B

## this is what the data looks like
head(BAL)

##   YEAR WEEK BWK CASES CASES.i    TMAX   TMIN    RATE SCH   B B.5    POP
## 1 1920    1   1   181     181  24.214 -46.14 0.01970  -1 651 574 733826
## 2 1920    3   2   274     274   4.429 -61.79 0.01970  -1 651 574 733826
## 3 1920    5   3   270     270  47.929 -32.14 0.01977   1 653 576 733826
## 4 1920    7   4   323     323  46.786 -26.57 0.01982   1 655 578 733826
## 5 1920    9   5   308     308  46.786 -48.86 0.01971   1 651 574 733826
## 6 1920   11   6   372     372 133.357  23.00 0.01962   1 648 572 733826


plot(cumsum(BAL$B.5), cumsum(BAL$CASES.i), type = "l")
x <- c(1:1e+06)
lines(x, x, col = "red")

plot of chunk unnamed-chunk-1


## fit a smooth spline of cumulative measles on cumulative births with 2.5
## degrees of freedom

cum.reg <- smooth.spline(cumsum(BAL$B.5), cumsum(BAL$CASES.i), df = 2.5)

## predict points using the smooth spline and calculate residuals, D
D <- predict(cum.reg)$y - cumsum(BAL$CASES.i)

B <- BAL$B.5

plot(D, type = "l")

plot of chunk unnamed-chunk-1


## under reporting is given by slope of smooth spline
u <- predict(cum.reg, deriv = 1)$y
head(u)

## [1] 0.3705 0.3705 0.3705 0.3705 0.3705 0.3705


## Ic are actual cases - reported cases divided by u
Ic = BAL$CASES.i/u

# plot(Ic, type='l')

lIt = log(Ic[2:(length(BAL$CASES) + 1)])
lItm1 = log(Ic[1:length(BAL$CASES)])
Dtm1 = D[1:length(BAL$CASES)]

## remove values of -Inf from I - glm function does not like these!
for (i in 1:(length(lIt) - 1)) {
    if (lIt[i] == -Inf) {
        lIt[i] <- 0
    }
}

for (i in 1:length(lItm1)) {
    if (lItm1[i] == -Inf) {
        lItm1[i] <- 0
    }
}

## mean populaiton estimate
pop = mean(BAL$POP)
seas = rep(1:26, length(BAL$CASES))[1:length(BAL$CASES)]

seas <- as.factor(seas)

## test Smeans from 1% to whole population
Smean = seq(0.01, 1, by = 0.001) * pop

## this is a place to store the likelihoods of the data for each setting
## of Smean
llik = rep(NA, length(Smean))

## now perform the log linear regressions at each Smean
for (i in 1:length(Smean)) {
    lStm1 = log(Smean[i] + Dtm1)
    glmfit = glm(lIt ~ -1 + as.factor(seas) + lItm1 + offset(lStm1))
    llik[i] = glmfit$deviance
}

## plot likelihood curve
plot(Smean, llik, type = "l", xlim = c(0, 2e+05))

plot of chunk unnamed-chunk-1


## The Smean we want to use is the one that minimizes the log likelihood
sbar <- Smean[which(llik == min(llik))]
sbar

## [1] 23715


plot(D + sbar, type = "l")

plot of chunk unnamed-chunk-1


sbar.def <- sbar
D.def <- D
B.def <- BAL$B.5
alpha.def <- 0.88

## TSIR code

## pass B, sbar, and D results from above and guess at coefficients for
## Beta function

runTSIR <- function(alpha = alpha.def, B = B.def, sbar = sbar.def, D = D.def, 
    Beta = "TMP", guess = c(x1 = 3.8e-05, x2 = 0.4), initial.state = c(S = sbar.def - 
        488, I = 488, R = BAL$POP[1] - sbar.def - 488, CI = 488)) {

    ## create empty vectors to store S, I, R, B, Beta estimates
    S <- rep(NA, length(BAL$CASES))
    I <- rep(NA, length(BAL$CASES))
    R <- rep(NA, length(BAL$CASES))
    CI <- rep(NA, length(BAL$CASES))

    ## set time = 1 values to initial states
    S[1] <- sbar + D[1]
    I[1] <- initial.state["I"]
    R[1] <- initial.state["R"]
    CI[1] <- initial.state["CI"]

    ## betas are a function of the normalized climate data - I used tmax here.
    ## The x1-x2 are parameters to fit the seasonal forcing equation.
    tmax <- (BAL$TMAX - 170)/150
    Beta.TMP <- guess["x1"] * (1 + (guess["x2"] * (tmax)))
    Beta.SCH <- guess["x1"] * (1 + (guess["x2"] * (BAL$SCH)))
    Beta.CST <- rep(guess["x1"], length(BAL$CASES))


    if (Beta == "TMP") {
        for (t in 2:length(BAL$CASES)) {
            S[t] <- D[t] + sbar
            I[t] <- Beta.TMP[t] * S[t - 1] * (I[t - 1]^alpha)
            R[t] <- I[t - 1] + R[t - 1]
            CI[t] <- I[t] + CI[t - 1]
        }
    } else if (Beta == "SCH") {
        for (t in 2:length(BAL$CASES)) {
            S[t] <- D[t] + sbar
            I[t] <- Beta.SCH[t] * S[t - 1] * (I[t - 1]^alpha)
            R[t] <- I[t - 1] + R[t - 1]
            CI[t] <- I[t] + CI[t - 1]
        }
    } else if (Beta == "CST") {
        for (t in 2:length(BAL$CASES)) {
            S[t] <- D[t] + sbar
            I[t] <- Beta.CST[t] * S[t - 1] * (I[t - 1]^alpha)
            R[t] <- I[t - 1] + R[t - 1]
            CI[t] <- I[t] + CI[t - 1]
        }
    }

    tsir.output <- data.frame(S, I, R, CI, Beta.TMP, Beta.SCH, Beta.CST)
}

## optimizing for beta parameters sbar and x1-x2

LS1 <- function(x) {
    sum((runTSIR(guess = x, Beta = "TMP")$I - BAL$CASES.i/u)^2)
}

g <- c(x1 = 5e-05, x2 = 0.4)
p <- optim(g, LS1)

## show optimal values
p$par

##         x1         x2 
##  8.075e-05 -3.263e-01


## show MSE
LS1(p$par)

## [1] 803353667


optimal <- as.vector(p$par)

out.opt <- runTSIR(guess = c(x1 = as.numeric(p$par[1]), x2 = as.numeric(p$par[2])), 
    Beta = "TMP")

head(out.opt)

##       S     I      R   CI  Beta.TMP  Beta.SCH  Beta.CST
## 1 21529 488.0 709623  488 1.064e-04 0.0001071 8.075e-05
## 2 21468 549.0 710111 1037 1.098e-04 0.0001071 8.075e-05
## 3 21411 565.1 710660 1602 1.022e-04 0.0000544 8.075e-05
## 4 21303 579.1 711225 2181 1.024e-04 0.0000544 8.075e-05
## 5 21207 588.8 711804 2770 1.024e-04 0.0000544 8.075e-05
## 6 21047 506.4 712393 3276 8.719e-05 0.0000544 8.075e-05

## plot of our predicted incidences (in red) versus actuall incidences (in
## black)

plot(BAL$BWK, out.opt$I, col = "red", type = "l", lwd = 2, main = "Temperature", 
    ylim = c(0, 6500))
lines(BAL$BWK, BAL$CASES.i, col = "black", lwd = 1)
legend("topright", c(paste("MSE:", round(LS1(p$par), 0), sep = "")))

plot of chunk unnamed-chunk-2

## optimizing for beta parameters sbar and x1-x2

LS1 <- function(x) {
    sum((runTSIR(guess = x, Beta = "SCH")$I - BAL$CASES.i/u)^2)
}

g <- c(x1 = 5e-05, x2 = 0.4)
p <- optim(g, LS1)

## show optimal values
p$par

##        x1        x2 
## 7.955e-05 3.227e-01


## show MSE
LS1(p$par)

## [1] 791199318


optimal <- as.vector(p$par)

out.opt <- runTSIR(guess = c(x1 = as.numeric(p$par[1]), x2 = as.numeric(p$par[2])), 
    Beta = "SCH")

head(out.opt)

##       S     I      R     CI  Beta.TMP  Beta.SCH  Beta.CST
## 1 21529 488.0 709623  488.0 5.460e-05 5.388e-05 7.955e-05
## 2 21468 269.3 710111  757.3 5.122e-05 5.388e-05 7.955e-05
## 3 21411 310.9 710380 1068.2 5.866e-05 1.052e-04 7.955e-05
## 4 21303 351.7 710691 1419.9 5.847e-05 1.052e-04 7.955e-05
## 5 21207 390.1 711043 1810.0 5.847e-05 1.052e-04 7.955e-05
## 6 21047 425.4 711433 2235.4 7.328e-05 1.052e-04 7.955e-05

## plot of our predicted incidences (in red) versus actuall incidences (in
## black)

plot(BAL$BWK, out.opt$I, col = "red", type = "l", lwd = 2, main = "School", 
    , ylim = c(0, 6500))
lines(BAL$BWK, BAL$CASES.i, col = "black", lwd = 1)
legend("topright", c(paste("MSE:", round(LS1(p$par), 0), sep = "")))

plot of chunk unnamed-chunk-3

## optimizing for beta parameters sbar and x1-x2

LS1 <- function(x) {
    sum((runTSIR(guess = x, Beta = "CST")$I - BAL$CASES.i/u)^2)
}

g <- c(x1 = 5e-05, x2 = 0.4)
p <- optim(g, LS1)

## show optimal values
p$par

##        x1        x2 
## 0.0000821 0.4018619


## show MSE
LS1(p$par)

## [1] 942511346


optimal <- as.vector(p$par)

out.opt <- runTSIR(guess = c(x1 = as.numeric(p$par[1]), x2 = as.numeric(p$par[2])), 
    Beta = "CST")

head(out.opt)

##       S     I      R     CI  Beta.TMP  Beta.SCH Beta.CST
## 1 21529 488.0 709623  488.0 5.003e-05 4.911e-05 8.21e-05
## 2 21468 410.4 710111  898.4 4.568e-05 4.911e-05 8.21e-05
## 3 21411 351.3 710521 1249.7 5.525e-05 1.151e-04 8.21e-05
## 4 21303 305.7 710872 1555.4 5.500e-05 1.151e-04 8.21e-05
## 5 21207 269.0 711178 1824.4 5.500e-05 1.151e-04 8.21e-05
## 6 21047 239.3 711447 2063.7 7.404e-05 1.151e-04 8.21e-05

## plot of our predicted incidences (in red) versus actuall incidences (in
## black)

plot(BAL$BWK, out.opt$I, col = "red", type = "l", lwd = 2, main = "Constant Beta", 
    ylim = c(0, 6500))
lines(BAL$BWK, BAL$CASES.i, col = "black", lwd = 1)
legend("topright", c(paste("MSE:", round(LS1(p$par), 0), sep = "")))

plot of chunk unnamed-chunk-4