Baltimore Measles: 1920-1948

alpha = 0.88

BAL <- read.csv("C:/Users/Lisa/Desktop/677/BAL.FINAL.REVISED.csv", header = TRUE)

# BAL <- read.csv('C:/Users/Lisa/Desktop/677/BAL.csv', header=TRUE) BAL <-
# BAL[seq(1,length(BAL$CASES.i),by=2),]

## definition of school indicator
SCH <- c(-1, -1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
    1, 1, 1, 1, 1, 1, -1)

SCH <- rep(SCH, length(BAL$CASES))[1:length(BAL$CASES)]

BAL$SCH <- SCH

## to analyze a subset of the data
BAL <- BAL[c(1:length(BAL$CASES)), ]

BAL$CASES <- as.numeric(BAL$CASES)
BAL$CASES.i <- as.numeric(BAL$CASES.i)
BAL$TMAX <- as.numeric(BAL$TMAX)
BAL$TMIN <- as.numeric(BAL$TMIN)
BAL$WEEK <- as.numeric(BAL$WEEK)
BAL$BWK <- as.numeric(BAL$BWK)
BAL$SCH <- as.numeric(BAL$SCH)
BAL$B <- as.numeric(BAL$B)
BAL$B.5 <- as.numeric(BAL$B.5)

## to change back and forth from B.5 vs. B BAL$B.5 <- BAL$B

## this is what the data looks like head(BAL)

plot(cumsum(BAL$B.5), cumsum(BAL$CASES.i), type = "l", xlab = "Cumulative sum of births (lagged 5 years)", 
    ylab = "Cumulative sum of reported cases")
x <- c(1:1e+06)
lines(x, x, col = "red")
legend("bottomright", c("incidence", "expected incidence"), col = c("black", 
    "red", "gray"), lty = 1)


## fit a smooth spline of cumulative measles on cumulative births with 2.5
## degrees of freedom

cum.reg <- smooth.spline(cumsum(BAL$B.5), cumsum(BAL$CASES.i), df = 2.5)

lm <- lm(cumsum(BAL$CASES.i) ~ cumsum(BAL$B.5))
# abline(lm, col='gray')

## predict points using the smooth spline and calculate residuals, D
D <- predict(cum.reg)$y - cumsum(BAL$CASES.i)

B <- BAL$B.5

# plot(D, type='l')

## under reporting is given by slope of smooth spline
u <- predict(cum.reg, deriv = 1)$y
rho <- mean(u)

legend("topleft", paste("rho", round(rho, 3), sep = " = "))

plot of chunk unnamed-chunk-1



## Ic are actual cases - reported cases divided by u
Ic = BAL$CASES.i/u

# plot(Ic, type='l')

lIt = log(Ic[2:(length(BAL$CASES) + 1)])
lItm1 = log(Ic[1:length(BAL$CASES)])
Dtm1 = D[1:length(BAL$CASES)]

## remove values of -Inf from I - glm function does not like these!
for (i in 1:(length(lIt) - 1)) {
    if (lIt[i] == -Inf) {
        lIt[i] <- 0
    }
}

for (i in 1:length(lItm1)) {
    if (lItm1[i] == -Inf) {
        lItm1[i] <- 0
    }
}

## mean populaiton estimate
pop = mean(BAL$POP.5)
seas = rep(1:26, length(BAL$CASES))[1:length(BAL$CASES)]

seas <- as.factor(seas)

## test Smeans from 1% to whole population
Smean = seq(0.01, 1, by = 0.001) * pop

## this is a place to store the likelihoods of the data for each setting
## of Smean
llik = rep(NA, length(Smean))

## now perform the log linear regressions at each Smean
for (i in 1:length(Smean)) {
    lStm1 = log(Smean[i] + Dtm1)
    glmfit = glm(lIt ~ -1 + as.factor(seas) + lItm1 + offset(lStm1))
    llik[i] = glmfit$deviance
}

sbar <- Smean[which(llik == min(llik))]
sbar

## [1] 23598


## plot likelihood curve
plot(Smean, llik, type = "l", xlim = c(0, (0.5 * pop)), xlab = "sbar", ylab = "deviance (- log likelihood)")
legend("topright", paste("sbar", round(sbar, 0), sep = " = "))

plot of chunk unnamed-chunk-1


## The Smean we want to use is the one that minimizes the log likelihood


plot(D + sbar, type = "l", xlab = "Biweek", ylab = "S")

plot of chunk unnamed-chunk-1


sbar.def <- sbar
D.def <- D
B.def <- BAL$B.5
alpha.def <- 0.88

## TSIR code

## pass B, sbar, and D results from above and guess at coefficients for
## Beta function

runTSIR <- function(alpha = alpha.def, B = B.def, sbar = sbar.def, D = D.def, 
    Beta = "TMP", guess = c(x1 = 3.8e-05, x2 = 0.4), initial.state = c(S = sbar.def - 
        Ic[1] * rho, I = Ic[1] * rho, R = BAL$POP[1] - sbar.def - Ic[1] * rho, 
        CI = Ic[1] * rho)) {

    ## create empty vectors to store S, I, R, B, Beta estimates
    S <- rep(NA, length(BAL$CASES))
    I <- rep(NA, length(BAL$CASES))
    R <- rep(NA, length(BAL$CASES))
    CI <- rep(NA, length(BAL$CASES))

    ## set time = 1 values to initial states
    S[1] <- sbar + D[1]
    I[1] <- initial.state["I"]
    R[1] <- initial.state["R"]
    CI[1] <- initial.state["CI"]

    ## betas are a function of the normalized climate data - I used tmax here.
    ## The x1-x2 are parameters to fit the seasonal forcing equation.
    tmax <- (BAL$TMAX - 180)/150
    Beta.TMP <- guess["x1"] * (1 + (guess["x2"] * (tmax)))
    Beta.SCH <- guess["x1"] * (1 + (guess["x2"] * (BAL$SCH)))
    Beta.CST <- rep(guess["x1"], length(BAL$CASES))

    if (Beta == "TMP") {
        for (t in 2:length(BAL$CASES)) {
            S[t] <- D[t] + sbar
            I[t] <- Beta.TMP[t] * S[t - 1] * (I[t - 1]^alpha)
            R[t] <- I[t - 1] + R[t - 1]
            CI[t] <- I[t] + CI[t - 1]
        }
    } else if (Beta == "SCH") {
        for (t in 2:length(BAL$CASES)) {
            S[t] <- D[t] + sbar
            I[t] <- Beta.SCH[t] * S[t - 1] * (I[t - 1]^alpha)
            R[t] <- I[t - 1] + R[t - 1]
            CI[t] <- I[t] + CI[t - 1]
        }
    } else if (Beta == "CST") {
        for (t in 2:length(BAL$CASES)) {
            S[t] <- D[t] + sbar
            I[t] <- Beta.CST[t] * S[t - 1] * (I[t - 1]^alpha)
            R[t] <- I[t - 1] + R[t - 1]
            CI[t] <- I[t] + CI[t - 1]
        }
    }

    tsir.output <- data.frame(S, I, R, CI, Beta.TMP, Beta.SCH, Beta.CST)
}


######################## LS1 is temperature ##
LS1 <- function(x) {
    sum((runTSIR(guess = x, Beta = "TMP")$I - BAL$CASES.i/u)^2)
}

g <- c(x1 = 2.5e-05, x2 = -0.4)
p <- optim(g, LS1)

## show optimal values
p$par

##         x1         x2 
##  7.929e-05 -3.328e-01


## show MSE
MSE1 <- LS1(p$par)
MSE1

## [1] 8.07e+08


optimal1 <- as.vector(p$par)

out.opt1 <- runTSIR(guess = c(x1 = as.numeric(p$par[1]), x2 = as.numeric(p$par[2])), 
    Beta = "TMP")

head(out.opt1)

##       S     I      R     CI  Beta.TMP  Beta.SCH  Beta.CST
## 1 21412 182.5 837593  182.5 0.0001067 0.0001057 7.929e-05
## 2 21351 230.5 837775  413.1 0.0001102 0.0001057 7.929e-05
## 3 21294 262.7 838006  675.8 0.0001025 0.0000529 7.929e-05
## 4 21185 294.5 838269  970.3 0.0001027 0.0000529 7.929e-05
## 5 21090 324.0 838563 1294.3 0.0001027 0.0000529 7.929e-05
## 6 20930 298.8 838887 1593.1 0.0000875 0.0000529 7.929e-05


######################## LS2 is school #######

LS2 <- function(x) {
    sum((runTSIR(guess = x, Beta = "SCH")$I - BAL$CASES.i/u)^2)
}

g <- c(x1 = 2.5e-05, x2 = 0.4)
k <- optim(g, LS2)

par(mfrow = c(1, 3), mar = c(4, 4, 4, 2))
plot(BAL$BWK, out.opt1$S, type = "l", xlab = "Biweek", ylab = "Number of people", 
    main = "S")
plot(BAL$BWK, out.opt1$I, type = "l", xlab = "Biweek", ylab = "", main = "I")
plot(BAL$BWK, out.opt1$R, type = "l", xlab = "Biweek", ylab = "", main = "R")

plot of chunk unnamed-chunk-1

par(mfrow = c(1, 1))

## show optimal values
k$par

##        x1        x2 
## 7.985e-05 3.223e-01


## show MSE
MSE2 <- LS2(k$par)
MSE2

## [1] 794704947


optimal2 <- as.vector(k$par)

out.opt2 <- runTSIR(guess = c(x1 = as.numeric(k$par[1]), x2 = as.numeric(k$par[2])), 
    Beta = "SCH")

head(out.opt2)

##       S     I      R     CI  Beta.TMP  Beta.SCH  Beta.CST
## 1 21412 182.5 837593  182.5 5.313e-05 5.412e-05 7.985e-05
## 2 21351 113.2 837775  295.8 4.973e-05 5.412e-05 7.985e-05
## 3 21294 144.7 837889  440.5 5.720e-05 1.056e-04 7.985e-05
## 4 21185 179.1 838033  619.6 5.700e-05 1.056e-04 7.985e-05
## 5 21090 215.0 838212  834.6 5.700e-05 1.056e-04 7.985e-05
## 6 20930 251.3 838427 1085.9 7.185e-05 1.056e-04 7.985e-05


########################## LS3 is constant #######

LS3 <- function(x) {
    sum((runTSIR(guess = x, Beta = "CST")$I - BAL$CASES.i/u)^2)
}

g <- c(x1 = 2.5e-05, x2 = 0.8)
q <- optim(g, LS3)

## show optimal values
q$par

##        x1        x2 
## 8.241e-05 8.047e-01


## show MSE
MSE3 <- LS3(q$par)
MSE3

## [1] 944901779


optimal3 <- as.vector(q$par)
out.opt3 <- runTSIR(guess = c(x1 = as.numeric(q$par[1]), x2 = as.numeric(q$par[2])), 
    Beta = "CST")

head(out.opt3)

##       S     I      R    CI  Beta.TMP  Beta.SCH  Beta.CST
## 1 21412 182.5 837593 182.5 1.354e-05 1.609e-05 8.241e-05
## 2 21351 172.4 837775 355.0 4.788e-06 1.609e-05 8.241e-05
## 3 21294 163.5 837948 518.5 2.402e-05 1.487e-04 8.241e-05
## 4 21185 155.7 838111 674.2 2.351e-05 1.487e-04 8.241e-05
## 5 21090 148.3 838267 822.5 2.351e-05 1.487e-04 8.241e-05
## 6 20930 141.5 838415 964.0 6.179e-05 1.487e-04 8.241e-05



## get average betas for each biweek
Beta.TMP <- rep(NA, 26)

for (i in 1:26) {
    Beta.TMP[i] <- mean(out.opt1$Beta.TMP[rep(i, length(out.opt1$Beta.TMP), 
        by = 26)])
}

Beta.SCH <- rep(NA, 26)

for (i in 1:26) {
    Beta.SCH[i] <- mean(out.opt2$Beta.SCH[rep(i, length(out.opt2$Beta.SCH), 
        by = 26)])
}

Beta.CST <- rep(NA, 26)

for (i in 1:26) {
    Beta.CST[i] <- mean(out.opt3$Beta.CST[rep(i, length(out.opt3$Beta.CST), 
        by = 26)])
}

## Betas.26 is dataframe of average betas for 1 year
Betas.26 <- data.frame(Beta.TMP, Beta.SCH, Beta.CST)


plot(Betas.26$Beta.TMP, col = "magenta", type = "l", ylab = "Beta", xlab = "Biweek", 
    xlim = c(1, 26), lwd = 2, pch = 16, ylim = c(min(Beta.SCH, Beta.TMP), max(Beta.SCH, 
        Beta.TMP)))
lines(Betas.26$Beta.SCH, col = "darkgreen", lwd = 2)
lines(Betas.26$Beta.CST, col = "blue", lwd = 2)
legend("topright", c("Constant     ", "School", "Temperature   "), col = c("blue", 
    "darkgreen", "magenta"), lwd = 2)

plot of chunk unnamed-chunk-1


## Betas is all betas
Betas <- data.frame(Beta.TMP = out.opt1$Beta.TMP, Beta.SCH = out.opt2$Beta.SCH, 
    Beta.CST = out.opt3$Beta.CST)


# plot(BAL$BWK,out.opt1$I, type='l', col='purple',
# ylim=c(0,4000),xlim=c(0,26*10)) lines(BAL$BWK, out.opt2$I, type='l',
# col='orange')


## Fourier analysis - plot spectrum and ID dominant frequencies
freq <- spectrum(BAL$CASES.i)$freq

plot of chunk unnamed-chunk-1

spec <- spectrum(BAL$CASES.i)$spec

data <- data.frame(freq, spec, period = 1/freq, period.yrs = (1/freq)/26)
data <- data[order(-spec), ]
head(data)

##       freq     spec period period.yrs
## 29 0.03867 11486830  25.86     0.9947
## 11 0.01467  8526286  68.18     2.6224
## 18 0.02400  5032060  41.67     1.6026
## 28 0.03733  3668268  26.79     1.0302
## 10 0.01333  2972409  75.00     2.8846
## 40 0.05333  2830025  18.75     0.7212


par(mar = c(4, 4, 2, 2))
spectrum(BAL$CASES.i, main = "", xlab = "Frequency (1/Period)", ylab = "Spectral Density", 
    ylim = c(10, 1e+08), xlim = c(-0.01, 0.5))
text(data[1, 1], data[1, 2], paste(round(data[1, 3], 1), "bwks", sep = "\n"), 
    pos = 4, cex = 0.7)
text(data[2, 1], data[2, 2], paste(round(data[2, 3], 1), "bwks", sep = "\n"), 
    pos = 3, cex = 0.7)

plot of chunk unnamed-chunk-1


## parameters
param <- data.frame(TEMP = p$par, SCHOOL = k$par, CONSTANT = c(q$par[1], NA))
row.names(param) <- c("beta_0", "beta_1")

## plot fits against data
plot(BAL$BWK, out.opt1$I, type = "l", col = "red", lwd = 2, ylim = c(0, max(BAL$CASES.i)))
lines(BAL$BWK, BAL$CASES.i)
legend("topright", c(paste("MSE:", round(MSE1, 0), sep = "")))

plot of chunk unnamed-chunk-1


plot(BAL$BWK, out.opt2$I, type = "l", col = "red", lwd = 2, ylim = c(0, max(BAL$CASES.i)))
lines(BAL$BWK, BAL$CASES.i)
legend("topright", c(paste("MSE:", round(MSE2, 0), sep = "")))

plot of chunk unnamed-chunk-1


plot(BAL$BWK, out.opt3$I, type = "l", col = "red", lwd = 2, ylim = c(0, max(BAL$CASES.i)))
lines(BAL$BWK, BAL$CASES.i)
legend("topright", c(paste("MSE:", round(MSE3, 0), sep = "")))

plot of chunk unnamed-chunk-1


param

##              TEMP    SCHOOL  CONSTANT
## beta_0  7.929e-05 7.985e-05 8.241e-05
## beta_1 -3.328e-01 3.223e-01        NA