Predicting Spring Tornado Activity in the Central Great Plains By March 1st

James B. Elsner and Holly M. Widen

Code in support of our paper to be submitted to Monthly Weather Review

date()
## [1] "Sat Dec 29 07:40:20 2012"

Set working directory, load packages, and read bibtex file.

setwd("~/Dropbox/Tornadoes/Holly")
require(maptools)
require(maps)
require(mapproj)
require(rgeos)
require(rgdal)
require(ggmap)
require(plyr)
require(INLA)
require(vcd)
require(reshape)
require(ADGofTest)
require(spatstat)

Download the tornado data from the Storm Prediction Center (SPC) http://www.spc.noaa.gov/gis/svrgis/

# tmp =
# download.file('http://www.spc.noaa.gov/gis/svrgis/zipped/tornado.zip',
# 'tornado.zip', mode='wb')
unzip("tornado.zip")
Torn = readOGR(dsn = ".", layer = "tornado")
## OGR data source with driver: ESRI Shapefile 
## Source: ".", layer: "tornado"
## with 56221 features and 28 fields
## Feature type: wkbPoint with 2 dimensions
print(proj4string(Torn))
## [1] "+proj=lcc +lat_1=33 +lat_2=45 +lat_0=39 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"
names(Torn)
##  [1] "OM"         "YEAR"       "MONTH"      "DAY"        "DATE"      
##  [6] "TIME"       "TIMEZONE"   "STATE"      "FIPS"       "STATENUMBE"
## [11] "FSCALE"     "INJURIES"   "FATALITIES" "LOSS"       "CROPLOSS"  
## [16] "SLAT"       "SLON"       "ELAT"       "ELON"       "LENGTH"    
## [21] "WIDTH"      "NS"         "SN"         "SG"         "F1"        
## [26] "F2"         "F3"         "F4"

The number of tornado reports in the data set is 56221.

Obtain a google map centered on Russell KS with a zoom of 7. Subset the tornado reports by map domain.

ourlocation = geocode("Russell KS")
Map = get_map(location = unlist(ourlocation), source = "google", maptype = "hybrid", 
    zoom = 7)
lla = attr(Map, "bb")$ll.lat
llo = attr(Map, "bb")$ll.lon
ula = attr(Map, "bb")$ur.lat
ulo = attr(Map, "bb")$ur.lon
Torn2 = subset(Torn, Torn$SLAT <= ula & Torn$SLAT >= lla & Torn$SLON <= ulo & 
    Torn$SLON >= llo)

The map shows touchdown locations of 6328 tornadoes.

Create a bar chart of tornado frequency by month type

p1 = ggplot(Torn2@data, aes(as.factor(MONTH))) + geom_bar()
p1 + ylab("Tornado Reports (F0-F5)") + xlab("") + theme_bw() + scale_x_discrete(breaks = as.character(1:12), 
    labels = month.abb)

plot of chunk barchartmonthlyFrequency

Total = sum(table(Torn2$MONTH))
Perc = sum(table(Torn2$MONTH)[4:6])/Total * 100

FIGURE 1: Histogram of monthly tornadoes.

Create a map of the tornadoes centered on Russell, KS.

Torn3 = subset(Torn2, Torn2$MONTH >= 4 | Torn2$MONTH <= 6)
Torn3 = subset(Torn3, Torn3$FSCALE >= 0)
Torn3$F.Scale = factor(Torn3$FSCALE, ordered = TRUE)
p2 = ggmap(Map, extent = "normal") + theme_bw() + geom_point(aes(x = SLON, y = SLAT, 
    size = F.Scale), color = "green", alpha = 0.5, data = Torn3@data) + xlab(expression(paste("Longitude (", 
    degree, "E)"))) + ylab(expression(paste("Latitude (", degree, "N)")))
p2

plot of chunk map

Total2 = sum(table(Torn3$MONTH))
Dif = Total - Total2
Perc3 = Dif/Total * 100
Perc2 = sum(table(Torn3$MONTH)[4:6])/Total * 100

FIGURE 2: Tornado reports (F0–F5) over the study region centered on Russell, KS during April–June for the period 1950–2011. Point size is proportion to the F scale.

trpyWide = ddply(Torn3@data, .(YEAR), summarize, F0 = sum(FSCALE == 0), F1 = sum(FSCALE == 
    1), F2 = sum(FSCALE == 2), F3 = sum(FSCALE == 3), F4 = sum(FSCALE == 4), 
    F5 = sum(FSCALE == 5), F0F5 = sum(FSCALE >= 0), F1F5 = sum(FSCALE >= 1), 
    F2F5 = sum(FSCALE >= 2), F3F5 = sum(FSCALE >= 3), F4F5 = sum(FSCALE >= 4))
trpyLong = melt(trpyWide, id = "YEAR", measure.vars = c("F0F5", "F1F5", "F2F5", 
    "F3F5", "F4F5"))
trpyLong$"F0-F5" = trpyLong$F0F5
trpyLong$"F1-F5" = trpyLong$F1F5
trpyLong$"F2-F5" = trpyLong$F2F5
trpyLong$"F3-F5" = trpyLong$F3F5
trpyLong$"F4-F5" = trpyLong$F4F5
p3 = ggplot(trpyLong, aes(x = YEAR, y = value)) + geom_point() + facet_grid(variable ~ 
    ., scale = "free_y")
p3 = p3 + geom_smooth(method = "loess", span = 1, color = "green") + theme_bw()
p3 + ylab("Number of Tornado Reports") + xlab("")

plot of chunk timeSeriesPlot

FIGURE 3: Time series of tornado reports by F scale groups over the study region centered on Russell, KS.

Run this code chunk to create the function multiplot(). The function is used to arrange individual ggplot objects on a single page.

# Multiple plot function
# 
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot
# objects) - cols: Number of columns in layout - layout: A matrix
# specifying the layout. If present, 'cols' is ignored.
# 
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.  Author: winston@stdout.org
multiplot <- function(..., plotlist = NULL, file, cols = 1, layout = NULL) {
    require(grid)

    # Make a list from the ... arguments and plotlist
    plots <- c(list(...), plotlist)

    numPlots = length(plots)

    # If layout is NULL, then use 'cols' to determine layout
    if (is.null(layout)) {
        # Make the panel ncol: Number of columns of plots nrow: Number of rows
        # needed, calculated from # of cols
        layout <- matrix(seq(1, cols * ceiling(numPlots/cols)), ncol = cols, 
            nrow = ceiling(numPlots/cols), byrow = TRUE)
    }

    if (numPlots == 1) {
        print(plots[[1]])

    } else {
        # Set up the page
        grid.newpage()
        pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))

        # Make each plot, in the correct location
        for (i in 1:numPlots) {
            # Get the i,j matrix positions of the regions that contain this subplot
            matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))

            print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row, layout.pos.col = matchidx$col))
        }
    }
}
yl = "Number of Years"
p4a = ggplot(trpyWide, aes(x = F0F5)) + geom_histogram(binwidth = 20, color = "white") + 
    scale_y_continuous(limits = c(0, 13)) + theme_bw() + ylab(yl) + xlab("F0-F5 Tornado Reports")
p4b = ggplot(trpyWide, aes(x = F1F5)) + geom_histogram(binwidth = 10, color = "white") + 
    scale_y_continuous(limits = c(0, 20)) + theme_bw() + ylab(yl) + xlab("F1-F5 Tornado Reports")
p4c = ggplot(trpyWide, aes(x = F2F5)) + geom_histogram(binwidth = 5, color = "white") + 
    scale_y_continuous(limits = c(0, 16)) + theme_bw() + ylab(yl) + xlab("F2-F5 Tornado Reports")
p4d = ggplot(trpyWide, aes(x = F3F5)) + geom_histogram(binwidth = 2, color = "white") + 
    scale_y_continuous(limits = c(0, 22)) + theme_bw() + ylab(yl) + xlab("F3-F5 Tornado Reports")
multiplot(p4a, p4b, p4c, p4d, cols = 2)

plot of chunk histograms

FIGURE 4: Histograms of annual tornado reports by F scale grouping.

m = sapply(trpyWide[, 8:12], mean)
v = sapply(trpyWide[, 8:12], var)
ratio = v/m
m
##   F0F5   F1F5   F2F5   F3F5   F4F5 
## 95.677 42.855 16.677  4.871  1.000
v
##     F0F5     F1F5     F2F5     F3F5     F4F5 
## 2660.025  372.913   95.763   19.327    1.672
ratio
##   F0F5   F1F5   F2F5   F3F5   F4F5 
## 27.802  8.702  5.742  3.968  1.672
summary(goodfit(trpyWide$F3F5, type = "poisson"))
## 
##   Goodness-of-fit test for poisson distribution
## 
##                    X^2 df  P(> X^2)
## Likelihood Ratio 100.3 13 1.433e-15

Annual statistics by F-scale grouping.
Group & Mean & Variance & Ratio & \( p \)-value\ \hline
F0–F5 & 95.7 & 2660 & 27.8 & $<$0.001\
F1–F5 & 42.9 & 372.9 & 8.70 & $<$0.001\
F2–F5 & 16.7 & 95.76 & 5.74 & $<$0.001\
F3–F5 & 4.87 & 19.33 & 3.97 & $<$0.001\
F4–F5 & 1.00 & 1.672 & 1.67 & 0.016 \ \hline

StudyDomain = data.frame(lon = c(llo, llo, ulo, ulo, llo), lat = c(lla, ula, 
    ula, lla, lla))
lla2 = 15
llo2 = -90
ula2 = 25
ulo2 = -70
GOM = data.frame(lon = c(llo2, llo2, ulo2, ulo2, llo2), lat = c(lla2, ula2, 
    ula2, lla2, lla2))
lla3 = 50.364
llo3 = -153.662
ula3 = 59.948
ulo3 = -136.084
GOAK = data.frame(lon = c(llo3, llo3, ulo3, ulo3, llo3), lat = c(lla3, ula3, 
    ula3, lla3, llo3))
Map2 = get_map(location = unlist(ourlocation), source = "google", maptype = "satellite", 
    zoom = 3)
p5 = ggmap(Map2, extent = "device") + geom_polygon(data = StudyDomain, aes(x = lon, 
    y = lat), color = "green", fill = "green", alpha = 0.4) + geom_polygon(data = GOAK, 
    aes(x = lon, y = lat), color = "cyan", fill = "cyan", alpha = 0.4) + geom_polygon(data = GOM, 
    aes(x = lon, y = lat), color = "red", fill = "red", alpha = 0.4)
p5

plot of chunk map2

FIGURE 5: Tornado and SST regions.

WCA bbox: 15, 25, -70, -90
GAK bbox: 50.364, -136.084, 59.948, -153.662

dat = trpyWide
KSPDSI = read.table("KSPDSI.txt")
names(KSPDSI) = c("YEAR", paste(month.abb, "KSPDSI", sep = ""))
KSPDSI = KSPDSI[, c(1, 3)]  # YEAR & February
dat = merge(dat, KSPDSI, by = "YEAR")
SOI = read.table("SOIstd.txt")
names(SOI) = c("YEAR", paste(month.abb, "SOI", sep = ""))
SOI = SOI[, c(1, 3)]  # YEAR & February
dat = merge(dat, SOI, by = "YEAR")
TNI = read.table("TNI.txt")
names(TNI) = c("YEAR", paste(month.abb, "TNI", sep = ""))
TNI = TNI[, c(1, 3)]
dat = merge(dat, TNI, by = "YEAR")
NAO = read.table("NAO.txt")
names(NAO) = c("YEAR", paste(month.abb, "NAO", sep = ""))
NAO = NAO[, c(1, 3)]
dat = merge(dat, NAO, by = "YEAR")
PNA = read.table("PNA.txt")
names(PNA) = c("YEAR", paste(month.abb, "PNA", sep = ""))
PNA = PNA[, c(1, 3)]
dat = merge(dat, PNA, by = "YEAR")
GOM = read.table("GOMSST2.txt", na.string = "-9.900", header = FALSE)
names(GOM) = c("YEAR", paste(month.abb, "GOM", sep = ""))
GOM = GOM[, c(1, 3)]
dat = merge(dat, GOM, by = "YEAR")
GAK = read.table("GOAKSST4.txt", na.string = "-9.900", header = FALSE)
names(GAK) = c("YEAR", paste(month.abb, "GAK", sep = ""))
GAK = GAK[, c(1, 3)]
annual = merge(dat, GAK, by = "YEAR")

Time series plot of GOM and GAK SST.

annual$"Gulf of Mexico" = annual$FebGOM
annual$"Gulf of Alaska" = annual$FebGAK
covLong = melt(annual, id = "YEAR", measure.vars = c("Gulf of Mexico", "Gulf of Alaska"))
# covLong$'Gulf of Mexico' = covLong$FebGOM
p6 = ggplot(covLong, aes(x = YEAR, y = value)) + geom_line() + facet_grid(variable ~ 
    ., scale = "free_y") + ylab(expression(paste("February Sea-Surface Temperature (", 
    degree, "C)")))
p6 = p6 + geom_smooth(method = "loess", span = 1, color = "gray") + theme_bw() + 
    xlab("")
p6

plot of chunk timeSeriesCovariates

ccAll = cor(annual$FebGOM, annual$FebGAK)
cc04a = cor(annual$FebGOM[annual$YEAR >= 2004], annual$FebGAK[annual$YEAR >= 
    2004])
cc04b = cor(annual$FebGOM[annual$YEAR < 2004], annual$FebGAK[annual$YEAR < 2004])

FIGURE 6: Time series of February sea-surface temperature in the Gulf of Mexico and the Gulf of Alaska.

source(“http://www.math.ntnu.no/inla/givemeINLA.R”)

formula1 = F0F5 ~ FebGOM + FebGAK + f(YEAR, model = "ar1")
modF0F5 = inla(formula1, family = "nbinomial", data = annual, control.compute = list(dic = TRUE, 
    cpo = TRUE), control.predictor = list(link = 1))
summary(modF0F5)
## 
## Call:
## c("inla(formula = formula1, family = \"nbinomial\", data = annual, ",  "    control.compute = list(dic = TRUE, cpo = TRUE), control.predictor = list(link = 1))" )
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.2016          0.4002          0.1579          0.7597 
## 
## Fixed effects:
##                mean      sd 0.025quant 0.5quant 0.975quant       kld
## (Intercept) -4.3548 4.52454  -13.23917  -4.3606    4.56739 4.337e-07
## FebGOM       0.3740 0.18060    0.01834   0.3741    0.72904 2.923e-06
## FebGAK      -0.1626 0.09514   -0.34958  -0.1627    0.02482 2.188e-05
## 
## Random effects:
## Name   Model     Max KLD 
## YEAR   AR1 model 
## 
## Model hyperparameters:
##                                                      mean     sd      
## size for the nbinomial observations (overdispersion)  8.31353  1.81485
## Precision for YEAR                                    9.07254  5.40301
## Rho for YEAR                                          0.94753  0.03696
##                                                      0.025quant 0.5quant
## size for the nbinomial observations (overdispersion)  5.26488    8.14307
## Precision for YEAR                                    2.30152    7.90552
## Rho for YEAR                                          0.85314    0.95611
##                                                      0.975quant
## size for the nbinomial observations (overdispersion) 12.35273  
## Precision for YEAR                                   22.74742  
## Rho for YEAR                                          0.99175  
## 
## Expected number of effective parameters(std dev): 12.11(3.512)
## Number of equivalent replicates : 5.12 
## 
## Deviance Information Criterion: 600.98
## Effective number of parameters: 13.03
## 
## Marginal Likelihood:  -329.40 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed
ad.test(modF0F5$cpo$pit, distr.fun = punif)$p.value
##    AD 
## 0.159
-mean(log(modF0F5$cpo$cpo))
## [1] 4.75
dfF0F5 = data.frame(Year = annual$YEAR, obs = annual$F0F5, modF0F5$summary.fitted.values)
cor(dfF0F5$obs, dfF0F5$mean)
## [1] 0.8763
limits = aes(ymax = dfF0F5$X0.975, ymin = dfF0F5$X0.025quant)
p8a = ggplot(dfF0F5, aes(x = obs, y = mean)) + geom_point() + geom_smooth(method = "lm") + 
    geom_abline(slope = 1, col = "gray") + theme_bw() + geom_linerange(limits) + 
    scale_y_continuous(limits = c(min(dfF0F5$obs), max(dfF0F5$X0.975))) + scale_x_continuous(limits = c(min(dfF0F5$obs), 
    max(dfF0F5$X0.975))) + xlab("Observed Number of Reports") + ylab("Predicted Number of Reports") + 
    ggtitle("F0-F5 Tornadoes")
mGOM = data.frame(x = modF0F5$marginals.fixed$FebGOM[, 1], y = modF0F5$marginals.fixed$FebGOM[, 
    2])
mGOM$x = (exp(mGOM$x) - 1) * 100
mGAK = data.frame(x = modF0F5$marginals.fixed$FebGAK[, 1], y = modF0F5$marginals.fixed$FebGAK[, 
    2])
mGAK$x = (exp(mGAK$x) - 1) * 100
p7a = ggplot(mGOM, aes(x, y)) + geom_line(col = "red") + theme_bw() + geom_line(aes(x, 
    y), data = mGAK, col = "blue") + xlab(expression(paste("% Change in Number of Reports /", 
    degree, "C"))) + ylab("Posterior Density") + geom_vline(x = 0, col = "gray") + 
    scale_x_continuous(limits = c(-100, 300)) + scale_y_continuous(limits = c(0, 
    4.3)) + ggtitle("F0-F5 Tornadoes")
formula1 = F1F5 ~ FebGOM + FebGAK + FebSOI + FebNAO + FebPNA + f(YEAR, model = "ar1")
modF1F5 = inla(formula1, family = "nbinomial", data = annual, control.compute = list(dic = TRUE, 
    cpo = TRUE), control.predictor = list(link = 1))
summary(modF1F5)
## 
## Call:
## c("inla(formula = formula1, family = \"nbinomial\", data = annual, ",  "    control.compute = list(dic = TRUE, cpo = TRUE), control.predictor = list(link = 1))" )
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##         0.13037         0.34895         0.05453         0.53385 
## 
## Fixed effects:
##                  mean      sd 0.025quant  0.5quant 0.975quant       kld
## (Intercept) -7.882030 5.18492  -18.09141 -7.878107    2.31010 6.337e-06
## FebGOM       0.494197 0.20626    0.08907  0.493975    0.90075 9.157e-06
## FebGAK      -0.232075 0.14416   -0.51459 -0.232465    0.05277 2.808e-05
## FebSOI      -0.012175 0.05091   -0.11293 -0.011967    0.08742 3.985e-07
## FebNAO      -0.005758 0.06491   -0.13406 -0.005539    0.12135 4.662e-05
## FebPNA      -0.030642 0.07819   -0.18503 -0.030426    0.12260 4.912e-05
## 
## Random effects:
## Name   Model     Max KLD 
## YEAR   AR1 model 
## 
## Model hyperparameters:
##                                                      mean       sd        
## size for the nbinomial observations (overdispersion)  5.250e+00  1.050e+00
## Precision for YEAR                                    1.872e+04  1.842e+04
## Rho for YEAR                                          2.050e-03  6.980e-01
##                                                      0.025quant 0.5quant  
## size for the nbinomial observations (overdispersion)  3.462e+00  5.159e+00
## Precision for YEAR                                    1.280e+03  1.330e+04
## Rho for YEAR                                         -9.875e-01  4.333e-03
##                                                      0.975quant
## size for the nbinomial observations (overdispersion)  7.567e+00
## Precision for YEAR                                    6.722e+04
## Rho for YEAR                                          9.876e-01
## 
## Expected number of effective parameters(std dev): 6.03(0.03916)
## Number of equivalent replicates : 10.28 
## 
## Deviance Information Criterion: 537.38
## Effective number of parameters: 6.821
## 
## Marginal Likelihood:  -297.53 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed
ad.test(modF1F5$cpo$pit, distr.fun = punif)$p.value
##     AD 
## 0.8785
-mean(log(modF1F5$cpo$cpo))
## [1] 4.305
dfF1F5 = data.frame(Year = annual$YEAR, obs = annual$F1F5, modF1F5$summary.fitted.values)
cor(dfF1F5$obs, dfF1F5$mean)
## [1] 0.4429
limits = aes(ymax = dfF1F5$X0.975, ymin = dfF1F5$X0.025quant)
p8b = ggplot(dfF1F5, aes(x = obs, y = mean)) + geom_point() + geom_smooth(method = "lm") + 
    geom_abline(slope = 1, col = "gray") + theme_bw() + geom_linerange(limits) + 
    scale_y_continuous(limits = c(min(dfF1F5$obs), max(dfF1F5$obs))) + scale_x_continuous(limits = c(min(dfF1F5$obs), 
    max(dfF1F5$obs))) + xlab("Observed Number of Reports") + ylab("Predicted Number of Reports") + 
    ggtitle("F1-F5 Tornadoes")
mGOM = data.frame(x = modF1F5$marginals.fixed$FebGOM[, 1], y = modF1F5$marginals.fixed$FebGOM[, 
    2])
mGOM$x = (exp(mGOM$x) - 1) * 100
mGAK = data.frame(x = modF1F5$marginals.fixed$FebGAK[, 1], y = modF1F5$marginals.fixed$FebGAK[, 
    2])
mGAK$x = (exp(mGAK$x) - 1) * 100
p7b = ggplot(mGOM, aes(x, y)) + geom_line(col = "red") + theme_bw() + geom_line(aes(x, 
    y), data = mGAK, col = "blue") + xlab(expression(paste("% Change in Number of Reports /", 
    degree, "C"))) + ylab("Posterior Density") + geom_vline(x = 0, col = "gray") + 
    scale_x_continuous(limits = c(-100, 300)) + scale_y_continuous(limits = c(0, 
    4.3)) + ggtitle("F1-F5 Tornadoes")
formula1 = F2F5 ~ FebGOM + FebGAK + f(YEAR, model = "ar1")
modF2F5 = inla(formula1, family = "nbinomial", data = annual, control.compute = list(dic = TRUE, 
    cpo = TRUE), control.predictor = list(link = 1))
summary(modF2F5)
## 
## Call:
## c("inla(formula = formula1, family = \"nbinomial\", data = annual, ",  "    control.compute = list(dic = TRUE, cpo = TRUE), control.predictor = list(link = 1))" )
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##         0.14254         0.26637         0.04715         0.45605 
## 
## Fixed effects:
##                 mean     sd 0.025quant 0.5quant 0.975quant       kld
## (Intercept) -13.4843 6.5270   -26.3468 -13.4752   -0.66734 5.100e-05
## FebGOM        0.6920 0.2613     0.1792   0.6916    1.20758 5.946e-05
## FebGAK       -0.3352 0.1368    -0.6051  -0.3349   -0.06676 8.111e-05
## 
## Random effects:
## Name   Model     Max KLD 
## YEAR   AR1 model 
## 
## Model hyperparameters:
##                                                      mean     sd      
## size for the nbinomial observations (overdispersion)   4.4204   1.1152
## Precision for YEAR                                    74.4362 175.8389
## Rho for YEAR                                           0.8846   0.1280
##                                                      0.025quant 0.5quant
## size for the nbinomial observations (overdispersion)   2.5982     4.2993
## Precision for YEAR                                     4.0123    30.6599
## Rho for YEAR                                           0.5233     0.9269
##                                                      0.975quant
## size for the nbinomial observations (overdispersion)   6.9424  
## Precision for YEAR                                   415.8113  
## Rho for YEAR                                           0.9933  
## 
## Expected number of effective parameters(std dev): 6.19(3.433)
## Number of equivalent replicates : 10.02 
## 
## Deviance Information Criterion: 440.37
## Effective number of parameters: 8.758
## 
## Marginal Likelihood:  -237.65 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed
ad.test(modF2F5$cpo$pit, distr.fun = punif)$p.value
##     AD 
## 0.5259
-mean(log(modF2F5$cpo$cpo))
## [1] 3.531
dfF2F5 = data.frame(Year = annual$YEAR, obs = annual$F2F5, modF2F5$summary.fitted.values)
cor(dfF2F5$obs, dfF2F5$mean)
## [1] 0.6309
limits = aes(ymax = dfF2F5$X0.975, ymin = dfF2F5$X0.025quant)
p8c = ggplot(dfF2F5, aes(x = obs, y = mean)) + geom_point() + geom_smooth(method = "lm") + 
    geom_abline(slope = 1, col = "gray") + theme_bw() + geom_linerange(limits) + 
    scale_y_continuous(limits = c(min(dfF2F5$obs), max(dfF2F5$X0.975))) + scale_x_continuous(limits = c(min(dfF2F5$obs), 
    max(dfF2F5$X0.975))) + xlab("Observed Number of Reports") + ylab("Predicted Number of Reports") + 
    ggtitle("F2-F5 Tornadoes")
mGOM = data.frame(x = modF2F5$marginals.fixed$FebGOM[, 1], y = modF2F5$marginals.fixed$FebGOM[, 
    2])
mGOM$x = (exp(mGOM$x) - 1) * 100
mGAK = data.frame(x = modF2F5$marginals.fixed$FebGAK[, 1], y = modF2F5$marginals.fixed$FebGAK[, 
    2])
mGAK$x = (exp(mGAK$x) - 1) * 100
p7c = ggplot(mGOM, aes(x, y)) + geom_line(col = "red") + theme_bw() + geom_line(aes(x, 
    y), data = mGAK, col = "blue") + xlab(expression(paste("% Change in Number of Reports /", 
    degree, "C"))) + ylab("Posterior Density") + geom_vline(x = 0, col = "gray") + 
    scale_x_continuous(limits = c(-100, 300)) + scale_y_continuous(limits = c(0, 
    4.3)) + ggtitle("F2-F5 Tornadoes")
formula1 = F3F5 ~ FebGOM + FebGAK + f(YEAR, model = "ar1")
modF3F5 = inla(formula1, family = "nbinomial", data = annual, control.compute = list(dic = TRUE, 
    cpo = TRUE), control.predictor = list(link = 1))
summary(modF3F5)
## 
## Call:
## c("inla(formula = formula1, family = \"nbinomial\", data = annual, ",  "    control.compute = list(dic = TRUE, cpo = TRUE), control.predictor = list(link = 1))" )
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##         0.15099         0.29388         0.04352         0.48839 
## 
## Fixed effects:
##                mean     sd 0.025quant 0.5quant 0.975quant       kld
## (Intercept) -7.4455 9.7569   -26.6911  -7.4290    11.7175 2.506e-05
## FebGOM       0.4434 0.3911    -0.3233   0.4423     1.2166 2.526e-05
## FebGAK      -0.6169 0.1986    -1.0117  -0.6155    -0.2299 1.610e-04
## 
## Random effects:
## Name   Model     Max KLD 
## YEAR   AR1 model 
## 
## Model hyperparameters:
##                                                      mean       sd        
## size for the nbinomial observations (overdispersion)  2.171e+00  5.730e-01
## Precision for YEAR                                    1.867e+04  1.839e+04
## Rho for YEAR                                          2.382e-03  6.981e-01
##                                                      0.025quant 0.5quant  
## size for the nbinomial observations (overdispersion)  1.269e+00  2.095e+00
## Precision for YEAR                                    1.266e+03  1.325e+04
## Rho for YEAR                                         -9.876e-01  5.352e-03
##                                                      0.975quant
## size for the nbinomial observations (overdispersion)  3.498e+00
## Precision for YEAR                                    6.707e+04
## Rho for YEAR                                          9.875e-01
## 
## Expected number of effective parameters(std dev): 3.01(0.01292)
## Number of equivalent replicates : 20.60 
## 
## Deviance Information Criterion: 323.15
## Effective number of parameters: 3.831
## 
## Marginal Likelihood:  -170.46 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed
ad.test(modF3F5$cpo$pit, distr.fun = punif)$p.value
##     AD 
## 0.1765
-mean(log(modF3F5$cpo$cpo))
## [1] 2.603
dfF3F5 = data.frame(Year = annual$YEAR, obs = annual$F3F5, modF3F5$summary.fitted.values)
cor(dfF3F5$obs, dfF3F5$mean)
## [1] 0.4098
limits = aes(ymax = dfF3F5$X0.975, ymin = dfF3F5$X0.025quant)
p8d = ggplot(dfF3F5, aes(x = obs, y = mean)) + geom_point() + geom_smooth(method = "lm") + 
    geom_abline(slope = 1, col = "gray") + theme_bw() + geom_linerange(limits) + 
    scale_y_continuous(limits = c(min(dfF3F5$obs), max(dfF3F5$obs))) + scale_x_continuous(limits = c(min(dfF3F5$obs), 
    max(dfF3F5$obs))) + xlab("Observed Number of Reports") + ylab("Predicted Number of Reports") + 
    ggtitle("F3-F5 Tornadoes")
mGOM = data.frame(x = modF3F5$marginals.fixed$FebGOM[, 1], y = modF3F5$marginals.fixed$FebGOM[, 
    2])
mGOM$x = (exp(mGOM$x) - 1) * 100
mGAK = data.frame(x = modF3F5$marginals.fixed$FebGAK[, 1], y = modF3F5$marginals.fixed$FebGAK[, 
    2])
mGAK$x = (exp(mGAK$x) - 1) * 100
p7d = ggplot(mGOM, aes(x, y)) + geom_line(col = "red") + theme_bw() + geom_line(aes(x, 
    y), data = mGAK, col = "blue") + xlab(expression(paste("% Change in Number of Reports /", 
    degree, "C"))) + ylab("Posterior Density") + geom_vline(x = 0, col = "gray") + 
    scale_x_continuous(limits = c(-100, 300)) + scale_y_continuous(limits = c(0, 
    4.3)) + ggtitle("F3-F5 Tornadoes")
      & \multicolumn{2}{c}{F0F5} & \multicolumn{2}{c}{F1F5} & \multicolumn{2}{c}{F2F5} & \multicolumn{2}{c}{F3F5} \\

Statistic & Mean (s.d.) & CI (95\%) & Mean (s.d.) & CI (95\%) & Mean (s.d.) & CI (95\%) & Mean (s.d.) & CI (95\%) \ \hline
\( \beta_{WCA} \) & 0.374 (0.181) & (0.018, 0.729) & 0.492(0.198) & (0.104,0.883) & 0.693(0.261) & (0.180,1.208) & 0.443(0.391) & (-0.323,1.2167)\
\( \beta_{GAK} \) & -0.163(0.095) & (-0.350,0.025) & -0.246(0.107) & (-0.456,-0.036) & -0.335(0.137) & (-0.606,-0.066) & -0.617(0.199) & (-1.012,-0.230)\
\( n \) & 8.314(1.8148) & (5.2649,12.3527) & 5.514(1.0820) & (3.6600,7.9890) & 4.407(1.1088) & (2.5937, 6.9144) & 2.171(0.5730) & (1.2690,3.4980)\
\( \phi \) & 0.9475(0.03696) & (0.8531,0.9918) & -0.001(0.6980) & (-0.9873,0.9877) & 0.885(0.1301) & (0.5153,0.9936) & 0.003(0.6981) & (-0.9876,0.9875)\
ENEP & 12.11(3.512) & & 3.034(0.0435) & & 6.25(3.375) & & 3.01(0.1292)\
NER & 5.12 & & 20.43 & & 9.92 & & 20.60\
DIC & 600.98 & & 530.95 & & 440.21 & & 323.15\
ENP & 13.03 & & 3.85 & & 8.806 & & 3.832\
ML & -329.40 & & -278.97 & & -237.64 & & -170.46\
AD \( p \)-value & 0.4630 & & 0.9285 & & 0.51767 & & 0.1766\
log score & 4.825 & & 4.2723 & & 3.5371 & & 2.6029\
\( r(o, p) \) in sample & 0.88 & & 0.45 & & 0.63 & & 0.41\
\( r(o, p) \) out of sample & 0.78 & & 0.36 & & 0.42 & & 0.31\

multiplot(p7a, p7b, p7c, p7d, cols = 2)

plot of chunk SSTeffects

FIGURE 7: Posterior densities for the covariate parameters. The blue line is the parameter for the Gulf of Alaska (GAK) SST covariate and the red line is the parameter for the western Caribbean (WCA) SST covariate. The densities are plotted for the percent change in the number of reports per $\circ$C.

multiplot(p8a, p8b, p8c, p8d, cols = 2)

plot of chunk obsVsPredPlot

FIGURE 8: Observed versus predicted annual tornado report counts by F-scale group.

formula1 = F4F5 ~ FebGOM + FebGAK + f(YEAR, model = "ar1")
modF4F5 = inla(formula1, family = "nbinomial", data = annual, control.compute = list(dic = TRUE, 
    cpo = TRUE), control.predictor = list(link = 1))
summary(modF4F5)
ad.test(modF4F5$cpo$pit, distr.fun = punif)$p.value
-mean(log(modF4F5$cpo$cpo))
dfF4F5 = data.frame(Year = annual$YEAR, obs = annual$F4F5, modF4F5$summary.fitted.values)
cor(dfF4F5$obs, dfF4F5$mean)
formula1 = F4F5T ~ FebGOM + FebGAK + f(YEAR, model = "iid")
modF4F5T = inla(formula1, family = "binomial", data = annual, control.compute = list(dic = TRUE, 
    cpo = TRUE), control.predictor = list(link = 1))
summary(modF4F5T)
ad.test(modF4F5T$cpo$pit, distr.fun = punif)$p.value
-mean(log(modF4F5T$cpo$cpo))
dfF4F5T = data.frame(Year = annual$YEAR, obs = annual$F4F5T, modF4F5T$summary.fitted.values)
cor(dfF4F5T$obs, dfF4F5T$mean)

Out of sample prediction.

pre = numeric()
for (i in 1:62) {
    annual2 = annual
    annual2$F2F5[i] = NA
    formula1 = F2F5 ~ FebGOM + FebGAK + f(YEAR, model = "ar1")
    modF2F5 = inla(formula1, family = "nbinomial", data = annual2, control.predictor = list(link = 1))
    pre = c(pre, modF2F5$summary.fitted.values$mean[i])
}
cor(pre, annual$F2F5)

Out-of-Sample Skill
F0F5 0.785
F1F5 0.355
F2F5 0.416
F3F5 0.314

PC of the two covariates. No improvement.

xx = princomp(~FebGAK + FebGOM, data = annual)
annual$PC1 = xx$scores[, 1]
annual$PC2 = xx$scores[, 2]
formula1 = F4F5 ~ PC1 + PC2 + f(YEAR, model = "ar1")
modF4F5 = inla(formula1, family = "nbinomial", data = annual, control.compute = list(dic = TRUE, 
    cpo = TRUE), control.predictor = list(link = 1))
summary(modF4F5)
ad.test(modF4F5$cpo$pit, distr.fun = punif)$p.value
-mean(log(modF4F5$cpo$cpo))
dfF4F5 = data.frame(Year = annual$YEAR, obs = annual$F4F5, modF4F5$summary.fitted.values)
cor(dfF4F5$obs, dfF4F5$mean)

Model using near-city tornado density as predictand. Determine near-city tornado density and use a beta likelihood.

Torn3.ppp = as.ppp(Torn3)
unzip("ci08au12.zip")
ll = "+proj=longlat +datum=NAD83"
US = readOGR(dsn = ".", layer = "ci08au12", p4s = ll)
USt = spTransform(US, CRS(proj4string(Torn3)))
USt = subset(USt, USt$POP_1990 > 1000)
US.ppp = as.ppp(USt)
W = as.owin(Torn3.ppp)
CP.ppp = US.ppp[W]
Z = distmap(CP.ppp)
m2km = 0.001
km2 = 1e+08
dist = numeric()
rho = numeric()
Year = numeric()
rhoCity = numeric()
All = numeric()
rhoCountry = numeric()
rhoCountryhi = numeric()
rhoCountrylo = numeric()
rhoCityhi = numeric()
rhoCitylo = numeric()
beginYear = 1950
endYear = 2011
DT = 1
TornA = subset(Torn3, Torn3$FSCALE >= 2)
TornA.ppp = as.ppp(TornA["YEAR"])
for (yr in seq(beginYear, endYear, 1)) {
    Torn4.ppp = TornA.ppp[TornA.ppp$marks > (yr - DT) & TornA.ppp$marks <= yr]
    All = c(All, Torn4.ppp$n)
    rhat = rhohat(unmark(Torn4.ppp), Z, bw = 0.25, method = "transform")
    dist = c(dist, rhat$Z * m2km)
    rho = c(rho, rhat$rho * km2)
    rhoCity = c(rhoCity, rhat$rho[1] * km2)
    rhoCityhi = c(rhoCityhi, rhat$hi[1] * km2)
    rhoCitylo = c(rhoCitylo, rhat$lo[1] * km2)
    rhoCountry = c(rhoCountry, rhat$rho[length(rhat$rho)] * km2)
    rhoCountryhi = c(rhoCountryhi, rhat$hi[length(rhat$rho)] * km2)
    rhoCountrylo = c(rhoCountrylo, rhat$lo[length(rhat$rho)] * km2)
    Year = c(Year, rep(yr, length(rhat$Z)))
}
annual2 = data.frame(annual, rhoCity = rhoCity, rhoCountry = rhoCountry, rhoCountryhi = rhoCountryhi, 
    rhoCountrylo = rhoCountrylo, rhoCityhi = rhoCityhi, rhoCitylo = rhoCitylo)
formula1 = rhoCountry ~ FebGOM + FebGAK + f(YEAR, model = "ar1")
mod = inla(formula1, family = "beta", data = annual2, control.compute = list(dic = TRUE, 
    cpo = TRUE), control.predictor = list(link = 1))
summary(mod)
ad.test(mod$cpo$pit, distr.fun = punif)$p.value
-mean(log(mod$cpo$cpo))
df = data.frame(Year = annual2$YEAR, obs = annual2$rhoCity, mod$summary.fitted.values)
cor(df$obs, df$mean)

Sample from marginals. Not used.

sampleGOM = inla.rmarginal(1000, PD.GOM)
h = inla.hyperpar(results)
summary(h)
plot(h)

Gridded temperature data. Not used.

source("ReadGriddedData.R")
x = finaldata$TempLand
y = subset(x, year >= 1950 & lat > 20 & month == 4)
y = y[!is.na(y$TempLand), ]