Predicting Spring Tornado Activity in the Central Great Plains By March 1st

James B. Elsner and Holly M. Widen

Code in support of our paper submitted to Monthly Weather Review

date()

## [1] "Sat Mar  2 08:42:08 2013"

Set working directory, load packages, and read bibtex file.

setwd("~/Dropbox/Tornadoes/Holly")
require(maptools)
require(maps)
require(mapproj)
require(rgeos)
require(rgdal)
require(ggmap)
require(plyr)
require(INLA)
require(vcd)
require(reshape)
require(ADGofTest)
require(spatstat)
require(RColorBrewer)

Download the tornado data from the Storm Prediction Center (SPC) http://www.spc.noaa.gov/gis/svrgis/

# tmp =
# download.file('http://www.spc.noaa.gov/gis/svrgis/zipped/tornado.zip',
# 'tornado.zip', mode='wb')
unzip("tornado.zip")
Torn = readOGR(dsn = ".", layer = "tornado")

## OGR data source with driver: ESRI Shapefile 
## Source: ".", layer: "tornado"
## with 56221 features and 28 fields
## Feature type: wkbPoint with 2 dimensions

print(proj4string(Torn))

## [1] "+proj=lcc +lat_1=33 +lat_2=45 +lat_0=39 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"

names(Torn)

##  [1] "OM"         "YEAR"       "MONTH"      "DAY"        "DATE"      
##  [6] "TIME"       "TIMEZONE"   "STATE"      "FIPS"       "STATENUMBE"
## [11] "FSCALE"     "INJURIES"   "FATALITIES" "LOSS"       "CROPLOSS"  
## [16] "SLAT"       "SLON"       "ELAT"       "ELON"       "LENGTH"    
## [21] "WIDTH"      "NS"         "SN"         "SG"         "F1"        
## [26] "F2"         "F3"         "F4"

The number of tornado reports in the data set is 56221.

Obtain a google map centered on Russell KS with a zoom of 7. Subset the tornado reports by map domain.

ourlocation = geocode("Russell KS")
Map = get_map(location = unlist(ourlocation), source = "google", maptype = "roadmap", 
    zoom = 7)
lla = attr(Map, "bb")$ll.lat
llo = attr(Map, "bb")$ll.lon
ula = attr(Map, "bb")$ur.lat
ulo = attr(Map, "bb")$ur.lon
Torn2 = subset(Torn, Torn$SLAT <= ula & Torn$SLAT >= lla & Torn$SLON <= ulo & 
    Torn$SLON >= llo)

The map shows touchdown locations of 6328 tornadoes.

Create a bar chart of tornado frequency by month type

p1 = ggplot(Torn2@data, aes(as.factor(MONTH))) + geom_bar()
p1 + ylab("Tornado Reports (F0-F5)") + xlab("") + theme_bw() + scale_x_discrete(breaks = as.character(1:12), 
    labels = month.abb)

plot of chunk barchartmonthlyFrequency

Total = sum(table(Torn2$MONTH))
Perc = sum(table(Torn2$MONTH)[4:6])/Total * 100

FIGURE 1: Histogram of monthly tornadoes.

Create a map of the tornadoes centered on Russell, KS.

Torn3 = subset(Torn2, Torn2$MONTH >= 4 | Torn2$MONTH <= 6)
Torn3 = subset(Torn3, Torn3$FSCALE >= 0)
Torn3$F.Scale = factor(Torn3$FSCALE, ordered = TRUE)
myColors = brewer.pal(6, "Set3")
names(myColors) = levels(Torn3$F.Scale)
colScale = scale_colour_manual(name = "F.Scale", values = myColors)
p2 = ggmap(Map, extent = "normal") + theme_bw() + geom_point(aes(x = SLON, y = SLAT, 
    color = F.Scale), alpha = 1, size = 0.8, data = Torn3@data) + colScale + 
    xlab(expression(paste("Longitude (", degree, "E)"))) + ylab(expression(paste("Latitude (", 
    degree, "N)")))
p2

plot of chunk map

Total2 = sum(table(Torn3$MONTH))
Dif = Total - Total2
Perc3 = Dif/Total * 100
Perc2 = sum(table(Torn3$MONTH)[4:6])/Total * 100

FIGURE 2: Tornado reports (F0–F5) over the study region centered on Russell, KS during April–June for the period 1950–2011. Point size is proportion to the F scale.

trpyWide = ddply(Torn3@data, .(YEAR), summarize, F0 = sum(FSCALE == 0), F1 = sum(FSCALE == 
    1), F2 = sum(FSCALE == 2), F3 = sum(FSCALE == 3), F4 = sum(FSCALE == 4), 
    F5 = sum(FSCALE == 5), F0F5 = sum(FSCALE >= 0), F1F5 = sum(FSCALE >= 1), 
    F2F5 = sum(FSCALE >= 2), F3F5 = sum(FSCALE >= 3), F4F5 = sum(FSCALE >= 4), 
    Length = mean(LENGTH), Width = mean(WIDTH), Area = mean((LENGTH * 1609.34) * 
        (WIDTH * 0.9144)), Width75 = quantile(WIDTH, prob = 0.75), Width95 = quantile(WIDTH, 
        prob = 0.95))
trpyLong = melt(trpyWide, id = "YEAR", measure.vars = c("F0F5", "F1F5", "F2F5", 
    "F3F5", "F4F5"))
trpyLong$"F0-F5" = trpyLong$F0F5
trpyLong$"F1-F5" = trpyLong$F1F5
trpyLong$"F2-F5" = trpyLong$F2F5
trpyLong$"F3-F5" = trpyLong$F3F5
trpyLong$"F4-F5" = trpyLong$F4F5

p3 = ggplot(trpyLong, aes(x = YEAR, y = value)) + geom_point() + facet_grid(variable ~ 
    ., scale = "free_y")
p3 = p3 + geom_smooth(method = "loess", span = 1, color = "green") + theme_bw()
p3 + ylab("Number of Tornado Reports") + xlab("")

plot of chunk timeSeriesPlot

FIGURE 3: Time series of tornado reports by F scale groups over the study region centered on Russell, KS.

Run this code chunk to create the function multiplot(). The function is used to arrange individual ggplot objects on a single page.

# Multiple plot function
# 
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot
# objects) - cols: Number of columns in layout - layout: A matrix
# specifying the layout. If present, 'cols' is ignored.
# 
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.  Author: winston@stdout.org
multiplot <- function(..., plotlist = NULL, file, cols = 1, layout = NULL) {
    require(grid)

    # Make a list from the ... arguments and plotlist
    plots <- c(list(...), plotlist)

    numPlots = length(plots)

    # If layout is NULL, then use 'cols' to determine layout
    if (is.null(layout)) {
        # Make the panel ncol: Number of columns of plots nrow: Number of rows
        # needed, calculated from # of cols
        layout <- matrix(seq(1, cols * ceiling(numPlots/cols)), ncol = cols, 
            nrow = ceiling(numPlots/cols), byrow = TRUE)
    }

    if (numPlots == 1) {
        print(plots[[1]])

    } else {
        # Set up the page
        grid.newpage()
        pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))

        # Make each plot, in the correct location
        for (i in 1:numPlots) {
            # Get the i,j matrix positions of the regions that contain this subplot
            matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))

            print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row, layout.pos.col = matchidx$col))
        }
    }
}

yl = "Number of Years"
p4a = ggplot(trpyWide, aes(x = F0F5)) + geom_histogram(binwidth = 20, color = "white") + 
    scale_y_continuous(limits = c(0, 13)) + theme_bw() + ylab(yl) + xlab("F0-F5 Tornado Reports")
p4b = ggplot(trpyWide, aes(x = F1F5)) + geom_histogram(binwidth = 10, color = "white") + 
    scale_y_continuous(limits = c(0, 20)) + theme_bw() + ylab(yl) + xlab("F1-F5 Tornado Reports")
p4c = ggplot(trpyWide, aes(x = F2F5)) + geom_histogram(binwidth = 5, color = "white") + 
    scale_y_continuous(limits = c(0, 16)) + theme_bw() + ylab(yl) + xlab("F2-F5 Tornado Reports")
p4d = ggplot(trpyWide, aes(x = F3F5)) + geom_histogram(binwidth = 2, color = "white") + 
    scale_y_continuous(limits = c(0, 22)) + theme_bw() + ylab(yl) + xlab("F3-F5 Tornado Reports")
multiplot(p4a, p4b, p4c, p4d, cols = 2)

plot of chunk histograms

FIGURE 4: Histograms of annual tornado reports by F scale grouping.

m = sapply(trpyWide[, 8:12], mean)
v = sapply(trpyWide[, 8:12], var)
ratio = v/m
m

##   F0F5   F1F5   F2F5   F3F5   F4F5 
## 95.677 42.855 16.677  4.871  1.000

##     F0F5     F1F5     F2F5     F3F5     F4F5 
## 2660.025  372.913   95.763   19.327    1.672

ratio

##   F0F5   F1F5   F2F5   F3F5   F4F5 
## 27.802  8.702  5.742  3.968  1.672

summary(goodfit(trpyWide$F3F5, type = "poisson"))

## 
##   Goodness-of-fit test for poisson distribution
## 
##                    X^2 df  P(> X^2)
## Likelihood Ratio 100.3 13 1.433e-15

Annual statistics by F-scale grouping. Group & Mean & Variance & Ratio & $ p $-value\ \hline F0–F5 & 95.7 & 2660 & 27.8 & $<$0.001\ F1–F5 & 42.9 & 372.9 & 8.70 & $<$0.001\ F2–F5 & 16.7 & 95.76 & 5.74 & $<$0.001\ F3–F5 & 4.87 & 19.33 & 3.97 & $<$0.001\ F4–F5 & 1.00 & 1.672 & 1.67 & 0.016 \ \hline

StudyDomain = data.frame(lon = c(llo, llo, ulo, ulo, llo), lat = c(lla, ula, 
    ula, lla, lla))
lla2 = 15
llo2 = -90
ula2 = 25
ulo2 = -70
GOM = data.frame(lon = c(llo2, llo2, ulo2, ulo2, llo2), lat = c(lla2, ula2, 
    ula2, lla2, lla2))
lla3 = 50.364
llo3 = -153.662
ula3 = 59.948
ulo3 = -136.084
GOAK = data.frame(lon = c(llo3, llo3, ulo3, ulo3, llo3), lat = c(lla3, ula3, 
    ula3, lla3, llo3))
Map2 = get_map(location = unlist(ourlocation), source = "google", maptype = "satellite", 
    zoom = 3)
p5 = ggmap(Map2, extent = "device") + geom_polygon(data = StudyDomain, aes(x = lon, 
    y = lat), color = "green", fill = "green", alpha = 0.4) + geom_polygon(data = GOAK, 
    aes(x = lon, y = lat), color = "cyan", fill = "cyan", alpha = 0.4) + geom_polygon(data = GOM, 
    aes(x = lon, y = lat), color = "red", fill = "red", alpha = 0.4)
p5

plot of chunk map2

FIGURE 5: Tornado and SST regions.

WCA bbox: 15, 25, -70, -90 GAK bbox: 50.364, -136.084, 59.948, -153.662

dat = trpyWide
KSPDSI = read.table("KSPDSI.txt")
names(KSPDSI) = c("YEAR", paste(month.abb, "KSPDSI", sep = ""))
KSPDSI = KSPDSI[, c(1, 3)]  # YEAR & February
dat = merge(dat, KSPDSI, by = "YEAR")
SOI = read.table("SOIstd.txt")
names(SOI) = c("YEAR", paste(month.abb, "SOI", sep = ""))
SOI = SOI[, c(1, 3)]  # YEAR & February
dat = merge(dat, SOI, by = "YEAR")
TNI = read.table("TNI.txt")
names(TNI) = c("YEAR", paste(month.abb, "TNI", sep = ""))
TNI = TNI[, c(1, 3)]
dat = merge(dat, TNI, by = "YEAR")
NAO = read.table("NAO.txt")
names(NAO) = c("YEAR", paste(month.abb, "NAO", sep = ""))
NAO = NAO[, c(1, 3)]
dat = merge(dat, NAO, by = "YEAR")
PNA = read.table("PNA.txt")
names(PNA) = c("YEAR", paste(month.abb, "PNA", sep = ""))
PNA = PNA[, c(1, 3)]
dat = merge(dat, PNA, by = "YEAR")
GOM = read.table("GOMSST2.txt", na.string = "-9.900", header = FALSE)
names(GOM) = c("YEAR", paste(month.abb, "GOM", sep = ""))
GOM = GOM[, c(1, 3)]
dat = merge(dat, GOM, by = "YEAR")
GAK = read.table("GOAKSST4.txt", na.string = "-9.900", header = FALSE)
names(GAK) = c("YEAR", paste(month.abb, "GAK", sep = ""))
GAK = GAK[, c(1, 3)]
annual = merge(dat, GAK, by = "YEAR")

Time series plot of GOM and GAK SST.

annual$"Western Caribbean" = annual$FebGOM
annual$"Gulf of Alaska" = annual$FebGAK
covLong = melt(annual, id = "YEAR", measure.vars = c("Western Caribbean", "Gulf of Alaska"))
# covLong$'Gulf of Mexico' = covLong$FebGOM
p6 = ggplot(covLong, aes(x = YEAR, y = value)) + geom_line() + facet_grid(variable ~ 
    ., scale = "free_y") + ylab(expression(paste("February Sea-Surface Temperature (", 
    degree, "C)")))
p6 = p6 + geom_smooth(method = "loess", span = 1, color = "gray") + theme_bw() + 
    xlab("")
p6

plot of chunk timeSeriesCovariates

ccAll = cor.test(annual$FebGOM, annual$FebGAK)
cc04a = cor.test(annual$FebGOM[annual$YEAR >= 2004], annual$FebGAK[annual$YEAR >= 
    2004])
cc04b = cor.test(annual$FebGOM[annual$YEAR < 2004], annual$FebGAK[annual$YEAR < 
    2004])

FIGURE 6: Time series of February sea-surface temperature in the Gulf of Mexico and the Gulf of Alaska.

source(“http://www.math.ntnu.no/inla/givemeINLA.R”)

annual$YEAR1 = annual$YEAR

formula1 = F0F5 ~ FebGOM + FebGAK + YEAR + f(YEAR1, model = "ar1")
modF0F5 = inla(formula1, family = "nbinomial", data = annual, control.compute = list(dic = TRUE, 
    cpo = TRUE), control.predictor = list(link = 1))
summary(modF0F5)

## 
## Call:
## c("inla(formula = formula1, family = \"nbinomial\", data = annual, ",  "    control.compute = list(dic = TRUE, cpo = TRUE), control.predictor = list(link = 1))" )
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.3297          0.4499          0.8640          1.6436 
## 
## Fixed effects:
##                  mean        sd 0.025quant  0.5quant 0.975quant       kld
## (Intercept) -40.34264 11.128126 -62.073407 -40.48788  -17.74601 2.282e-05
## FebGOM        0.37036  0.178718   0.018500   0.37046    0.72182 2.883e-06
## FebGAK       -0.16744  0.094739  -0.353712  -0.16748    0.01912 2.201e-05
## YEAR          0.01822  0.005291   0.007469   0.01828    0.02867 2.834e-05
## 
## Random effects:
## Name   Model     Max KLD 
## YEAR1   AR1 model 
## 
## Model hyperparameters:
##                                                      mean      sd       
## size for the nbinomial observations (overdispersion) 7.765e+00 1.847e+00
## Precision for YEAR1                                  8.131e+01 1.638e+04
## Rho for YEAR1                                        8.108e-01 1.538e-01
##                                                      0.025quant 0.5quant 
## size for the nbinomial observations (overdispersion) 4.632e+00  7.612e+00
## Precision for YEAR1                                  4.652e+00  3.726e+01
## Rho for YEAR1                                        3.915e-01  8.552e-01
##                                                      0.975quant
## size for the nbinomial observations (overdispersion) 1.184e+01 
## Precision for YEAR1                                  2.993e+02 
## Rho for YEAR1                                        9.733e-01 
## 
## Expected number of effective parameters(std dev): 10.94(4.274)
## Number of equivalent replicates : 5.668 
## 
## Deviance Information Criterion: 600.95
## Effective number of parameters: 11.26
## 
## Marginal Likelihood:  -333.40 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed

ad.test(modF0F5$cpo$pit, distr.fun = punif)$p.value

##     AD 
## 0.2696

-mean(log(modF0F5$cpo$cpo))

## [1] 4.763

dfF0F5 = data.frame(Year = annual$YEAR, obs = annual$F0F5, modF0F5$summary.fitted.values)
cor(dfF0F5$obs, dfF0F5$mean)

## [1] 0.8625

limits = aes(ymax = dfF0F5$X0.975, ymin = dfF0F5$X0.025quant)
p8a = ggplot(dfF0F5, aes(x = obs, y = mean)) + geom_point() + geom_smooth(method = "lm") + 
    geom_abline(slope = 1, col = "gray") + theme_bw() + geom_linerange(limits) + 
    scale_y_continuous(limits = c(min(dfF0F5$obs), max(dfF0F5$X0.975))) + scale_x_continuous(limits = c(min(dfF0F5$obs), 
    max(dfF0F5$X0.975))) + xlab("Observed Number of Reports") + ylab("Predicted Number of Reports") + 
    ggtitle("F0-F5 Tornadoes")

mGOM = data.frame(x = modF0F5$marginals.fixed$FebGOM[, 1], y = modF0F5$marginals.fixed$FebGOM[, 
    2])
mGOM$x = (exp(mGOM$x) - 1) * 100
mGAK = data.frame(x = modF0F5$marginals.fixed$FebGAK[, 1], y = modF0F5$marginals.fixed$FebGAK[, 
    2])
mGAK$x = (exp(mGAK$x) - 1) * 100
p7a = ggplot(mGOM, aes(x, y)) + geom_line(col = "red") + theme_bw() + geom_line(aes(x, 
    y), data = mGAK, col = "blue") + xlab(expression(paste("% Change in Number of Reports /", 
    degree, "C"))) + ylab("Posterior Density") + geom_vline(x = 0, col = "gray") + 
    scale_x_continuous(limits = c(-100, 300)) + scale_y_continuous(limits = c(0, 
    4.3)) + ggtitle("F0-F5 Tornadoes")

formula1 = F1F5 ~ FebGOM + FebGAK + YEAR + f(YEAR1, model = "ar1")
modF1F5 = inla(formula1, family = "nbinomial", data = annual, control.compute = list(dic = TRUE, 
    cpo = TRUE), control.predictor = list(link = 1))
summary(modF1F5)

## 
## Call:
## c("inla(formula = formula1, family = \"nbinomial\", data = annual, ",  "    control.compute = list(dic = TRUE, cpo = TRUE), control.predictor = list(link = 1))" )
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##         0.15341         0.31497         0.04839         0.51677 
## 
## Fixed effects:
##                mean       sd 0.025quant  0.5quant 0.975quant       kld
## (Intercept) -1.4703 7.743765 -16.702740 -1.469528  13.764523 4.902e-06
## FebGOM       0.5080 0.198534   0.118228  0.507705   0.899440 1.417e-05
## FebGAK      -0.2416 0.106379  -0.450938 -0.241496  -0.032394 2.405e-05
## YEAR        -0.0034 0.003208  -0.009714 -0.003399   0.002909 2.112e-07
## 
## Random effects:
## Name   Model     Max KLD 
## YEAR1   AR1 model 
## 
## Model hyperparameters:
##                                                      mean       sd        
## size for the nbinomial observations (overdispersion)  5.525e+00  1.093e+00
## Precision for YEAR1                                   1.867e+04  1.839e+04
## Rho for YEAR1                                        -3.795e-04  6.980e-01
##                                                      0.025quant 0.5quant  
## size for the nbinomial observations (overdispersion)  3.656e+00  5.433e+00
## Precision for YEAR1                                   1.283e+03  1.326e+04
## Rho for YEAR1                                        -9.874e-01 -1.317e-03
##                                                      0.975quant
## size for the nbinomial observations (overdispersion)  7.928e+00
## Precision for YEAR1                                   6.714e+04
## Rho for YEAR1                                         9.876e-01
## 
## Expected number of effective parameters(std dev): 4.032(0.04257)
## Number of equivalent replicates : 15.38 
## 
## Deviance Information Criterion: 531.80
## Effective number of parameters: 4.841
## 
## Marginal Likelihood:  -287.60 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed

ad.test(modF1F5$cpo$pit, distr.fun = punif)$p.value

##     AD 
## 0.8328

-mean(log(modF1F5$cpo$cpo))

## [1] 4.271

dfF1F5 = data.frame(Year = annual$YEAR, obs = annual$F1F5, modF1F5$summary.fitted.values)
cor(dfF1F5$obs, dfF1F5$mean)

## [1] 0.4568

limits = aes(ymax = dfF1F5$X0.975, ymin = dfF1F5$X0.025quant)
p8b = ggplot(dfF1F5, aes(x = obs, y = mean)) + geom_point() + geom_smooth(method = "lm") + 
    geom_abline(slope = 1, col = "gray") + theme_bw() + geom_linerange(limits) + 
    scale_y_continuous(limits = c(min(dfF1F5$obs), max(dfF1F5$obs))) + scale_x_continuous(limits = c(min(dfF1F5$obs), 
    max(dfF1F5$obs))) + xlab("Observed Number of Reports") + ylab("Predicted Number of Reports") + 
    ggtitle("F1-F5 Tornadoes")

mGOM = data.frame(x = modF1F5$marginals.fixed$FebGOM[, 1], y = modF1F5$marginals.fixed$FebGOM[, 
    2])
mGOM$x = (exp(mGOM$x) - 1) * 100
mGAK = data.frame(x = modF1F5$marginals.fixed$FebGAK[, 1], y = modF1F5$marginals.fixed$FebGAK[, 
    2])
mGAK$x = (exp(mGAK$x) - 1) * 100
p7b = ggplot(mGOM, aes(x, y)) + geom_line(col = "red") + theme_bw() + geom_line(aes(x, 
    y), data = mGAK, col = "blue") + xlab(expression(paste("% Change in Number of Reports /", 
    degree, "C"))) + ylab("Posterior Density") + geom_vline(x = 0, col = "gray") + 
    scale_x_continuous(limits = c(-100, 300)) + scale_y_continuous(limits = c(0, 
    4.3)) + ggtitle("F1-F5 Tornadoes")

formula1 = F2F5 ~ FebGOM + FebGAK + YEAR + f(YEAR1, model = "ar1")
modF2F5 = inla(formula1, family = "nbinomial", data = annual, control.compute = list(dic = TRUE, 
    cpo = TRUE), control.predictor = list(link = 1))
summary(modF2F5)

## 
## Call:
## c("inla(formula = formula1, family = \"nbinomial\", data = annual, ",  "    control.compute = list(dic = TRUE, cpo = TRUE), control.predictor = list(link = 1))" )
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##         0.16956         0.33202         0.04661         0.54819 
## 
## Fixed effects:
##                 mean       sd 0.025quant 0.5quant 0.975quant       kld
## (Intercept) 13.93702 9.342815   -4.43387 13.93522  32.325801 5.916e-09
## FebGOM       0.77642 0.248784    0.28995  0.77532   1.269238 5.901e-05
## FebGAK      -0.33567 0.127428   -0.58682 -0.33548  -0.085472 4.702e-05
## YEAR        -0.01493 0.003966   -0.02276 -0.01492  -0.007159 4.816e-05
## 
## Random effects:
## Name   Model     Max KLD 
## YEAR1   AR1 model 
## 
## Model hyperparameters:
##                                                      mean       sd        
## size for the nbinomial observations (overdispersion)  4.377e+00  9.838e-01
## Precision for YEAR1                                   1.870e+04  1.841e+04
## Rho for YEAR1                                         4.173e-03  6.980e-01
##                                                      0.025quant 0.5quant  
## size for the nbinomial observations (overdispersion)  2.732e+00  4.283e+00
## Precision for YEAR1                                   1.273e+03  1.327e+04
## Rho for YEAR1                                        -9.876e-01  9.345e-03
##                                                      0.975quant
## size for the nbinomial observations (overdispersion)  6.572e+00
## Precision for YEAR1                                   6.718e+04
## Rho for YEAR1                                         9.875e-01
## 
## Expected number of effective parameters(std dev): 4.022(0.02974)
## Number of equivalent replicates : 15.41 
## 
## Deviance Information Criterion: 432.16
## Effective number of parameters: 4.873
## 
## Marginal Likelihood:  -235.92 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed

ad.test(modF2F5$cpo$pit, distr.fun = punif)$p.value

##     AD 
## 0.5565

-mean(log(modF2F5$cpo$cpo))

## [1] 3.477

dfF2F5 = data.frame(Year = annual$YEAR, obs = annual$F2F5, modF2F5$summary.fitted.values)
cor(dfF2F5$obs, dfF2F5$mean)

## [1] 0.582

limits = aes(ymax = dfF2F5$X0.975, ymin = dfF2F5$X0.025quant)
p8c = ggplot(dfF2F5, aes(x = obs, y = mean)) + geom_point() + geom_smooth(method = "lm") + 
    geom_abline(slope = 1, col = "gray") + theme_bw() + geom_linerange(limits) + 
    scale_y_continuous(limits = c(min(dfF2F5$obs), max(dfF2F5$X0.975))) + scale_x_continuous(limits = c(min(dfF2F5$obs), 
    max(dfF2F5$X0.975))) + xlab("Observed Number of Reports") + ylab("Predicted Number of Reports") + 
    ggtitle("F2-F5 Tornadoes")

mGOM = data.frame(x = modF2F5$marginals.fixed$FebGOM[, 1], y = modF2F5$marginals.fixed$FebGOM[, 
    2])
mGOM$x = (exp(mGOM$x) - 1) * 100
mGAK = data.frame(x = modF2F5$marginals.fixed$FebGAK[, 1], y = modF2F5$marginals.fixed$FebGAK[, 
    2])
mGAK$x = (exp(mGAK$x) - 1) * 100
p7c = ggplot(mGOM, aes(x, y)) + geom_line(col = "red") + theme_bw() + geom_line(aes(x, 
    y), data = mGAK, col = "blue") + xlab(expression(paste("% Change in Number of Reports /", 
    degree, "C"))) + ylab("Posterior Density") + geom_vline(x = 0, col = "gray") + 
    scale_x_continuous(limits = c(-100, 300)) + scale_y_continuous(limits = c(0, 
    4.3)) + ggtitle("F2-F5 Tornadoes")

formula1 = F3F5 ~ FebGOM + FebGAK + YEAR + f(YEAR1, model = "ar1")
modF3F5 = inla(formula1, family = "nbinomial", data = annual, control.compute = list(dic = TRUE, 
    cpo = TRUE), control.predictor = list(link = 1))
summary(modF3F5)

## 
## Call:
## c("inla(formula = formula1, family = \"nbinomial\", data = annual, ",  "    control.compute = list(dic = TRUE, cpo = TRUE), control.predictor = list(link = 1))" )
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##         0.15945         0.33624         0.04802         0.54370 
## 
## Fixed effects:
##                  mean        sd 0.025quant  0.5quant 0.975quant       kld
## (Intercept)  4.186047 14.359594  -24.10209  4.198973   32.41424 6.544e-08
## FebGOM       0.516662  0.396641   -0.25993  0.515142    1.30218 1.111e-04
## FebGAK      -0.604202  0.198760   -0.99898 -0.602922   -0.21667 2.722e-04
## YEAR        -0.006804  0.006197   -0.01904 -0.006788    0.00535 2.884e-05
## 
## Random effects:
## Name   Model     Max KLD 
## YEAR1   AR1 model 
## 
## Model hyperparameters:
##                                                      mean       sd        
## size for the nbinomial observations (overdispersion)  2.175e+00  5.809e-01
## Precision for YEAR1                                   1.855e+04  1.834e+04
## Rho for YEAR1                                         3.354e-03  6.981e-01
##                                                      0.025quant 0.5quant  
## size for the nbinomial observations (overdispersion)  1.269e+00  2.096e+00
## Precision for YEAR1                                   1.263e+03  1.314e+04
## Rho for YEAR1                                        -9.876e-01  7.618e-03
##                                                      0.975quant
## size for the nbinomial observations (overdispersion)  3.532e+00
## Precision for YEAR1                                   6.686e+04
## Rho for YEAR1                                         9.875e-01
## 
## Expected number of effective parameters(std dev): 4.009(0.01258)
## Number of equivalent replicates : 15.46 
## 
## Deviance Information Criterion: 323.91
## Effective number of parameters: 4.796
## 
## Marginal Likelihood:  -178.38 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed

ad.test(modF3F5$cpo$pit, distr.fun = punif)$p.value

##     AD 
## 0.1578

-mean(log(modF3F5$cpo$cpo))

## [1] 2.611

dfF3F5 = data.frame(Year = annual$YEAR, obs = annual$F3F5, modF3F5$summary.fitted.values)
cor(dfF3F5$obs, dfF3F5$mean)

## [1] 0.4198

limits = aes(ymax = dfF3F5$X0.975, ymin = dfF3F5$X0.025quant)
p8d = ggplot(dfF3F5, aes(x = obs, y = mean)) + geom_point() + geom_smooth(method = "lm") + 
    geom_abline(slope = 1, col = "gray") + theme_bw() + geom_linerange(limits) + 
    scale_y_continuous(limits = c(min(dfF3F5$obs), max(dfF3F5$obs))) + scale_x_continuous(limits = c(min(dfF3F5$obs), 
    max(dfF3F5$obs))) + xlab("Observed Number of Reports") + ylab("Predicted Number of Reports") + 
    ggtitle("F3-F5 Tornadoes")

mGOM = data.frame(x = modF3F5$marginals.fixed$FebGOM[, 1], y = modF3F5$marginals.fixed$FebGOM[, 
    2])
mGOM$x = (exp(mGOM$x) - 1) * 100
mGAK = data.frame(x = modF3F5$marginals.fixed$FebGAK[, 1], y = modF3F5$marginals.fixed$FebGAK[, 
    2])
mGAK$x = (exp(mGAK$x) - 1) * 100
p7d = ggplot(mGOM, aes(x, y)) + geom_line(col = "red") + theme_bw() + geom_line(aes(x, 
    y), data = mGAK, col = "blue") + xlab(expression(paste("% Change in Number of Reports /", 
    degree, "C"))) + ylab("Posterior Density") + geom_vline(x = 0, col = "gray") + 
    scale_x_continuous(limits = c(-100, 300)) + scale_y_continuous(limits = c(0, 
    4.3)) + ggtitle("F3-F5 Tornadoes")

multiplot(p7a, p7b, p7c, p7d, cols = 2)

plot of chunk SSTeffects

FIGURE 7: Posterior densities for the covariate parameters. The blue line is the parameter for the Gulf of Alaska (GAK) SST covariate and the red line is the parameter for the western Caribbean (WCA) SST covariate. The densities are plotted for the percent change in the number of reports per $\circ$C.

multiplot(p8a, p8b, p8c, p8d, cols = 2)

plot of chunk obsVsPredPlot

FIGURE 8: Observed versus predicted annual tornado report counts by F-scale group.

formula1 = F4F5 ~ FebGOM + FebGAK + f(YEAR, model = "rw1")
modF4F5 = inla(formula1, family = "nbinomial", data = annual, control.compute = list(dic = TRUE, 
    cpo = TRUE), control.predictor = list(link = 1))
summary(modF4F5)
ad.test(modF4F5$cpo$pit, distr.fun = punif)$p.value
-mean(log(modF4F5$cpo$cpo))
dfF4F5 = data.frame(Year = annual$YEAR, obs = annual$F4F5, modF4F5$summary.fitted.values)
cor(dfF4F5$obs, dfF4F5$mean)

formula1 = F4F5T ~ FebGOM + FebGAK + f(YEAR, model = "iid")
modF4F5T = inla(formula1, family = "binomial", data = annual, control.compute = list(dic = TRUE, 
    cpo = TRUE), control.predictor = list(link = 1))
summary(modF4F5T)
ad.test(modF4F5T$cpo$pit, distr.fun = punif)$p.value
-mean(log(modF4F5T$cpo$cpo))
dfF4F5T = data.frame(Year = annual$YEAR, obs = annual$F4F5T, modF4F5T$summary.fitted.values)
cor(dfF4F5T$obs, dfF4F5T$mean)

Out of sample prediction.

pre = numeric()
for (i in 1:62) {
    annual2 = annual
    annual2$F3F5[i] = NA
    formula1 = F3F5 ~ FebGOM + FebGAK + YEAR + f(YEAR1, model = "ar1")
    modF3F5 = inla(formula1, family = "nbinomial", data = annual2, control.predictor = list(link = 1))
    pre = c(pre, modF3F5$summary.fitted.values$mean[i])
}
cor(pre, annual$F3F5)

Out-of-Sample Skill F0F5 0.765 F1F5 0.331 F2F5 0.517 F3F5 0.292

PC of the two covariates. No improvement.

xx = princomp(~FebGAK + FebGOM, data = annual)
annual$PC1 = xx$scores[, 1]
annual$PC2 = xx$scores[, 2]
formula1 = F4F5 ~ PC1 + PC2 + f(YEAR, model = "ar1")
modF4F5 = inla(formula1, family = "nbinomial", data = annual, control.compute = list(dic = TRUE, 
    cpo = TRUE), control.predictor = list(link = 1))
summary(modF4F5)
ad.test(modF4F5$cpo$pit, distr.fun = punif)$p.value
-mean(log(modF4F5$cpo$cpo))
dfF4F5 = data.frame(Year = annual$YEAR, obs = annual$F4F5, modF4F5$summary.fitted.values)
cor(dfF4F5$obs, dfF4F5$mean)

Model using near-city tornado density as predictand. Determine near-city tornado density and use a beta likelihood.

Torn3.ppp = as.ppp(Torn3)
unzip("ci08au12.zip")
ll = "+proj=longlat +datum=NAD83"
US = readOGR(dsn = ".", layer = "ci08au12", p4s = ll)
USt = spTransform(US, CRS(proj4string(Torn3)))
USt = subset(USt, USt$POP_1990 > 1000)
US.ppp = as.ppp(USt)
W = as.owin(Torn3.ppp)
CP.ppp = US.ppp[W]
Z = distmap(CP.ppp)
m2km = 0.001
km2 = 1e+08
dist = numeric()
rho = numeric()
Year = numeric()
rhoCity = numeric()
All = numeric()
rhoCountry = numeric()
rhoCountryhi = numeric()
rhoCountrylo = numeric()
rhoCityhi = numeric()
rhoCitylo = numeric()
beginYear = 1950
endYear = 2011
DT = 1
TornA = subset(Torn3, Torn3$FSCALE >= 2)
TornA.ppp = as.ppp(TornA["YEAR"])
for (yr in seq(beginYear, endYear, 1)) {
    Torn4.ppp = TornA.ppp[TornA.ppp$marks > (yr - DT) & TornA.ppp$marks <= yr]
    All = c(All, Torn4.ppp$n)
    rhat = rhohat(unmark(Torn4.ppp), Z, bw = 0.25, method = "transform")
    dist = c(dist, rhat$Z * m2km)
    rho = c(rho, rhat$rho * km2)
    rhoCity = c(rhoCity, rhat$rho[1] * km2)
    rhoCityhi = c(rhoCityhi, rhat$hi[1] * km2)
    rhoCitylo = c(rhoCitylo, rhat$lo[1] * km2)
    rhoCountry = c(rhoCountry, rhat$rho[length(rhat$rho)] * km2)
    rhoCountryhi = c(rhoCountryhi, rhat$hi[length(rhat$rho)] * km2)
    rhoCountrylo = c(rhoCountrylo, rhat$lo[length(rhat$rho)] * km2)
    Year = c(Year, rep(yr, length(rhat$Z)))
}
annual2 = data.frame(annual, rhoCity = rhoCity, rhoCountry = rhoCountry, rhoCountryhi = rhoCountryhi, 
    rhoCountrylo = rhoCountrylo, rhoCityhi = rhoCityhi, rhoCitylo = rhoCitylo)
formula1 = rhoCountry ~ FebGOM + FebGAK + f(YEAR, model = "ar1")
mod = inla(formula1, family = "beta", data = annual2, control.compute = list(dic = TRUE, 
    cpo = TRUE), control.predictor = list(link = 1))
summary(mod)
ad.test(mod$cpo$pit, distr.fun = punif)$p.value
-mean(log(mod$cpo$cpo))
df = data.frame(Year = annual2$YEAR, obs = annual2$rhoCity, mod$summary.fitted.values)
cor(df$obs, df$mean)

Sample from marginals. Not used.

sampleGOM = inla.rmarginal(1000, PD.GOM)
h = inla.hyperpar(results)
summary(h)
plot(h)

Gridded temperature data. Not used.

source("ReadGriddedData.R")
x = finaldata$TempLand
y = subset(x, year >= 1950 & lat > 20 & month == 4)
y = y[!is.na(y$TempLand), ]

Annual tornado width and length

summary(lm(Width ~ FebGOM + FebGAK + YEAR, data = annual[annual$YEAR > 1987, 
    ]))

## 
## Call:
## lm(formula = Width ~ FebGOM + FebGAK + YEAR, data = annual[annual$YEAR > 
##     1987, ])
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -28.83 -15.22  -4.73  14.86  42.17 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -8230.634   1293.273   -6.36  3.3e-06 ***
## FebGOM         87.754     15.968    5.50  2.2e-05 ***
## FebGAK        -19.819      7.840   -2.53     0.02 *  
## YEAR            3.098      0.628    4.93  8.0e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 21.3 on 20 degrees of freedom
## Multiple R-squared: 0.764,   Adjusted R-squared: 0.729 
## F-statistic: 21.6 on 3 and 20 DF,  p-value: 1.73e-06

INLA model for tornado Width

formula1 = Width ~ FebGOM + FebGAK + YEAR
modWidth = inla(formula1, family = "gamma", data = annual[annual$YEAR > 1987, 
    ], control.compute = list(dic = TRUE, cpo = TRUE), control.predictor = list(link = 1))
summary(modWidth)

## 
## Call:
## c("inla(formula = formula1, family = \"gamma\", data = annual[annual$YEAR > ",  "    1987, ], control.compute = list(dic = TRUE, cpo = TRUE), ",  "    control.predictor = list(link = 1))")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##         0.11487         0.05183         0.03209         0.19879 
## 
## Fixed effects:
##                  mean        sd 0.025quant  0.5quant 0.975quant       kld
## (Intercept) -73.90968 14.916470 -103.39327 -73.88885  -44.52825 1.170e-04
## FebGOM        0.77742  0.181777    0.41781   0.77777    1.13521 1.861e-05
## FebGAK       -0.15518  0.090148   -0.33224  -0.15547    0.02360 1.004e-04
## YEAR          0.02976  0.007153    0.01568   0.02975    0.04391 1.409e-04
## 
## The model has no random effects
## 
## Model hyperparameters:
##                                                mean   sd     0.025quant
## Precision parameter for the Gamma observations 18.128  5.422  9.434    
##                                                0.5quant 0.975quant
## Precision parameter for the Gamma observations 17.507   30.538    
## 
## Expected number of effective parameters(std dev): 4.006(0.001445)
## Number of equivalent replicates : 5.991 
## 
## Deviance Information Criterion: 226.92
## Effective number of parameters: 4.647
## 
## Marginal Likelihood:  -132.60 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed


ad.test(modWidth$cpo$pit, distr.fun = punif)$p.value

##     AD 
## 0.9914

-mean(log(modWidth$cpo$cpo))

## [1] 4.665

dfWidth = data.frame(Year = annual$YEAR[annual$YEAR > 1987], obs = annual$Width[annual$YEAR > 
    1987], modWidth$summary.fitted.values)
cor(dfWidth$obs, dfWidth$mean)

## [1] 0.8681