Code in support of our paper to be submitted to Monthly Weather Review
date()
## [1] "Sat Dec 29 07:40:20 2012"
Set working directory, load packages, and read bibtex file.
setwd("~/Dropbox/Tornadoes/Holly")
require(maptools)
require(maps)
require(mapproj)
require(rgeos)
require(rgdal)
require(ggmap)
require(plyr)
require(INLA)
require(vcd)
require(reshape)
require(ADGofTest)
require(spatstat)
Download the tornado data from the Storm Prediction Center (SPC) http://www.spc.noaa.gov/gis/svrgis/
# tmp =
# download.file('http://www.spc.noaa.gov/gis/svrgis/zipped/tornado.zip',
# 'tornado.zip', mode='wb')
unzip("tornado.zip")
Torn = readOGR(dsn = ".", layer = "tornado")
## OGR data source with driver: ESRI Shapefile
## Source: ".", layer: "tornado"
## with 56221 features and 28 fields
## Feature type: wkbPoint with 2 dimensions
print(proj4string(Torn))
## [1] "+proj=lcc +lat_1=33 +lat_2=45 +lat_0=39 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"
names(Torn)
## [1] "OM" "YEAR" "MONTH" "DAY" "DATE"
## [6] "TIME" "TIMEZONE" "STATE" "FIPS" "STATENUMBE"
## [11] "FSCALE" "INJURIES" "FATALITIES" "LOSS" "CROPLOSS"
## [16] "SLAT" "SLON" "ELAT" "ELON" "LENGTH"
## [21] "WIDTH" "NS" "SN" "SG" "F1"
## [26] "F2" "F3" "F4"
The number of tornado reports in the data set is 56221.
Obtain a google map centered on Russell KS with a zoom of 7. Subset the tornado reports by map domain.
ourlocation = geocode("Russell KS")
Map = get_map(location = unlist(ourlocation), source = "google", maptype = "hybrid",
zoom = 7)
lla = attr(Map, "bb")$ll.lat
llo = attr(Map, "bb")$ll.lon
ula = attr(Map, "bb")$ur.lat
ulo = attr(Map, "bb")$ur.lon
Torn2 = subset(Torn, Torn$SLAT <= ula & Torn$SLAT >= lla & Torn$SLON <= ulo &
Torn$SLON >= llo)
The map shows touchdown locations of 6328 tornadoes.
Create a bar chart of tornado frequency by month type
p1 = ggplot(Torn2@data, aes(as.factor(MONTH))) + geom_bar()
p1 + ylab("Tornado Reports (F0-F5)") + xlab("") + theme_bw() + scale_x_discrete(breaks = as.character(1:12),
labels = month.abb)
Total = sum(table(Torn2$MONTH))
Perc = sum(table(Torn2$MONTH)[4:6])/Total * 100
FIGURE 1: Histogram of monthly tornadoes.
Create a map of the tornadoes centered on Russell, KS.
Torn3 = subset(Torn2, Torn2$MONTH >= 4 | Torn2$MONTH <= 6)
Torn3 = subset(Torn3, Torn3$FSCALE >= 0)
Torn3$F.Scale = factor(Torn3$FSCALE, ordered = TRUE)
p2 = ggmap(Map, extent = "normal") + theme_bw() + geom_point(aes(x = SLON, y = SLAT,
size = F.Scale), color = "green", alpha = 0.5, data = Torn3@data) + xlab(expression(paste("Longitude (",
degree, "E)"))) + ylab(expression(paste("Latitude (", degree, "N)")))
p2
Total2 = sum(table(Torn3$MONTH))
Dif = Total - Total2
Perc3 = Dif/Total * 100
Perc2 = sum(table(Torn3$MONTH)[4:6])/Total * 100
FIGURE 2: Tornado reports (F0–F5) over the study region centered on Russell, KS during April–June for the period 1950–2011. Point size is proportion to the F scale.
trpyWide = ddply(Torn3@data, .(YEAR), summarize, F0 = sum(FSCALE == 0), F1 = sum(FSCALE ==
1), F2 = sum(FSCALE == 2), F3 = sum(FSCALE == 3), F4 = sum(FSCALE == 4),
F5 = sum(FSCALE == 5), F0F5 = sum(FSCALE >= 0), F1F5 = sum(FSCALE >= 1),
F2F5 = sum(FSCALE >= 2), F3F5 = sum(FSCALE >= 3), F4F5 = sum(FSCALE >= 4))
trpyLong = melt(trpyWide, id = "YEAR", measure.vars = c("F0F5", "F1F5", "F2F5",
"F3F5", "F4F5"))
trpyLong$"F0-F5" = trpyLong$F0F5
trpyLong$"F1-F5" = trpyLong$F1F5
trpyLong$"F2-F5" = trpyLong$F2F5
trpyLong$"F3-F5" = trpyLong$F3F5
trpyLong$"F4-F5" = trpyLong$F4F5
p3 = ggplot(trpyLong, aes(x = YEAR, y = value)) + geom_point() + facet_grid(variable ~
., scale = "free_y")
p3 = p3 + geom_smooth(method = "loess", span = 1, color = "green") + theme_bw()
p3 + ylab("Number of Tornado Reports") + xlab("")
FIGURE 3: Time series of tornado reports by F scale groups over the study region centered on Russell, KS.
Run this code chunk to create the function multiplot(). The function is used to arrange individual ggplot objects on a single page.
# Multiple plot function
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot
# objects) - cols: Number of columns in layout - layout: A matrix
# specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom. Author: winston@stdout.org
multiplot <- function(..., plotlist = NULL, file, cols = 1, layout = NULL) {
require(grid)
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
# Make the panel ncol: Number of columns of plots nrow: Number of rows
# needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)), ncol = cols,
nrow = ceiling(numPlots/cols), byrow = TRUE)
}
if (numPlots == 1) {
print(plots[[1]])
} else {
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
# Make each plot, in the correct location
for (i in 1:numPlots) {
# Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row, layout.pos.col = matchidx$col))
}
}
}
yl = "Number of Years"
p4a = ggplot(trpyWide, aes(x = F0F5)) + geom_histogram(binwidth = 20, color = "white") +
scale_y_continuous(limits = c(0, 13)) + theme_bw() + ylab(yl) + xlab("F0-F5 Tornado Reports")
p4b = ggplot(trpyWide, aes(x = F1F5)) + geom_histogram(binwidth = 10, color = "white") +
scale_y_continuous(limits = c(0, 20)) + theme_bw() + ylab(yl) + xlab("F1-F5 Tornado Reports")
p4c = ggplot(trpyWide, aes(x = F2F5)) + geom_histogram(binwidth = 5, color = "white") +
scale_y_continuous(limits = c(0, 16)) + theme_bw() + ylab(yl) + xlab("F2-F5 Tornado Reports")
p4d = ggplot(trpyWide, aes(x = F3F5)) + geom_histogram(binwidth = 2, color = "white") +
scale_y_continuous(limits = c(0, 22)) + theme_bw() + ylab(yl) + xlab("F3-F5 Tornado Reports")
multiplot(p4a, p4b, p4c, p4d, cols = 2)
FIGURE 4: Histograms of annual tornado reports by F scale grouping.
m = sapply(trpyWide[, 8:12], mean)
v = sapply(trpyWide[, 8:12], var)
ratio = v/m
m
## F0F5 F1F5 F2F5 F3F5 F4F5
## 95.677 42.855 16.677 4.871 1.000
v
## F0F5 F1F5 F2F5 F3F5 F4F5
## 2660.025 372.913 95.763 19.327 1.672
ratio
## F0F5 F1F5 F2F5 F3F5 F4F5
## 27.802 8.702 5.742 3.968 1.672
summary(goodfit(trpyWide$F3F5, type = "poisson"))
##
## Goodness-of-fit test for poisson distribution
##
## X^2 df P(> X^2)
## Likelihood Ratio 100.3 13 1.433e-15
Annual statistics by F-scale grouping.
Group & Mean & Variance & Ratio & \( p \)-value\ \hline
F0–F5 & 95.7 & 2660 & 27.8 & $<$0.001\
F1–F5 & 42.9 & 372.9 & 8.70 & $<$0.001\
F2–F5 & 16.7 & 95.76 & 5.74 & $<$0.001\
F3–F5 & 4.87 & 19.33 & 3.97 & $<$0.001\
F4–F5 & 1.00 & 1.672 & 1.67 & 0.016 \ \hline
StudyDomain = data.frame(lon = c(llo, llo, ulo, ulo, llo), lat = c(lla, ula,
ula, lla, lla))
lla2 = 15
llo2 = -90
ula2 = 25
ulo2 = -70
GOM = data.frame(lon = c(llo2, llo2, ulo2, ulo2, llo2), lat = c(lla2, ula2,
ula2, lla2, lla2))
lla3 = 50.364
llo3 = -153.662
ula3 = 59.948
ulo3 = -136.084
GOAK = data.frame(lon = c(llo3, llo3, ulo3, ulo3, llo3), lat = c(lla3, ula3,
ula3, lla3, llo3))
Map2 = get_map(location = unlist(ourlocation), source = "google", maptype = "satellite",
zoom = 3)
p5 = ggmap(Map2, extent = "device") + geom_polygon(data = StudyDomain, aes(x = lon,
y = lat), color = "green", fill = "green", alpha = 0.4) + geom_polygon(data = GOAK,
aes(x = lon, y = lat), color = "cyan", fill = "cyan", alpha = 0.4) + geom_polygon(data = GOM,
aes(x = lon, y = lat), color = "red", fill = "red", alpha = 0.4)
p5
FIGURE 5: Tornado and SST regions.
WCA bbox: 15, 25, -70, -90
GAK bbox: 50.364, -136.084, 59.948, -153.662
dat = trpyWide
KSPDSI = read.table("KSPDSI.txt")
names(KSPDSI) = c("YEAR", paste(month.abb, "KSPDSI", sep = ""))
KSPDSI = KSPDSI[, c(1, 3)] # YEAR & February
dat = merge(dat, KSPDSI, by = "YEAR")
SOI = read.table("SOIstd.txt")
names(SOI) = c("YEAR", paste(month.abb, "SOI", sep = ""))
SOI = SOI[, c(1, 3)] # YEAR & February
dat = merge(dat, SOI, by = "YEAR")
TNI = read.table("TNI.txt")
names(TNI) = c("YEAR", paste(month.abb, "TNI", sep = ""))
TNI = TNI[, c(1, 3)]
dat = merge(dat, TNI, by = "YEAR")
NAO = read.table("NAO.txt")
names(NAO) = c("YEAR", paste(month.abb, "NAO", sep = ""))
NAO = NAO[, c(1, 3)]
dat = merge(dat, NAO, by = "YEAR")
PNA = read.table("PNA.txt")
names(PNA) = c("YEAR", paste(month.abb, "PNA", sep = ""))
PNA = PNA[, c(1, 3)]
dat = merge(dat, PNA, by = "YEAR")
GOM = read.table("GOMSST2.txt", na.string = "-9.900", header = FALSE)
names(GOM) = c("YEAR", paste(month.abb, "GOM", sep = ""))
GOM = GOM[, c(1, 3)]
dat = merge(dat, GOM, by = "YEAR")
GAK = read.table("GOAKSST4.txt", na.string = "-9.900", header = FALSE)
names(GAK) = c("YEAR", paste(month.abb, "GAK", sep = ""))
GAK = GAK[, c(1, 3)]
annual = merge(dat, GAK, by = "YEAR")
Time series plot of GOM and GAK SST.
annual$"Gulf of Mexico" = annual$FebGOM
annual$"Gulf of Alaska" = annual$FebGAK
covLong = melt(annual, id = "YEAR", measure.vars = c("Gulf of Mexico", "Gulf of Alaska"))
# covLong$'Gulf of Mexico' = covLong$FebGOM
p6 = ggplot(covLong, aes(x = YEAR, y = value)) + geom_line() + facet_grid(variable ~
., scale = "free_y") + ylab(expression(paste("February Sea-Surface Temperature (",
degree, "C)")))
p6 = p6 + geom_smooth(method = "loess", span = 1, color = "gray") + theme_bw() +
xlab("")
p6
ccAll = cor(annual$FebGOM, annual$FebGAK)
cc04a = cor(annual$FebGOM[annual$YEAR >= 2004], annual$FebGAK[annual$YEAR >=
2004])
cc04b = cor(annual$FebGOM[annual$YEAR < 2004], annual$FebGAK[annual$YEAR < 2004])
FIGURE 6: Time series of February sea-surface temperature in the Gulf of Mexico and the Gulf of Alaska.
source(“http://www.math.ntnu.no/inla/givemeINLA.R”)
formula1 = F0F5 ~ FebGOM + FebGAK + f(YEAR, model = "ar1")
modF0F5 = inla(formula1, family = "nbinomial", data = annual, control.compute = list(dic = TRUE,
cpo = TRUE), control.predictor = list(link = 1))
summary(modF0F5)
##
## Call:
## c("inla(formula = formula1, family = \"nbinomial\", data = annual, ", " control.compute = list(dic = TRUE, cpo = TRUE), control.predictor = list(link = 1))" )
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.2016 0.4002 0.1579 0.7597
##
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant kld
## (Intercept) -4.3548 4.52454 -13.23917 -4.3606 4.56739 4.337e-07
## FebGOM 0.3740 0.18060 0.01834 0.3741 0.72904 2.923e-06
## FebGAK -0.1626 0.09514 -0.34958 -0.1627 0.02482 2.188e-05
##
## Random effects:
## Name Model Max KLD
## YEAR AR1 model
##
## Model hyperparameters:
## mean sd
## size for the nbinomial observations (overdispersion) 8.31353 1.81485
## Precision for YEAR 9.07254 5.40301
## Rho for YEAR 0.94753 0.03696
## 0.025quant 0.5quant
## size for the nbinomial observations (overdispersion) 5.26488 8.14307
## Precision for YEAR 2.30152 7.90552
## Rho for YEAR 0.85314 0.95611
## 0.975quant
## size for the nbinomial observations (overdispersion) 12.35273
## Precision for YEAR 22.74742
## Rho for YEAR 0.99175
##
## Expected number of effective parameters(std dev): 12.11(3.512)
## Number of equivalent replicates : 5.12
##
## Deviance Information Criterion: 600.98
## Effective number of parameters: 13.03
##
## Marginal Likelihood: -329.40
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
ad.test(modF0F5$cpo$pit, distr.fun = punif)$p.value
## AD
## 0.159
-mean(log(modF0F5$cpo$cpo))
## [1] 4.75
dfF0F5 = data.frame(Year = annual$YEAR, obs = annual$F0F5, modF0F5$summary.fitted.values)
cor(dfF0F5$obs, dfF0F5$mean)
## [1] 0.8763
limits = aes(ymax = dfF0F5$X0.975, ymin = dfF0F5$X0.025quant)
p8a = ggplot(dfF0F5, aes(x = obs, y = mean)) + geom_point() + geom_smooth(method = "lm") +
geom_abline(slope = 1, col = "gray") + theme_bw() + geom_linerange(limits) +
scale_y_continuous(limits = c(min(dfF0F5$obs), max(dfF0F5$X0.975))) + scale_x_continuous(limits = c(min(dfF0F5$obs),
max(dfF0F5$X0.975))) + xlab("Observed Number of Reports") + ylab("Predicted Number of Reports") +
ggtitle("F0-F5 Tornadoes")
mGOM = data.frame(x = modF0F5$marginals.fixed$FebGOM[, 1], y = modF0F5$marginals.fixed$FebGOM[,
2])
mGOM$x = (exp(mGOM$x) - 1) * 100
mGAK = data.frame(x = modF0F5$marginals.fixed$FebGAK[, 1], y = modF0F5$marginals.fixed$FebGAK[,
2])
mGAK$x = (exp(mGAK$x) - 1) * 100
p7a = ggplot(mGOM, aes(x, y)) + geom_line(col = "red") + theme_bw() + geom_line(aes(x,
y), data = mGAK, col = "blue") + xlab(expression(paste("% Change in Number of Reports /",
degree, "C"))) + ylab("Posterior Density") + geom_vline(x = 0, col = "gray") +
scale_x_continuous(limits = c(-100, 300)) + scale_y_continuous(limits = c(0,
4.3)) + ggtitle("F0-F5 Tornadoes")
formula1 = F1F5 ~ FebGOM + FebGAK + FebSOI + FebNAO + FebPNA + f(YEAR, model = "ar1")
modF1F5 = inla(formula1, family = "nbinomial", data = annual, control.compute = list(dic = TRUE,
cpo = TRUE), control.predictor = list(link = 1))
summary(modF1F5)
##
## Call:
## c("inla(formula = formula1, family = \"nbinomial\", data = annual, ", " control.compute = list(dic = TRUE, cpo = TRUE), control.predictor = list(link = 1))" )
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.13037 0.34895 0.05453 0.53385
##
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant kld
## (Intercept) -7.882030 5.18492 -18.09141 -7.878107 2.31010 6.337e-06
## FebGOM 0.494197 0.20626 0.08907 0.493975 0.90075 9.157e-06
## FebGAK -0.232075 0.14416 -0.51459 -0.232465 0.05277 2.808e-05
## FebSOI -0.012175 0.05091 -0.11293 -0.011967 0.08742 3.985e-07
## FebNAO -0.005758 0.06491 -0.13406 -0.005539 0.12135 4.662e-05
## FebPNA -0.030642 0.07819 -0.18503 -0.030426 0.12260 4.912e-05
##
## Random effects:
## Name Model Max KLD
## YEAR AR1 model
##
## Model hyperparameters:
## mean sd
## size for the nbinomial observations (overdispersion) 5.250e+00 1.050e+00
## Precision for YEAR 1.872e+04 1.842e+04
## Rho for YEAR 2.050e-03 6.980e-01
## 0.025quant 0.5quant
## size for the nbinomial observations (overdispersion) 3.462e+00 5.159e+00
## Precision for YEAR 1.280e+03 1.330e+04
## Rho for YEAR -9.875e-01 4.333e-03
## 0.975quant
## size for the nbinomial observations (overdispersion) 7.567e+00
## Precision for YEAR 6.722e+04
## Rho for YEAR 9.876e-01
##
## Expected number of effective parameters(std dev): 6.03(0.03916)
## Number of equivalent replicates : 10.28
##
## Deviance Information Criterion: 537.38
## Effective number of parameters: 6.821
##
## Marginal Likelihood: -297.53
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
ad.test(modF1F5$cpo$pit, distr.fun = punif)$p.value
## AD
## 0.8785
-mean(log(modF1F5$cpo$cpo))
## [1] 4.305
dfF1F5 = data.frame(Year = annual$YEAR, obs = annual$F1F5, modF1F5$summary.fitted.values)
cor(dfF1F5$obs, dfF1F5$mean)
## [1] 0.4429
limits = aes(ymax = dfF1F5$X0.975, ymin = dfF1F5$X0.025quant)
p8b = ggplot(dfF1F5, aes(x = obs, y = mean)) + geom_point() + geom_smooth(method = "lm") +
geom_abline(slope = 1, col = "gray") + theme_bw() + geom_linerange(limits) +
scale_y_continuous(limits = c(min(dfF1F5$obs), max(dfF1F5$obs))) + scale_x_continuous(limits = c(min(dfF1F5$obs),
max(dfF1F5$obs))) + xlab("Observed Number of Reports") + ylab("Predicted Number of Reports") +
ggtitle("F1-F5 Tornadoes")
mGOM = data.frame(x = modF1F5$marginals.fixed$FebGOM[, 1], y = modF1F5$marginals.fixed$FebGOM[,
2])
mGOM$x = (exp(mGOM$x) - 1) * 100
mGAK = data.frame(x = modF1F5$marginals.fixed$FebGAK[, 1], y = modF1F5$marginals.fixed$FebGAK[,
2])
mGAK$x = (exp(mGAK$x) - 1) * 100
p7b = ggplot(mGOM, aes(x, y)) + geom_line(col = "red") + theme_bw() + geom_line(aes(x,
y), data = mGAK, col = "blue") + xlab(expression(paste("% Change in Number of Reports /",
degree, "C"))) + ylab("Posterior Density") + geom_vline(x = 0, col = "gray") +
scale_x_continuous(limits = c(-100, 300)) + scale_y_continuous(limits = c(0,
4.3)) + ggtitle("F1-F5 Tornadoes")
formula1 = F2F5 ~ FebGOM + FebGAK + f(YEAR, model = "ar1")
modF2F5 = inla(formula1, family = "nbinomial", data = annual, control.compute = list(dic = TRUE,
cpo = TRUE), control.predictor = list(link = 1))
summary(modF2F5)
##
## Call:
## c("inla(formula = formula1, family = \"nbinomial\", data = annual, ", " control.compute = list(dic = TRUE, cpo = TRUE), control.predictor = list(link = 1))" )
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.14254 0.26637 0.04715 0.45605
##
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant kld
## (Intercept) -13.4843 6.5270 -26.3468 -13.4752 -0.66734 5.100e-05
## FebGOM 0.6920 0.2613 0.1792 0.6916 1.20758 5.946e-05
## FebGAK -0.3352 0.1368 -0.6051 -0.3349 -0.06676 8.111e-05
##
## Random effects:
## Name Model Max KLD
## YEAR AR1 model
##
## Model hyperparameters:
## mean sd
## size for the nbinomial observations (overdispersion) 4.4204 1.1152
## Precision for YEAR 74.4362 175.8389
## Rho for YEAR 0.8846 0.1280
## 0.025quant 0.5quant
## size for the nbinomial observations (overdispersion) 2.5982 4.2993
## Precision for YEAR 4.0123 30.6599
## Rho for YEAR 0.5233 0.9269
## 0.975quant
## size for the nbinomial observations (overdispersion) 6.9424
## Precision for YEAR 415.8113
## Rho for YEAR 0.9933
##
## Expected number of effective parameters(std dev): 6.19(3.433)
## Number of equivalent replicates : 10.02
##
## Deviance Information Criterion: 440.37
## Effective number of parameters: 8.758
##
## Marginal Likelihood: -237.65
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
ad.test(modF2F5$cpo$pit, distr.fun = punif)$p.value
## AD
## 0.5259
-mean(log(modF2F5$cpo$cpo))
## [1] 3.531
dfF2F5 = data.frame(Year = annual$YEAR, obs = annual$F2F5, modF2F5$summary.fitted.values)
cor(dfF2F5$obs, dfF2F5$mean)
## [1] 0.6309
limits = aes(ymax = dfF2F5$X0.975, ymin = dfF2F5$X0.025quant)
p8c = ggplot(dfF2F5, aes(x = obs, y = mean)) + geom_point() + geom_smooth(method = "lm") +
geom_abline(slope = 1, col = "gray") + theme_bw() + geom_linerange(limits) +
scale_y_continuous(limits = c(min(dfF2F5$obs), max(dfF2F5$X0.975))) + scale_x_continuous(limits = c(min(dfF2F5$obs),
max(dfF2F5$X0.975))) + xlab("Observed Number of Reports") + ylab("Predicted Number of Reports") +
ggtitle("F2-F5 Tornadoes")
mGOM = data.frame(x = modF2F5$marginals.fixed$FebGOM[, 1], y = modF2F5$marginals.fixed$FebGOM[,
2])
mGOM$x = (exp(mGOM$x) - 1) * 100
mGAK = data.frame(x = modF2F5$marginals.fixed$FebGAK[, 1], y = modF2F5$marginals.fixed$FebGAK[,
2])
mGAK$x = (exp(mGAK$x) - 1) * 100
p7c = ggplot(mGOM, aes(x, y)) + geom_line(col = "red") + theme_bw() + geom_line(aes(x,
y), data = mGAK, col = "blue") + xlab(expression(paste("% Change in Number of Reports /",
degree, "C"))) + ylab("Posterior Density") + geom_vline(x = 0, col = "gray") +
scale_x_continuous(limits = c(-100, 300)) + scale_y_continuous(limits = c(0,
4.3)) + ggtitle("F2-F5 Tornadoes")
formula1 = F3F5 ~ FebGOM + FebGAK + f(YEAR, model = "ar1")
modF3F5 = inla(formula1, family = "nbinomial", data = annual, control.compute = list(dic = TRUE,
cpo = TRUE), control.predictor = list(link = 1))
summary(modF3F5)
##
## Call:
## c("inla(formula = formula1, family = \"nbinomial\", data = annual, ", " control.compute = list(dic = TRUE, cpo = TRUE), control.predictor = list(link = 1))" )
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.15099 0.29388 0.04352 0.48839
##
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant kld
## (Intercept) -7.4455 9.7569 -26.6911 -7.4290 11.7175 2.506e-05
## FebGOM 0.4434 0.3911 -0.3233 0.4423 1.2166 2.526e-05
## FebGAK -0.6169 0.1986 -1.0117 -0.6155 -0.2299 1.610e-04
##
## Random effects:
## Name Model Max KLD
## YEAR AR1 model
##
## Model hyperparameters:
## mean sd
## size for the nbinomial observations (overdispersion) 2.171e+00 5.730e-01
## Precision for YEAR 1.867e+04 1.839e+04
## Rho for YEAR 2.382e-03 6.981e-01
## 0.025quant 0.5quant
## size for the nbinomial observations (overdispersion) 1.269e+00 2.095e+00
## Precision for YEAR 1.266e+03 1.325e+04
## Rho for YEAR -9.876e-01 5.352e-03
## 0.975quant
## size for the nbinomial observations (overdispersion) 3.498e+00
## Precision for YEAR 6.707e+04
## Rho for YEAR 9.875e-01
##
## Expected number of effective parameters(std dev): 3.01(0.01292)
## Number of equivalent replicates : 20.60
##
## Deviance Information Criterion: 323.15
## Effective number of parameters: 3.831
##
## Marginal Likelihood: -170.46
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
ad.test(modF3F5$cpo$pit, distr.fun = punif)$p.value
## AD
## 0.1765
-mean(log(modF3F5$cpo$cpo))
## [1] 2.603
dfF3F5 = data.frame(Year = annual$YEAR, obs = annual$F3F5, modF3F5$summary.fitted.values)
cor(dfF3F5$obs, dfF3F5$mean)
## [1] 0.4098
limits = aes(ymax = dfF3F5$X0.975, ymin = dfF3F5$X0.025quant)
p8d = ggplot(dfF3F5, aes(x = obs, y = mean)) + geom_point() + geom_smooth(method = "lm") +
geom_abline(slope = 1, col = "gray") + theme_bw() + geom_linerange(limits) +
scale_y_continuous(limits = c(min(dfF3F5$obs), max(dfF3F5$obs))) + scale_x_continuous(limits = c(min(dfF3F5$obs),
max(dfF3F5$obs))) + xlab("Observed Number of Reports") + ylab("Predicted Number of Reports") +
ggtitle("F3-F5 Tornadoes")
mGOM = data.frame(x = modF3F5$marginals.fixed$FebGOM[, 1], y = modF3F5$marginals.fixed$FebGOM[,
2])
mGOM$x = (exp(mGOM$x) - 1) * 100
mGAK = data.frame(x = modF3F5$marginals.fixed$FebGAK[, 1], y = modF3F5$marginals.fixed$FebGAK[,
2])
mGAK$x = (exp(mGAK$x) - 1) * 100
p7d = ggplot(mGOM, aes(x, y)) + geom_line(col = "red") + theme_bw() + geom_line(aes(x,
y), data = mGAK, col = "blue") + xlab(expression(paste("% Change in Number of Reports /",
degree, "C"))) + ylab("Posterior Density") + geom_vline(x = 0, col = "gray") +
scale_x_continuous(limits = c(-100, 300)) + scale_y_continuous(limits = c(0,
4.3)) + ggtitle("F3-F5 Tornadoes")
& \multicolumn{2}{c}{F0F5} & \multicolumn{2}{c}{F1F5} & \multicolumn{2}{c}{F2F5} & \multicolumn{2}{c}{F3F5} \\
Statistic & Mean (s.d.) & CI (95\%) & Mean (s.d.) & CI (95\%) & Mean (s.d.) & CI (95\%) & Mean (s.d.) & CI (95\%) \ \hline
\( \beta_{WCA} \) & 0.374 (0.181) & (0.018, 0.729) & 0.492(0.198) & (0.104,0.883) & 0.693(0.261) & (0.180,1.208) & 0.443(0.391) & (-0.323,1.2167)\
\( \beta_{GAK} \) & -0.163(0.095) & (-0.350,0.025) & -0.246(0.107) & (-0.456,-0.036) & -0.335(0.137) & (-0.606,-0.066) & -0.617(0.199) & (-1.012,-0.230)\
\( n \) & 8.314(1.8148) & (5.2649,12.3527) & 5.514(1.0820) & (3.6600,7.9890) & 4.407(1.1088) & (2.5937, 6.9144) & 2.171(0.5730) & (1.2690,3.4980)\
\( \phi \) & 0.9475(0.03696) & (0.8531,0.9918) & -0.001(0.6980) & (-0.9873,0.9877) & 0.885(0.1301) & (0.5153,0.9936) & 0.003(0.6981) & (-0.9876,0.9875)\
ENEP & 12.11(3.512) & & 3.034(0.0435) & & 6.25(3.375) & & 3.01(0.1292)\
NER & 5.12 & & 20.43 & & 9.92 & & 20.60\
DIC & 600.98 & & 530.95 & & 440.21 & & 323.15\
ENP & 13.03 & & 3.85 & & 8.806 & & 3.832\
ML & -329.40 & & -278.97 & & -237.64 & & -170.46\
AD \( p \)-value & 0.4630 & & 0.9285 & & 0.51767 & & 0.1766\
log score & 4.825 & & 4.2723 & & 3.5371 & & 2.6029\
\( r(o, p) \) in sample & 0.88 & & 0.45 & & 0.63 & & 0.41\
\( r(o, p) \) out of sample & 0.78 & & 0.36 & & 0.42 & & 0.31\
multiplot(p7a, p7b, p7c, p7d, cols = 2)
FIGURE 7: Posterior densities for the covariate parameters. The blue line is the parameter for the Gulf of Alaska (GAK) SST covariate and the red line is the parameter for the western Caribbean (WCA) SST covariate. The densities are plotted for the percent change in the number of reports per $\circ$C.
multiplot(p8a, p8b, p8c, p8d, cols = 2)
FIGURE 8: Observed versus predicted annual tornado report counts by F-scale group.
formula1 = F4F5 ~ FebGOM + FebGAK + f(YEAR, model = "ar1")
modF4F5 = inla(formula1, family = "nbinomial", data = annual, control.compute = list(dic = TRUE,
cpo = TRUE), control.predictor = list(link = 1))
summary(modF4F5)
ad.test(modF4F5$cpo$pit, distr.fun = punif)$p.value
-mean(log(modF4F5$cpo$cpo))
dfF4F5 = data.frame(Year = annual$YEAR, obs = annual$F4F5, modF4F5$summary.fitted.values)
cor(dfF4F5$obs, dfF4F5$mean)
formula1 = F4F5T ~ FebGOM + FebGAK + f(YEAR, model = "iid")
modF4F5T = inla(formula1, family = "binomial", data = annual, control.compute = list(dic = TRUE,
cpo = TRUE), control.predictor = list(link = 1))
summary(modF4F5T)
ad.test(modF4F5T$cpo$pit, distr.fun = punif)$p.value
-mean(log(modF4F5T$cpo$cpo))
dfF4F5T = data.frame(Year = annual$YEAR, obs = annual$F4F5T, modF4F5T$summary.fitted.values)
cor(dfF4F5T$obs, dfF4F5T$mean)
Out of sample prediction.
pre = numeric()
for (i in 1:62) {
annual2 = annual
annual2$F2F5[i] = NA
formula1 = F2F5 ~ FebGOM + FebGAK + f(YEAR, model = "ar1")
modF2F5 = inla(formula1, family = "nbinomial", data = annual2, control.predictor = list(link = 1))
pre = c(pre, modF2F5$summary.fitted.values$mean[i])
}
cor(pre, annual$F2F5)
Out-of-Sample Skill
F0F5 0.785
F1F5 0.355
F2F5 0.416
F3F5 0.314
PC of the two covariates. No improvement.
xx = princomp(~FebGAK + FebGOM, data = annual)
annual$PC1 = xx$scores[, 1]
annual$PC2 = xx$scores[, 2]
formula1 = F4F5 ~ PC1 + PC2 + f(YEAR, model = "ar1")
modF4F5 = inla(formula1, family = "nbinomial", data = annual, control.compute = list(dic = TRUE,
cpo = TRUE), control.predictor = list(link = 1))
summary(modF4F5)
ad.test(modF4F5$cpo$pit, distr.fun = punif)$p.value
-mean(log(modF4F5$cpo$cpo))
dfF4F5 = data.frame(Year = annual$YEAR, obs = annual$F4F5, modF4F5$summary.fitted.values)
cor(dfF4F5$obs, dfF4F5$mean)
Model using near-city tornado density as predictand. Determine near-city tornado density and use a beta likelihood.
Torn3.ppp = as.ppp(Torn3)
unzip("ci08au12.zip")
ll = "+proj=longlat +datum=NAD83"
US = readOGR(dsn = ".", layer = "ci08au12", p4s = ll)
USt = spTransform(US, CRS(proj4string(Torn3)))
USt = subset(USt, USt$POP_1990 > 1000)
US.ppp = as.ppp(USt)
W = as.owin(Torn3.ppp)
CP.ppp = US.ppp[W]
Z = distmap(CP.ppp)
m2km = 0.001
km2 = 1e+08
dist = numeric()
rho = numeric()
Year = numeric()
rhoCity = numeric()
All = numeric()
rhoCountry = numeric()
rhoCountryhi = numeric()
rhoCountrylo = numeric()
rhoCityhi = numeric()
rhoCitylo = numeric()
beginYear = 1950
endYear = 2011
DT = 1
TornA = subset(Torn3, Torn3$FSCALE >= 2)
TornA.ppp = as.ppp(TornA["YEAR"])
for (yr in seq(beginYear, endYear, 1)) {
Torn4.ppp = TornA.ppp[TornA.ppp$marks > (yr - DT) & TornA.ppp$marks <= yr]
All = c(All, Torn4.ppp$n)
rhat = rhohat(unmark(Torn4.ppp), Z, bw = 0.25, method = "transform")
dist = c(dist, rhat$Z * m2km)
rho = c(rho, rhat$rho * km2)
rhoCity = c(rhoCity, rhat$rho[1] * km2)
rhoCityhi = c(rhoCityhi, rhat$hi[1] * km2)
rhoCitylo = c(rhoCitylo, rhat$lo[1] * km2)
rhoCountry = c(rhoCountry, rhat$rho[length(rhat$rho)] * km2)
rhoCountryhi = c(rhoCountryhi, rhat$hi[length(rhat$rho)] * km2)
rhoCountrylo = c(rhoCountrylo, rhat$lo[length(rhat$rho)] * km2)
Year = c(Year, rep(yr, length(rhat$Z)))
}
annual2 = data.frame(annual, rhoCity = rhoCity, rhoCountry = rhoCountry, rhoCountryhi = rhoCountryhi,
rhoCountrylo = rhoCountrylo, rhoCityhi = rhoCityhi, rhoCitylo = rhoCitylo)
formula1 = rhoCountry ~ FebGOM + FebGAK + f(YEAR, model = "ar1")
mod = inla(formula1, family = "beta", data = annual2, control.compute = list(dic = TRUE,
cpo = TRUE), control.predictor = list(link = 1))
summary(mod)
ad.test(mod$cpo$pit, distr.fun = punif)$p.value
-mean(log(mod$cpo$cpo))
df = data.frame(Year = annual2$YEAR, obs = annual2$rhoCity, mod$summary.fitted.values)
cor(df$obs, df$mean)
Sample from marginals. Not used.
sampleGOM = inla.rmarginal(1000, PD.GOM)
h = inla.hyperpar(results)
summary(h)
plot(h)
Gridded temperature data. Not used.
source("ReadGriddedData.R")
x = finaldata$TempLand
y = subset(x, year >= 1950 & lat > 20 & month == 4)
y = y[!is.na(y$TempLand), ]