Tyler Fricker, James B. Elsner, and Thomas H. Jagger
Set working directory and load packages.
setwd("~/Dropbox/Tyler")
suppressPackageStartupMessages(library("ggplot2"))
suppressPackageStartupMessages(library("raster"))
suppressPackageStartupMessages(library("ggmap"))
suppressPackageStartupMessages(library("dplyr"))
suppressPackageStartupMessages(library("tidyr"))
suppressPackageStartupMessages(library("rgdal"))
suppressPackageStartupMessages(library("rgeos"))
suppressPackageStartupMessages(library("ggthemes"))
suppressPackageStartupMessages(library("scales"))
suppressPackageStartupMessages(library("broom"))
suppressPackageStartupMessages(library("RColorBrewer"))
suppressPackageStartupMessages(library("scales"))
suppressPackageStartupMessages(library("broom"))
suppressPackageStartupMessages(library("INLA"))
Data from the Storm Prediction Center (SPC) http://www.spc.noaa.gov/gis/svrgis/.
#download.file("http://www.spc.noaa.gov/gis/svrgis/zipped/tornado.zip",
"tornado.zip", mode = "wb")
#unzip("tornado.zip")
Read the tornado data.
TornL = readOGR(dsn = "torn", layer = "torn",
stringsAsFactors = FALSE)
## OGR data source with driver: ESRI Shapefile
## Source: "torn", layer: "torn"
## with 60114 features
## It has 22 fields
TornL$Date = as.Date(TornL$date)
TornL$Year = TornL$yr
TornL$Length = TornL$len * 1609.34
TornL$Width = TornL$wid * .9144
TornL$AreaPath = TornL$Length * TornL$Width
TornL$AreaVortex = pi * (TornL$Width/2)^2
TornL$Ma = factor(month.abb[TornL$mo], levels = month.abb[1:12])
Set study period and remove tornadoes that occurred in Alaska, Hawaii, and Puerto Rico. Add injuries and deaths to make a casualty column.
TornL = TornL[TornL$Year >= 2007, ]
TornL = TornL[TornL$st != "HI" & TornL$st != "AK" & TornL$st != "PR", ]
TornL$cas = TornL$inj + TornL$fat
``Casualty’’ refers to either human death or injury as a direct consequence of tornado activity according to the NWS .
Over the conterminous United States during the period 2007-2015 there were 10,807 tornadoes. Of these, 872 are linked to 12,972 casualties. Only eight percent of all casualties lead to death.
df = as.data.frame(TornL) %>%
filter(cas >0)
dim(df)
## [1] 872 30
sum(df$cas)
## [1] 12972
sum(df$fat)/sum(df$cas) * 100
## [1] 7.940179
Distribution of casualties. Most casualty-producing tornadoes result in only a few casualties, while relatively few casualty-producing tornadoes result in many casualties.
df = data.frame(table(TornL$cas[TornL$cas > 0]))
df$Size = as.integer(df$Var1)
ggplot(df, aes(x = Size, y = Freq)) +
scale_x_log10() + scale_y_log10() +
geom_point() +
xlab(expression(paste("Casualties"))) +
ylab("Number of Tornadoes")
Casualties and EF category. On average a casualty-producing EF0 tornado results in 1.9 casualties, a casualty-producing EF1 tornado results in 2.7 casualties, and a casualty-producing EF2 tornado results in 5.7 casualties. The big jumps in the expected number of casualties occur for the highest rated tornadoes. On average a casualty-producing EF3 tornado results in 18 casualties, a casualty-producing EF4 tornado results in 90 casualties, and a casualty-producing EF5 tornado results in 255 casualties.
df = as.data.frame(TornL[TornL$cas > 0,]) %>%
group_by(mag) %>%
summarize(cas = sum(cas),
nT = n(),
fat = sum(fat),
inj = sum(inj),
avgcas = cas/nT)
df
## # A tibble: 6 x 6
## mag cas nT fat inj avgcas
## <int> <int> <int> <int> <int> <dbl>
## 1 0 128 68 2 126 1.882353
## 2 1 687 253 28 659 2.715415
## 3 2 1767 312 106 1661 5.663462
## 4 3 3248 176 227 3021 18.454545
## 5 4 4844 54 333 4511 89.703704
## 6 5 2298 9 334 1964 255.333333
Casualties by location and size.
df = as.data.frame(TornL) %>%
filter(cas > 0) %>%
mutate(label = paste("EF", mag, sep = ""))
states.df = map_data("state") %>%
filter(region != 'alaska', region != 'district of columbia')
ggplot(states.df, aes(x = long, y = lat, group = group)) +
geom_polygon(fill = "white") +
geom_path(color = "gray85") +
coord_map(project = "polyconic") +
geom_point(aes(x = slon, y = slat, size = cas, group = om),
alpha = .3, data = df) +
xlab("") + ylab("") +
facet_wrap(~ label, ncol = 2) +
theme(panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
panel.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "bottom") +
scale_size_continuous("Number of\n Casualties")
Tornado casualties during the period (2007–2015) were most common across the Southeast. Alabama had the most casualties (2,749) followed by Missouri (1,521), Oklahoma (1,399), Texas (962), and Mississippi (942). Rounding out the top ten are Arkansas (717), Georgia (618), North Carolina (594), Tennessee (526) and Illinois (408). Eight of the top eleven are states in the Southeast including Kentucky with 404 casualties. Massachusetts, ranked 14th, stands out in New England with 205 casualties from two tornadoes.
df = as.data.frame(TornL) %>%
filter(cas > 0) %>%
group_by(st) %>%
summarize(nT = n(),
nC = sum(cas),
ratio = nC/nT) %>%
arrange(desc(nC))
states.df = map_data("state") %>%
filter(region != 'alaska', region != 'district of columbia') %>%
mutate(st = state.abb[match(region, tolower(state.name))]) %>%
left_join(df, by = "st") %>%
arrange(order)
ggplot(states.df, aes(x = long, y = lat, group = group, fill = log10(nC))) +
geom_polygon() +
geom_path(color = "gray75") +
coord_map(project = "polyconic") +
theme(panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
panel.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "bottom") +
# labs(title = "Tornado Casualties\n [2007-2015]") +
xlab("") + ylab("") +
scale_fill_continuous(low = "#fee6ce", high = "#e6550d",
"Number of\n Casualties",
breaks = 1:3,
labels = c("10", "100", "1000"))
Create tornado paths and extract the average population density (people per sq. km under the path). The population data are on a raster. With the extract method and weights = TRUE the calculation is done based on percentage of cell under the path. This takes a long time. Multiply the average population density by the path area (L x W) to estimate the number of people under the path.
Begin with The Gridded Population of the World Volume 3 data (2000).
#TornLs = subset(TornL, date == "2011-04-27")
TornP = gBuffer(TornL, byid = TRUE, width = TornL$Width/2, capStyle = "FLAT")
Pop = raster("/Users/tylerfricker/Dropbox/Tyler/usadens/usads00g/w001001.adf")
Popp = projectRaster(Pop, crs = proj4string(TornP))
TornP$popD = as.vector(raster::extract(Popp, TornP, fun = mean, na.rm = TRUE,
weights = FALSE, normalizeWeights = FALSE))
TornP$pop = TornP$popD * TornP$AreaPath/10^6
Torn.df = as.data.frame(TornP)
This can also be done with The Gridded Population of the World Volume 4 data (2010). Because the GPW, v4 is at a finer resolution, we use an extent of the U.S. and crop.
#TornP = gBuffer(TornL, byid = TRUE, width = TornL$Width/2, capStyle = "FLAT")
#Pop = raster("gpw-v4-population-density_2010.tif")
#ext = raster::extent(c(-125, -67, 24, 50))
#Pop2 = crop(Pop, ext)
#Popp = projectRaster(Pop2, crs = proj4string(TornP))
#TornP$popD = as.vector(raster::extract(Popp, TornP, fun = mean, na.rm = TRUE,
# weights = FALSE, normalizeWeights = FALSE))
#TornP$pop = TornP$popD * TornP$AreaPath/10^6
#Torn.df = as.data.frame(TornP)
Missing population density
MissP = Torn.df[is.na(Torn.df$popD), ]
ggplot(states.df, aes(x = long, y = lat, group = group)) +
geom_polygon(fill = "white") +
geom_path(color = "gray85") +
coord_map(project = "polyconic") +
geom_point(aes(x = slon, y = slat, size = cas, group = om),
alpha = .3, data = MissP) +
xlab("") + ylab("") +
theme(panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
panel.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "bottom") +
scale_size_continuous("Number of\n Casualties")
Population density distribution. For the set of 872 tornadoes with at least one casualty the median population density per tornado is 23.6 people per square kilometer with an inter-quartile range between 10.7 and 70.2 people per square kilometer
Torn.df %>%
filter(cas > 0) %>%
summarize(nT = n(),
avgPopD = mean(popD, na.rm = TRUE),
medianPopD = median(popD, na.rm = TRUE),
q25 = quantile(popD, probs = .25, na.rm = TRUE),
q75 = quantile(popD, probs = .75, na.rm = TRUE),
maxPopD = max(popD, na.rm = TRUE),
minPopD = min(popD, na.rm = TRUE))
Torn.df %>%
filter(cas > 0) %>%
arrange(desc(popD))
Torn.df %>%
filter(cas > 0) %>%
group_by(mag) %>%
summarize(nT = n(),
nP = round(sum(pop, na.rm = TRUE), 0),
avgP = round(mean(pop, na.rm = TRUE), 0),
medP = round(median(pop, na.rm = TRUE), 0),
avgPD = mean(popD, na.rm = TRUE),
medPD = median(popD, na.rm = TRUE))
df = Torn.df %>%
filter(popD > 0 & cas > 0) %>%
group_by(mag) %>%
summarize(popD = sum(popD),
nT = n(),
avgpopD = popD/nT)
df = df[1:6,]
ggplot(Torn.df[Torn.df$popD > 0 & Torn.df$cas > 0,], aes(log(popD))) +
geom_histogram(binwidth = .5, color = "white") +
xlab("Population Density") +
ylab("Number of Tornadoes")
#A = A + ggtitle("A") +
# theme(plot.title=element_text(hjust=0))
Empirical model from Table 3-1 of NRC 2007. Percent area by EF rating for each EF category. Threshold wind speeds (m/s) are a lower bound 3 sec gusts on the operational EF Scale (Table 2-1 of NRC2007). Area * 1000 = volume in cubic meters.
perc = c(1, 0, 0, 0, 0, 0,
.772, .228, 0, 0, 0, 0,
.616, .268, .115, 0, 0, 0,
.529, .271, .133, .067, 0, 0,
.543, .238, .131, .056, .032, 0,
.538, .223, .119, .07, .033, .017)
percM = matrix(perc, ncol = 6, byrow = TRUE)
Compute mass-specific kinetic energy (e), total kinetic energy (E), and atmosphere moment of tornado (AMT) per tornado. Mass-specific kinetic energy (e) is a function of EF rating only (6 values). The equation for energy dissipation (atmosphere moment) is \(E = \frac{1}{2}A_v l \bar{\rho} \sum_{j=0}^{J} w_j v_j^{2},\) where \(A_v\) is the area of the vortex (\(\pi R^{2}\)), \(l\) is the path length, \(\bar{\rho}\) is air density, \(v_j\) is the midpoint wind speed for each rating, and \(w_j\) is the corresponding fraction of path area. With no upper bound on the EF5 wind speeds, the midpoint wind speed is set at 97 m~s\(^{-1}\) (7.5 m~s\(^{-1}\) above the threshold wind speed consistent with the EF4 midpoint speed relative to its threshold).
threshW = c(29.06, 38.45, 49.62, 60.8, 74.21, 89.41)
midptW = c(diff(threshW)/2 + threshW[-length(threshW)], threshW[length(threshW)] + 7.5)
ef = Torn.df$mag + 1
EW2 = numeric()
for(i in 1:length(ef)) EW2[i] = midptW^2 %*% percM[ef[i], ]
height = 1000
Torn.df = Torn.df %>%
mutate(e = .5 * EW2,
E = e * AreaPath * height,
AMT = e * AreaVortex * Length)
Distribution of AMT, atmospheric moment of tornadoes.
ggplot(Torn.df[Torn.df$cas > 0,], aes(AMT)) +
geom_histogram(binwidth = .5, color = "white") +
scale_x_log10(breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x))) +
xlab("Atmosphere Moment (J)") +
ylab("Number of Tornadoes")
#B = B + ggtitle("B") +
# theme(plot.title=element_text(hjust=0))
For the set of 872 tornadoes with at least one casualty the median energy dissipation is .410 terajoules (TJ) with an inter-quartile range between .035 and 4.58 TJ.
Torn.df %>%
filter(cas > 0) %>%
summarize(nT = n(),
avgAMT = mean(AMT, na.rm = TRUE),
medianAMT = median(AMT, na.rm = TRUE),
q25 = quantile(AMT, probs = .25, na.rm = TRUE),
q75 = quantile(AMT, probs = .75, na.rm = TRUE),
maxAMT = max(AMT, na.rm = TRUE),
minAMT = min(AMT, na.rm = TRUE))
## nT avgAMT medianAMT q25 q75 maxAMT minAMT
## 1 872 1.49478e+13 409513816772 35014876010 4.58442e+12 1.45533e+15 2408332
Torn.df %>%
filter(cas > 0) %>%
group_by(mag) %>%
summarize(nT = n(),
nAMT = round(sum(AMT, na.rm = TRUE), 0),
avgAMT = round(mean(AMT, na.rm = TRUE), 0),
medAMT = round(median(AMT, na.rm = TRUE), 0))
## # A tibble: 6 x 5
## mag nT nAMT avgAMT medAMT
## <int> <int> <dbl> <dbl> <dbl>
## 1 0 68 4.815863e+12 7.082151e+10 1.860813e+09
## 2 1 253 2.420207e+14 9.566034e+11 4.539786e+10
## 3 2 312 1.600533e+15 5.129913e+12 4.892114e+11
## 4 3 176 4.590518e+15 2.608249e+13 7.083210e+12
## 5 4 54 4.891454e+15 9.058249e+13 2.802787e+13
## 6 5 9 1.705142e+15 1.894603e+14 7.199459e+13
Population and casualties.
df = Torn.df[Torn.df$cas>0,]
ggplot(df, aes(x = popD, y = cas)) +
geom_point() +
scale_x_log10() +
scale_y_log10() +
xlab("Population Density") +
ylab("Number of Casualties")
#A = A + ggtitle("A") +
# theme(plot.title=element_text(hjust=0))
Energy dissipation and casualties.
ggplot(df, aes(x = AMT, y = cas)) +
geom_point() +
scale_x_log10(breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x))) +
scale_y_log10() +
xlab("Atmosphere Moment (J)") +
ylab("Number of Casualties") +
geom_label(aes(label = "Tuscaloosa-Birmingham (2011-04-27)", x = AMT, y = cas),
data = df[511, ],
hjust = 1.05,
vjust = .75)
#B = B + ggtitle("B") +
# theme(plot.title=element_text(hjust=0))
Mean and variance of the casualty counts.
Torn.df %>%
filter(cas > 0) %>%
summarize(meanCas = mean(cas),
varCas = var(cas),
ratio = varCas/meanCas)
## meanCas varCas ratio
## 1 14.87615 5850.359 393.2711
For the set of tornadoes with at least one casualty, the mean and variance of the counts are 14.9 and 5850, respectively suggesting a negative binomial model for counts expressed as \[ C \sim \hbox{NegBin}(\hat \mu, n) \] where \(\hat \mu\) is the per tornado casualty rate conditional on at least one casualty and \(n\) is the size parameter for the negative binomial distribution.
The logarithm of casualty rate (given at least one casualty) is linearly related to the logarithm of the population density (\(P\)) and the logarithm of the energy dissipation (\(E\)). The coefficient \(\hat \alpha\) is the population elasticity and the coefficient \(\hat \beta\) is the energy elasticity and \(\hat \nu\) is the offset parameter. \[ \log(\hat \mu) = \hat \alpha\,\log(P) + \hat \beta\,\log(E) + \hat v \]
A 1% increase in \(P\) leads to an \(\alpha\)% increase in the casualty rate. For \(0 < \hat \alpha < 1\) the relationship is said to be “inelastic.”
df = Torn.df %>%
filter(cas > 0)
formula = log(cas) ~ log(popD) + log(AMT)
model = lm(formula, data = df)
summary(model) # model is not adequate as the residuals are positively skewed.
formula = cas ~ log(popD) + log(AMT)
model2 = glm(formula, data = df, family = poisson)
summary(model2)
pchisq(21614, 869, lower.tail = FALSE) # model is not adequate as the p-value = 0.
library(MASS)
model3 = glm.nb(formula, data = df, link = log)
summary(model3)
pchisq(917.79, 869, lower.tail = FALSE) # model is adequate p-value = .122.
INLA models
control = list(predictor = list(compute = TRUE, link = 1))
df = Torn.df %>%
filter(cas > 0)
formula = cas ~ log(popD) + log(AMT)
modelI = inla(formula, family = "nbinomial", data = df,
control.predictor = control$predictor)
cor(modelI$summary.fitted.values$mean, log(df$cas))^2
## [1] 0.3678431
modelI$summary.fixed$mean[1]
## [1] -7.00155
modelI$summary.fixed$mean[2]
## [1] 0.3505155
modelI$summary.fixed$mean[3]
## [1] 0.2918532
modelI$summary.fixed$"0.025quant"[1]
## [1] -7.59495
modelI$summary.fixed$"0.975quant"[1]
## [1] -6.409765
modelI$summary.fixed$"0.025quant"[2]
## [1] 0.299609
modelI$summary.fixed$"0.975quant"[2]
## [1] 0.4016938
modelI$summary.fixed$"0.025quant"[3]
## [1] 0.2718688
modelI$summary.fixed$"0.975quant"[3]
## [1] 0.3118952
The coefficient on log(AMT) is .2918532 this means that for a 100% increase in tornado energy (doubling) we expect to see a 2^(.2918532) = 1.224 or 22% increase in the number of casualties. The coefficient on log(popD) is .3505154 this means that for a doubling of the number of people exposed to the damaging winds we expect to see a 2^(.3505154) = 1.275 or a 28% increase in the number of injuries.
Observed number of casualties versus predicted rate.
df2 = data.frame(obs = df$cas, pre = modelI$summary.fitted.values$mean)
cor(df2$obs, df2$pre)
ggplot(df2, aes(x = obs, y = pre)) +
geom_point() +
geom_abline() +
geom_smooth(method = lm, color = "red") +
xlab("Observed Number of Casualties") +
ylab("Predicted Rate") +
scale_x_log10() + scale_y_log10()
Model residuals
data.frame(om = df$om, obs = df$cas, pre = modelI$summary.fitted.values$mean) %>%
mutate(differ = obs - pre) %>%
arrange(desc(pre)) %>%
head()
## om obs pre differ
## 1 462224 8 110.37485 -102.37485
## 2 314625 1564 101.77215 1462.22785
## 3 317910 5 75.17520 -70.17520
## 4 366421 38 74.43432 -36.43432
## 5 309488 217 71.39030 145.60970
## 6 323440 203 68.81324 134.18676
df[df$om == 462224,]
## om yr mo dy date time tz st stf stn mag inj fat loss
## 715 462224 2013 5 31 2013-05-31 18:50:00 3 MO 29 0 3 8 0 60
## closs slat slon elat elon len wid fc Date Year Length
## 715 0 38.69 -90.75 38.77 -90.18 31.71 1760 0 2013-05-31 2013 51032.17
## Width AreaPath AreaVortex Ma cas popD pop e
## 715 1609.344 82128319 2034172 May 8 718.4043 59001.34 919.4754
## E AMT
## 715 7.551497e+13 9.544909e+13
df[df$om == 314625,]
## om yr mo dy date time tz st stf stn mag inj fat loss
## 511 314625 2011 4 27 2011-04-27 15:43:00 3 AL 1 103 4 1500 64 2450
## closs slat slon elat elon len wid fc Date Year Length
## 511 0 33.03 -87.94 33.63 -86.74 80.68 2600 0 2011-04-27 2011 129841.6
## Width AreaPath AreaVortex Ma cas popD pop e
## 511 2377.44 308690497 4439244 Apr 1564 130.819 40382.59 974.4311
## E AMT
## 511 3.007976e+14 5.616605e+14
The largest over prediction by the model is the Weldon Spring-Northern St. Louis County, MO EF3 tornado of May 31, 2013. Based on the estimated number of people in harm’s way and the estimated energy dissipation the model predicts upwards of 110 casualties. Only eight were reported. The largest under prediction by the model is the Tuscaloosa AL EF4 tornado of April 27, 2011. Again based on the number of people in harm’s way and the energy dissipation the model predicts 102 casualties. The official report has 1564 casualties.
Removing these two storms from the data set and refitting the model.
df = Torn.df %>%
filter(cas > 0, om != 314625, om != 462224)
formula = cas ~ log(popD) + log(AMT)
modelI = inla(formula, family = "nbinomial", data = df,
control.predictor = control$predictor)
cor(modelI$summary.fitted.values$mean, log(df$cas))
## [1] 0.614171
2^modelI$summary.fixed$mean[2] - 1
## [1] 0.2643165
2^modelI$summary.fixed$mean[3] - 1
## [1] 0.2166747
Removing all tornadoes during 2011 and refitting the model. This drops the population elasticity to 26% and the energy elasticity to 22%.
df = Torn.df %>%
filter(cas > 0, Year != 2011)
formula = cas ~ log(popD) + log(AMT)
modelI = inla(formula, family = "nbinomial", data = df,
control.predictor = control$predictor)
cor(modelI$summary.fitted.values$mean, log(df$cas))
## [1] 0.5634977
2^modelI$summary.fixed$mean[2] - 1
## [1] 0.2267989
2^modelI$summary.fixed$mean[3] - 1
## [1] 0.1920019
Compute the elasticities by year. With the negative binomial model this can be problematic as a few large events can spoil the fit.
alpha = numeric()
beta = numeric()
alphaL = numeric()
alphaH = numeric()
betaL = numeric()
betaH = numeric()
for(yr in 2007:2015){
df1 = Torn.df[Torn.df$cas > 0 & Torn.df$Year == yr, ]
modelI = inla(formula, family = "nbinomial", data = df1)
alpha = c(alpha, modelI$summary.fixed$mean[2])
beta = c(beta, modelI$summary.fixed$mean[3])
alphaL = c(alphaL, modelI$summary.fixed$"0.025quant"[2])
betaL = c(betaL, modelI$summary.fixed$"0.025quant"[3])
alphaH = c(alphaH, modelI$summary.fixed$"0.975quant"[2])
betaH = c(betaH, modelI$summary.fixed$"0.975quant"[3])
}
out = data.frame(Year = 2007:2015,
ElasPop = (2^alpha - 1) * 100,
ElasEng = (2^beta - 1) * 100,
ElasPopL = (2^alphaL - 1) * 100,
ElasPopH = (2^alphaH - 1) * 100,
ElasEngL = (2^betaL - 1) * 100,
ElasEngH = (2^betaH - 1) * 100)
ggplot(out, aes(Year, ElasPop)) +
geom_point() +
geom_errorbar(aes(ymin = ElasPopL, ymax = ElasPopH), width = .1) +
ylab("Population Elasticity of Casualties") +
scale_y_continuous(limits = c(-10, 80)) +
scale_x_continuous(breaks = 2007:2015)
#A = A + ggtitle("A") +
# theme(plot.title=element_text(hjust=0))
ggplot(out, aes(Year, ElasEng)) +
geom_point() +
geom_errorbar(aes(ymin = ElasEngL, ymax = ElasEngH), width = .1) +
ylab("Energy Elasticity of Casualties") +
scale_y_continuous(limits = c(-10, 80)) +
scale_x_continuous(breaks = 2007:2015)
#B = B + ggtitle("B") +
# theme(plot.title=element_text(hjust=0))
Refitting the model to only fatalities. This drops the population elasticity to 20% and the energy elasticity to 22%. Thus, the tornado death rate is more sensitive to changes in energy than to changes in population on average.
df = Torn.df %>%
filter(fat > 0)
formula = fat ~ log(popD) + log(AMT)
modelIf = inla(formula, family = "nbinomial", data = df,
control.predictor = control$predictor)
2^modelIf$summary.fixed$mean[2] - 1
## [1] 0.2011918
2^modelIf$summary.fixed$mean[3] - 1
## [1] 0.2167051
Assess effect around cities. Buffer cities by 30km (within the average distance traveled to work).
map("usa")
data(us.cities)
map.cities(x = us.cities)
coordinates(us.cities) = cbind(us.cities$long, us.cities$lat)
proj4string(us.cities) = CRS("+init=epsg:4326")
cities = spTransform(us.cities, CRS("+init=epsg:3857"))
BC = gBuffer(cities, byid = TRUE, width = 30 * 1000)
BC = BC[BC$country.etc != "AK" & BC$country.etc != "HI",]
spT = spTransform(BC, CRS = CRS("+proj=lcc +lat_1=33 +lat_2=45 +lat_0=39 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0 "))
TornCities = over(TornP, spT)
TornCities$cas = Torn.df$cas
TornCities$popD = Torn.df$popD
TornCities$AMT = Torn.df$AMT
TornCities.df = TornCities[complete.cases(TornCities),] # Subset by only the cities
Refit the Model. There is no significant difference.
df = TornCities.df %>%
filter(cas >0)
control = list(predictor = list(compute = TRUE, link = 1))
formula = cas ~ log(popD) + log(AMT)
modelI = inla(formula, family = "nbinomial", data = df,
control.predictor = control$predictor)
cor(modelI$summary.fitted.values$mean, log(df$cas))^2
## [1] 0.4032755
modelI$summary.fixed$mean[1]
## [1] -7.442188
modelI$summary.fixed$mean[2]
## [1] 0.2003991
modelI$summary.fixed$mean[3]
## [1] 0.3438552
modelI$summary.fixed$"0.025quant"[1]
## [1] -8.757317
modelI$summary.fixed$"0.975quant"[1]
## [1] -6.113579
modelI$summary.fixed$"0.025quant"[2]
## [1] 0.07960542
modelI$summary.fixed$"0.975quant"[2]
## [1] 0.3216087
modelI$summary.fixed$"0.025quant"[3]
## [1] 0.3061153
modelI$summary.fixed$"0.975quant"[3]
## [1] 0.3817766
2^modelI$summary.fixed$mean[2] - 1
## [1] 0.1490162
2^modelI$summary.fixed$mean[3] - 1
## [1] 0.2691435
Assess daytime and nightime tornadoes and refit the model. Use Lubridate to calculate the hour of the tornado. There is no significant difference (in either).
library("lubridate")
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
df = Torn.df
df$datetime = as.POSIXct(paste(df$date, df$time), tz = "America/New_York")
df$hr = hour(df$datetime)
#df = df %>%
# filter(hr>=12 & hr<= 24 & cas >0)
df = df %>%
filter(hr>=1 & hr<= 12 & cas >0)
control = list(predictor = list(compute = TRUE, link = 1))
formula = cas ~ log(popD) + log(AMT)
modelI = inla(formula, family = "nbinomial", data = df,
control.predictor = control$predictor)
modelI$summary.fixed$mean[1]
## [1] -6.645119
modelI$summary.fixed$mean[2]
## [1] 0.3401922
modelI$summary.fixed$mean[3]
## [1] 0.2674894
modelI$summary.fixed$"0.025quant"[1]
## [1] -7.771679
modelI$summary.fixed$"0.975quant"[1]
## [1] -5.528782
modelI$summary.fixed$"0.025quant"[2]
## [1] 0.2559633
modelI$summary.fixed$"0.975quant"[2]
## [1] 0.4258943
modelI$summary.fixed$"0.025quant"[3]
## [1] 0.2304236
modelI$summary.fixed$"0.975quant"[3]
## [1] 0.3049695
2^modelI$summary.fixed$mean[2] - 1
## [1] 0.2659253
2^modelI$summary.fixed$mean[3] - 1
## [1] 0.2037113
Weighted scatterplot of casualties.
df = Torn.df[Torn.df$cas>0,]
ggplot(df, aes(x = popD, y = AMT, size = cas)) +
geom_point(alpha = 0.75) +
scale_x_log10() +
scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x))) +
xlab("Population Density") +
ylab("Atmosphere Moment (J)") +
labs(size = "Casualties") +
theme(legend.position = "bottom", legend.direction = "horizontal") #+
# facet_wrap(~mag)