Population and energy elasticity of tornado casualties

Tyler Fricker, James B. Elsner, and Thomas H. Jagger

Set working directory and load packages.

setwd("~/Dropbox/Tyler")
suppressPackageStartupMessages(library("ggplot2"))
suppressPackageStartupMessages(library("raster"))
suppressPackageStartupMessages(library("ggmap"))
suppressPackageStartupMessages(library("dplyr"))
suppressPackageStartupMessages(library("tidyr"))
suppressPackageStartupMessages(library("rgdal"))
suppressPackageStartupMessages(library("rgeos"))
suppressPackageStartupMessages(library("ggthemes"))
suppressPackageStartupMessages(library("scales"))
suppressPackageStartupMessages(library("broom"))
suppressPackageStartupMessages(library("RColorBrewer"))
suppressPackageStartupMessages(library("scales"))
suppressPackageStartupMessages(library("broom"))
suppressPackageStartupMessages(library("INLA"))

Tornado data

Data from the Storm Prediction Center (SPC) http://www.spc.noaa.gov/gis/svrgis/.

#download.file("http://www.spc.noaa.gov/gis/svrgis/zipped/tornado.zip",
              "tornado.zip", mode = "wb")
#unzip("tornado.zip")

Read the tornado data.

TornL = readOGR(dsn = "torn", layer = "torn", 
                stringsAsFactors = FALSE)

## OGR data source with driver: ESRI Shapefile 
## Source: "torn", layer: "torn"
## with 60114 features
## It has 22 fields

TornL$Date = as.Date(TornL$date)
TornL$Year = TornL$yr
TornL$Length = TornL$len * 1609.34
TornL$Width = TornL$wid * .9144
TornL$AreaPath = TornL$Length * TornL$Width
TornL$AreaVortex = pi * (TornL$Width/2)^2
TornL$Ma = factor(month.abb[TornL$mo], levels = month.abb[1:12])

Set study period and remove tornadoes that occurred in Alaska, Hawaii, and Puerto Rico. Add injuries and deaths to make a casualty column.

TornL = TornL[TornL$Year >= 2007, ]
TornL = TornL[TornL$st != "HI" & TornL$st != "AK" & TornL$st != "PR", ]
TornL$cas = TornL$inj + TornL$fat

Casualty Statistics

``Casualty’’ refers to either human death or injury as a direct consequence of tornado activity according to the NWS .

Over the conterminous United States during the period 2007-2015 there were 10,807 tornadoes. Of these, 872 are linked to 12,972 casualties. Only eight percent of all casualties lead to death.

df = as.data.frame(TornL) %>%
  filter(cas >0)
dim(df)

## [1] 872  30

sum(df$cas)

## [1] 12972

sum(df$fat)/sum(df$cas) * 100

## [1] 7.940179

Distribution of casualties. Most casualty-producing tornadoes result in only a few casualties, while relatively few casualty-producing tornadoes result in many casualties.

df = data.frame(table(TornL$cas[TornL$cas > 0]))
df$Size = as.integer(df$Var1)
ggplot(df, aes(x = Size, y = Freq)) +
  scale_x_log10() + scale_y_log10() +
  geom_point() + 
  xlab(expression(paste("Casualties"))) +
  ylab("Number of Tornadoes")

Casualties and EF category. On average a casualty-producing EF0 tornado results in 1.9 casualties, a casualty-producing EF1 tornado results in 2.7 casualties, and a casualty-producing EF2 tornado results in 5.7 casualties. The big jumps in the expected number of casualties occur for the highest rated tornadoes. On average a casualty-producing EF3 tornado results in 18 casualties, a casualty-producing EF4 tornado results in 90 casualties, and a casualty-producing EF5 tornado results in 255 casualties.

df = as.data.frame(TornL[TornL$cas > 0,]) %>%
  group_by(mag) %>%
  summarize(cas = sum(cas),
            nT = n(),
            fat = sum(fat),
            inj = sum(inj),
            avgcas = cas/nT)
df

## # A tibble: 6 x 6
##     mag   cas    nT   fat   inj     avgcas
##   <int> <int> <int> <int> <int>      <dbl>
## 1     0   128    68     2   126   1.882353
## 2     1   687   253    28   659   2.715415
## 3     2  1767   312   106  1661   5.663462
## 4     3  3248   176   227  3021  18.454545
## 5     4  4844    54   333  4511  89.703704
## 6     5  2298     9   334  1964 255.333333

Casualties by location and size.

df = as.data.frame(TornL) %>%
  filter(cas > 0) %>%
  mutate(label = paste("EF", mag, sep = ""))

states.df = map_data("state") %>%
  filter(region != 'alaska',  region != 'district of columbia')

ggplot(states.df, aes(x = long, y = lat, group = group)) +
  geom_polygon(fill = "white") +
  geom_path(color = "gray85") +
  coord_map(project = "polyconic") + 
  geom_point(aes(x = slon, y = slat, size = cas, group = om), 
             alpha = .3, data = df) +
  xlab("") + ylab("") +
  facet_wrap(~ label, ncol = 2) +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(), 
        panel.background = element_blank(),
        axis.ticks = element_blank(),
        axis.text = element_blank(),
        legend.position = "bottom") +
  scale_size_continuous("Number of\n Casualties")

Tornado casualties during the period (2007–2015) were most common across the Southeast. Alabama had the most casualties (2,749) followed by Missouri (1,521), Oklahoma (1,399), Texas (962), and Mississippi (942). Rounding out the top ten are Arkansas (717), Georgia (618), North Carolina (594), Tennessee (526) and Illinois (408). Eight of the top eleven are states in the Southeast including Kentucky with 404 casualties. Massachusetts, ranked 14th, stands out in New England with 205 casualties from two tornadoes.

df = as.data.frame(TornL) %>%
  filter(cas > 0) %>%
  group_by(st) %>%
  summarize(nT = n(),
            nC = sum(cas),
            ratio = nC/nT) %>%
  arrange(desc(nC))

states.df = map_data("state") %>%
  filter(region != 'alaska',  region != 'district of columbia') %>%
  mutate(st = state.abb[match(region, tolower(state.name))]) %>%
  left_join(df, by = "st") %>%
  arrange(order)

ggplot(states.df, aes(x = long, y = lat, group = group, fill = log10(nC))) +
  geom_polygon() +
  geom_path(color = "gray75") +
  coord_map(project = "polyconic") + 
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(), 
        panel.background = element_blank(),
        axis.ticks = element_blank(),
        axis.text = element_blank(),
        legend.position = "bottom") +
#        labs(title = "Tornado Casualties\n [2007-2015]") + 
        xlab("") + ylab("") +
  scale_fill_continuous(low = "#fee6ce", high = "#e6550d",
                        "Number of\n Casualties",
                        breaks = 1:3, 
                        labels = c("10", "100", "1000"))

Create tornado paths and extract the average population density (people per sq. km under the path). The population data are on a raster. With the extract method and weights = TRUE the calculation is done based on percentage of cell under the path. This takes a long time. Multiply the average population density by the path area (L x W) to estimate the number of people under the path.

Begin with The Gridded Population of the World Volume 3 data (2000).

#TornLs = subset(TornL, date == "2011-04-27")
TornP = gBuffer(TornL, byid = TRUE, width = TornL$Width/2, capStyle = "FLAT")
Pop = raster("/Users/tylerfricker/Dropbox/Tyler/usadens/usads00g/w001001.adf")
Popp = projectRaster(Pop, crs = proj4string(TornP))
TornP$popD = as.vector(raster::extract(Popp, TornP, fun = mean, na.rm = TRUE,
                                       weights = FALSE, normalizeWeights = FALSE))
TornP$pop = TornP$popD * TornP$AreaPath/10^6
Torn.df = as.data.frame(TornP)

This can also be done with The Gridded Population of the World Volume 4 data (2010). Because the GPW, v4 is at a finer resolution, we use an extent of the U.S. and crop.

#TornP = gBuffer(TornL, byid = TRUE, width = TornL$Width/2, capStyle = "FLAT")
#Pop = raster("gpw-v4-population-density_2010.tif")
#ext = raster::extent(c(-125, -67, 24, 50))
#Pop2 = crop(Pop, ext)
#Popp = projectRaster(Pop2, crs = proj4string(TornP))
#TornP$popD = as.vector(raster::extract(Popp, TornP, fun = mean, na.rm = TRUE,
#                                       weights = FALSE, normalizeWeights = FALSE))
#TornP$pop = TornP$popD * TornP$AreaPath/10^6
#Torn.df = as.data.frame(TornP)

Missing population density

MissP = Torn.df[is.na(Torn.df$popD), ]

ggplot(states.df, aes(x = long, y = lat, group = group)) +
  geom_polygon(fill = "white") +
  geom_path(color = "gray85") +
  coord_map(project = "polyconic") + 
  geom_point(aes(x = slon, y = slat, size = cas, group = om), 
             alpha = .3, data = MissP) +
  xlab("") + ylab("") +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(), 
        panel.background = element_blank(),
        axis.ticks = element_blank(),
        axis.text = element_blank(),
        legend.position = "bottom") +
  scale_size_continuous("Number of\n Casualties")

Population Statistics

Population density distribution. For the set of 872 tornadoes with at least one casualty the median population density per tornado is 23.6 people per square kilometer with an inter-quartile range between 10.7 and 70.2 people per square kilometer

Torn.df %>%
  filter(cas > 0) %>%
  summarize(nT = n(),
            avgPopD = mean(popD, na.rm = TRUE),
            medianPopD = median(popD, na.rm = TRUE),
            q25 = quantile(popD, probs = .25, na.rm = TRUE),
            q75 = quantile(popD, probs = .75, na.rm = TRUE),
            maxPopD = max(popD, na.rm = TRUE),
            minPopD = min(popD, na.rm = TRUE))
Torn.df %>%
  filter(cas > 0) %>%
  arrange(desc(popD))

Torn.df %>%
  filter(cas > 0) %>%
  group_by(mag) %>%
  summarize(nT = n(),
            nP = round(sum(pop, na.rm = TRUE), 0),
            avgP = round(mean(pop, na.rm = TRUE), 0),
            medP = round(median(pop, na.rm = TRUE), 0),
            avgPD = mean(popD, na.rm = TRUE),
            medPD = median(popD, na.rm = TRUE))

df = Torn.df %>%
  filter(popD > 0 & cas > 0) %>%
  group_by(mag) %>%
  summarize(popD = sum(popD),
            nT = n(),
            avgpopD = popD/nT)
df = df[1:6,]

ggplot(Torn.df[Torn.df$popD > 0 & Torn.df$cas > 0,], aes(log(popD))) +
  geom_histogram(binwidth = .5, color = "white") +
  xlab("Population Density") +
  ylab("Number of Tornadoes")

#A = A + ggtitle("A") + 
#  theme(plot.title=element_text(hjust=0))

Atmosphere moment of tornadoes

Empirical model from Table 3-1 of NRC 2007. Percent area by EF rating for each EF category. Threshold wind speeds (m/s) are a lower bound 3 sec gusts on the operational EF Scale (Table 2-1 of NRC2007). Area * 1000 = volume in cubic meters.

perc = c(1, 0, 0, 0, 0, 0, 
         .772, .228, 0, 0, 0, 0,
         .616, .268, .115, 0, 0, 0,
         .529, .271, .133, .067, 0, 0,
         .543, .238, .131, .056, .032, 0,
         .538, .223, .119, .07, .033, .017)
percM = matrix(perc, ncol = 6, byrow = TRUE)

Compute mass-specific kinetic energy (e), total kinetic energy (E), and atmosphere moment of tornado (AMT) per tornado. Mass-specific kinetic energy (e) is a function of EF rating only (6 values). The equation for energy dissipation (atmosphere moment) is \(E = \frac{1}{2}A_v l \bar{\rho} \sum_{j=0}^{J} w_j v_j^{2},\) where \(A_v\) is the area of the vortex (\(\pi R^{2}\)), \(l\) is the path length, \(\bar{\rho}\) is air density, \(v_j\) is the midpoint wind speed for each rating, and \(w_j\) is the corresponding fraction of path area. With no upper bound on the EF5 wind speeds, the midpoint wind speed is set at 97 m~s\(^{-1}\) (7.5 m~s\(^{-1}\) above the threshold wind speed consistent with the EF4 midpoint speed relative to its threshold).

threshW = c(29.06, 38.45, 49.62, 60.8, 74.21, 89.41)
midptW = c(diff(threshW)/2 + threshW[-length(threshW)], threshW[length(threshW)] + 7.5)
ef = Torn.df$mag + 1
EW2 = numeric()
for(i in 1:length(ef)) EW2[i] = midptW^2 %*% percM[ef[i], ]
height = 1000
Torn.df = Torn.df %>%
  mutate(e = .5 * EW2,
         E = e * AreaPath * height,
         AMT = e * AreaVortex * Length)

Distribution of AMT, atmospheric moment of tornadoes.

ggplot(Torn.df[Torn.df$cas > 0,], aes(AMT)) +
  geom_histogram(binwidth = .5, color = "white") +
  scale_x_log10(breaks = trans_breaks("log10", function(x) 10^x),
                     labels = trans_format("log10", math_format(10^.x))) +
  xlab("Atmosphere Moment (J)") +
  ylab("Number of Tornadoes")

#B = B + ggtitle("B") + 
#  theme(plot.title=element_text(hjust=0))

For the set of 872 tornadoes with at least one casualty the median energy dissipation is .410 terajoules (TJ) with an inter-quartile range between .035 and 4.58 TJ.

Torn.df %>%
  filter(cas > 0) %>%
  summarize(nT = n(),
            avgAMT = mean(AMT, na.rm = TRUE),
            medianAMT = median(AMT, na.rm = TRUE),
            q25 = quantile(AMT, probs = .25, na.rm = TRUE),
            q75 = quantile(AMT, probs = .75, na.rm = TRUE),
            maxAMT = max(AMT, na.rm = TRUE),
            minAMT = min(AMT, na.rm = TRUE))

##    nT      avgAMT    medianAMT         q25         q75      maxAMT  minAMT
## 1 872 1.49478e+13 409513816772 35014876010 4.58442e+12 1.45533e+15 2408332

Torn.df %>%
  filter(cas > 0) %>%
  group_by(mag) %>%
  summarize(nT = n(),
            nAMT = round(sum(AMT, na.rm = TRUE), 0),
            avgAMT = round(mean(AMT, na.rm = TRUE), 0),
            medAMT = round(median(AMT, na.rm = TRUE), 0))

## # A tibble: 6 x 5
##     mag    nT         nAMT       avgAMT       medAMT
##   <int> <int>        <dbl>        <dbl>        <dbl>
## 1     0    68 4.815863e+12 7.082151e+10 1.860813e+09
## 2     1   253 2.420207e+14 9.566034e+11 4.539786e+10
## 3     2   312 1.600533e+15 5.129913e+12 4.892114e+11
## 4     3   176 4.590518e+15 2.608249e+13 7.083210e+12
## 5     4    54 4.891454e+15 9.058249e+13 2.802787e+13
## 6     5     9 1.705142e+15 1.894603e+14 7.199459e+13

Bi-variate Relationships

Population and casualties.

df = Torn.df[Torn.df$cas>0,]
ggplot(df, aes(x = popD, y = cas)) +
  geom_point() +
  scale_x_log10() +
  scale_y_log10() +
    xlab("Population Density") +
  ylab("Number of Casualties")

#A = A + ggtitle("A") + 
#  theme(plot.title=element_text(hjust=0))

Energy dissipation and casualties.

ggplot(df, aes(x = AMT, y = cas)) +
  geom_point() +
  scale_x_log10(breaks = trans_breaks("log10", function(x) 10^x),
                     labels = trans_format("log10", math_format(10^.x))) +
  scale_y_log10() +
  xlab("Atmosphere Moment (J)") +
  ylab("Number of Casualties") +
  geom_label(aes(label = "Tuscaloosa-Birmingham (2011-04-27)", x = AMT, y = cas), 
             data = df[511, ],
             hjust = 1.05,
             vjust = .75)

#B = B + ggtitle("B") + 
#  theme(plot.title=element_text(hjust=0))

Regression Model

Mean and variance of the casualty counts.

Torn.df %>%
  filter(cas > 0) %>%
  summarize(meanCas = mean(cas),
            varCas = var(cas),
            ratio = varCas/meanCas)

##    meanCas   varCas    ratio
## 1 14.87615 5850.359 393.2711

For the set of tornadoes with at least one casualty, the mean and variance of the counts are 14.9 and 5850, respectively suggesting a negative binomial model for counts expressed as \[ C \sim \hbox{NegBin}(\hat \mu, n) \] where \(\hat \mu\) is the per tornado casualty rate conditional on at least one casualty and \(n\) is the size parameter for the negative binomial distribution.

The logarithm of casualty rate (given at least one casualty) is linearly related to the logarithm of the population density (\(P\)) and the logarithm of the energy dissipation (\(E\)). The coefficient \(\hat \alpha\) is the population elasticity and the coefficient \(\hat \beta\) is the energy elasticity and \(\hat \nu\) is the offset parameter. \[ \log(\hat \mu) = \hat \alpha\,\log(P) + \hat \beta\,\log(E) + \hat v \]

A 1% increase in \(P\) leads to an \(\alpha\)% increase in the casualty rate. For \(0 < \hat \alpha < 1\) the relationship is said to be “inelastic.”

df = Torn.df %>%
  filter(cas > 0)

formula = log(cas) ~ log(popD) + log(AMT)
model = lm(formula, data = df)
summary(model)  # model is not adequate as the residuals are positively skewed.

formula = cas ~ log(popD) + log(AMT)
model2 = glm(formula, data = df, family = poisson)
summary(model2)
pchisq(21614, 869, lower.tail = FALSE) # model is not adequate as the p-value = 0.

library(MASS)
model3 = glm.nb(formula, data = df, link = log)
summary(model3)
pchisq(917.79, 869, lower.tail = FALSE) # model is adequate p-value = .122.

INLA models

control = list(predictor = list(compute = TRUE, link = 1))

df = Torn.df %>%
  filter(cas > 0)

formula = cas ~ log(popD) + log(AMT)
modelI = inla(formula, family = "nbinomial", data = df,
              control.predictor = control$predictor)

cor(modelI$summary.fitted.values$mean, log(df$cas))^2

## [1] 0.3678431

modelI$summary.fixed$mean[1]

## [1] -7.00155

modelI$summary.fixed$mean[2]

## [1] 0.3505155

modelI$summary.fixed$mean[3]

## [1] 0.2918532

modelI$summary.fixed$"0.025quant"[1]

## [1] -7.59495

modelI$summary.fixed$"0.975quant"[1]

## [1] -6.409765

modelI$summary.fixed$"0.025quant"[2]

## [1] 0.299609

modelI$summary.fixed$"0.975quant"[2]

## [1] 0.4016938

modelI$summary.fixed$"0.025quant"[3]

## [1] 0.2718688

modelI$summary.fixed$"0.975quant"[3]

## [1] 0.3118952

The coefficient on log(AMT) is .2918532 this means that for a 100% increase in tornado energy (doubling) we expect to see a 2^(.2918532) = 1.224 or 22% increase in the number of casualties. The coefficient on log(popD) is .3505154 this means that for a doubling of the number of people exposed to the damaging winds we expect to see a 2^(.3505154) = 1.275 or a 28% increase in the number of injuries.

Observed number of casualties versus predicted rate.

df2 = data.frame(obs = df$cas, pre = modelI$summary.fitted.values$mean)

cor(df2$obs, df2$pre)

ggplot(df2, aes(x = obs, y = pre)) +
  geom_point() +
  geom_abline() +
  geom_smooth(method = lm, color = "red") +
  xlab("Observed Number of Casualties") +
  ylab("Predicted Rate") +
  scale_x_log10() + scale_y_log10()

Model residuals

data.frame(om = df$om, obs = df$cas, pre = modelI$summary.fitted.values$mean) %>%
  mutate(differ = obs - pre) %>%
  arrange(desc(pre)) %>%
  head()

##       om  obs       pre     differ
## 1 462224    8 110.37485 -102.37485
## 2 314625 1564 101.77215 1462.22785
## 3 317910    5  75.17520  -70.17520
## 4 366421   38  74.43432  -36.43432
## 5 309488  217  71.39030  145.60970
## 6 323440  203  68.81324  134.18676

df[df$om == 462224,]

##         om   yr mo dy       date     time tz st stf stn mag inj fat loss
## 715 462224 2013  5 31 2013-05-31 18:50:00  3 MO  29   0   3   8   0   60
##     closs  slat   slon  elat   elon   len  wid fc       Date Year   Length
## 715     0 38.69 -90.75 38.77 -90.18 31.71 1760  0 2013-05-31 2013 51032.17
##        Width AreaPath AreaVortex  Ma cas     popD      pop        e
## 715 1609.344 82128319    2034172 May   8 718.4043 59001.34 919.4754
##                E          AMT
## 715 7.551497e+13 9.544909e+13

df[df$om == 314625,]

##         om   yr mo dy       date     time tz st stf stn mag  inj fat loss
## 511 314625 2011  4 27 2011-04-27 15:43:00  3 AL   1 103   4 1500  64 2450
##     closs  slat   slon  elat   elon   len  wid fc       Date Year   Length
## 511     0 33.03 -87.94 33.63 -86.74 80.68 2600  0 2011-04-27 2011 129841.6
##       Width  AreaPath AreaVortex  Ma  cas    popD      pop        e
## 511 2377.44 308690497    4439244 Apr 1564 130.819 40382.59 974.4311
##                E          AMT
## 511 3.007976e+14 5.616605e+14

The largest over prediction by the model is the Weldon Spring-Northern St. Louis County, MO EF3 tornado of May 31, 2013. Based on the estimated number of people in harm’s way and the estimated energy dissipation the model predicts upwards of 110 casualties. Only eight were reported. The largest under prediction by the model is the Tuscaloosa AL EF4 tornado of April 27, 2011. Again based on the number of people in harm’s way and the energy dissipation the model predicts 102 casualties. The official report has 1564 casualties.

Removing these two storms from the data set and refitting the model.

df = Torn.df %>%
  filter(cas > 0, om != 314625, om != 462224)

formula = cas ~ log(popD) + log(AMT)
modelI = inla(formula, family = "nbinomial", data = df,
              control.predictor = control$predictor)

cor(modelI$summary.fitted.values$mean, log(df$cas))

## [1] 0.614171

2^modelI$summary.fixed$mean[2] - 1

## [1] 0.2643165

2^modelI$summary.fixed$mean[3] - 1

## [1] 0.2166747

Removing all tornadoes during 2011 and refitting the model. This drops the population elasticity to 26% and the energy elasticity to 22%.

df = Torn.df %>%
  filter(cas > 0, Year != 2011)

formula = cas ~ log(popD) + log(AMT)
modelI = inla(formula, family = "nbinomial", data = df,
              control.predictor = control$predictor)

cor(modelI$summary.fitted.values$mean, log(df$cas))

## [1] 0.5634977

2^modelI$summary.fixed$mean[2] - 1

## [1] 0.2267989

2^modelI$summary.fixed$mean[3] - 1

## [1] 0.1920019

Compute the elasticities by year. With the negative binomial model this can be problematic as a few large events can spoil the fit.

alpha = numeric()
beta = numeric()
alphaL = numeric()
alphaH = numeric()
betaL = numeric()
betaH = numeric()
for(yr in 2007:2015){
  df1 = Torn.df[Torn.df$cas > 0 & Torn.df$Year == yr, ]
  modelI = inla(formula, family = "nbinomial", data = df1)
  alpha = c(alpha, modelI$summary.fixed$mean[2])
  beta = c(beta, modelI$summary.fixed$mean[3])
  alphaL = c(alphaL, modelI$summary.fixed$"0.025quant"[2])
  betaL = c(betaL, modelI$summary.fixed$"0.025quant"[3])
  alphaH = c(alphaH, modelI$summary.fixed$"0.975quant"[2])
  betaH = c(betaH, modelI$summary.fixed$"0.975quant"[3])
}
out = data.frame(Year = 2007:2015, 
                 ElasPop = (2^alpha - 1) * 100, 
                 ElasEng = (2^beta - 1) * 100,
                 ElasPopL = (2^alphaL - 1) * 100, 
                 ElasPopH = (2^alphaH - 1) * 100, 
                 ElasEngL = (2^betaL - 1) * 100, 
                 ElasEngH = (2^betaH - 1) * 100)

ggplot(out, aes(Year, ElasPop)) +
  geom_point() + 
  geom_errorbar(aes(ymin = ElasPopL, ymax = ElasPopH), width = .1) +
  ylab("Population Elasticity of Casualties") +
  scale_y_continuous(limits = c(-10, 80)) +
  scale_x_continuous(breaks = 2007:2015)

#A = A + ggtitle("A") + 
#  theme(plot.title=element_text(hjust=0))

ggplot(out, aes(Year, ElasEng)) +
  geom_point() + 
  geom_errorbar(aes(ymin = ElasEngL, ymax = ElasEngH), width = .1) +
  ylab("Energy Elasticity of Casualties") +
  scale_y_continuous(limits = c(-10, 80)) +
  scale_x_continuous(breaks = 2007:2015)

#B = B + ggtitle("B") + 
#  theme(plot.title=element_text(hjust=0))

Refitting the model to only fatalities. This drops the population elasticity to 20% and the energy elasticity to 22%. Thus, the tornado death rate is more sensitive to changes in energy than to changes in population on average.

df = Torn.df %>%
  filter(fat > 0)
formula = fat ~ log(popD) + log(AMT)

modelIf = inla(formula, family = "nbinomial", data = df,
              control.predictor = control$predictor)
2^modelIf$summary.fixed$mean[2] - 1

## [1] 0.2011918

2^modelIf$summary.fixed$mean[3] - 1

## [1] 0.2167051

Additional analysis for reviewer’s comments

Assess effect around cities. Buffer cities by 30km (within the average distance traveled to work).

map("usa")
data(us.cities)
map.cities(x = us.cities)

coordinates(us.cities) = cbind(us.cities$long, us.cities$lat)
proj4string(us.cities) = CRS("+init=epsg:4326")

cities = spTransform(us.cities, CRS("+init=epsg:3857"))

BC = gBuffer(cities, byid = TRUE, width = 30 * 1000)
BC = BC[BC$country.etc != "AK" & BC$country.etc != "HI",]

spT = spTransform(BC, CRS = CRS("+proj=lcc +lat_1=33 +lat_2=45 +lat_0=39 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0 "))

TornCities = over(TornP, spT)
TornCities$cas = Torn.df$cas
TornCities$popD = Torn.df$popD
TornCities$AMT = Torn.df$AMT


TornCities.df = TornCities[complete.cases(TornCities),]  # Subset by only the cities

Refit the Model. There is no significant difference.

df = TornCities.df %>%
  filter(cas >0)

control = list(predictor = list(compute = TRUE, link = 1))

formula = cas ~ log(popD) + log(AMT)
modelI = inla(formula, family = "nbinomial", data = df,
              control.predictor = control$predictor)

cor(modelI$summary.fitted.values$mean, log(df$cas))^2

## [1] 0.4032755

modelI$summary.fixed$mean[1]

## [1] -7.442188

modelI$summary.fixed$mean[2]

## [1] 0.2003991

modelI$summary.fixed$mean[3]

## [1] 0.3438552

modelI$summary.fixed$"0.025quant"[1]

## [1] -8.757317

modelI$summary.fixed$"0.975quant"[1]

## [1] -6.113579

modelI$summary.fixed$"0.025quant"[2]

## [1] 0.07960542

modelI$summary.fixed$"0.975quant"[2]

## [1] 0.3216087

modelI$summary.fixed$"0.025quant"[3]

## [1] 0.3061153

modelI$summary.fixed$"0.975quant"[3]

## [1] 0.3817766

2^modelI$summary.fixed$mean[2] - 1

## [1] 0.1490162

2^modelI$summary.fixed$mean[3] - 1

## [1] 0.2691435

Assess daytime and nightime tornadoes and refit the model. Use Lubridate to calculate the hour of the tornado. There is no significant difference (in either).

library("lubridate")

## 
## Attaching package: 'lubridate'

## The following object is masked from 'package:base':
## 
##     date

df = Torn.df

df$datetime = as.POSIXct(paste(df$date, df$time), tz = "America/New_York")
df$hr = hour(df$datetime)

#df = df %>%
#  filter(hr>=12 & hr<= 24 & cas >0)

df = df %>%
  filter(hr>=1 & hr<= 12 & cas >0)

control = list(predictor = list(compute = TRUE, link = 1))

formula = cas ~ log(popD) + log(AMT)
modelI = inla(formula, family = "nbinomial", data = df,
              control.predictor = control$predictor)

modelI$summary.fixed$mean[1]

## [1] -6.645119

modelI$summary.fixed$mean[2]

## [1] 0.3401922

modelI$summary.fixed$mean[3]

## [1] 0.2674894

modelI$summary.fixed$"0.025quant"[1]

## [1] -7.771679

modelI$summary.fixed$"0.975quant"[1]

## [1] -5.528782

modelI$summary.fixed$"0.025quant"[2]

## [1] 0.2559633

modelI$summary.fixed$"0.975quant"[2]

## [1] 0.4258943

modelI$summary.fixed$"0.025quant"[3]

## [1] 0.2304236

modelI$summary.fixed$"0.975quant"[3]

## [1] 0.3049695

2^modelI$summary.fixed$mean[2] - 1

## [1] 0.2659253

2^modelI$summary.fixed$mean[3] - 1

## [1] 0.2037113

Weighted scatterplot of casualties.

df = Torn.df[Torn.df$cas>0,]
ggplot(df, aes(x = popD, y = AMT, size = cas)) +
  geom_point(alpha = 0.75) +
  scale_x_log10() +
  scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x),
                     labels = trans_format("log10", math_format(10^.x))) +
  xlab("Population Density") +
  ylab("Atmosphere Moment (J)") +
  labs(size = "Casualties") +
  theme(legend.position = "bottom", legend.direction = "horizontal") #+

#  facet_wrap(~mag)

Elasticity.Rmd