Models for Tornado Casualty Rates

James B. Elsner

Code for the paper: Tornado casualties in the United States: A model involving an interaction between population density and energy dissipation by Elsner, Fricker, Berry.

Load packages and data.

suppressMessages(library("ggplot2"))
suppressMessages(library("raster"))
suppressMessages(library("ggmap"))
suppressMessages(library("dplyr"))
suppressMessages(library("tidyr"))
suppressMessages(library("rgdal"))
suppressMessages(library("rgeos"))
suppressMessages(library("ggthemes"))
suppressMessages(library("RColorBrewer"))
suppressMessages(library("scales"))
suppressMessages(library("broom"))
suppressMessages(library("maps"))
suppressMessages(library("sf"))
suppressMessages(library("xtable"))
suppressMessages(library("lubridate"))
#load(file = "Oct8.RData")

Data and Variables

Tornado casualties

Data from the Storm Prediction Center (SPC) http://www.spc.noaa.gov/gis/svrgis/.

download.file("http://www.spc.noaa.gov/gis/svrgis/zipped/tornado.zip",
              "tornado.zip", mode = "wb")
unzip("tornado.zip")

Read the tornado data.

Torn.sfdf <- st_read(dsn = "torn", 
                   layer = "torn", 
                   stringsAsFactors = FALSE)

## Reading layer `torn' from data source `/Users/jelsner/Dropbox/Tornadoes/Casualties/torn' using driver `ESRI Shapefile'
## Simple feature collection with 61092 features and 22 fields
## geometry type:  LINESTRING
## dimension:      XY
## bbox:           xmin: -6279240 ymin: -1835231 xmax: 3405399 ymax: 3888106
## epsg (SRID):    NA
## proj4string:    +proj=lcc +lat_1=33 +lat_2=45 +lat_0=39 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs

Add new columns and filter. We only consider tornadoes occurring within the conterminous United States.

Torn.sfdf <- Torn.sfdf %>%
  mutate(Date = as.Date(date),
         Year = yr,
         cas = inj + fat,
         Length = len * 1609.34,
         Width = wid * .9144,
         Width = ifelse(Width == 0, min(Width[Width > 0]), Width),
         AreaPath = Length * Width,
         AreaVortex = pi * (Width/2)^2,
         Ma = factor(month.abb[mo], levels = month.abb[1:12])) %>%
  filter(Year >= 1995, st != "HI", st != "AK", st != "PR")

Torn.sfdf %>%
  filter(cas > 0) %>%
  summarize(nT = n(),
            nC = sum(cas),
            pF = sum(fat)/nC * 100)

## Simple feature collection with 1 feature and 3 fields
## geometry type:  MULTILINESTRING
## dimension:      XY
## bbox:           xmin: -2270178 ymin: -1429838 xmax: 2130657 ymax: 1116407
## epsg (SRID):    NA
## proj4string:    +proj=lcc +lat_1=33 +lat_2=45 +lat_0=39 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs
##     nT    nC       pF                       geometry
## 1 2208 25968 6.662046 MULTILINESTRING ((-223554.6...

During the 22-year period 1995–2016 there are 26,863 tornadoes recorded in the conterminous United States. Of these, 2,208 are linked to 25,968 casualties. Only 6.7 percent of all casualties lead to death.

df <- as.data.frame(Torn.sfdf) %>%
  arrange(desc(cas)) %>%
  dplyr::select(date, time, st, mag, inj, fat, cas)
head(df)

##         date     time st mag  inj fat  cas
## 1 2011-04-27 15:43:00 AL   4 1500  64 1564
## 2 2011-05-22 16:34:00 MO   5 1150 158 1308
## 3 1999-05-03 17:26:00 OK   5  583  36  619
## 4 2015-12-26 18:46:00 TX   4  468  10  478
## 5 2008-05-10 16:20:00 OK   4  350  21  371
## 6 2011-04-27 18:15:00 GA   4  335  20  355

On average a casualty-producing EF0 tornado results in two casualties, a casualty-producing EF1 tornado results in 3.1 casualties, and a casualty-producing EF2 tornado results in 6.5 casualties.

as.data.frame(Torn.sfdf) %>%
  filter(cas > 0) %>%
  group_by(mag) %>%
  summarize(cas = sum(cas),
            nT = n(),
            fat = sum(fat),
            inj = sum(inj),
            avgcas = cas/nT) %>%
xtable(., digits = 1)

## % latex table generated in R 3.4.3 by xtable 1.8-2 package
## % Wed Dec 27 08:43:31 2017
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrrrr}
##   \hline
##  & mag & cas & nT & fat & inj & avgcas \\ 
##   \hline
## 1 &  0 & 362 & 183 &  8 & 354 & 2.0 \\ 
##   2 &  1 & 2300 & 750 & 79 & 2221 & 3.1 \\ 
##   3 &  2 & 4888 & 753 & 218 & 4670 & 6.5 \\ 
##   4 &  3 & 7933 & 392 & 536 & 7397 & 20.2 \\ 
##   5 &  4 & 7187 & 116 & 457 & 6730 & 62.0 \\ 
##   6 &  5 & 3298 & 14 & 432 & 2866 & 235.6 \\ 
##    \hline
## \end{tabular}
## \end{table}

Table 2 Casualties by EF rating.

Casualties by location and size.

df <- as.data.frame(Torn.sfdf) %>%
  filter(cas > 0) %>%
  mutate(label = paste("EF", mag, sep = ""))

states.df <- map_data("state") %>%
  filter(region != 'alaska',  region != 'district of columbia')

ggplot(states.df, aes(x = long, y = lat, group = group)) +
  geom_polygon(fill = "white") +
  geom_path(color = "gray85") +
  coord_map(project = "polyconic") + 
  geom_point(aes(x = slon, y = slat, size = cas, group = om), 
             alpha = .3, data = df) +
  xlab("") + ylab("") +
  facet_wrap(~ label, ncol = 2) +
  theme_minimal() +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(), 
        panel.background = element_blank(),
        axis.ticks = element_blank(),
        axis.text = element_blank(),
        legend.position = "bottom") +
  scale_size_continuous("Number of\n Casualties")

Figure 1 Location of casualty-producing tornadoes (1995–2016) by EF rating.

Alabama had the most casualties (3,937) followed by Oklahoma (2,500), Missouri (2,025), Arkansas (1,759), Texas (1,704), and Tennessee (1,678).

sfdf <- Torn.sfdf %>%
  filter(cas > 0) %>%
  group_by(st) %>%
  summarize(nT = n(),
            nC = sum(cas),
            nF = sum(fat),
            ratio = nC/nT) %>%
  arrange(desc(nC))
st_geometry(sfdf) = NULL #removes the geometry column
sfdf

## # A tibble: 44 x 5
##       st    nT    nC    nF     ratio
##  * <chr> <int> <int> <int>     <dbl>
##  1    AL   159  3937   347 24.761006
##  2    OK   108  2500   132 23.148148
##  3    MO   110  2025   219 18.409091
##  4    AR   126  1759   114 13.960317
##  5    TX   193  1704   100  8.829016
##  6    TN   133  1678   149 12.616541
##  7    GA   125  1610    94 12.880000
##  8    MS   130  1400    98 10.769231
##  9    KY    94  1051    71 11.180851
## 10    NC    88   985    46 11.193182
## # ... with 34 more rows

Make a choropleth map of fatalities and injuries by state using functions from the tmap package. The functions are based on the grammar of graphics, and the syntax resembles the syntax of ggplot2.

library(tmap)
library(albersusa)
us_sf <- usa_sf("aeqd") %>%
  mutate(st = as.character(iso_3166_2))

us_sf <- us_sf %>%
  filter(st != "AK" & st != "HI")


sfdf2 <- Torn.sfdf %>%
  group_by(st) %>%
  summarize(nT = n(),
            nC = sum(cas),
            nF = sum(fat),
            nI = sum(inj))
st_geometry(sfdf2) = NULL

sfdf3 <- left_join(sfdf2, 
                  as.data.frame(us_sf), by = "st") %>%
         sf::st_as_sf() %>%
         select(nT, nC, nF, nI)

tm1 <- tm_shape(sfdf3) +
  tm_polygons("nI", 
              border.col = NULL,
              title = "Injuries",
              palette = "Purples") +
  tm_text("nI", size = .5) +
tm_shape(us_sf) + tm_borders(alpha = .2) +
  tm_compass() + tm_scale_bar(lwd = .5) +
  tm_format_Europe2(legend.position = c("left", "bottom"),
                attr.position = c("left", "bottom")) 

tm2 <- tm_shape(sfdf3) +
  tm_polygons("nF", 
              border.col = NULL,
              title = "Fatalities",
              palette = "Oranges") +
  tm_text("nF", size = .5) +
tm_shape(us_sf) + tm_borders(alpha = .2) +
  tm_compass() + tm_scale_bar(lwd = .5) +
  tm_format_Europe2(legend.position = c("left", "bottom"),
                attr.position = c("left", "bottom")) 

tmap_arrange(tm1, tm2, asp = NA, ncol = 1)

Figure 2 Tornado casualties (top: injuries, bottom: fatalities) by state (1995-2016). All casualties are assigned to the state in which genesis occurred.

Create tornado paths and extract the average population density (people per sq. km under the path). The population data (Gridded Population of the World Volume 4–2010) are on a raster. With the extract method and weights = TRUE the calculation is done based on percentage of cell under the path. This takes more time. Multiply the average population density by the path area (L x W) to estimate the number of people under the path.

First convert the tornado simple feature data frame to a spatial lines file. Then buffer the track to make a path.

TornL.sfdf <- Torn.sfdf %>%
  filter(cas > 0)
TornL = as(TornL.sfdf, "Spatial")
TornP = gBuffer(TornL, byid = TRUE, width = TornL$Width/2, capStyle = "FLAT")
#TornP$AreaPath = gArea(TornP, byid = TRUE)

Population density

Population data are obtained from the Gridded Population of the World, version four (GPW, v4) from the Socioeconomic Data and Applications Center at Columbia University, USA. The database contain decennial census density estimates for 1990, 2000, and 2010 represented as people per square kilometer. Densities are based on residential population. The native cell resolution is .0083\(^{\circ}\) latitude/longitude, which at 36\(^{\circ}\) N latitude means a cell having the dimension of .9 km in the north-south direction and .7 km in the east-west direction.

Load the population raster(s) and crop to defined extent. Use the extract function to obtain the population density. The projectRaster and extract functions take about 10 minutes per raster. Group years by mid-year of the census year. Assign the population density using the first decennial estimate then successively replace with later estimates by grouped years.

stime = proc.time()
PopD2000 = raster("gpw-v4-population-density_2000.tif")
PopD2010 = raster("gpw-v4-population-density_2010.tif")
ext = raster::extent(c(-125, -67, 24, 50))
PopD2000 = crop(PopD2000, ext)
PopD2010 = crop(PopD2010, ext)

PopD2000p = projectRaster(PopD2000, crs = proj4string(TornP))
PopD2010p = projectRaster(PopD2010, crs = proj4string(TornP))
TornP$popD2000 = raster::extract(PopD2000p, TornP, fun = mean, na.rm = TRUE,
                                       weights = TRUE, normalizeWeights = FALSE)[, 1]
TornP$popD2010 = raster::extract(PopD2010p, TornP, fun = mean, na.rm = TRUE,
                                       weights = TRUE, normalizeWeights = FALSE)[, 1]
TornP$popD = TornP$popD2000
yearGroup = TornP$yr > 2005
TornP$popD[yearGroup] = TornP$popD2010[yearGroup]

TornP$pop = TornP$popD * TornP$AreaPath/10^6

Torn.df = as.data.frame(TornP)
proc.time() - stime

##    user  system elapsed 
## 625.091  13.833 640.173

Note: The warning results from tornadoes with paths over areas with no people. It can be safely ignored.

The popD attribute for each tornado corresponds to the population density from the nearest decennial estimate so that a tornado that occurred in 1996 uses the 2000 decennial estimate while a tornado that occurred in 2012 uses the 2010 estimate.

For the set of 2208 tornadoes with at least one casualty the median population density per tornado is 31.3 people per square kilometer with an inter-quartile range between 9.6 and 136 people per square kilometer.

1 mi^2 = 2.589988 km^2 http://mcdc.missouri.edu/TenThings/urbanrural.shtml

Torn.df %>%
  summarize(nT = n(),
            avgPopD = mean(popD),
            medianPopD = median(popD),
            q25 = quantile(popD, probs = .25),
            q75 = quantile(popD, probs = .75),
            maxPopD = max(popD),
            minPopD = min(popD)) %>%
  t()

##                    [,1]
## nT          2208.000000
## avgPopD      220.998721
## medianPopD    31.309198
## q25            9.618402
## q75          135.985054
## maxPopD    13949.001032
## minPopD        0.000000

Torn.df %>%
  dplyr::select(date, st, mag, inj, fat, cas, popD) %>%
  arrange(desc(popD)) %>%
  head(., n = 10) %>%
  xtable(., digits = 0)

## % latex table generated in R 3.4.3 by xtable 1.8-2 package
## % Wed Dec 27 08:54:29 2017
## \begin{table}[ht]
## \centering
## \begin{tabular}{rllrrrrr}
##   \hline
##  & date & st & mag & inj & fat & cas & popD \\ 
##   \hline
## 1 & 2007-08-08 & NY & 2 & 9 & 0 & 9 & 13949 \\ 
##   2 & 1998-01-09 & CA & 1 & 1 & 0 & 1 & 12249 \\ 
##   3 & 1999-01-18 & PA & 0 & 18 & 0 & 18 & 12220 \\ 
##   4 & 2010-09-16 & NY & 1 & 1 & 1 & 2 & 9524 \\ 
##   5 & 2003-03-27 & FL & 2 & 14 & 1 & 15 & 6588 \\ 
##   6 & 2010-07-25 & NY & 1 & 7 & 0 & 7 & 6094 \\ 
##   7 & 2013-07-19 & FL & 0 & 3 & 0 & 3 & 5224 \\ 
##   8 & 2014-07-28 & MA & 2 & 2 & 0 & 2 & 4440 \\ 
##   9 & 1998-05-05 & CA & 2 & 1 & 0 & 1 & 4340 \\ 
##   10 & 2015-05-24 & TX & 0 & 4 & 0 & 4 & 4047 \\ 
##    \hline
## \end{tabular}
## \end{table}

sum(Torn.df$popD == 0)

## [1] 16

df = Torn.df %>%
  filter(yr >= 1990 & yr <= 2001) %>%
  summarize(totalPop = sum(pop))

Annual average number of people exposed to tornadoes that caused at least one casualty.

df = Torn.df %>%
  filter(cas > 0) %>%
  group_by(Year) %>%
  summarize(nT = n(),
            mPop = mean(pop),
            sdPop = sd(pop),
            sePop = sdPop/sqrt(nT),
            mPopm = mPop - sePop,
            mPopp = mPop + sePop)
A = ggplot(df, aes(x = Year, y = mPop)) +
  geom_point() +
  geom_errorbar(aes(ymin = mPopm, ymax = mPopp), width = .2) +
  scale_y_continuous(limits = c(0, 2500)) +
  scale_x_continuous(breaks = seq(1995, 2016, 4)) +
  xlab("Year") + ylab("Number of People Exposed") +
  theme_minimal()

Distribution of population and population density by EF rating.

PeopleByEF = Torn.df %>%
  group_by(mag) %>%
  summarize(nT = n(),
            nP = round(sum(pop), 0),
            avgP = round(mean(pop), 0),
            medP = round(median(pop), 0),
            avgPD = mean(popD),
            medPD = median(popD))
sum(PeopleByEF$nP)

## [1] 1496394

xtable(PeopleByEF, digits = 0)

## % latex table generated in R 3.4.3 by xtable 1.8-2 package
## % Wed Dec 27 08:54:29 2017
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrrrrr}
##   \hline
##  & mag & nT & nP & avgP & medP & avgPD & medPD \\ 
##   \hline
## 1 & 0 & 183 & 9386 & 51 & 4 & 440 & 55 \\ 
##   2 & 1 & 750 & 217053 & 289 & 17 & 293 & 37 \\ 
##   3 & 2 & 753 & 397781 & 528 & 72 & 186 & 29 \\ 
##   4 & 3 & 392 & 492471 & 1256 & 261 & 88 & 24 \\ 
##   5 & 4 & 116 & 289722 & 2498 & 824 & 103 & 27 \\ 
##   6 & 5 & 14 & 89981 & 6427 & 3318 & 82 & 25 \\ 
##    \hline
## \end{tabular}
## \end{table}

PeopleByEFc= Torn.df %>%
  filter(cas > 0) %>%
  group_by(mag) %>%
  summarize(nT = n(),
            nP = round(sum(pop), 0),
            avgP = round(mean(pop), 0),
            medP = round(median(pop), 0),
            avgPD = mean(popD),
            medPD = median(popD))
sum(PeopleByEFc$nP)

## [1] 1496394

xtable(PeopleByEFc, digits = 0)

## % latex table generated in R 3.4.3 by xtable 1.8-2 package
## % Wed Dec 27 08:54:29 2017
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrrrrr}
##   \hline
##  & mag & nT & nP & avgP & medP & avgPD & medPD \\ 
##   \hline
## 1 & 0 & 183 & 9386 & 51 & 4 & 440 & 55 \\ 
##   2 & 1 & 750 & 217053 & 289 & 17 & 293 & 37 \\ 
##   3 & 2 & 753 & 397781 & 528 & 72 & 186 & 29 \\ 
##   4 & 3 & 392 & 492471 & 1256 & 261 & 88 & 24 \\ 
##   5 & 4 & 116 & 289722 & 2498 & 824 & 103 & 27 \\ 
##   6 & 5 & 14 & 89981 & 6427 & 3318 & 82 & 25 \\ 
##    \hline
## \end{tabular}
## \end{table}

Table 3: Population and population density by EF rating

Distribution of population density in the path of casualty-producing tornadoes.

df = Torn.df %>%
  filter(cas > 0) %>%
  filter(popD > 0) %>%
  group_by(mag) %>%
  summarize(pop = sum(pop),
            nT = n(),
            avgpop = pop/nT)

df = Torn.df %>%
  filter(cas > 0)
B = ggplot(df[df$popD > 0, ], aes(popD)) +
  geom_histogram(binwidth = .5, color = "white") +
  scale_x_log10(breaks = c(.01, .1, 1, 10, 100, 1000, 10000), 
                labels = c(".01", ".1", "1", "10", "100", "1000", "10,000")) +
  xlab(expression(paste("Population Density [people/", km^{2}, "]"))) +
  scale_y_continuous(limits = c(0, 600)) +
  ylab("Number of Tornadoes") +
  theme_minimal()

Energy dissipation

Empirical model for tornado winds by EF rating taken from Table 3-1 of NRC 2007. Percent area by EF rating for each EF category. Threshold wind speeds (m/s) are a lower bound 3 sec gusts on the operational EF Scale (Table 2-1 of NRC2007).

perc = c(1, 0, 0, 0, 0, 0, 
         .772, .228, 0, 0, 0, 0,
         .616, .268, .115, 0, 0, 0,
         .529, .271, .133, .067, 0, 0,
         .543, .238, .131, .056, .032, 0,
         .538, .223, .119, .07, .033, .017)
percM = matrix(perc, ncol = 6, byrow = TRUE)

Compute energy dissipation. \(E = A_p \rho \sum_{j=0}^{J} w_j v_j^{3},\) where \(A_p\) is the area of the path, \(\rho\) is area density [1 kg/m^3] \(v_j\) is the midpoint wind speed for each rating, and \(w_j\) is the corresponding fraction of path area by EF rating. With no upper bound on the EF5 wind speeds, the midpoint wind speed is set at 97 m~s\(^{-1}\) (7.5 m~s\(^{-1}\) above the threshold wind speed consistent with the EF4 midpoint speed relative to its threshold).

Torn.df = Torn.df %>%
  filter(cas > 0)
threshW = c(29.06, 38.45, 49.62, 60.8, 74.21, 89.41)
midptW = c(diff(threshW)/2 + threshW[-length(threshW)], threshW[length(threshW)] + 7.5)
ef = Torn.df$mag + 1
EW3 = numeric()
for(i in 1:length(ef)) EW3[i] = midptW^3 %*% percM[ef[i], ]
Torn.df = Torn.df %>%
  mutate(ED = EW3 * AreaPath)

Annual energy dissipation of tornadoes with at least one casualty.

df = Torn.df %>%
  group_by(Year) %>%
  summarize(nT = n(),
            mED = mean(ED),
            sdED = sd(ED),
            seED = sdED/sqrt(nT),
            mEDm = mED - seED,
            mEDp = mED + seED)

C = ggplot(df, aes(x = Year, y = mED/10^9)) +
  geom_point() +
  geom_errorbar(aes(ymin = mEDm/10^9, ymax = mEDp/10^9), width = .2) +
  scale_y_continuous(limits = c(-.15, NA)) +
  scale_x_continuous(breaks = seq(1995, 2016, 4)) +
  xlab("Year") + ylab("Energy Dissipation [GW]") +
  theme_minimal()

Distribution of energy dissipation.

D = ggplot(Torn.df, aes(ED)) +
  geom_histogram(binwidth = .5, color = "white") +
  scale_x_log10(breaks = c(10^8, 10^10, 10^12, 10^14),
                labels = c(".1", "10", "1000", "10,000")) +
  xlab("Energy Dissipation [GW]") +
  scale_y_continuous(limits = c(0, 600)) +
  ylab("Number of Tornadoes") +
  theme_minimal()

Combine figures.

source("multiplot.txt")
mtrx <- matrix(c(1, 2, 3, 4), nrow = 2, byrow = TRUE)
A <- A + ggtitle("A") + 
  theme(plot.title = element_text(hjust = -.03))
B <- B + ggtitle("B") + 
  theme(plot.title = element_text(hjust = 0))
C <- C + ggtitle("C") + 
  theme(plot.title = element_text(hjust = 0))
D <- D + ggtitle("D") + 
  theme(plot.title = element_text(hjust = 0))
multiplot(A, B, C, D, layout = mtrx)

## Loading required package: grid

Figure 3 People exposed and energy dissipation for all casualty-producing tornadoes (1995-2016). (A) Annual average number of people exposed per tornado, (B) distribution of per tornado population density, (C) annual average energy dissipation per tornado, and (D) distribution of per tornado energy dissipation.

For the set of 2208 tornadoes with at least one casualty the median energy dissipation is 95.5 gigawatt with an inter-quartile range between 14 and 511 GW. The Tallulah-Yazoo City-Durant tornado (Louisiana and Mississippi) of 24 April 2010 that killed ten and injured 146 has the highest energy dissipation at 66.2 TW (10^12 W).

Torn.df %>%
  filter(popD > 0) %>%
  summarize(nT = n(),
            avgED = mean(ED)/10^9,
            medianED = median(ED)/10^9,
            q25 = quantile(ED, probs = .25)/10^9,
            q75 = quantile(ED, probs = .75)/10^9,
            maxED = max(ED)/10^9,
            minED = min(ED)/10^9)

##     nT    avgED medianED      q25      q75    maxED       minED
## 1 2192 855.2684 97.19229 14.46852 513.8461 66169.66 0.005659764

Torn.df %>%
  dplyr::select(date, st, mag, inj, fat, cas, popD, ED) %>%
  arrange(desc(ED)) %>%
  head(., n = 10) %>%
  xtable(., digits = 1)

## % latex table generated in R 3.4.3 by xtable 1.8-2 package
## % Wed Dec 27 08:54:30 2017
## \begin{table}[ht]
## \centering
## \begin{tabular}{rllrrrrrr}
##   \hline
##  & date & st & mag & inj & fat & cas & popD & ED \\ 
##   \hline
## 1 & 2010-04-24 & LA &  4 & 146 & 10 & 156 & 8.5 & 66169658959790.3 \\ 
##   2 & 2011-04-27 & AL &  5 & 145 & 72 & 217 & 52.5 & 49073840644331.3 \\ 
##   3 & 2004-05-22 & NE &  4 & 38 &  1 & 39 & 13.0 & 34265399398044.4 \\ 
##   4 & 2011-04-27 & AL &  4 & 1500 & 64 & 1564 & 132.7 & 30251617428152.3 \\ 
##   5 & 2011-04-27 & AL &  4 & 54 & 13 & 67 & 21.5 & 25950329144119.0 \\ 
##   6 & 2011-04-27 & AL &  4 & 85 & 22 & 107 & 32.1 & 24704083877123.4 \\ 
##   7 & 2008-02-05 & AR &  4 & 139 & 13 & 152 & 10.3 & 23193868125876.3 \\ 
##   8 & 1998-04-16 & TN &  5 & 36 &  3 & 39 & 18.4 & 20640754792221.8 \\ 
##   9 & 2008-05-10 & OK &  4 & 350 & 21 & 371 & 15.1 & 19165780063306.2 \\ 
##   10 & 2011-04-25 & AR &  2 & 16 &  4 & 20 & 37.2 & 19160328444788.8 \\ 
##    \hline
## \end{tabular}
## \end{table}

Here we use logarithms to simplify the number and complexity of “interaction” terms (of the model). cf: Cobb-Douglas production function in economics?

df = Torn.df %>%
  filter(popD > 0) %>%
  arrange(cas)
p = ggplot(df, aes(x = popD, y = ED, color = log10(cas))) + 
  scale_color_continuous(low = 'green', high = 'red', labels = c("1", "10", "100", "1000")) +
  geom_point() + 
  scale_x_log10(breaks = c(.01, .1, 1, 10, 100, 1000, 10000), 
                labels = c(".01", ".1", "1", "10", "100", "1000", "10,000")) +
  scale_y_log10(breaks = c(10^8, 10^10, 10^12, 10^14),
                labels = c(".1", "10", "1000", "100,000")) +
  xlab(expression(paste("Population Density [people/", km^{2}, "]"))) +
  ylab("Energy Dissipation [GW]") +
  labs(color = "Tornado\nCasualties") +
  theme_bw() +
  theme(legend.position = c(.01, .99),
        legend.justification = c(.01, .99))

xx = c(8, 150, 1500, 1500, 10, 1.4, 1.4)
yy = c(10^13, 10^13, 10^11.7, 10^9, 10^9, 10^10, 10^12)
poly.df = data.frame(xx, yy)

p + geom_polygon(data = poly.df, aes(x = xx, y = yy), 
                 col = "grey", fill = "transparent", 
                 size = 1.2) +
  geom_text(aes(x = 1.4, y = 10^6.9, label = "1.4"), col = "grey") +
  geom_text(aes(x = 1500, y = 10^6.9, label = "1500"), col = "grey") +
  geom_text(aes(x = 1.4, y = 10^6.5, label = "C"), col = "grey") +
  geom_text(aes(x = 1500, y = 10^6.5, label = "D"), col = "grey") +
  geom_text(aes(x = 60000, y = 10^9, label = "1 B", hjust = "right"), col = "grey") +
  geom_text(aes(x = 60000, y = 10^13, label =  "10,000 A", hjust = "right"), col = "grey") +
  geom_segment(aes(x = 1.4, xend = 1.4,  y = 10^10, yend = 10^7.1), 
               col = "grey", linetype = 2) +
  geom_segment(aes(x = 1500, xend = 1500,  y = 10^9, yend = 10^7.1), 
               col = "grey", linetype = 2) +
  geom_segment(aes(x = 150, xend = 7000,  y = 10^13, yend = 10^13), 
               col = "grey", linetype = 2) +
  geom_segment(aes(x = 1500, xend = 25000,  y = 10^9, yend = 10^9), 
               col = "grey", linetype = 2)

Figure 4: Tornado casualties as a function of population density and energy dissipation on a log-log plot. The number of casualties is color coded on a logarithmic scale.

Population and casualties and energy dissipation and casualties. There are seven orders of magnitude separating the lowest from the highest per-tornado energy dissipation. There are five orders of magnitude separating the lowest from highest per-tornado population density.

Determine the number and proportion of casualty-producing tornadoes within the polygon area depicted in Figure 4.

Torn.df %>%
  filter(popD >= 1.4 & popD <= 1500) %>%
  summarize(nT = n(),
            pT = nT/2192)

##     nT        pT
## 1 2023 0.9229015

Torn.df %>%
  filter(ED <= 10^13 & ED >= 10^9) %>%
  summarize(nT = n(),
            pT = nT/2192)

##     nT        pT
## 1 2033 0.9274635

Torn.df %>%
  filter(ED <= 10^13 & ED >= 10^9 & popD >= 1.4 & popD <= 1500) %>%
  summarize(nT = n(),
            pT = nT/2192)

##     nT        pT
## 1 1873 0.8544708

library(sp)
library(sf)

points <- data.frame(xT = Torn.df$popD, yT = Torn.df$ED)
sp2 <- SpatialPoints(points)
sp2.sf <- st_as_sf(sp2)

xx <- c(xx, xx[1])
yy <- c(yy, yy[1])
p1 <- cbind(xx, yy)
sp1.sf <- st_polygon(list(p1))

tf <- st_within(sp2.sf, sp1.sf, sparse = FALSE)
sum(tf)

## [1] 1829

sum(tf[, 1])/2192

## [1] 0.8343978

Correlation between ED and popD.

Torn.df %>%
  group_by(mag) %>%
  summarize(nT = n(),
            totalED = sum(ED)/10^12,
            avgED = mean(ED)/10^12,
            medED = median(ED)/10^12)

## # A tibble: 6 x 5
##     mag    nT    totalED       avgED       medED
##   <int> <int>      <dbl>       <dbl>       <dbl>
## 1     0   183   2.047737  0.01118982 0.002122412
## 2     1   750  63.661112  0.08488148 0.020255930
## 3     2   753 342.357147  0.45465757 0.147663274
## 4     3   392 716.181754  1.82699427 0.921135995
## 5     4   116 584.882021  5.04208639 2.285802106
## 6     5    14 165.825408 11.84467201 8.223214252

Torn.df %>%
  filter(popD > 0) %>%
  mutate(lED10 = log10(ED),
         lpopD10 = log10(popD),
         lED = log(ED),
         lpopD = log(popD)) %>%
  summarize(r1 = cor(ED, popD, method = "p"),
            r2 = cor(ED, popD, method = "s"),
            r3 = cor(lED10, lpopD10),
            r4 = cor(lED, lpopD))

##            r1        r2         r3         r4
## 1 -0.06650332 -0.216603 -0.2057972 -0.2057972

Results

Descriptive statistics

Summary statistics. Descriptive statistics for dependent and independent variables used in the regresion models, 1995–2016 (N = 2,192)

Torn.df %>%
  filter(popD > 0) %>%
  summarize(nT = n(),
            casM = mean(cas),
            casMedian = median(cas),
            casMax = max(cas),
            casMin = min(cas),
            casSD = sd(cas),
            popDM = mean(popD),
            popDMedian = median(popD),
            popDQ25 = quantile(popD, prob = .25),
            popDQ75 = quantile(popD, prob = .75),
            popDMax = max(popD),
            popDMin = min(popD),
            popDSD = sd(popD),
            EDM = mean(ED),
            EDMedian = median(popD),
            EDMax = max(ED),
            EDMin = min(ED),
            EDSD = sd(ED)) %>%
  t() %>%
xtable(., digits = 3)

## % latex table generated in R 3.4.3 by xtable 1.8-2 package
## % Wed Dec 27 08:54:41 2017
## \begin{table}[ht]
## \centering
## \begin{tabular}{rr}
##   \hline
##  & x \\ 
##   \hline
## nT & 2192.000 \\ 
##   casM & 11.829 \\ 
##   casMedian & 3.000 \\ 
##   casMax & 1564.000 \\ 
##   casMin & 1.000 \\ 
##   casSD & 53.164 \\ 
##   popDM & 222.612 \\ 
##   popDMedian & 31.874 \\ 
##   popDQ25 & 9.882 \\ 
##   popDQ75 & 136.872 \\ 
##   popDMax & 13949.001 \\ 
##   popDMin & 0.003 \\ 
##   popDSD & 708.024 \\ 
##   EDM & 855268417141.460 \\ 
##   EDMedian & 31.874 \\ 
##   EDMax & 66169658959790.266 \\ 
##   EDMin & 5659764.491 \\ 
##   EDSD & 2896044608799.418 \\ 
##    \hline
## \end{tabular}
## \end{table}

Table 1 Descriptive statistics for the variables used in the regression model.

Casualty model

Mean and variance of casualty counts.

Torn.df %>%
  filter(popD > 0) %>%
  summarize(meanCas = mean(cas),
            varCas = var(cas),
            ratio = varCas/meanCas)

##    meanCas   varCas    ratio
## 1 11.82938 2826.436 238.9335

For the set of tornadoes with at least one casualty, the mean and variance of the counts are 11.8 and 2826, respectively suggesting a negative binomial model for counts.

Use the glm.nb function from the MASS package to fit the model and get the estimate for theta. Use natural logs for the modeling.

df = Torn.df %>%
  filter(popD > 0) %>%
  mutate(lpopD = log(popD),
         lpop = log(pop),
         lED = log(ED),
         lpopD10 = log10(popD),
         lED10 = log10(ED))

formula0 = cas ~ lpopD + lED 
formula1 = cas ~ lpopD + lED + lpopD:lED

suppressMessages(library(MASS))
model0a = glm.nb(formula0, data = df, link = log, init.theta = 1)
model0 = glm(formula0, data = df, family = neg.bin(theta = model0a$theta))

model1a = glm.nb(formula1, data = df, link = log, init.theta = 1)
model1 = glm(formula1, data = df, family = neg.bin(theta = model1a$theta))

pre0 = predict.glm(model0, type = "response")
pre1 = predict.glm(model1, type = "response")

df$pre0 = pre0
df$pre1 = pre1

cor(df$pre0, df$cas)

## [1] 0.4203877

cor(df$pre1, df$cas)

## [1] 0.4478823

sqrt(mean((df$pre0 - df$cas)^2))

## [1] 50.43017

sqrt(mean((df$pre1 - df$cas)^2))

## [1] 49.66327

mean(abs(df$pre0 - df$cas))

## [1] 10.82663

mean(abs(df$pre1 - df$cas))

## [1] 10.62275

summary(model1)

## 
## Call:
## glm(formula = formula1, family = neg.bin(theta = model1a$theta), 
##     data = df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0779  -0.9946  -0.5365   0.0756   8.2310  
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -3.026816   0.960560  -3.151 0.001649 ** 
## lpopD       -0.812117   0.229361  -3.541 0.000407 ***
## lED          0.166102   0.037678   4.408 1.09e-05 ***
## lpopD:lED    0.041503   0.009206   4.508 6.88e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial family taken to be 3.408786)
## 
##     Null deviance: 4275.1  on 2191  degrees of freedom
## Residual deviance: 2286.3  on 2188  degrees of freedom
## AIC: 13223
## 
## Number of Fisher Scoring iterations: 19

summary(model0)

## 
## Call:
## glm(formula = formula0, family = neg.bin(theta = model0a$theta), 
##     data = df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9426  -1.0070  -0.5678   0.0534   8.2118  
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -6.88254    0.53019 -12.981   <2e-16 ***
## lpopD        0.21331    0.02476   8.615   <2e-16 ***
## lED          0.32115    0.01963  16.361   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial family taken to be 3.683118)
## 
##     Null deviance: 4128.2  on 2191  degrees of freedom
## Residual deviance: 2311.7  on 2189  degrees of freedom
## AIC: 13323
## 
## Number of Fisher Scoring iterations: 21

AIC(model0)

## [1] 13323.1

AIC(model1)

## [1] 13223.14

BIC(model0)

## [1] 13340.17

BIC(model1)

## [1] 13245.91

Table 4 Table of coefficients for an interactive and additive models fit to the per-tornado casualty counts.

Marginal effects. Use the interplot package. Find the value of popD that would make predicted energy elasticity from the interactive model equal to the (constant) predicted energy elasticity from the additive model and find the value of ED that would make predicted population elasticity from the interactive model equal to the (constant) predicted population elasticity from the additive model.

suppressMessages(library(interplot))
A5 = interplot(m = model1, var1 = "lED", var2 = "lpopD", hist = TRUE) +
    scale_x_continuous(breaks = log(c(.01, 1, 100, 10000)), 
                       labels = c(".01", "1", "100", "10,000")) +
    scale_y_continuous(breaks = c(-.5, -.25, 0, .25, .5, .75, 1)) +
#    geom_vline(xintercept = -1.5, col = "green") +
    geom_hline(yintercept = 0, linetype = "dashed") +
    geom_segment(aes(y = coef(model0)[3], yend = coef(model0)[3],
                 x = range(df$lpopD)[1], xend = range(df$lpopD)[2]),
                 col = "red") +
    xlab(expression(paste("Population Density [people/", km^{2}, "]"))) +
    ylab("Energy Elasticity") +
    theme_bw() 

out.df = interplot(m = model1, var1 = "lED", var2 = "lpopD", plot = FALSE)
approx(x = out.df$lpopD, y = out.df$coef, xout = log(100))$y

## [1] 0.3572223

approx(x = out.df$lpopD, y = out.df$ub, xout = log(100))$y

## [1] 0.399648

approx(x = out.df$lpopD, y = out.df$lb, xout = log(100))$y

## [1] 0.3153432

approx(x = out.df$lpopD, y = out.df$coef, xout = log(1.4))$y

## [1] 0.1789858

approx(x = out.df$lpopD, y = out.df$ub, xout = log(1.4))$y

## [1] 0.2474098

approx(x = out.df$lpopD, y = out.df$lb, xout = log(1.4))$y

## [1] 0.1102553

approx(x = out.df$lpopD, y = out.df$coef, xout = log(1500))$y

## [1] 0.4702951

approx(x = out.df$lpopD, y = out.df$ub, xout = log(1500))$y

## [1] 0.5489416

approx(x = out.df$lpopD, y = out.df$lb, xout = log(1500))$y

## [1] 0.3931439

(2^approx(x = out.df$lpopD, y = out.df$coef, xout = log(100))$y - 1) * 100

## [1] 28.09573

approx(x = out.df$lpopD, y = out.df$coef, xout = log(1000))$y

## [1] 0.4533652

exp(approx(x = out.df$lb, y = out.df$lpopD, xout = 0)$y) #popD when lb above zero

## [1] 0.2122565

sum(df$lpopD > approx(x = out.df$lb, y = out.df$lpopD, xout = 0)$y)/length(df$lpopD) #% obs

## [1] 0.9899635

exp(approx(x = out.df$coef, y = out.df$lpopD, xout = coef(model0)[3])$y)

## [1] 42.15137

B5 = interplot(m = model1, var1 = "lpopD", var2 = "lED", hist = TRUE) +
    scale_x_continuous(breaks = log(c(10^7, 10^9, 10^11, 10^13)),
                       labels = c(".01", "1", "100", "10,000")) +
    scale_y_continuous(breaks = c(-.5, -.25, 0, .25, .5, .75, 1)) +
#    geom_vline(xintercept = 9.35, col = "green") +
    geom_hline(yintercept = 0, linetype = "dashed") +
    geom_segment(aes(y = coef(model0)[2], yend = coef(model0)[2],
                 x = range(df$lED)[1], xend = range(df$lED)[2]),
                 col = "red") +
    xlab("Energy Dissipation [GW]") +
    ylab("Population Elasticity") +
    theme_bw()

sum(Torn.df$ED > 2.1e9 & tf)/2192

## [1] 0.8070255

sum(Torn.df$ED > 2.1e9)/2192

## [1] 0.9060219

out.df = interplot(m = model1, var1 = "lpopD", var2 = "lED", plot = FALSE)
out.df[1, ]

##        lED       coef          ub         lb
## 1 15.54889 -0.1688119 0.005324622 -0.3443671

approx(x = out.df$lED, y = out.df$coef, xout = log(10^9))$y

## [1] 0.04724048

approx(x = out.df$lED, y = out.df$ub, xout = log(10^9))$y

## [1] 0.1329021

approx(x = out.df$lED, y = out.df$lb, xout = log(10^9))$y

## [1] -0.04079171

approx(x = out.df$lED, y = out.df$coef, xout = log(10^13))$y

## [1] 0.4318118

approx(x = out.df$lED, y = out.df$ub, xout = log(10^13))$y

## [1] 0.5369099

approx(x = out.df$lED, y = out.df$lb, xout = log(10^13))$y

## [1] 0.3300179

exp(approx(x = out.df$lb, y = out.df$lED, xout = 0)$y)/10^9 #ED when lb above zero

## [1] 2.06471

sum(df$lED > approx(x = out.df$lb, y = out.df$lED, xout = 0)$y)/length(df$lED) #% obs

## [1] 0.9032847

approx(x = out.df$lED, y = out.df$coef, xout = log(10^13))$y

## [1] 0.4318118

exp(approx(x = out.df$coef, y = out.df$lED, xout = coef(model0)[2])$y)

## [1] 5.337e+10

mtrx = matrix(c(1, 2), nrow = 2, byrow = TRUE)
A5 = A5 + ggtitle("A") + 
  theme(plot.title = element_text(hjust = 0))
B5 = B5 + ggtitle("B") + 
  theme(plot.title = element_text(hjust = 0))
multiplot(A5, B5, layout = mtrx)

Figure 5 Marginal effects plots. Relative percentage of observations are shown as a histogram along the horizontal axis.

The predicted casualty rate (and the upper and lower boundaries for this point estimate) at each of 9 combinations of values for energy dissipation (ED) and population density (PD). The allEffects() function is needed to get the lower and upper 95% uncertainty points on the prediction not available from the predict.glm() function. Also the value of ED that makes predicted population elasticity from the interactive model equal to the (constant) predicted population elasticity from the additive model and the value PD that makes the predicted energy elasticity from the interactive model equalt to the (constant) predicted population elasticity from the additive model.

library(effects)

## Loading required package: carData

## lattice theme set by effectsTheme()
## See ?effectsTheme for details.

# the minimum value of ED and the minimum value of PD
predict.glm(model1, newdata = data.frame(lED = min(df$lED), lpopD = min(df$lpopD)), type = "response")

##        1 
## 1.676098

eff = allEffects(model1, xlevels = list(lED = seq(min(df$lED), min(df$lED) + 1, 1), 
                                        lpopD = seq(min(df$lpopD), min(df$lpopD) + 1, 1)))
as.data.frame(eff[[1]])[1, ]

##       lpopD      lED      fit        se     lower    upper
## 1 -5.759214 15.54889 1.676098 0.8616381 0.3093612 9.080985

# the minimum value of ED and the maximum value of PD
predict.glm(model1, newdata = data.frame(lED = min(df$lED), lpopD = max(df$lpopD)), type = "response")

##         1 
## 0.1305705

eff = allEffects(model1, xlevels = list(lED = seq(min(df$lED), min(df$lED) + 1, 1), 
                                        lpopD = seq(max(df$lpopD) - 1, max(df$lpopD), 1)))
as.data.frame(eff[[1]])[2, ]

##      lpopD      lED       fit        se      lower     upper
## 2 9.543163 15.54889 0.1305705 0.5410464 0.04519087 0.3772589

# the maximum value of ED and the minimum value of PD
predict.glm(model1, newdata = data.frame(lED = max(df$lED), lpopD = min(df$lpopD)), type = "response")

##         1 
## 0.5115534

eff = allEffects(model1, xlevels = list(lED = seq(max(df$lED) - 1, max(df$lED), 1), 
                                        lpopD = seq(min(df$lpopD), min(df$lpopD) + 1, 1)))
as.data.frame(eff[[1]])[3, ]

##       lpopD      lED       fit        se     lower    upper
## 3 -5.759214 31.82324 0.5115534 0.6378153 0.1464472 1.786903

#the maximum value of ED and the maximum value of PD
predict.glm(model1, newdata = data.frame(lED = max(df$lED), lpopD = max(df$lpopD)), type = "response")

##        1 
## 1227.981

eff = allEffects(model1, xlevels = list(lED = seq(max(df$lED) - 1, max(df$lED), 1), 
                                        lpopD = seq(max(df$lpopD) - 1, max(df$lpopD), 1)))
as.data.frame(eff[[1]])[4, ]

##      lpopD      lED      fit        se    lower    upper
## 4 9.543163 31.82324 1227.981 0.4466586 511.4282 2948.482

#the 10th percentile value of ED and the 10th percentile value of PD
predict.glm(model1, newdata = data.frame(lED = quantile(df$lED, prob = .1), lpopD = quantile(df$lpopD, prob = .1)), type = "response")

##      10% 
## 1.898046

eff = allEffects(model1, xlevels = list(lED = seq(quantile(df$lED, prob = .1), quantile(df$lED, prob = .1) + 1, 1), 
                                        lpopD = seq(quantile(df$lpopD, prob = .1), quantile(df$lpopD, prob = .1) + 1, 1)))
as.data.frame(eff[[1]])[1, ]

##      lpopD      lED      fit        se    lower    upper
## 1 1.207831 21.49812 1.898046 0.1357294 1.454487 2.476874

# the 10th percentile value of ED and the 90th percentile value of PD
predict.glm(model1, newdata = data.frame(lED = quantile(df$lED, prob = .1), lpopD = quantile(df$lpopD, prob = .9)), type = "response")

##      10% 
## 2.870813

eff = allEffects(model1, xlevels = list(lED = seq(quantile(df$lED, prob = .1), quantile(df$lED, prob = .1) + 1, 1), 
                                        lpopD = seq(quantile(df$lpopD, prob = .9), quantile(df$lpopD, prob = .9) + 1, 1)))
as.data.frame(eff[[1]])[1, ]

##      lpopD      lED      fit        se   lower    upper
## 1 6.372288 21.49812 2.870813 0.1226639 2.25702 3.651526

# the 90th percentile value of ED and the 10th percentile value of PD
predict.glm(model1, newdata = data.frame(lED = quantile(df$lED, prob = .9), lpopD = quantile(df$lpopD, prob = .1)), type = "response")

##      90% 
## 8.318838

eff = allEffects(model1, xlevels = list(lED = seq(quantile(df$lED, prob = .9), quantile(df$lED, prob = .9) + 1, 1), 
                                        lpopD = seq(quantile(df$lpopD, prob = .1), quantile(df$lpopD, prob = .1) + 1, 1)))
as.data.frame(eff[[1]])[1, ]

##      lpopD      lED      fit        se    lower    upper
## 1 1.207831 28.33203 8.318838 0.1070887 6.743078 10.26283

# the 90th percentile value of ED and the 90th percentile value of PD
predict.glm(model1, newdata = data.frame(lED = quantile(df$lED, prob = .9), lpopD = quantile(df$lpopD, prob = .9)), type = "response")

##      90% 
## 54.43867

eff = allEffects(model1, xlevels = list(lED = seq(quantile(df$lED, prob = .9),
                                                  quantile(df$lED, prob = .9) + 1, 1), 
                                        lpopD = seq(quantile(df$lpopD, prob = .9), quantile(df$lpopD, prob = .9) + 1, 1)))
as.data.frame(eff[[1]])[1, ]

##      lpopD      lED      fit        se    lower    upper
## 1 6.372288 28.33203 54.43867 0.1441927 41.03009 72.22914

predict.glm(model1, newdata = data.frame(lED = 24.70051, lpopD = 3.741267), 
            type = "response")

##        1 
## 6.507591

eff = allEffects(model1, xlevels = list(lED = seq(24.70051, 24.70051 + 1, 1), 
                                        lpopD = seq(3.741267, 3.741267 + 1, 1)))
as.data.frame(eff[[1]])[1, ]

##      lpopD      lED      fit         se    lower   upper
## 1 3.741267 24.70051 6.507591 0.04719707 5.932309 7.13866

Repeat for the additive model.

newData = data.frame(lpopD = c(min(df$lpopD), max(df$lpopD), min(df$lpopD), max(df$lpopD), 
                               quantile(df$lpopD, prob = .1), quantile(df$lpopD, prob = .9), quantile(df$lpopD, prob = .1), quantile(df$lpopD, prob = .9),   
                               3.741267),
                     lED = c(min(df$lED), min(df$lED), max(df$lED), max(df$lED),
                             quantile(df$lED, prob = .1), quantile(df$lED, prob = .1), quantile(df$lED, prob = .9), quantile(df$lED, prob = .9),
                             24.70051))
predict.glm(model0, newdata = newData, type = "response", se.fit = TRUE)

## $fit
##            1            2            3            4            5 
##   0.04426818   1.15792131   8.24024037 215.53968419   1.32211704 
##            6            7            8            9 
##   3.97829320  11.86923182  35.71490496   6.34765274 
## 
## $se.fit
##           1           2           3           4           5           6 
##  0.01510704  0.24895587  1.97449853 47.79772108  0.15614515  0.39608104 
##           7           8           9 
##  1.02647680  3.94954479  0.31028955 
## 
## $residual.scale
## [1] 1.919145

Absolute value of predicted differences.

pg = data.frame(DeltaC = predict.glm(model1, type = "response") - 
                         predict.glm(model0, type = "response"))
pg$DeltaCA = abs(pg$DeltaC)
mean(pg$DeltaCA)

## [1] 1.370967

median(pg$DeltaCA)

## [1] 0.4795468

sum(pg$DeltaCA > 3)/length(pg$DeltaCA)

## [1] 0.1099453

sum(pg$DeltaCA > 5)/length(pg$DeltaCA)

## [1] 0.05474453

sum(pg$DeltaCA > 10)

## [1] 36

sum(pg$DeltaCA > 10)/length(pg$DeltaCA)

## [1] 0.01642336

pgP = pg %>%
  filter(DeltaC > .5)
pgM = pg %>%
  filter(DeltaC < .5)

ggplot(pg, aes(x = DeltaC)) +
  geom_histogram(binwidth = 2, col ="black") +
  scale_x_continuous(breaks = seq(-15, 40, 5), labels = seq(-15, 40, 5)) +
  ylab("Number of Tornadoes") +
  xlab("Interaction Model Predictions Minus Additive Model Predictions")

Multiply pop density in people/sq km by 2.59 to get pop density in people / sq miles. 373 * 2.59 = 966 ~ population density of Fort Smith, Arkansas. Kent, Ohio at 954 people / sq. mi. Fort Wayne, Indiana at 959 people / sq. mi http://zipatlas.com/us/in/city-comparison/population-density.htm

In each panel what are the 10th and 90th percentile values in the distribution of the X-axis variable? Also, what is the predicted point estimate of the elasticity and the upper and lower boundaries for the confidence band at these 10th and 90th percentile values.

quantile(df$popD, probs = c(.1, .9))

##        10%        90% 
##   3.346219 585.396097

quantile(df$ED, probs = c(.1, .9))

##          10%          90% 
## 2.170278e+09 2.015790e+12

3.346, 585.4 people/sq. km 2.17 x 10^9 and 2.02 x 10^12 Watts

quantile(df$popD, probs = c(.1, .9))

##        10%        90% 
##   3.346219 585.396097

quantile(df$ED, probs = c(.1, .9))

##          10%          90% 
## 2.170278e+09 2.015790e+12

A = interplot(m = model1, var1 = "lED", var2 = "lpopD", plot = FALSE, ci = .95)

round(approx(A$lpopD, A$coef, xout = quantile(df$lpopD, probs = .1))$y, 4)

## [1] 0.2154

round(approx(A$lpopD, A$lb, xout = quantile(df$lpopD, probs = .1))$y, 4)

## [1] 0.1581

round(approx(A$lpopD, A$ub, xout = quantile(df$lpopD, probs = .1))$y, 4)

## [1] 0.2712

round(approx(A$lpopD, A$coef, xout = quantile(df$lpopD, probs = .9))$y, 4)

## [1] 0.431

round(approx(A$lpopD, A$lb, xout = quantile(df$lpopD, probs = .9))$y, 4)

## [1] 0.3676

round(approx(A$lpopD, A$ub, xout = quantile(df$lpopD, probs = .9))$y, 4)

## [1] 0.4947

B = interplot(m = model1, var1 = "lpopD", var2 = "lED", plot = FALSE, ci = .95)

round(approx(B$lED, B$coef, xout = quantile(df$lED, probs = .1))$y, 4)

## [1] 0.0796

round(approx(B$lED, B$lb, xout = quantile(df$lED, probs = .1))$y, 4)

## [1] 0.0028

round(approx(B$lED, B$ub, xout = quantile(df$lED, probs = .1))$y, 4)

## [1] 0.1533

round(approx(B$lED, B$coef, xout = quantile(df$lED, probs = .9))$y, 4)

## [1] 0.3649

round(approx(B$lED, B$lb, xout = quantile(df$lED, probs = .9))$y, 4)

## [1] 0.2871

round(approx(B$lED, B$ub, xout = quantile(df$lED, probs = .9))$y, 4)

## [1] 0.4442

Use the effects package to get marginal predictions witht the effects function. theta in the neg.bin argument is determined by the glm.nb function above.

suppressMessages(library(effects))
eff = allEffects(model1, xlevels = list(lED = log(10^seq(9, 13, .1)), 
                                        lpopD = log(c(1.4, 31.9, 1500))))

results1 = as.data.frame(eff[[1]]) %>%
  mutate(popD = round(exp(lpopD), 1),
         ED = exp(lED),
         lbl = paste("Population Density", popD, 
                                "[people/sq. km]", sep = " "))

fit0 = predict(model0, 
               newdata = data.frame(lpopD = results1$lpopD, lED = results1$lED),
               type = "response")

results1$fit0 = fit0

ggplot(results1, aes(x = ED, y = fit, color = as.factor(popD))) +
  geom_line(size = 2) +
#  geom_line(aes(y = fit0)) +
  scale_x_log10(breaks = c(10^9, 10^10, 10^11, 10^12, 10^13),
                labels = c("1", "10", "100", "1000", "10,000")) +
  scale_y_log10(limits = c(.7, 200), breaks = c(1, 2, 5, 10, 20, 50, 100)) +
  xlab("Energy Dissipation [GW]") +
  ylab("Casualty Rate\n [No. of Casualties Per Casualty-Producing Tornado]") +
  labs(color = expression(paste("Population Density [people/", km^{2}, "]"))) +
  theme_bw() +
  theme(legend.position = c(.01, .99),
        legend.justification = c(.01, .99)) +
  guides(color = guide_legend(reverse = TRUE))

Figure 6 Predicted effect of tornado energy dissipation on casualty rates for given levels of population density (colors).

Compute the common slope in the above figure (model0). Should equal the energy elasticity. Then find the population density at which the energy elasticity of model1 equals the energy elasticity of model0. This is done by interpolation.

commonSlope = diff(predict(model0, 
               newdata = data.frame(lpopD = c(2, 2), lED = c(20, 21)),
               type = "link"))
commonSlope

##         2 
## 0.3211507

slope = numeric()
for(i in seq(3, 4, .1)){
slope = c(slope, diff(predict(model1,
                newdata = data.frame(lpopD = c(i, i), lED = c(20, 21)),
                type = "link")))
}
A = data.frame(popD = exp(seq(3, 4, .1)), slope)

approx(A$slope, A$popD, xout = commonSlope)$y

## [1] 41.97262

Model adequacy

pchisq(model1$deviance, model1$df.residual, lower.tail = FALSE) # model is adequate p-value = .07.

## [1] 0.07027416