Code for the paper: Tornado casualties in the United States: A model involving an interaction between population density and energy dissipation by Elsner, Fricker, Berry.
Load packages and data.
suppressMessages(library("ggplot2"))
suppressMessages(library("raster"))
suppressMessages(library("ggmap"))
suppressMessages(library("dplyr"))
suppressMessages(library("tidyr"))
suppressMessages(library("rgdal"))
suppressMessages(library("rgeos"))
suppressMessages(library("ggthemes"))
suppressMessages(library("RColorBrewer"))
suppressMessages(library("scales"))
suppressMessages(library("broom"))
suppressMessages(library("maps"))
suppressMessages(library("sf"))
suppressMessages(library("xtable"))
suppressMessages(library("lubridate"))
#load(file = "Oct8.RData")
Data from the Storm Prediction Center (SPC) http://www.spc.noaa.gov/gis/svrgis/.
download.file("http://www.spc.noaa.gov/gis/svrgis/zipped/tornado.zip",
"tornado.zip", mode = "wb")
unzip("tornado.zip")
Read the tornado data.
Torn.sfdf <- st_read(dsn = "torn",
layer = "torn",
stringsAsFactors = FALSE)
## Reading layer `torn' from data source `/Users/jelsner/Dropbox/Tornadoes/Casualties/torn' using driver `ESRI Shapefile'
## Simple feature collection with 61092 features and 22 fields
## geometry type: LINESTRING
## dimension: XY
## bbox: xmin: -6279240 ymin: -1835231 xmax: 3405399 ymax: 3888106
## epsg (SRID): NA
## proj4string: +proj=lcc +lat_1=33 +lat_2=45 +lat_0=39 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs
Add new columns and filter. We only consider tornadoes occurring within the conterminous United States.
Torn.sfdf <- Torn.sfdf %>%
mutate(Date = as.Date(date),
Year = yr,
cas = inj + fat,
Length = len * 1609.34,
Width = wid * .9144,
Width = ifelse(Width == 0, min(Width[Width > 0]), Width),
AreaPath = Length * Width,
AreaVortex = pi * (Width/2)^2,
Ma = factor(month.abb[mo], levels = month.abb[1:12])) %>%
filter(Year >= 1995, st != "HI", st != "AK", st != "PR")
Torn.sfdf %>%
filter(cas > 0) %>%
summarize(nT = n(),
nC = sum(cas),
pF = sum(fat)/nC * 100)
## Simple feature collection with 1 feature and 3 fields
## geometry type: MULTILINESTRING
## dimension: XY
## bbox: xmin: -2270178 ymin: -1429838 xmax: 2130657 ymax: 1116407
## epsg (SRID): NA
## proj4string: +proj=lcc +lat_1=33 +lat_2=45 +lat_0=39 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs
## nT nC pF geometry
## 1 2208 25968 6.662046 MULTILINESTRING ((-223554.6...
During the 22-year period 1995–2016 there are 26,863 tornadoes recorded in the conterminous United States. Of these, 2,208 are linked to 25,968 casualties. Only 6.7 percent of all casualties lead to death.
df <- as.data.frame(Torn.sfdf) %>%
arrange(desc(cas)) %>%
dplyr::select(date, time, st, mag, inj, fat, cas)
head(df)
## date time st mag inj fat cas
## 1 2011-04-27 15:43:00 AL 4 1500 64 1564
## 2 2011-05-22 16:34:00 MO 5 1150 158 1308
## 3 1999-05-03 17:26:00 OK 5 583 36 619
## 4 2015-12-26 18:46:00 TX 4 468 10 478
## 5 2008-05-10 16:20:00 OK 4 350 21 371
## 6 2011-04-27 18:15:00 GA 4 335 20 355
On average a casualty-producing EF0 tornado results in two casualties, a casualty-producing EF1 tornado results in 3.1 casualties, and a casualty-producing EF2 tornado results in 6.5 casualties.
as.data.frame(Torn.sfdf) %>%
filter(cas > 0) %>%
group_by(mag) %>%
summarize(cas = sum(cas),
nT = n(),
fat = sum(fat),
inj = sum(inj),
avgcas = cas/nT) %>%
xtable(., digits = 1)
## % latex table generated in R 3.4.3 by xtable 1.8-2 package
## % Wed Dec 27 08:43:31 2017
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrrrr}
## \hline
## & mag & cas & nT & fat & inj & avgcas \\
## \hline
## 1 & 0 & 362 & 183 & 8 & 354 & 2.0 \\
## 2 & 1 & 2300 & 750 & 79 & 2221 & 3.1 \\
## 3 & 2 & 4888 & 753 & 218 & 4670 & 6.5 \\
## 4 & 3 & 7933 & 392 & 536 & 7397 & 20.2 \\
## 5 & 4 & 7187 & 116 & 457 & 6730 & 62.0 \\
## 6 & 5 & 3298 & 14 & 432 & 2866 & 235.6 \\
## \hline
## \end{tabular}
## \end{table}
Casualties by location and size.
df <- as.data.frame(Torn.sfdf) %>%
filter(cas > 0) %>%
mutate(label = paste("EF", mag, sep = ""))
states.df <- map_data("state") %>%
filter(region != 'alaska', region != 'district of columbia')
ggplot(states.df, aes(x = long, y = lat, group = group)) +
geom_polygon(fill = "white") +
geom_path(color = "gray85") +
coord_map(project = "polyconic") +
geom_point(aes(x = slon, y = slat, size = cas, group = om),
alpha = .3, data = df) +
xlab("") + ylab("") +
facet_wrap(~ label, ncol = 2) +
theme_minimal() +
theme(panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
panel.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "bottom") +
scale_size_continuous("Number of\n Casualties")
Alabama had the most casualties (3,937) followed by Oklahoma (2,500), Missouri (2,025), Arkansas (1,759), Texas (1,704), and Tennessee (1,678).
sfdf <- Torn.sfdf %>%
filter(cas > 0) %>%
group_by(st) %>%
summarize(nT = n(),
nC = sum(cas),
nF = sum(fat),
ratio = nC/nT) %>%
arrange(desc(nC))
st_geometry(sfdf) = NULL #removes the geometry column
sfdf
## # A tibble: 44 x 5
## st nT nC nF ratio
## * <chr> <int> <int> <int> <dbl>
## 1 AL 159 3937 347 24.761006
## 2 OK 108 2500 132 23.148148
## 3 MO 110 2025 219 18.409091
## 4 AR 126 1759 114 13.960317
## 5 TX 193 1704 100 8.829016
## 6 TN 133 1678 149 12.616541
## 7 GA 125 1610 94 12.880000
## 8 MS 130 1400 98 10.769231
## 9 KY 94 1051 71 11.180851
## 10 NC 88 985 46 11.193182
## # ... with 34 more rows
Make a choropleth map of fatalities and injuries by state using functions from the tmap package. The functions are based on the grammar of graphics, and the syntax resembles the syntax of ggplot2.
library(tmap)
library(albersusa)
us_sf <- usa_sf("aeqd") %>%
mutate(st = as.character(iso_3166_2))
us_sf <- us_sf %>%
filter(st != "AK" & st != "HI")
sfdf2 <- Torn.sfdf %>%
group_by(st) %>%
summarize(nT = n(),
nC = sum(cas),
nF = sum(fat),
nI = sum(inj))
st_geometry(sfdf2) = NULL
sfdf3 <- left_join(sfdf2,
as.data.frame(us_sf), by = "st") %>%
sf::st_as_sf() %>%
select(nT, nC, nF, nI)
tm1 <- tm_shape(sfdf3) +
tm_polygons("nI",
border.col = NULL,
title = "Injuries",
palette = "Purples") +
tm_text("nI", size = .5) +
tm_shape(us_sf) + tm_borders(alpha = .2) +
tm_compass() + tm_scale_bar(lwd = .5) +
tm_format_Europe2(legend.position = c("left", "bottom"),
attr.position = c("left", "bottom"))
tm2 <- tm_shape(sfdf3) +
tm_polygons("nF",
border.col = NULL,
title = "Fatalities",
palette = "Oranges") +
tm_text("nF", size = .5) +
tm_shape(us_sf) + tm_borders(alpha = .2) +
tm_compass() + tm_scale_bar(lwd = .5) +
tm_format_Europe2(legend.position = c("left", "bottom"),
attr.position = c("left", "bottom"))
tmap_arrange(tm1, tm2, asp = NA, ncol = 1)
Create tornado paths and extract the average population density (people per sq. km under the path). The population data (Gridded Population of the World Volume 4–2010) are on a raster. With the extract method and weights = TRUE the calculation is done based on percentage of cell under the path. This takes more time. Multiply the average population density by the path area (L x W) to estimate the number of people under the path.
First convert the tornado simple feature data frame to a spatial lines file. Then buffer the track to make a path.
TornL.sfdf <- Torn.sfdf %>%
filter(cas > 0)
TornL = as(TornL.sfdf, "Spatial")
TornP = gBuffer(TornL, byid = TRUE, width = TornL$Width/2, capStyle = "FLAT")
#TornP$AreaPath = gArea(TornP, byid = TRUE)
Population data are obtained from the Gridded Population of the World, version four (GPW, v4) from the Socioeconomic Data and Applications Center at Columbia University, USA. The database contain decennial census density estimates for 1990, 2000, and 2010 represented as people per square kilometer. Densities are based on residential population. The native cell resolution is .0083\(^{\circ}\) latitude/longitude, which at 36\(^{\circ}\) N latitude means a cell having the dimension of .9 km in the north-south direction and .7 km in the east-west direction.
Load the population raster(s) and crop to defined extent. Use the extract function to obtain the population density. The projectRaster and extract functions take about 10 minutes per raster. Group years by mid-year of the census year. Assign the population density using the first decennial estimate then successively replace with later estimates by grouped years.
stime = proc.time()
PopD2000 = raster("gpw-v4-population-density_2000.tif")
PopD2010 = raster("gpw-v4-population-density_2010.tif")
ext = raster::extent(c(-125, -67, 24, 50))
PopD2000 = crop(PopD2000, ext)
PopD2010 = crop(PopD2010, ext)
PopD2000p = projectRaster(PopD2000, crs = proj4string(TornP))
PopD2010p = projectRaster(PopD2010, crs = proj4string(TornP))
TornP$popD2000 = raster::extract(PopD2000p, TornP, fun = mean, na.rm = TRUE,
weights = TRUE, normalizeWeights = FALSE)[, 1]
TornP$popD2010 = raster::extract(PopD2010p, TornP, fun = mean, na.rm = TRUE,
weights = TRUE, normalizeWeights = FALSE)[, 1]
TornP$popD = TornP$popD2000
yearGroup = TornP$yr > 2005
TornP$popD[yearGroup] = TornP$popD2010[yearGroup]
TornP$pop = TornP$popD * TornP$AreaPath/10^6
Torn.df = as.data.frame(TornP)
proc.time() - stime
## user system elapsed
## 625.091 13.833 640.173
Note: The warning results from tornadoes with paths over areas with no people. It can be safely ignored.
The popD attribute for each tornado corresponds to the population density from the nearest decennial estimate so that a tornado that occurred in 1996 uses the 2000 decennial estimate while a tornado that occurred in 2012 uses the 2010 estimate.
For the set of 2208 tornadoes with at least one casualty the median population density per tornado is 31.3 people per square kilometer with an inter-quartile range between 9.6 and 136 people per square kilometer.
1 mi^2 = 2.589988 km^2 http://mcdc.missouri.edu/TenThings/urbanrural.shtml
Torn.df %>%
summarize(nT = n(),
avgPopD = mean(popD),
medianPopD = median(popD),
q25 = quantile(popD, probs = .25),
q75 = quantile(popD, probs = .75),
maxPopD = max(popD),
minPopD = min(popD)) %>%
t()
## [,1]
## nT 2208.000000
## avgPopD 220.998721
## medianPopD 31.309198
## q25 9.618402
## q75 135.985054
## maxPopD 13949.001032
## minPopD 0.000000
Torn.df %>%
dplyr::select(date, st, mag, inj, fat, cas, popD) %>%
arrange(desc(popD)) %>%
head(., n = 10) %>%
xtable(., digits = 0)
## % latex table generated in R 3.4.3 by xtable 1.8-2 package
## % Wed Dec 27 08:54:29 2017
## \begin{table}[ht]
## \centering
## \begin{tabular}{rllrrrrr}
## \hline
## & date & st & mag & inj & fat & cas & popD \\
## \hline
## 1 & 2007-08-08 & NY & 2 & 9 & 0 & 9 & 13949 \\
## 2 & 1998-01-09 & CA & 1 & 1 & 0 & 1 & 12249 \\
## 3 & 1999-01-18 & PA & 0 & 18 & 0 & 18 & 12220 \\
## 4 & 2010-09-16 & NY & 1 & 1 & 1 & 2 & 9524 \\
## 5 & 2003-03-27 & FL & 2 & 14 & 1 & 15 & 6588 \\
## 6 & 2010-07-25 & NY & 1 & 7 & 0 & 7 & 6094 \\
## 7 & 2013-07-19 & FL & 0 & 3 & 0 & 3 & 5224 \\
## 8 & 2014-07-28 & MA & 2 & 2 & 0 & 2 & 4440 \\
## 9 & 1998-05-05 & CA & 2 & 1 & 0 & 1 & 4340 \\
## 10 & 2015-05-24 & TX & 0 & 4 & 0 & 4 & 4047 \\
## \hline
## \end{tabular}
## \end{table}
sum(Torn.df$popD == 0)
## [1] 16
df = Torn.df %>%
filter(yr >= 1990 & yr <= 2001) %>%
summarize(totalPop = sum(pop))
Annual average number of people exposed to tornadoes that caused at least one casualty.
df = Torn.df %>%
filter(cas > 0) %>%
group_by(Year) %>%
summarize(nT = n(),
mPop = mean(pop),
sdPop = sd(pop),
sePop = sdPop/sqrt(nT),
mPopm = mPop - sePop,
mPopp = mPop + sePop)
A = ggplot(df, aes(x = Year, y = mPop)) +
geom_point() +
geom_errorbar(aes(ymin = mPopm, ymax = mPopp), width = .2) +
scale_y_continuous(limits = c(0, 2500)) +
scale_x_continuous(breaks = seq(1995, 2016, 4)) +
xlab("Year") + ylab("Number of People Exposed") +
theme_minimal()
Distribution of population and population density by EF rating.
PeopleByEF = Torn.df %>%
group_by(mag) %>%
summarize(nT = n(),
nP = round(sum(pop), 0),
avgP = round(mean(pop), 0),
medP = round(median(pop), 0),
avgPD = mean(popD),
medPD = median(popD))
sum(PeopleByEF$nP)
## [1] 1496394
xtable(PeopleByEF, digits = 0)
## % latex table generated in R 3.4.3 by xtable 1.8-2 package
## % Wed Dec 27 08:54:29 2017
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrrrrr}
## \hline
## & mag & nT & nP & avgP & medP & avgPD & medPD \\
## \hline
## 1 & 0 & 183 & 9386 & 51 & 4 & 440 & 55 \\
## 2 & 1 & 750 & 217053 & 289 & 17 & 293 & 37 \\
## 3 & 2 & 753 & 397781 & 528 & 72 & 186 & 29 \\
## 4 & 3 & 392 & 492471 & 1256 & 261 & 88 & 24 \\
## 5 & 4 & 116 & 289722 & 2498 & 824 & 103 & 27 \\
## 6 & 5 & 14 & 89981 & 6427 & 3318 & 82 & 25 \\
## \hline
## \end{tabular}
## \end{table}
PeopleByEFc= Torn.df %>%
filter(cas > 0) %>%
group_by(mag) %>%
summarize(nT = n(),
nP = round(sum(pop), 0),
avgP = round(mean(pop), 0),
medP = round(median(pop), 0),
avgPD = mean(popD),
medPD = median(popD))
sum(PeopleByEFc$nP)
## [1] 1496394
xtable(PeopleByEFc, digits = 0)
## % latex table generated in R 3.4.3 by xtable 1.8-2 package
## % Wed Dec 27 08:54:29 2017
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrrrrr}
## \hline
## & mag & nT & nP & avgP & medP & avgPD & medPD \\
## \hline
## 1 & 0 & 183 & 9386 & 51 & 4 & 440 & 55 \\
## 2 & 1 & 750 & 217053 & 289 & 17 & 293 & 37 \\
## 3 & 2 & 753 & 397781 & 528 & 72 & 186 & 29 \\
## 4 & 3 & 392 & 492471 & 1256 & 261 & 88 & 24 \\
## 5 & 4 & 116 & 289722 & 2498 & 824 & 103 & 27 \\
## 6 & 5 & 14 & 89981 & 6427 & 3318 & 82 & 25 \\
## \hline
## \end{tabular}
## \end{table}
Distribution of population density in the path of casualty-producing tornadoes.
df = Torn.df %>%
filter(cas > 0) %>%
filter(popD > 0) %>%
group_by(mag) %>%
summarize(pop = sum(pop),
nT = n(),
avgpop = pop/nT)
df = Torn.df %>%
filter(cas > 0)
B = ggplot(df[df$popD > 0, ], aes(popD)) +
geom_histogram(binwidth = .5, color = "white") +
scale_x_log10(breaks = c(.01, .1, 1, 10, 100, 1000, 10000),
labels = c(".01", ".1", "1", "10", "100", "1000", "10,000")) +
xlab(expression(paste("Population Density [people/", km^{2}, "]"))) +
scale_y_continuous(limits = c(0, 600)) +
ylab("Number of Tornadoes") +
theme_minimal()
Empirical model for tornado winds by EF rating taken from Table 3-1 of NRC 2007. Percent area by EF rating for each EF category. Threshold wind speeds (m/s) are a lower bound 3 sec gusts on the operational EF Scale (Table 2-1 of NRC2007).
perc = c(1, 0, 0, 0, 0, 0,
.772, .228, 0, 0, 0, 0,
.616, .268, .115, 0, 0, 0,
.529, .271, .133, .067, 0, 0,
.543, .238, .131, .056, .032, 0,
.538, .223, .119, .07, .033, .017)
percM = matrix(perc, ncol = 6, byrow = TRUE)
Compute energy dissipation. \(E = A_p \rho \sum_{j=0}^{J} w_j v_j^{3},\) where \(A_p\) is the area of the path, \(\rho\) is area density [1 kg/m^3] \(v_j\) is the midpoint wind speed for each rating, and \(w_j\) is the corresponding fraction of path area by EF rating. With no upper bound on the EF5 wind speeds, the midpoint wind speed is set at 97 m~s\(^{-1}\) (7.5 m~s\(^{-1}\) above the threshold wind speed consistent with the EF4 midpoint speed relative to its threshold).
Torn.df = Torn.df %>%
filter(cas > 0)
threshW = c(29.06, 38.45, 49.62, 60.8, 74.21, 89.41)
midptW = c(diff(threshW)/2 + threshW[-length(threshW)], threshW[length(threshW)] + 7.5)
ef = Torn.df$mag + 1
EW3 = numeric()
for(i in 1:length(ef)) EW3[i] = midptW^3 %*% percM[ef[i], ]
Torn.df = Torn.df %>%
mutate(ED = EW3 * AreaPath)
Annual energy dissipation of tornadoes with at least one casualty.
df = Torn.df %>%
group_by(Year) %>%
summarize(nT = n(),
mED = mean(ED),
sdED = sd(ED),
seED = sdED/sqrt(nT),
mEDm = mED - seED,
mEDp = mED + seED)
C = ggplot(df, aes(x = Year, y = mED/10^9)) +
geom_point() +
geom_errorbar(aes(ymin = mEDm/10^9, ymax = mEDp/10^9), width = .2) +
scale_y_continuous(limits = c(-.15, NA)) +
scale_x_continuous(breaks = seq(1995, 2016, 4)) +
xlab("Year") + ylab("Energy Dissipation [GW]") +
theme_minimal()
Distribution of energy dissipation.
D = ggplot(Torn.df, aes(ED)) +
geom_histogram(binwidth = .5, color = "white") +
scale_x_log10(breaks = c(10^8, 10^10, 10^12, 10^14),
labels = c(".1", "10", "1000", "10,000")) +
xlab("Energy Dissipation [GW]") +
scale_y_continuous(limits = c(0, 600)) +
ylab("Number of Tornadoes") +
theme_minimal()
Combine figures.
source("multiplot.txt")
mtrx <- matrix(c(1, 2, 3, 4), nrow = 2, byrow = TRUE)
A <- A + ggtitle("A") +
theme(plot.title = element_text(hjust = -.03))
B <- B + ggtitle("B") +
theme(plot.title = element_text(hjust = 0))
C <- C + ggtitle("C") +
theme(plot.title = element_text(hjust = 0))
D <- D + ggtitle("D") +
theme(plot.title = element_text(hjust = 0))
multiplot(A, B, C, D, layout = mtrx)
## Loading required package: grid
For the set of 2208 tornadoes with at least one casualty the median energy dissipation is 95.5 gigawatt with an inter-quartile range between 14 and 511 GW. The Tallulah-Yazoo City-Durant tornado (Louisiana and Mississippi) of 24 April 2010 that killed ten and injured 146 has the highest energy dissipation at 66.2 TW (10^12 W).
Torn.df %>%
filter(popD > 0) %>%
summarize(nT = n(),
avgED = mean(ED)/10^9,
medianED = median(ED)/10^9,
q25 = quantile(ED, probs = .25)/10^9,
q75 = quantile(ED, probs = .75)/10^9,
maxED = max(ED)/10^9,
minED = min(ED)/10^9)
## nT avgED medianED q25 q75 maxED minED
## 1 2192 855.2684 97.19229 14.46852 513.8461 66169.66 0.005659764
Torn.df %>%
dplyr::select(date, st, mag, inj, fat, cas, popD, ED) %>%
arrange(desc(ED)) %>%
head(., n = 10) %>%
xtable(., digits = 1)
## % latex table generated in R 3.4.3 by xtable 1.8-2 package
## % Wed Dec 27 08:54:30 2017
## \begin{table}[ht]
## \centering
## \begin{tabular}{rllrrrrrr}
## \hline
## & date & st & mag & inj & fat & cas & popD & ED \\
## \hline
## 1 & 2010-04-24 & LA & 4 & 146 & 10 & 156 & 8.5 & 66169658959790.3 \\
## 2 & 2011-04-27 & AL & 5 & 145 & 72 & 217 & 52.5 & 49073840644331.3 \\
## 3 & 2004-05-22 & NE & 4 & 38 & 1 & 39 & 13.0 & 34265399398044.4 \\
## 4 & 2011-04-27 & AL & 4 & 1500 & 64 & 1564 & 132.7 & 30251617428152.3 \\
## 5 & 2011-04-27 & AL & 4 & 54 & 13 & 67 & 21.5 & 25950329144119.0 \\
## 6 & 2011-04-27 & AL & 4 & 85 & 22 & 107 & 32.1 & 24704083877123.4 \\
## 7 & 2008-02-05 & AR & 4 & 139 & 13 & 152 & 10.3 & 23193868125876.3 \\
## 8 & 1998-04-16 & TN & 5 & 36 & 3 & 39 & 18.4 & 20640754792221.8 \\
## 9 & 2008-05-10 & OK & 4 & 350 & 21 & 371 & 15.1 & 19165780063306.2 \\
## 10 & 2011-04-25 & AR & 2 & 16 & 4 & 20 & 37.2 & 19160328444788.8 \\
## \hline
## \end{tabular}
## \end{table}
Here we use logarithms to simplify the number and complexity of “interaction” terms (of the model). cf: Cobb-Douglas production function in economics?
df = Torn.df %>%
filter(popD > 0) %>%
arrange(cas)
p = ggplot(df, aes(x = popD, y = ED, color = log10(cas))) +
scale_color_continuous(low = 'green', high = 'red', labels = c("1", "10", "100", "1000")) +
geom_point() +
scale_x_log10(breaks = c(.01, .1, 1, 10, 100, 1000, 10000),
labels = c(".01", ".1", "1", "10", "100", "1000", "10,000")) +
scale_y_log10(breaks = c(10^8, 10^10, 10^12, 10^14),
labels = c(".1", "10", "1000", "100,000")) +
xlab(expression(paste("Population Density [people/", km^{2}, "]"))) +
ylab("Energy Dissipation [GW]") +
labs(color = "Tornado\nCasualties") +
theme_bw() +
theme(legend.position = c(.01, .99),
legend.justification = c(.01, .99))
xx = c(8, 150, 1500, 1500, 10, 1.4, 1.4)
yy = c(10^13, 10^13, 10^11.7, 10^9, 10^9, 10^10, 10^12)
poly.df = data.frame(xx, yy)
p + geom_polygon(data = poly.df, aes(x = xx, y = yy),
col = "grey", fill = "transparent",
size = 1.2) +
geom_text(aes(x = 1.4, y = 10^6.9, label = "1.4"), col = "grey") +
geom_text(aes(x = 1500, y = 10^6.9, label = "1500"), col = "grey") +
geom_text(aes(x = 1.4, y = 10^6.5, label = "C"), col = "grey") +
geom_text(aes(x = 1500, y = 10^6.5, label = "D"), col = "grey") +
geom_text(aes(x = 60000, y = 10^9, label = "1 B", hjust = "right"), col = "grey") +
geom_text(aes(x = 60000, y = 10^13, label = "10,000 A", hjust = "right"), col = "grey") +
geom_segment(aes(x = 1.4, xend = 1.4, y = 10^10, yend = 10^7.1),
col = "grey", linetype = 2) +
geom_segment(aes(x = 1500, xend = 1500, y = 10^9, yend = 10^7.1),
col = "grey", linetype = 2) +
geom_segment(aes(x = 150, xend = 7000, y = 10^13, yend = 10^13),
col = "grey", linetype = 2) +
geom_segment(aes(x = 1500, xend = 25000, y = 10^9, yend = 10^9),
col = "grey", linetype = 2)
Population and casualties and energy dissipation and casualties. There are seven orders of magnitude separating the lowest from the highest per-tornado energy dissipation. There are five orders of magnitude separating the lowest from highest per-tornado population density.
Determine the number and proportion of casualty-producing tornadoes within the polygon area depicted in Figure 4.
Torn.df %>%
filter(popD >= 1.4 & popD <= 1500) %>%
summarize(nT = n(),
pT = nT/2192)
## nT pT
## 1 2023 0.9229015
Torn.df %>%
filter(ED <= 10^13 & ED >= 10^9) %>%
summarize(nT = n(),
pT = nT/2192)
## nT pT
## 1 2033 0.9274635
Torn.df %>%
filter(ED <= 10^13 & ED >= 10^9 & popD >= 1.4 & popD <= 1500) %>%
summarize(nT = n(),
pT = nT/2192)
## nT pT
## 1 1873 0.8544708
library(sp)
library(sf)
points <- data.frame(xT = Torn.df$popD, yT = Torn.df$ED)
sp2 <- SpatialPoints(points)
sp2.sf <- st_as_sf(sp2)
xx <- c(xx, xx[1])
yy <- c(yy, yy[1])
p1 <- cbind(xx, yy)
sp1.sf <- st_polygon(list(p1))
tf <- st_within(sp2.sf, sp1.sf, sparse = FALSE)
sum(tf)
## [1] 1829
sum(tf[, 1])/2192
## [1] 0.8343978
Correlation between ED and popD.
Torn.df %>%
group_by(mag) %>%
summarize(nT = n(),
totalED = sum(ED)/10^12,
avgED = mean(ED)/10^12,
medED = median(ED)/10^12)
## # A tibble: 6 x 5
## mag nT totalED avgED medED
## <int> <int> <dbl> <dbl> <dbl>
## 1 0 183 2.047737 0.01118982 0.002122412
## 2 1 750 63.661112 0.08488148 0.020255930
## 3 2 753 342.357147 0.45465757 0.147663274
## 4 3 392 716.181754 1.82699427 0.921135995
## 5 4 116 584.882021 5.04208639 2.285802106
## 6 5 14 165.825408 11.84467201 8.223214252
Torn.df %>%
filter(popD > 0) %>%
mutate(lED10 = log10(ED),
lpopD10 = log10(popD),
lED = log(ED),
lpopD = log(popD)) %>%
summarize(r1 = cor(ED, popD, method = "p"),
r2 = cor(ED, popD, method = "s"),
r3 = cor(lED10, lpopD10),
r4 = cor(lED, lpopD))
## r1 r2 r3 r4
## 1 -0.06650332 -0.216603 -0.2057972 -0.2057972
Summary statistics. Descriptive statistics for dependent and independent variables used in the regresion models, 1995–2016 (N = 2,192)
Torn.df %>%
filter(popD > 0) %>%
summarize(nT = n(),
casM = mean(cas),
casMedian = median(cas),
casMax = max(cas),
casMin = min(cas),
casSD = sd(cas),
popDM = mean(popD),
popDMedian = median(popD),
popDQ25 = quantile(popD, prob = .25),
popDQ75 = quantile(popD, prob = .75),
popDMax = max(popD),
popDMin = min(popD),
popDSD = sd(popD),
EDM = mean(ED),
EDMedian = median(popD),
EDMax = max(ED),
EDMin = min(ED),
EDSD = sd(ED)) %>%
t() %>%
xtable(., digits = 3)
## % latex table generated in R 3.4.3 by xtable 1.8-2 package
## % Wed Dec 27 08:54:41 2017
## \begin{table}[ht]
## \centering
## \begin{tabular}{rr}
## \hline
## & x \\
## \hline
## nT & 2192.000 \\
## casM & 11.829 \\
## casMedian & 3.000 \\
## casMax & 1564.000 \\
## casMin & 1.000 \\
## casSD & 53.164 \\
## popDM & 222.612 \\
## popDMedian & 31.874 \\
## popDQ25 & 9.882 \\
## popDQ75 & 136.872 \\
## popDMax & 13949.001 \\
## popDMin & 0.003 \\
## popDSD & 708.024 \\
## EDM & 855268417141.460 \\
## EDMedian & 31.874 \\
## EDMax & 66169658959790.266 \\
## EDMin & 5659764.491 \\
## EDSD & 2896044608799.418 \\
## \hline
## \end{tabular}
## \end{table}
Mean and variance of casualty counts.
Torn.df %>%
filter(popD > 0) %>%
summarize(meanCas = mean(cas),
varCas = var(cas),
ratio = varCas/meanCas)
## meanCas varCas ratio
## 1 11.82938 2826.436 238.9335
For the set of tornadoes with at least one casualty, the mean and variance of the counts are 11.8 and 2826, respectively suggesting a negative binomial model for counts.
Use the glm.nb function from the MASS package to fit the model and get the estimate for theta. Use natural logs for the modeling.
df = Torn.df %>%
filter(popD > 0) %>%
mutate(lpopD = log(popD),
lpop = log(pop),
lED = log(ED),
lpopD10 = log10(popD),
lED10 = log10(ED))
formula0 = cas ~ lpopD + lED
formula1 = cas ~ lpopD + lED + lpopD:lED
suppressMessages(library(MASS))
model0a = glm.nb(formula0, data = df, link = log, init.theta = 1)
model0 = glm(formula0, data = df, family = neg.bin(theta = model0a$theta))
model1a = glm.nb(formula1, data = df, link = log, init.theta = 1)
model1 = glm(formula1, data = df, family = neg.bin(theta = model1a$theta))
pre0 = predict.glm(model0, type = "response")
pre1 = predict.glm(model1, type = "response")
df$pre0 = pre0
df$pre1 = pre1
cor(df$pre0, df$cas)
## [1] 0.4203877
cor(df$pre1, df$cas)
## [1] 0.4478823
sqrt(mean((df$pre0 - df$cas)^2))
## [1] 50.43017
sqrt(mean((df$pre1 - df$cas)^2))
## [1] 49.66327
mean(abs(df$pre0 - df$cas))
## [1] 10.82663
mean(abs(df$pre1 - df$cas))
## [1] 10.62275
summary(model1)
##
## Call:
## glm(formula = formula1, family = neg.bin(theta = model1a$theta),
## data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0779 -0.9946 -0.5365 0.0756 8.2310
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.026816 0.960560 -3.151 0.001649 **
## lpopD -0.812117 0.229361 -3.541 0.000407 ***
## lED 0.166102 0.037678 4.408 1.09e-05 ***
## lpopD:lED 0.041503 0.009206 4.508 6.88e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial family taken to be 3.408786)
##
## Null deviance: 4275.1 on 2191 degrees of freedom
## Residual deviance: 2286.3 on 2188 degrees of freedom
## AIC: 13223
##
## Number of Fisher Scoring iterations: 19
summary(model0)
##
## Call:
## glm(formula = formula0, family = neg.bin(theta = model0a$theta),
## data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9426 -1.0070 -0.5678 0.0534 8.2118
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.88254 0.53019 -12.981 <2e-16 ***
## lpopD 0.21331 0.02476 8.615 <2e-16 ***
## lED 0.32115 0.01963 16.361 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial family taken to be 3.683118)
##
## Null deviance: 4128.2 on 2191 degrees of freedom
## Residual deviance: 2311.7 on 2189 degrees of freedom
## AIC: 13323
##
## Number of Fisher Scoring iterations: 21
AIC(model0)
## [1] 13323.1
AIC(model1)
## [1] 13223.14
BIC(model0)
## [1] 13340.17
BIC(model1)
## [1] 13245.91
Marginal effects. Use the interplot package. Find the value of popD that would make predicted energy elasticity from the interactive model equal to the (constant) predicted energy elasticity from the additive model and find the value of ED that would make predicted population elasticity from the interactive model equal to the (constant) predicted population elasticity from the additive model.
suppressMessages(library(interplot))
A5 = interplot(m = model1, var1 = "lED", var2 = "lpopD", hist = TRUE) +
scale_x_continuous(breaks = log(c(.01, 1, 100, 10000)),
labels = c(".01", "1", "100", "10,000")) +
scale_y_continuous(breaks = c(-.5, -.25, 0, .25, .5, .75, 1)) +
# geom_vline(xintercept = -1.5, col = "green") +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_segment(aes(y = coef(model0)[3], yend = coef(model0)[3],
x = range(df$lpopD)[1], xend = range(df$lpopD)[2]),
col = "red") +
xlab(expression(paste("Population Density [people/", km^{2}, "]"))) +
ylab("Energy Elasticity") +
theme_bw()
out.df = interplot(m = model1, var1 = "lED", var2 = "lpopD", plot = FALSE)
approx(x = out.df$lpopD, y = out.df$coef, xout = log(100))$y
## [1] 0.3572223
approx(x = out.df$lpopD, y = out.df$ub, xout = log(100))$y
## [1] 0.399648
approx(x = out.df$lpopD, y = out.df$lb, xout = log(100))$y
## [1] 0.3153432
approx(x = out.df$lpopD, y = out.df$coef, xout = log(1.4))$y
## [1] 0.1789858
approx(x = out.df$lpopD, y = out.df$ub, xout = log(1.4))$y
## [1] 0.2474098
approx(x = out.df$lpopD, y = out.df$lb, xout = log(1.4))$y
## [1] 0.1102553
approx(x = out.df$lpopD, y = out.df$coef, xout = log(1500))$y
## [1] 0.4702951
approx(x = out.df$lpopD, y = out.df$ub, xout = log(1500))$y
## [1] 0.5489416
approx(x = out.df$lpopD, y = out.df$lb, xout = log(1500))$y
## [1] 0.3931439
(2^approx(x = out.df$lpopD, y = out.df$coef, xout = log(100))$y - 1) * 100
## [1] 28.09573
approx(x = out.df$lpopD, y = out.df$coef, xout = log(1000))$y
## [1] 0.4533652
exp(approx(x = out.df$lb, y = out.df$lpopD, xout = 0)$y) #popD when lb above zero
## [1] 0.2122565
sum(df$lpopD > approx(x = out.df$lb, y = out.df$lpopD, xout = 0)$y)/length(df$lpopD) #% obs
## [1] 0.9899635
exp(approx(x = out.df$coef, y = out.df$lpopD, xout = coef(model0)[3])$y)
## [1] 42.15137
B5 = interplot(m = model1, var1 = "lpopD", var2 = "lED", hist = TRUE) +
scale_x_continuous(breaks = log(c(10^7, 10^9, 10^11, 10^13)),
labels = c(".01", "1", "100", "10,000")) +
scale_y_continuous(breaks = c(-.5, -.25, 0, .25, .5, .75, 1)) +
# geom_vline(xintercept = 9.35, col = "green") +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_segment(aes(y = coef(model0)[2], yend = coef(model0)[2],
x = range(df$lED)[1], xend = range(df$lED)[2]),
col = "red") +
xlab("Energy Dissipation [GW]") +
ylab("Population Elasticity") +
theme_bw()
sum(Torn.df$ED > 2.1e9 & tf)/2192
## [1] 0.8070255
sum(Torn.df$ED > 2.1e9)/2192
## [1] 0.9060219
out.df = interplot(m = model1, var1 = "lpopD", var2 = "lED", plot = FALSE)
out.df[1, ]
## lED coef ub lb
## 1 15.54889 -0.1688119 0.005324622 -0.3443671
approx(x = out.df$lED, y = out.df$coef, xout = log(10^9))$y
## [1] 0.04724048
approx(x = out.df$lED, y = out.df$ub, xout = log(10^9))$y
## [1] 0.1329021
approx(x = out.df$lED, y = out.df$lb, xout = log(10^9))$y
## [1] -0.04079171
approx(x = out.df$lED, y = out.df$coef, xout = log(10^13))$y
## [1] 0.4318118
approx(x = out.df$lED, y = out.df$ub, xout = log(10^13))$y
## [1] 0.5369099
approx(x = out.df$lED, y = out.df$lb, xout = log(10^13))$y
## [1] 0.3300179
exp(approx(x = out.df$lb, y = out.df$lED, xout = 0)$y)/10^9 #ED when lb above zero
## [1] 2.06471
sum(df$lED > approx(x = out.df$lb, y = out.df$lED, xout = 0)$y)/length(df$lED) #% obs
## [1] 0.9032847
approx(x = out.df$lED, y = out.df$coef, xout = log(10^13))$y
## [1] 0.4318118
exp(approx(x = out.df$coef, y = out.df$lED, xout = coef(model0)[2])$y)
## [1] 5.337e+10
mtrx = matrix(c(1, 2), nrow = 2, byrow = TRUE)
A5 = A5 + ggtitle("A") +
theme(plot.title = element_text(hjust = 0))
B5 = B5 + ggtitle("B") +
theme(plot.title = element_text(hjust = 0))
multiplot(A5, B5, layout = mtrx)
The predicted casualty rate (and the upper and lower boundaries for this point estimate) at each of 9 combinations of values for energy dissipation (ED) and population density (PD). The allEffects() function is needed to get the lower and upper 95% uncertainty points on the prediction not available from the predict.glm() function. Also the value of ED that makes predicted population elasticity from the interactive model equal to the (constant) predicted population elasticity from the additive model and the value PD that makes the predicted energy elasticity from the interactive model equalt to the (constant) predicted population elasticity from the additive model.
library(effects)
## Loading required package: carData
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
# the minimum value of ED and the minimum value of PD
predict.glm(model1, newdata = data.frame(lED = min(df$lED), lpopD = min(df$lpopD)), type = "response")
## 1
## 1.676098
eff = allEffects(model1, xlevels = list(lED = seq(min(df$lED), min(df$lED) + 1, 1),
lpopD = seq(min(df$lpopD), min(df$lpopD) + 1, 1)))
as.data.frame(eff[[1]])[1, ]
## lpopD lED fit se lower upper
## 1 -5.759214 15.54889 1.676098 0.8616381 0.3093612 9.080985
# the minimum value of ED and the maximum value of PD
predict.glm(model1, newdata = data.frame(lED = min(df$lED), lpopD = max(df$lpopD)), type = "response")
## 1
## 0.1305705
eff = allEffects(model1, xlevels = list(lED = seq(min(df$lED), min(df$lED) + 1, 1),
lpopD = seq(max(df$lpopD) - 1, max(df$lpopD), 1)))
as.data.frame(eff[[1]])[2, ]
## lpopD lED fit se lower upper
## 2 9.543163 15.54889 0.1305705 0.5410464 0.04519087 0.3772589
# the maximum value of ED and the minimum value of PD
predict.glm(model1, newdata = data.frame(lED = max(df$lED), lpopD = min(df$lpopD)), type = "response")
## 1
## 0.5115534
eff = allEffects(model1, xlevels = list(lED = seq(max(df$lED) - 1, max(df$lED), 1),
lpopD = seq(min(df$lpopD), min(df$lpopD) + 1, 1)))
as.data.frame(eff[[1]])[3, ]
## lpopD lED fit se lower upper
## 3 -5.759214 31.82324 0.5115534 0.6378153 0.1464472 1.786903
#the maximum value of ED and the maximum value of PD
predict.glm(model1, newdata = data.frame(lED = max(df$lED), lpopD = max(df$lpopD)), type = "response")
## 1
## 1227.981
eff = allEffects(model1, xlevels = list(lED = seq(max(df$lED) - 1, max(df$lED), 1),
lpopD = seq(max(df$lpopD) - 1, max(df$lpopD), 1)))
as.data.frame(eff[[1]])[4, ]
## lpopD lED fit se lower upper
## 4 9.543163 31.82324 1227.981 0.4466586 511.4282 2948.482
#the 10th percentile value of ED and the 10th percentile value of PD
predict.glm(model1, newdata = data.frame(lED = quantile(df$lED, prob = .1), lpopD = quantile(df$lpopD, prob = .1)), type = "response")
## 10%
## 1.898046
eff = allEffects(model1, xlevels = list(lED = seq(quantile(df$lED, prob = .1), quantile(df$lED, prob = .1) + 1, 1),
lpopD = seq(quantile(df$lpopD, prob = .1), quantile(df$lpopD, prob = .1) + 1, 1)))
as.data.frame(eff[[1]])[1, ]
## lpopD lED fit se lower upper
## 1 1.207831 21.49812 1.898046 0.1357294 1.454487 2.476874
# the 10th percentile value of ED and the 90th percentile value of PD
predict.glm(model1, newdata = data.frame(lED = quantile(df$lED, prob = .1), lpopD = quantile(df$lpopD, prob = .9)), type = "response")
## 10%
## 2.870813
eff = allEffects(model1, xlevels = list(lED = seq(quantile(df$lED, prob = .1), quantile(df$lED, prob = .1) + 1, 1),
lpopD = seq(quantile(df$lpopD, prob = .9), quantile(df$lpopD, prob = .9) + 1, 1)))
as.data.frame(eff[[1]])[1, ]
## lpopD lED fit se lower upper
## 1 6.372288 21.49812 2.870813 0.1226639 2.25702 3.651526
# the 90th percentile value of ED and the 10th percentile value of PD
predict.glm(model1, newdata = data.frame(lED = quantile(df$lED, prob = .9), lpopD = quantile(df$lpopD, prob = .1)), type = "response")
## 90%
## 8.318838
eff = allEffects(model1, xlevels = list(lED = seq(quantile(df$lED, prob = .9), quantile(df$lED, prob = .9) + 1, 1),
lpopD = seq(quantile(df$lpopD, prob = .1), quantile(df$lpopD, prob = .1) + 1, 1)))
as.data.frame(eff[[1]])[1, ]
## lpopD lED fit se lower upper
## 1 1.207831 28.33203 8.318838 0.1070887 6.743078 10.26283
# the 90th percentile value of ED and the 90th percentile value of PD
predict.glm(model1, newdata = data.frame(lED = quantile(df$lED, prob = .9), lpopD = quantile(df$lpopD, prob = .9)), type = "response")
## 90%
## 54.43867
eff = allEffects(model1, xlevels = list(lED = seq(quantile(df$lED, prob = .9),
quantile(df$lED, prob = .9) + 1, 1),
lpopD = seq(quantile(df$lpopD, prob = .9), quantile(df$lpopD, prob = .9) + 1, 1)))
as.data.frame(eff[[1]])[1, ]
## lpopD lED fit se lower upper
## 1 6.372288 28.33203 54.43867 0.1441927 41.03009 72.22914
predict.glm(model1, newdata = data.frame(lED = 24.70051, lpopD = 3.741267),
type = "response")
## 1
## 6.507591
eff = allEffects(model1, xlevels = list(lED = seq(24.70051, 24.70051 + 1, 1),
lpopD = seq(3.741267, 3.741267 + 1, 1)))
as.data.frame(eff[[1]])[1, ]
## lpopD lED fit se lower upper
## 1 3.741267 24.70051 6.507591 0.04719707 5.932309 7.13866
Repeat for the additive model.
newData = data.frame(lpopD = c(min(df$lpopD), max(df$lpopD), min(df$lpopD), max(df$lpopD),
quantile(df$lpopD, prob = .1), quantile(df$lpopD, prob = .9), quantile(df$lpopD, prob = .1), quantile(df$lpopD, prob = .9),
3.741267),
lED = c(min(df$lED), min(df$lED), max(df$lED), max(df$lED),
quantile(df$lED, prob = .1), quantile(df$lED, prob = .1), quantile(df$lED, prob = .9), quantile(df$lED, prob = .9),
24.70051))
predict.glm(model0, newdata = newData, type = "response", se.fit = TRUE)
## $fit
## 1 2 3 4 5
## 0.04426818 1.15792131 8.24024037 215.53968419 1.32211704
## 6 7 8 9
## 3.97829320 11.86923182 35.71490496 6.34765274
##
## $se.fit
## 1 2 3 4 5 6
## 0.01510704 0.24895587 1.97449853 47.79772108 0.15614515 0.39608104
## 7 8 9
## 1.02647680 3.94954479 0.31028955
##
## $residual.scale
## [1] 1.919145
Absolute value of predicted differences.
pg = data.frame(DeltaC = predict.glm(model1, type = "response") -
predict.glm(model0, type = "response"))
pg$DeltaCA = abs(pg$DeltaC)
mean(pg$DeltaCA)
## [1] 1.370967
median(pg$DeltaCA)
## [1] 0.4795468
sum(pg$DeltaCA > 3)/length(pg$DeltaCA)
## [1] 0.1099453
sum(pg$DeltaCA > 5)/length(pg$DeltaCA)
## [1] 0.05474453
sum(pg$DeltaCA > 10)
## [1] 36
sum(pg$DeltaCA > 10)/length(pg$DeltaCA)
## [1] 0.01642336
pgP = pg %>%
filter(DeltaC > .5)
pgM = pg %>%
filter(DeltaC < .5)
ggplot(pg, aes(x = DeltaC)) +
geom_histogram(binwidth = 2, col ="black") +
scale_x_continuous(breaks = seq(-15, 40, 5), labels = seq(-15, 40, 5)) +
ylab("Number of Tornadoes") +
xlab("Interaction Model Predictions Minus Additive Model Predictions")
Multiply pop density in people/sq km by 2.59 to get pop density in people / sq miles. 373 * 2.59 = 966 ~ population density of Fort Smith, Arkansas. Kent, Ohio at 954 people / sq. mi. Fort Wayne, Indiana at 959 people / sq. mi http://zipatlas.com/us/in/city-comparison/population-density.htm
In each panel what are the 10th and 90th percentile values in the distribution of the X-axis variable? Also, what is the predicted point estimate of the elasticity and the upper and lower boundaries for the confidence band at these 10th and 90th percentile values.
quantile(df$popD, probs = c(.1, .9))
## 10% 90%
## 3.346219 585.396097
quantile(df$ED, probs = c(.1, .9))
## 10% 90%
## 2.170278e+09 2.015790e+12
3.346, 585.4 people/sq. km 2.17 x 10^9 and 2.02 x 10^12 Watts
quantile(df$popD, probs = c(.1, .9))
## 10% 90%
## 3.346219 585.396097
quantile(df$ED, probs = c(.1, .9))
## 10% 90%
## 2.170278e+09 2.015790e+12
A = interplot(m = model1, var1 = "lED", var2 = "lpopD", plot = FALSE, ci = .95)
round(approx(A$lpopD, A$coef, xout = quantile(df$lpopD, probs = .1))$y, 4)
## [1] 0.2154
round(approx(A$lpopD, A$lb, xout = quantile(df$lpopD, probs = .1))$y, 4)
## [1] 0.1581
round(approx(A$lpopD, A$ub, xout = quantile(df$lpopD, probs = .1))$y, 4)
## [1] 0.2712
round(approx(A$lpopD, A$coef, xout = quantile(df$lpopD, probs = .9))$y, 4)
## [1] 0.431
round(approx(A$lpopD, A$lb, xout = quantile(df$lpopD, probs = .9))$y, 4)
## [1] 0.3676
round(approx(A$lpopD, A$ub, xout = quantile(df$lpopD, probs = .9))$y, 4)
## [1] 0.4947
B = interplot(m = model1, var1 = "lpopD", var2 = "lED", plot = FALSE, ci = .95)
round(approx(B$lED, B$coef, xout = quantile(df$lED, probs = .1))$y, 4)
## [1] 0.0796
round(approx(B$lED, B$lb, xout = quantile(df$lED, probs = .1))$y, 4)
## [1] 0.0028
round(approx(B$lED, B$ub, xout = quantile(df$lED, probs = .1))$y, 4)
## [1] 0.1533
round(approx(B$lED, B$coef, xout = quantile(df$lED, probs = .9))$y, 4)
## [1] 0.3649
round(approx(B$lED, B$lb, xout = quantile(df$lED, probs = .9))$y, 4)
## [1] 0.2871
round(approx(B$lED, B$ub, xout = quantile(df$lED, probs = .9))$y, 4)
## [1] 0.4442
Use the effects package to get marginal predictions witht the effects function. theta in the neg.bin argument is determined by the glm.nb function above.
suppressMessages(library(effects))
eff = allEffects(model1, xlevels = list(lED = log(10^seq(9, 13, .1)),
lpopD = log(c(1.4, 31.9, 1500))))
results1 = as.data.frame(eff[[1]]) %>%
mutate(popD = round(exp(lpopD), 1),
ED = exp(lED),
lbl = paste("Population Density", popD,
"[people/sq. km]", sep = " "))
fit0 = predict(model0,
newdata = data.frame(lpopD = results1$lpopD, lED = results1$lED),
type = "response")
results1$fit0 = fit0
ggplot(results1, aes(x = ED, y = fit, color = as.factor(popD))) +
geom_line(size = 2) +
# geom_line(aes(y = fit0)) +
scale_x_log10(breaks = c(10^9, 10^10, 10^11, 10^12, 10^13),
labels = c("1", "10", "100", "1000", "10,000")) +
scale_y_log10(limits = c(.7, 200), breaks = c(1, 2, 5, 10, 20, 50, 100)) +
xlab("Energy Dissipation [GW]") +
ylab("Casualty Rate\n [No. of Casualties Per Casualty-Producing Tornado]") +
labs(color = expression(paste("Population Density [people/", km^{2}, "]"))) +
theme_bw() +
theme(legend.position = c(.01, .99),
legend.justification = c(.01, .99)) +
guides(color = guide_legend(reverse = TRUE))
Compute the common slope in the above figure (model0). Should equal the energy elasticity. Then find the population density at which the energy elasticity of model1 equals the energy elasticity of model0. This is done by interpolation.
commonSlope = diff(predict(model0,
newdata = data.frame(lpopD = c(2, 2), lED = c(20, 21)),
type = "link"))
commonSlope
## 2
## 0.3211507
slope = numeric()
for(i in seq(3, 4, .1)){
slope = c(slope, diff(predict(model1,
newdata = data.frame(lpopD = c(i, i), lED = c(20, 21)),
type = "link")))
}
A = data.frame(popD = exp(seq(3, 4, .1)), slope)
approx(A$slope, A$popD, xout = commonSlope)$y
## [1] 41.97262
Model adequacy
pchisq(model1$deviance, model1$df.residual, lower.tail = FALSE) # model is adequate p-value = .07.
## [1] 0.07027416