Set working directory and load packages.
suppressMessages(library("ggplot2"))
suppressMessages(library("raster"))
suppressMessages(library("ggmap"))
suppressMessages(library("dplyr"))
suppressMessages(library("tidyr"))
suppressMessages(library("rgdal"))
suppressMessages(library("rgeos"))
suppressMessages(library("ggthemes"))
suppressMessages(library("RColorBrewer"))
suppressMessages(library("scales"))
suppressMessages(library("broom"))
suppressMessages(library("maps"))
suppressMessages(library("sf"))
suppressMessages(library("xtable"))
suppressMessages(library("lubridate"))
Data from the Storm Prediction Center (SPC) http://www.spc.noaa.gov/gis/svrgis/.
download.file("http://www.spc.noaa.gov/gis/svrgis/zipped/tornado.zip",
"tornado.zip", mode = "wb")
unzip("tornado.zip")
Read the tornado data.
Torn.sfdf = st_read(dsn = "torn",
layer = "torn",
stringsAsFactors = FALSE)
## Reading layer `torn' from data source `/Users/jelsner/Dropbox/Tornadoes/Casualties/torn' using driver `ESRI Shapefile'
## converted into: LINESTRING
## Simple feature collection with 61092 features and 22 fields
## geometry type: LINESTRING
## dimension: XY
## bbox: xmin: -6279240 ymin: -1835231 xmax: 3405399 ymax: 3888106
## epsg (SRID): NA
## proj4string: +proj=lcc +lat_1=33 +lat_2=45 +lat_0=39 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs
as.data.frame(Torn.sfdf) %>%
mutate(cas = fat + inj) %>%
summarize(nT = sum(cas < 20),
nAll = length(fat),
nT/nAll * 100)
## nT nAll nT/nAll * 100
## 1 60225 61092 98.58083
Compare with Table 4 in Merrell et al. (2005).
as.data.frame(Torn.sfdf) %>%
filter(yr >= 1990 & yr <= 2001) %>%
summarize(nF = sum(fat))
## nF
## 1 662
Ages = c("0-9", "10-19", "20-29", "30-39", "40-49", "50-59", "60-69", "70-79", "80+")
Census2009 = c(6.9 + 6.6, 6.8 + 7.1, 7.0 + 6.9, 6.5 + 6.9, 7.3 + 7.5, 6.9 + 6.0, 4.8 + 3.6, 2.9 + 2.5, 1.9 + 1.7)/100
Table4 = c(66, 51, 60, 91, 98, 83, 75, 81, 50)
sum(Table4)
## [1] 655
Add new columns and filter. We only consider tornadoes occurring within the conterminous United States.
Torn.sfdf = Torn.sfdf %>%
mutate(Date = as.Date(date),
Year = yr,
cas = inj + fat,
Length = len * 1609.34,
Width = wid * .9144,
Width = ifelse(Width == 0, min(Width[Width > 0]), Width),
AreaPath = Length * Width,
AreaVortex = pi * (Width/2)^2,
Ma = factor(month.abb[mo], levels = month.abb[1:12])) %>%
filter(Year >= 1990, st != "HI", st != "AK", st != "PR")
Warnings? Try
assign("last.warning", NULL, envir = baseenv())
Torn.sfdf %>%
filter(cas > 0) %>%
summarize(nT = n(),
nC = sum(cas),
pF = sum(fat)/nC * 100)
## Simple feature collection with 1 feature and 3 fields
## geometry type: MULTILINESTRING
## dimension: XY
## bbox: xmin: -2279065 ymin: -1429838 xmax: 2130657 ymax: 1210879
## epsg (SRID): NA
## proj4string: +proj=lcc +lat_1=33 +lat_2=45 +lat_0=39 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs
## nT nC pF geometry
## 1 2784 31587 6.214582 MULTILINESTRING((-223554.61...
During the 22-year period 1995–2016 there are 26,863 tornadoes recorded in the conterminous United States. Of these, 2,208 are linked to 25,968 casualties. Only 6.7 percent of all casualties lead to death.
df = as.data.frame(Torn.sfdf) %>%
arrange(desc(cas)) %>%
dplyr::select(date, time, st, mag, inj, fat, cas)
head(df)
## date time st mag inj fat cas
## 1 2011-04-27 15:43:00 AL 4 1500 64 1564
## 2 2011-05-22 16:34:00 MO 5 1150 158 1308
## 3 1999-05-03 17:26:00 OK 5 583 36 619
## 4 2015-12-26 18:46:00 TX 4 468 10 478
## 5 1990-08-28 14:30:00 IL 5 350 29 379
## 6 2008-05-10 16:20:00 OK 4 350 21 371
Distribution of casualties by tornado count.
df = as.data.frame(Torn.sfdf) %>%
filter(cas > 0) %>%
group_by(cas) %>%
summarize(nT = n()) %>%
arrange(desc(cas))
head(df)
## # A tibble: 6 x 2
## cas nT
## <int> <int>
## 1 1564 1
## 2 1308 1
## 3 619 1
## 4 478 1
## 5 379 1
## 6 371 1
ggplot(df, aes(x = cas, y = nT)) +
geom_point() +
scale_y_log10(breaks = c(1, 10, 100, 1000)) +
scale_x_log10(breaks = c(1, 10, 100, 1000)) +
xlab("Number of Casualties") + ylab("Number of Tornadoes") +
theme_minimal() +
ggtitle("Casualty-Producing Tornadoes in the United States\n(1995-2016)")
On average a casualty-producing EF0 tornado results in two casualties, a casualty-producing EF1 tornado results in 3.1 casualties, and a casualty-producing EF2 tornado results in 6.5 casualties.
as.data.frame(Torn.sfdf) %>%
filter(cas > 0) %>%
group_by(mag) %>%
summarize(cas = sum(cas),
nT = n(),
fat = sum(fat),
inj = sum(inj),
avgcas = cas/nT) %>%
xtable(., digits = 1)
## % latex table generated in R 3.4.1 by xtable 1.8-2 package
## % Tue Aug 22 08:51:13 2017
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrrrr}
## \hline
## & mag & cas & nT & fat & inj & avgcas \\
## \hline
## 1 & 0 & 481 & 237 & 11 & 470 & 2.0 \\
## 2 & 1 & 3016 & 963 & 97 & 2919 & 3.1 \\
## 3 & 2 & 6159 & 923 & 251 & 5908 & 6.7 \\
## 4 & 3 & 9343 & 488 & 591 & 8752 & 19.1 \\
## 5 & 4 & 8571 & 154 & 532 & 8039 & 55.7 \\
## 6 & 5 & 4017 & 19 & 481 & 3536 & 211.4 \\
## \hline
## \end{tabular}
## \end{table}
Casualties by location and size.
df = as.data.frame(Torn.sfdf) %>%
filter(cas > 0) %>%
mutate(label = paste("EF", mag, sep = ""))
states.df = map_data("state") %>%
filter(region != 'alaska', region != 'district of columbia')
ggplot(states.df, aes(x = long, y = lat, group = group)) +
geom_polygon(fill = "white") +
geom_path(color = "gray85") +
coord_map(project = "polyconic") +
geom_point(aes(x = slon, y = slat, size = cas, group = om),
alpha = .3, data = df) +
xlab("") + ylab("") +
facet_wrap(~ label, ncol = 2) +
theme_minimal() +
theme(panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
panel.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "bottom") +
scale_size_continuous("Number of\n Casualties")
Alabama had the most casualties (3,937) followed by Oklahoma (2,500), Missouri (2,025), Arkansas (1,759), Texas (1,704), and Tennessee (1,678).
sfdf = Torn.sfdf %>%
filter(cas > 0) %>%
group_by(st) %>%
summarize(nT = n(),
nC = sum(cas),
nF = sum(fat),
ratio = nC/nT) %>%
arrange(desc(nC))
st_geometry(sfdf) = NULL #removes the geometry column
sfdf
## # A tibble: 44 x 5
## st nT nC nF ratio
## * <chr> <int> <int> <int> <dbl>
## 1 AL 191 4407 376 23.073298
## 2 OK 142 2794 142 19.676056
## 3 GA 175 2218 126 12.674286
## 4 MO 124 2153 223 17.362903
## 5 TX 245 2063 109 8.420408
## 6 TN 148 1905 161 12.871622
## 7 MS 161 1854 123 11.515528
## 8 AR 138 1804 116 13.072464
## 9 IL 135 1334 67 9.881481
## 10 NC 104 1122 48 10.788462
## # ... with 34 more rows
Make a choropleth map. Use geom_polygon and coord_map from ggplot.
states.df = map_data("state") %>%
filter(region != 'alaska', region != 'district of columbia') %>%
mutate(st = state.abb[match(region, tolower(state.name))]) %>%
left_join(sfdf, by = "st") %>%
arrange(order)
pp = ggplot(states.df, aes(x = long, y = lat, group = group, fill = log10(nC))) +
geom_polygon() +
geom_path(color = "gray75") +
coord_map(project = "polyconic") +
# ggtitle("Tornado Casualties [1995-2016]") +
theme(panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
panel.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "bottom") +
xlab("") + ylab("") +
scale_fill_continuous(low = "#fee6ce", high = "#e6550d",
"Number of\n Casualties",
breaks = 1:3,
labels = c("10", "100", "1000"))
pp
The tmap package is a flexible, layer-based, and easy to use approach to create thematic maps including choropleths and bubble maps. It is based on the grammar of graphics, and resembles the syntax of ggplot2.
library(tmap)
library(albersusa)
us_sf = usa_sf("aeqd") %>%
mutate(st = as.character(iso_3166_2))
us_sf = us_sf %>%
filter(st != "AK" & st != "HI")
sfdf2 = Torn.sfdf %>%
group_by(st) %>%
summarize(nT = n(),
nC = sum(cas),
nF = sum(fat),
nI = sum(inj))
st_geometry(sfdf2) = NULL
sfdf3 = left_join(sfdf2,
as.data.frame(us_sf), by = "st") %>%
sf::st_as_sf() %>%
select(nT, nC, nF, nI)
tm1 = tm_shape(sfdf3) +
tm_polygons("nI",
border.col = NULL,
title = "Injuries",
palette = "Purples") +
tm_text("nI", size = .5) +
tm_shape(us_sf) + tm_borders(alpha = .2) +
tm_compass() + tm_scale_bar(lwd = .5) +
tm_format_Europe2(legend.position = c("left", "bottom"),
attr.position = c("left", "bottom"))
tm2 = tm_shape(sfdf3) +
tm_polygons("nF",
border.col = NULL,
title = "Fatalities",
palette = "Oranges") +
tm_text("nF", size = .5) +
tm_shape(us_sf) + tm_borders(alpha = .2) +
tm_compass() + tm_scale_bar(lwd = .5) +
tm_format_Europe2(legend.position = c("left", "bottom"),
attr.position = c("left", "bottom"))
tmap_arrange(tm1, tm2, asp = NA, ncol = 1)
Create tornado paths and extract the average population density (people per sq. km under the path). The population data (Gridded Population of the World Volume 4–2010) are on a raster. With the extract method and weights = TRUE the calculation is done based on percentage of cell under the path. This takes more time. Multiply the average population density by the path area (L x W) to estimate the number of people under the path.
First convert the tornado simple feature data frame to a spatial lines file. Then buffer the track to make a path.
TornL.sfdf = Torn.sfdf %>%
filter(cas > 0)
TornL = as(TornL.sfdf, "Spatial")
TornP = gBuffer(TornL, byid = TRUE, width = TornL$Width/2, capStyle = "FLAT")
#TornP$AreaPath = gArea(TornP, byid = TRUE)
Population data are obtained from the Gridded Population of the World, version four (GPW, v4) from the Socioeconomic Data and Applications Center at Columbia University, USA. The database contain decennial census density estimates for 1990, 2000, and 2010 represented as people per square kilometer. Densities are based on residential population. The native cell resolution is .0083\(^{\circ}\) latitude/longitude, which at 36\(^{\circ}\) N latitude means a cell having the dimension of .9 km in the north-south direction and .7 km in the east-west direction.
Load the population raster(s) and crop to defined extent. Use the extract function to obtain the population density. The projectRaster and extract functions take about 10 minutes per raster. Group years by mid-year of the census year. Assign the population density using the first decennial estimate then successively replace with later estimates by grouped years.
stime = proc.time()
PopD2000 = raster("gpw-v4-population-density_2000.tif")
PopD2010 = raster("gpw-v4-population-density_2010.tif")
ext = raster::extent(c(-125, -67, 24, 50))
PopD2000 = crop(PopD2000, ext)
PopD2010 = crop(PopD2010, ext)
PopD2000p = projectRaster(PopD2000, crs = proj4string(TornP))
PopD2010p = projectRaster(PopD2010, crs = proj4string(TornP))
TornP$popD2000 = raster::extract(PopD2000p, TornP, fun = mean, na.rm = TRUE,
weights = TRUE, normalizeWeights = FALSE)[, 1]
TornP$popD2010 = raster::extract(PopD2010p, TornP, fun = mean, na.rm = TRUE,
weights = TRUE, normalizeWeights = FALSE)[, 1]
TornP$popD = TornP$popD2000
yearGroup = TornP$yr > 2005
TornP$popD[yearGroup] = TornP$popD2010[yearGroup]
TornP$pop = TornP$popD * TornP$AreaPath/10^6
Torn.df = as.data.frame(TornP)
proc.time() - stime
## user system elapsed
## 736.443 19.534 757.062
Note: The warning results from tornadoes with paths over areas with no people. It can be safely ignored.
The popD attribute for each tornado corresponds to the population density from the nearest decennial estimate so that a tornado that occurred in 1996 uses the 2000 decennial estimate while a tornado that occurred in 2012 uses the 2010 estimate.
For the set of 2208 tornadoes with at least one casualty the median population density per tornado is 31.3 people per square kilometer with an inter-quartile range between 9.6 and 136 people per square kilometer.
1 mi^2 = 2.589988 km^2 http://mcdc.missouri.edu/TenThings/urbanrural.shtml
Torn.df %>%
summarize(nT = n(),
avgPopD = mean(popD),
medianPopD = median(popD),
q25 = quantile(popD, probs = .25),
q75 = quantile(popD, probs = .75),
maxPopD = max(popD),
minPopD = min(popD)) %>%
t()
## [,1]
## nT 2784.000000
## avgPopD 224.088765
## medianPopD 30.954250
## q25 9.031192
## q75 144.605224
## maxPopD 13949.001032
## minPopD 0.000000
Torn.df %>%
dplyr::select(date, st, mag, inj, fat, cas, popD) %>%
arrange(desc(popD)) %>%
head(., n = 10) %>%
xtable(., digits = 0)
## % latex table generated in R 3.4.1 by xtable 1.8-2 package
## % Tue Aug 22 09:04:08 2017
## \begin{table}[ht]
## \centering
## \begin{tabular}{rllrrrrr}
## \hline
## & date & st & mag & inj & fat & cas & popD \\
## \hline
## 1 & 2007-08-08 & NY & 2 & 9 & 0 & 9 & 13949 \\
## 2 & 1998-01-09 & CA & 1 & 1 & 0 & 1 & 12249 \\
## 3 & 1999-01-18 & PA & 0 & 18 & 0 & 18 & 12220 \\
## 4 & 2010-09-16 & NY & 1 & 1 & 1 & 2 & 9524 \\
## 5 & 2003-03-27 & FL & 2 & 14 & 1 & 15 & 6588 \\
## 6 & 2010-07-25 & NY & 1 & 7 & 0 & 7 & 6094 \\
## 7 & 2013-07-19 & FL & 0 & 3 & 0 & 3 & 5224 \\
## 8 & 2014-07-28 & MA & 2 & 2 & 0 & 2 & 4440 \\
## 9 & 1998-05-05 & CA & 2 & 1 & 0 & 1 & 4340 \\
## 10 & 2015-05-24 & TX & 0 & 4 & 0 & 4 & 4047 \\
## \hline
## \end{tabular}
## \end{table}
sum(Torn.df$popD == 0)
## [1] 21
df = Torn.df %>%
filter(yr >= 1990 & yr <= 2001) %>%
summarize(totalPop = sum(pop))
Annual average number of people exposed to tornadoes that caused at least one casualty.
df = Torn.df %>%
filter(cas > 0) %>%
group_by(Year) %>%
summarize(nT = n(),
mPop = mean(pop),
sdPop = sd(pop),
sePop = sdPop/sqrt(nT),
mPopm = mPop - sePop,
mPopp = mPop + sePop)
A1 = ggplot(df, aes(x = Year, y = mPop)) +
geom_point() +
# geom_smooth(method = lm) +
geom_errorbar(aes(ymin = mPopm, ymax = mPopp), width = .2) +
scale_y_continuous(limits = c(0, 2500)) +
scale_x_continuous(breaks = seq(1995, 2016, 4)) +
xlab("Year") + ylab("Number of People Exposed") +
theme_minimal()
# ggtitle("Tornadoes Resulting in at Least One Casualty")
Distribution of population and population density by EF rating.
PeopleByEF = Torn.df %>%
group_by(mag) %>%
summarize(nT = n(),
nP = round(sum(pop), 0),
avgP = round(mean(pop), 0),
medP = round(median(pop), 0),
avgPD = mean(popD),
medPD = median(popD))
sum(PeopleByEF$nP)
## [1] 1753011
xtable(PeopleByEF, digits = 0)
## % latex table generated in R 3.4.1 by xtable 1.8-2 package
## % Tue Aug 22 09:04:08 2017
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrrrrr}
## \hline
## & mag & nT & nP & avgP & medP & avgPD & medPD \\
## \hline
## 1 & 0 & 237 & 10193 & 43 & 2 & 424 & 46 \\
## 2 & 1 & 963 & 246273 & 256 & 14 & 289 & 37 \\
## 3 & 2 & 923 & 442369 & 479 & 67 & 198 & 30 \\
## 4 & 3 & 488 & 552440 & 1132 & 239 & 91 & 24 \\
## 5 & 4 & 154 & 393415 & 2555 & 680 & 104 & 24 \\
## 6 & 5 & 19 & 108321 & 5701 & 3307 & 106 & 27 \\
## \hline
## \end{tabular}
## \end{table}
PeopleByEFc= Torn.df %>%
filter(cas > 0) %>%
group_by(mag) %>%
summarize(nT = n(),
nP = round(sum(pop), 0),
avgP = round(mean(pop), 0),
medP = round(median(pop), 0),
avgPD = mean(popD),
medPD = median(popD))
sum(PeopleByEFc$nP)
## [1] 1753011
xtable(PeopleByEFc, digits = 0)
## % latex table generated in R 3.4.1 by xtable 1.8-2 package
## % Tue Aug 22 09:04:08 2017
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrrrrr}
## \hline
## & mag & nT & nP & avgP & medP & avgPD & medPD \\
## \hline
## 1 & 0 & 237 & 10193 & 43 & 2 & 424 & 46 \\
## 2 & 1 & 963 & 246273 & 256 & 14 & 289 & 37 \\
## 3 & 2 & 923 & 442369 & 479 & 67 & 198 & 30 \\
## 4 & 3 & 488 & 552440 & 1132 & 239 & 91 & 24 \\
## 5 & 4 & 154 & 393415 & 2555 & 680 & 104 & 24 \\
## 6 & 5 & 19 & 108321 & 5701 & 3307 & 106 & 27 \\
## \hline
## \end{tabular}
## \end{table}
Distribution of population density in the path of casualty-producing tornadoes.
df = Torn.df %>%
filter(cas > 0) %>%
filter(popD > 0) %>%
group_by(mag) %>%
summarize(pop = sum(pop),
nT = n(),
avgpop = pop/nT)
df = Torn.df %>%
filter(cas > 0)
A2 = ggplot(df[df$popD > 0, ], aes(popD)) +
geom_histogram(binwidth = .5, color = "white") +
scale_x_log10(breaks = c(.01, .1, 1, 10, 100, 1000, 10000),
labels = c(".01", ".1", "1", "10", "100", "1000", "10,000")) +
xlab("Population Density [people/sq. km]") +
scale_y_continuous(limits = c(0, 1000)) +
ylab("Number of Tornadoes") +
theme_minimal()
Empirical model for tornado winds by EF rating taken from Table 3-1 of NRC 2007. Percent area by EF rating for each EF category. Threshold wind speeds (m/s) are a lower bound 3 sec gusts on the operational EF Scale (Table 2-1 of NRC2007).
perc = c(1, 0, 0, 0, 0, 0,
.772, .228, 0, 0, 0, 0,
.616, .268, .115, 0, 0, 0,
.529, .271, .133, .067, 0, 0,
.543, .238, .131, .056, .032, 0,
.538, .223, .119, .07, .033, .017)
percM = matrix(perc, ncol = 6, byrow = TRUE)
Compute energy dissipation. \(E = A_p \rho \sum_{j=0}^{J} w_j v_j^{3},\) where \(A_p\) is the area of the path, \(\rho\) is area density [1 kg/m^3] \(v_j\) is the midpoint wind speed for each rating, and \(w_j\) is the corresponding fraction of path area by EF rating. With no upper bound on the EF5 wind speeds, the midpoint wind speed is set at 97 m~s\(^{-1}\) (7.5 m~s\(^{-1}\) above the threshold wind speed consistent with the EF4 midpoint speed relative to its threshold).
Torn.df = Torn.df %>%
filter(cas > 0)
threshW = c(29.06, 38.45, 49.62, 60.8, 74.21, 89.41)
midptW = c(diff(threshW)/2 + threshW[-length(threshW)], threshW[length(threshW)] + 7.5)
ef = Torn.df$mag + 1
EW3 = numeric()
for(i in 1:length(ef)) EW3[i] = midptW^3 %*% percM[ef[i], ]
Torn.df = Torn.df %>%
mutate(ED = EW3 * AreaPath)
Annual energy dissipation of tornadoes with at least one casualty.
df = Torn.df %>%
group_by(Year) %>%
summarize(nT = n(),
mED = mean(ED),
sdED = sd(ED),
seED = sdED/sqrt(nT),
mEDm = mED - seED,
mEDp = mED + seED)
B1 = ggplot(df, aes(x = Year, y = mED/10^12)) +
geom_point() +
geom_smooth(method = lm) +
geom_errorbar(aes(ymin = mEDm/10^12, ymax = mEDp/10^12), width = .2) +
scale_y_continuous(limits = c(-.15, NA)) +
scale_x_continuous(breaks = seq(1995, 2016, 4)) +
xlab("Year") + ylab("Energy Dissipation [PW]") +
theme_minimal()
# ggtitle("Tornadoes Resulting in at Least One Casualty")
Distribution of energy dissipation.
B2 = ggplot(Torn.df[Torn.df$cas > 0,], aes(ED)) +
geom_histogram(binwidth = .5, color = "white") +
scale_x_log10(breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x))) +
xlab("Energy Dissipation [Watts]") +
scale_y_continuous(limits = c(0, 1000)) +
ylab("Number of Tornadoes") +
theme_minimal()
Combine figures.
source("multiplot.txt")
mtrx = matrix(c(1, 2), nrow = 2, byrow = TRUE)
A1 = A1 + ggtitle("A") +
theme(plot.title = element_text(hjust = -.03))
B1 = B1 + ggtitle("C") +
theme(plot.title = element_text(hjust = 0))
#multiplot(A1, B1, layout = mtrx)
mtrx = matrix(c(1, 2, 3, 4), nrow = 2, byrow = TRUE)
A2 = A2 + ggtitle("B") +
theme(plot.title = element_text(hjust = 0))
B2 = B2 + ggtitle("D") +
theme(plot.title = element_text(hjust = 0))
multiplot(A1, A2, B1, B2, layout = mtrx)
## Loading required package: grid
For the set of 2208 tornadoes with at least one casualty the median energy dissipation is 95.5 gigawatt with an inter-quartile range between 14 and 511 GW. The Tallulah-Yazoo City-Durant tornado (Louisiana and Mississippi) of 24 April 2010 that killed ten and injured 146 has the highest energy dissipation at 66.2 PW.
Torn.df %>%
summarize(nT = n(),
avgED = mean(ED)/10^9,
medianED = median(ED)/10^9,
q25 = quantile(ED, probs = .25)/10^9,
q75 = quantile(ED, probs = .75)/10^9,
maxED = max(ED)/10^9,
minED = min(ED)/10^9)
## nT avgED medianED q25 q75 maxED minED
## 1 2784 791.6291 77.61539 10.85139 445.4082 66169.66 0.005659764
Torn.df %>%
dplyr::select(date, st, mag, inj, fat, cas, popD, ED) %>%
arrange(desc(ED)) %>%
head(., n = 10) %>%
xtable(., digits = 1)
## % latex table generated in R 3.4.1 by xtable 1.8-2 package
## % Tue Aug 22 09:04:09 2017
## \begin{table}[ht]
## \centering
## \begin{tabular}{rllrrrrrr}
## \hline
## & date & st & mag & inj & fat & cas & popD & ED \\
## \hline
## 1 & 2010-04-24 & LA & 4 & 146 & 10 & 156 & 8.5 & 66169658959790.3 \\
## 2 & 2011-04-27 & AL & 5 & 145 & 72 & 217 & 52.5 & 49073840644331.3 \\
## 3 & 2004-05-22 & NE & 4 & 38 & 1 & 39 & 13.0 & 34265399398044.4 \\
## 4 & 2011-04-27 & AL & 4 & 1500 & 64 & 1564 & 132.7 & 30251617428152.3 \\
## 5 & 2011-04-27 & AL & 4 & 54 & 13 & 67 & 21.5 & 25950329144119.0 \\
## 6 & 2011-04-27 & AL & 4 & 85 & 22 & 107 & 32.1 & 24704083877123.4 \\
## 7 & 2008-02-05 & AR & 4 & 139 & 13 & 152 & 10.3 & 23193868125876.3 \\
## 8 & 1998-04-16 & TN & 5 & 36 & 3 & 39 & 18.4 & 20640754792221.8 \\
## 9 & 2008-05-10 & OK & 4 & 350 & 21 & 371 & 15.1 & 19165780063306.2 \\
## 10 & 2011-04-25 & AR & 2 & 16 & 4 & 20 & 37.2 & 19160328444788.8 \\
## \hline
## \end{tabular}
## \end{table}
Summary statistics. Descriptive statistics for dependent and independent variables used in the regresion models, 1995–2016 (N = 2,192)
Torn.df %>%
filter(popD > 0) %>%
summarize(nT = n(),
casM = mean(cas),
casMax = max(cas),
casMin = min(cas),
casSD = sd(cas),
popDM = mean(popD),
popDMax = max(popD),
popDMin = min(popD),
popDSD = sd(popD),
EDM = mean(ED),
EDMax = max(ED),
EDMin = min(ED),
EDSD = sd(ED)) %>%
t() %>%
xtable(., digits = 0)
## % latex table generated in R 3.4.1 by xtable 1.8-2 package
## % Tue Aug 22 09:04:09 2017
## \begin{table}[ht]
## \centering
## \begin{tabular}{rr}
## \hline
## & x \\
## \hline
## nT & 2763 \\
## casM & 11 \\
## casMax & 1564 \\
## casMin & 1 \\
## casSD & 49 \\
## popDM & 226 \\
## popDMax & 13949 \\
## popDMin & 0 \\
## popDSD & 667 \\
## EDM & 797564860290 \\
## EDMax & 66169658959790 \\
## EDMin & 5659764 \\
## EDSD & 2720667862347 \\
## \hline
## \end{tabular}
## \end{table}
Population and casualties and energy dissipation and casualties. There are seven orders of magnitude separating the lowest from the highest per-tornado energy dissipation. There are five orders of magnitude separating the lowest from highest per-tornado population density.
A3 = ggplot(Torn.df[Torn.df$popD > 0, ], aes(x = popD, y = cas)) +
geom_point() +
scale_x_log10(breaks = c(.01, 1, 100, 10000),
labels = c(".01", "1", "100", "10,000")) +
scale_y_log10(breaks = c(1, 10, 100, 1000)) +
xlab("Population Density [people/sq. km]") +
ylab("Number of Casualties") +
theme_minimal()
B3 = ggplot(Torn.df, aes(x = ED, y = cas)) +
geom_point() +
scale_x_log10(breaks = c(10^7, 10^9, 10^11, 10^13),
labels = c(".001", ".1", "10", "1,000")) +
scale_y_log10(breaks = c(1, 10, 100, 1000)) +
xlab("Energy Dissipation [GW]") +
ylab("Number of Casualties") +
theme_minimal()
mtrx = matrix(c(1, 2), nrow = 1, byrow = TRUE)
A3 = A3 + ggtitle("A") +
theme(plot.title = element_text(hjust = 0))
B3 = B3 + ggtitle("B") +
theme(plot.title = element_text(hjust = 0))
multiplot(A3, B3, layout = mtrx)
Correlation between ED and popD.
Torn.df %>%
group_by(mag) %>%
summarize(nT = n(),
totalED = sum(ED)/10^12,
avgED = mean(ED)/10^12,
medED = median(ED)/10^12)
## # A tibble: 6 x 5
## mag nT totalED avgED medED
## <int> <int> <dbl> <dbl> <dbl>
## 1 0 237 2.204371 0.009301145 0.001414941
## 2 1 963 72.875709 0.075675710 0.017000513
## 3 2 923 375.868959 0.407225307 0.123699523
## 4 3 488 839.176917 1.719624830 0.841821518
## 5 4 154 726.489552 4.717464627 2.240879245
## 6 5 19 187.280005 9.856842348 6.758806234
Torn.df %>%
filter(popD > 0) %>%
mutate(lED = log10(ED),
lpopD = log10(popD)) %>%
summarize(r1 = cor(ED, popD, method = "p"),
r2 = cor(ED, popD, method = "s"),
r3 = cor(lED, lpopD))
## r1 r2 r3
## 1 -0.07129524 -0.2012558 -0.1828996
Mean and variance of casualty counts.
Torn.df %>%
filter(popD > 0) %>%
summarize(meanCas = mean(cas),
varCas = var(cas),
ratio = varCas/meanCas)
## meanCas varCas ratio
## 1 11.41585 2387.651 209.1523
For the set of tornadoes with at least one casualty, the mean and variance of the counts are 11.8 and 2826, respectively suggesting a negative binomial model for counts.
Use the glm.nb function from the MASS package to fit the model and get the estimate for theta.
df = Torn.df %>%
filter(popD > 0) %>%
mutate(lpopD = log10(popD),
lpop = log10(pop),
lED = log10(ED))
formula = cas ~ lpopD + lED + lpopD:lED
suppressMessages(library(MASS))
model0 = glm.nb(formula, data = df, link = log, init.theta = 1)
model1 = glm(formula, data = df, family = neg.bin(theta = model0$theta))
summary(model1)
##
## Call:
## glm(formula = formula, family = neg.bin(theta = model0$theta),
## data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0763 -1.0004 -0.5362 0.0699 7.8741
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.08017 0.74271 -4.147 3.47e-05 ***
## lpopD -1.59540 0.40936 -3.897 9.96e-05 ***
## lED 0.39146 0.06768 5.784 8.13e-09 ***
## lpopD:lED 0.19533 0.03824 5.109 3.47e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial family taken to be 3.066176)
##
## Null deviance: 5225.3 on 2762 degrees of freedom
## Residual deviance: 2872.1 on 2759 degrees of freedom
## AIC: 16661
##
## Number of Fisher Scoring iterations: 17
xtable(model1)
## % latex table generated in R 3.4.1 by xtable 1.8-2 package
## % Tue Aug 22 09:04:10 2017
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrr}
## \hline
## & Estimate & Std. Error & t value & Pr($>$$|$t$|$) \\
## \hline
## (Intercept) & -3.0802 & 0.7427 & -4.15 & 0.0000 \\
## lpopD & -1.5954 & 0.4094 & -3.90 & 0.0001 \\
## lED & 0.3915 & 0.0677 & 5.78 & 0.0000 \\
## lpopD:lED & 0.1953 & 0.0382 & 5.11 & 0.0000 \\
## \hline
## \end{tabular}
## \end{table}
Use the interplot package.
suppressMessages(library(interplot))
A4 = interplot(m = model1, var1 = "lED", var2 = "lpopD", hist = TRUE) +
scale_x_continuous(breaks = c(-2, 0, 2, 4),
labels = c(".01", "1", "100", "10,000")) +
scale_y_continuous(breaks = pretty_breaks()) +
geom_hline(yintercept = 0, linetype = "dashed") +
xlab("Population Density [people/sq. km]") +
ylab("Energy Elasticity") +
theme_minimal()
B4 = interplot(m = model1, var1 = "lpopD", var2 = "lED", hist = TRUE) +
scale_x_continuous(breaks = c(8, 10, 12, 14),
labels = c(".1", "10", "1000", "100,000")) +
scale_y_continuous(breaks = pretty_breaks()) +
geom_hline(yintercept = 0, linetype = "dashed") +
xlab("Energy Dissipation [GW]") +
ylab("Population Elasticity") +
theme_minimal()
mtrx = matrix(c(1, 2), nrow = 2, byrow = TRUE)
A4 = A4 + ggtitle("A") +
theme(plot.title = element_text(hjust = 0))
B4 = B4 + ggtitle("B") +
theme(plot.title = element_text(hjust = 0))
multiplot(A4, B4, layout = mtrx)
Use the effects package to get marginal predictions. theta in the neg.bin argument is determined by the glm.nb function above.
suppressMessages(library(effects))
eff = allEffects(model1, xlevels = list(lED = seq(8, 13, 1),
lpopD = seq(0, 3, 1)))
results1 = as.data.frame(eff[[1]]) %>%
mutate(popD = 10^lpopD,
ED = 10^lED,
lbl = paste("Population Density", popD, "[people/sq. km]", sep = " "))
ggplot(results1, aes(x = ED, y = fit)) +
geom_point() +
geom_errorbar(aes(ymax = upper, ymin = lower), width = .1) +
scale_x_log10(breaks = c(10^8, 10^10, 10^12),
labels = c(".1", "10", "1000")) +
scale_y_continuous(limits = c(0, NA), breaks = pretty_breaks()) +
facet_wrap(~ lbl, scales = "free_y") +
xlab("Energy Dissipation [GW]") +
ylab("Casualty Rate") +
theme_minimal()
Model adequacy
pchisq(model1$deviance, model1$df.residual, lower.tail = FALSE) # model is adequate p-value = .07.
## [1] 0.06533669
df0 = Torn.df %>%
group_by(st) %>%
summarize(nT = n())
sts = df0$st[df0$nT >= 70]
df = Torn.df %>%
filter(popD > 0) %>%
mutate(lcas = log10(cas),
lpopD = log10(popD),
lpop = log10(pop),
lED = log10(ED))
formula = cas ~ lpopD + lED + lpopD:lED
results0 = df %>%
filter(st %in% sts) %>%
group_by(st) %>%
# do(mod0 = glm.nb(formula, data = ., link = log,
# epsilon = 1e-5, start = c(0, 1, 1, 0))) %>%
do(mod0 = glm.nb(formula, data = ., link = log, init.theta = 1)) %>%
summarize(st = first(st),
theta = mod0$theta,
pEl = coef(mod0)[2],
eEl = coef(mod0)[3],
mEf = coef(mod0)[4],
sig = coef(summary(mod0))[4, 4])
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning in glm.nb(formula, data = ., link = log, init.theta = 1):
## alternation limit reached
pEl = NULL
eEl = NULL
mEf = NULL
sig = NULL
for(i in 1:length(sts)){
df1 = df[df$st == sts[i], ]
mod1 = glm(formula, data = df1,
family = neg.bin(theta = results0$theta[i]),
epsilon = 1e-5)
pEl = c(pEl, coef(mod1)[2])
eEl = c(eEl, coef(mod1)[3])
mEf = c(mEf, coef(mod1)[4])
sig = c(sig, coef(summary(mod1))[4, 4])
}
## Warning: glm.fit: algorithm did not converge
results1 = data.frame(sts, pEl, eEl, mEf, sig)
results1 %>%
arrange(desc(mEf)) %>%
xtable(., digits = 3)
## % latex table generated in R 3.4.1 by xtable 1.8-2 package
## % Tue Aug 22 09:04:13 2017
## \begin{table}[ht]
## \centering
## \begin{tabular}{rlrrrr}
## \hline
## & sts & pEl & eEl & mEf & sig \\
## \hline
## 1 & AR & -9.510 & -0.075 & 0.872 & 0.000 \\
## 2 & MO & -5.818 & 0.069 & 0.597 & 0.002 \\
## 3 & IL & -4.675 & 0.161 & 0.486 & 0.001 \\
## 4 & KY & -4.875 & 0.193 & 0.478 & 0.042 \\
## 5 & TN & -4.066 & 0.078 & 0.439 & 0.020 \\
## 6 & AL & -3.851 & 0.281 & 0.414 & 0.009 \\
## 7 & OK & -2.288 & 0.375 & 0.293 & 0.030 \\
## 8 & MS & -2.465 & 0.526 & 0.265 & 0.137 \\
## 9 & NC & -2.079 & 0.154 & 0.214 & 0.553 \\
## 10 & KS & -1.742 & 0.788 & 0.214 & 0.233 \\
## 11 & TX & -1.854 & 0.286 & 0.211 & 0.004 \\
## 12 & LA & -0.865 & 0.689 & 0.113 & 0.441 \\
## 13 & GA & -0.323 & 0.581 & 0.059 & 0.779 \\
## 14 & IN & 0.036 & 0.606 & 0.055 & 0.813 \\
## 15 & FL & 1.116 & 0.998 & -0.091 & 0.536 \\
## 16 & SC & 1.703 & 0.380 & -0.112 & 0.637 \\
## \hline
## \end{tabular}
## \end{table}
Compare conditional energy elasticities between states with substantive and significant interaction (AR, TN, MO) and those with insignificant interaction. Leave out states where there is significant, but weaker interactions.
formula = cas ~ lpopD + lED + lpopD:lED
df = Torn.df %>%
filter(popD > 0) %>%
# filter(yr >= 2006 & yr <= 2016) %>%
# filter(st == "IL") %>%
filter(st == "AR" | st == "TN" | st == "MO") %>%
# filter(st == "AR" | st == "TN" | st == "MO" | st == "KY" | st == "IL" | st == "OK" |
# st == "AL" | st == "MS" | st == "TX" | st == "LA") %>%
mutate(lpopD = log10(popD),
lpop = log10(pop),
lED = log10(ED))
range(df$lED)
## [1] 8.167772 13.365373
labs = (2^seq(-.5, 2.5, .5) - 1) * 100
model0A = glm.nb(formula, data = df, link = log, init.theta = 1)
model1A = glm(formula, data = df, family = neg.bin(theta = model0A$theta), epsilon = 1e-5)
A5 = interplot(m = model1A, var1 = "lED", var2 = "lpopD", hist = FALSE) +
scale_x_continuous(limits = c(0, 3.5),
breaks = c(0, 1, 2, 3),
labels = c("1", "10", "100", "1000")) +
# scale_y_continuous(limits = c(-.4, 2.7), breaks = pretty_breaks()) +
scale_y_continuous(limits = c(-.4, 2.7),
breaks = c(0, 1, 2),
labels = c("0", "100 %", "200 %")) +
geom_hline(yintercept = 0, linetype = "dashed") +
xlab("Population Density [people/sq. km]") +
ylab("Increase in Casualties\nPer Doubling of Energy") +
theme_minimal()
df = Torn.df %>%
filter(popD > 0) %>%
# filter(yr >= 2006 & yr <= 2016) %>%
# filter(st == "AL") %>%
# filter(st == "GA" | st == "FL" | st == "LA") %>%
filter(st != "AR" & st != "TN" & st != "MO" & st != "KY" & st != "IL" &
st != "OK" & st != "AL" & st != "MS" & st != "TX" & st != "LA") %>%
mutate(lpopD = log10(popD),
lpop = log10(pop),
lED = log10(ED))
range(df$lED)
## [1] 6.752798 13.534856
model0B = glm.nb(formula, data = df, link = log, init.theta = 1)
model1B = glm(formula, data = df, family = neg.bin(theta = model0B$theta), epsilon = 1e-5)
B5 = interplot(m = model1B, var1 = "lED", var2 = "lpopD", hist = FALSE) +
scale_x_continuous(limits = c(0, 3.5),
breaks = c(0, 1, 2, 3),
labels = c("1", "10", "100", "1000")) +
scale_y_continuous(limits = c(-.4, 2.7),
breaks = c(0, 1, 2),
labels = c("0", "100 %", "200 %")) +
# scale_y_continuous(limits = c(-.4, 2.7), breaks = pretty_breaks()) +
geom_hline(yintercept = 0, linetype = "dashed") +
xlab("Population Density [people/sq. km]") +
ylab("Increase in Casualties\nPer Doubling of Energy") +
theme_minimal()
mtrx = matrix(c(1, 2), nrow = 1, byrow = TRUE)
A5 = A5 + ggtitle("A") +
theme(plot.title = element_text(hjust = 0))
B5 = B5 + ggtitle("B") +
theme(plot.title = element_text(hjust = 0))
multiplot(A5, B5, layout = mtrx)
## Warning: Removed 5 rows containing missing values (geom_path).
## Warning: Removed 48 rows containing missing values (geom_path).
Night vs day. Overall statistics.
suppressMessages(library(maptools))
tzone = "Etc/GMT+6"
p4s = "+proj=longlat +datum=WGS84"
loc = matrix(c(Torn.df$slon, Torn.df$slat), ncol = 2)
genesisTime = ymd_hms(paste(Torn.df$date, Torn.df$time), tz = tzone)
ssetTime = sunriset(crds = loc, genesisTime,
proj4string = CRS("+proj=longlat +datum=WGS84"),
direction = "sunset", POSIXct = TRUE)$time
sriseTime = sunriset(crds = loc, genesisTime,
proj4string = CRS("+proj=longlat +datum=WGS84"),
direction = "sunrise", POSIXct = TRUE)$time
loc2 = matrix(c(Torn.sfdf$slon, Torn.sfdf$slat), ncol = 2)
genesisTime2 = ymd_hms(paste(Torn.sfdf$date, Torn.sfdf$time), tz = tzone)
ssetTime2 = sunriset(crds = loc2, genesisTime2,
proj4string = CRS("+proj=longlat +datum=WGS84"),
direction = "sunset", POSIXct = TRUE)$time
sriseTime2 = sunriset(crds = loc2, genesisTime2,
proj4string = CRS("+proj=longlat +datum=WGS84"),
direction = "sunrise", POSIXct = TRUE)$time
Torn.df %>%
# mutate(DateTime = as.POSIXct(paste(date, time), format = "%Y-%m-%d %H:%M:%S"),
# hr = hour(DateTime),
# Daytime = hr > 10 & hr <= 22) %>%
mutate(genesisTime = genesisTime,
sriseTime = sriseTime,
ssetTime = ssetTime,
Daytime = genesisTime > sriseTime & genesisTime < ssetTime) %>%
filter(yr <= 2007) %>%
group_by(Daytime) %>%
summarize(nT = n(),
nC = sum(cas),
nF = sum(fat),
casRate = nC/nT,
fatRate = nF/nT)
## # A tibble: 2 x 6
## Daytime nT nC nF casRate fatRate
## <lgl> <int> <int> <int> <dbl> <dbl>
## 1 FALSE 767 7884 508 10.279009 0.6623207
## 2 TRUE 1179 11034 484 9.358779 0.4105174
as.data.frame(Torn.sfdf) %>%
# mutate(DateTime = as.POSIXct(paste(date, time), format = "%Y-%m-%d %H:%M:%S"),
# hr = hour(DateTime),
# Daytime = hr > 10 & hr <= 22) %>%
mutate(genesisTime = genesisTime2,
sriseTime = sriseTime2,
ssetTime = ssetTime2,
Daytime = genesisTime > sriseTime & genesisTime < ssetTime) %>%
group_by(Daytime) %>%
summarize(nT = n(),
nTC = sum(cas > 0),
nTF = sum(fat > 0),
nC = sum(cas),
pTC = nTC/nT,
pTF = nTF/nT)
## # A tibble: 2 x 7
## Daytime nT nTC nTF nC pTC pTF
## <lgl> <int> <int> <int> <int> <dbl> <dbl>
## 1 FALSE 8950 1093 259 10954 0.12212291 0.02893855
## 2 TRUE 23729 1691 300 20633 0.07126301 0.01264276
Predicted casualty rates for tornadoes with at least one casualty. Mid South versus all states.
formula = cas ~ lpopD + lED + lpopD:lED
df = Torn.df %>%
mutate(genesisTime = genesisTime,
sriseTime = sriseTime,
ssetTime = ssetTime,
Daytime = genesisTime > sriseTime & genesisTime < ssetTime) %>%
filter(popD > 0) %>%
# filter(cas < 1000) %>%
# filter(st == "AR" | st == "TN" | st == "MO" | st == "KY" | st == "IL" | st == "OK" |
# st == "AL" | st == "MS" | st == "TX" | st == "LA") %>%
filter(st == "AR" | st == "TN" | st == "MO") %>%
filter(Daytime) %>%
mutate(lpopD = log10(popD),
lpop = log10(pop),
lED = log10(ED))
model0Day = glm.nb(formula, data = df, link = log, init.theta = 1, epsilon = 1e-5)
model1Day = glm(formula, data = df,
family = neg.bin(theta = model0Day$theta),
epsilon = 1e-5)
effDay = allEffects(model1Day, xlevels = list(lED = seq(9, 12, 1),
lpopD = c(log10(1), log10(10), log10(100), log10(1000))),
confidence.level = .95)
resultsDay = as.data.frame(effDay[[1]]) %>%
mutate(popD = 10^lpopD,
ED = 10^lED,
lbl = paste(popD, "[people/sq. km]", sep = " "),
Time = "Daytime")
df = Torn.df %>%
mutate(genesisTime = genesisTime,
sriseTime = sriseTime,
ssetTime = ssetTime,
Daytime = genesisTime > sriseTime & genesisTime < ssetTime) %>%
filter(popD > 0) %>%
# filter(cas < 1000) %>%
# filter(st == "AR" | st == "TN" | st == "MO" | st == "KY" | st == "IL" | st == "OK" |
# st == "AL" | st == "MS" | st == "TX" | st == "LA") %>%
filter(st == "AR" | st == "TN" | st == "MO") %>%
filter(!Daytime) %>%
mutate(lpopD = log10(popD),
lpop = log10(pop),
lED = log10(ED))
model0Night = glm.nb(formula, data = df, link = log, init.theta = 1, epsilon = 1e-5)
model1Night = glm(formula, data = df,
family = neg.bin(theta = model0Night$theta),
epsilon = 1e-5)
effNight = allEffects(model1Night, xlevels = list(lED = seq(9, 12, 1),
lpopD = c(log10(1), log10(10), log10(100), log10(1000))),
confidence.level = .95)
resultsNight = as.data.frame(effNight[[1]]) %>%
mutate(popD = 10^lpopD,
ED = 10^lED,
lbl = paste(popD, "[people/sq. km]", sep = " "),
Time = "Nighttime")
results = rbind(resultsDay, resultsNight)
dodge = position_dodge(width = .5)
B6 = ggplot(results, aes(x = ED, y = fit, color = Time)) +
geom_point(position = dodge) +
geom_errorbar(aes(ymax = upper, ymin = lower), width = .1, position = dodge) +
scale_x_log10(breaks = c(10^9, 10^10, 10^11, 10^12),
labels = c("1", "10", "100", "1000")) +
scale_y_continuous(limits = c(0, NA), breaks = pretty_breaks()) +
geom_errorbar(aes(ymax = upper, ymin = lower), width = .1, position = dodge) +
facet_wrap(~ lbl, scales = "free_y") +
xlab("Energy Dissipation [GW]") +
ylab("Casualty Rate") +
scale_color_discrete("") +
theme_minimal() +
theme(legend.position="bottom")
df = Torn.df %>%
mutate(genesisTime = genesisTime,
sriseTime = sriseTime,
ssetTime = ssetTime,
Daytime = genesisTime > sriseTime & genesisTime < ssetTime) %>%
filter(popD > 0) %>%
# filter(cas < 1000) %>%
# filter(st != "AR" & st != "TN" & st != "MO" & st != "KY" & st != "IL" & st != "OK" &
# st != "AL" & st != "MS" & st != "TX" & st != "LA") %>%
filter(Daytime) %>%
mutate(lpopD = log10(popD),
lpop = log10(pop),
lED = log10(ED))
model0Day = glm.nb(formula, data = df, link = log, init.theta = 1, epsilon = 1e-5)
model1Day = glm(formula, data = df,
family = neg.bin(theta = model0Day$theta),
epsilon = 1e-5)
effDay = allEffects(model1Day, xlevels = list(lED = seq(9, 12, 1),
lpopD = c(log10(1), log10(10), log10(100), log10(1000))),
confidence.level = .95)
resultsDay = as.data.frame(effDay[[1]]) %>%
mutate(popD = 10^lpopD,
ED = 10^lED,
lbl = paste(popD, "[people/sq. km]", sep = " "),
Time = "Daytime")
df = Torn.df %>%
mutate(genesisTime = genesisTime,
sriseTime = sriseTime,
ssetTime = ssetTime,
Daytime = genesisTime > sriseTime & genesisTime < ssetTime) %>%
filter(popD > 0) %>%
# filter(cas < 1000) %>%
# filter(st != "AR" & st != "TN" & st != "MO" & st != "KY" & st != "IL" & st != "OK" &
# st != "AL" & st != "MS" & st != "TX" & st != "LA") %>%
filter(!Daytime) %>%
mutate(lpopD = log10(popD),
lpop = log10(pop),
lED = log10(ED))
model0Night = glm.nb(formula, data = df, link = log, init.theta = 1, epsilon = 1e-5)
model1Night = glm(formula, data = df,
family = neg.bin(theta = model0Night$theta),
epsilon = 1e-5)
effNight = allEffects(model1Night, xlevels = list(lED = seq(9, 12, 1),
lpopD = c(log10(1), log10(10), log10(100), log10(1000))),
confidence.level = .95)
resultsNight = as.data.frame(effNight[[1]]) %>%
mutate(popD = 10^lpopD,
ED = 10^lED,
lbl = paste(popD, "[people/sq. km]", sep = " "),
Time = "Nighttime")
results = rbind(resultsDay, resultsNight)
dodge = position_dodge(width = .5)
A6 = ggplot(results, aes(x = ED, y = fit, color = Time)) +
geom_point(position = dodge) +
geom_errorbar(aes(ymax = upper, ymin = lower), width = .1, position = dodge) +
scale_x_log10(breaks = c(10^9, 10^10, 10^11, 10^12),
labels = c("1", "10", "100", "1000")) +
scale_y_continuous(limits = c(0, NA), breaks = pretty_breaks()) +
geom_errorbar(aes(ymax = upper, ymin = lower), width = .1, position = dodge) +
facet_wrap(~ lbl, scales = "free_y") +
xlab("Energy Dissipation [GW]") +
ylab("Casualty Rate") +
scale_color_discrete("") +
theme_minimal() +
theme(legend.position="bottom")
mtrx = matrix(c(1, 2), nrow = 1, byrow = TRUE)
A6 = A6 + ggtitle("B Everywhere") +
theme(plot.title = element_text(hjust = 0))
B6 = B6 + ggtitle("A Mid South") +
theme(plot.title = element_text(hjust = 0))
multiplot(B6, A6, layout = mtrx)
Weekday versus weekend
formula = cas ~ lpopD + lED + lpopD:lED
df = Torn.df %>%
mutate(DateTime = as.POSIXct(paste(date, time), format = "%Y-%m-%d %H:%M:%S"),
dow = wday(DateTime, label = TRUE, abbr = TRUE)) %>%
filter(popD > 0) %>%
filter(st == "AR" | st == "TN" | st == "MO" | st == "KY" | st == "OK" | st == "AL" | st == "MS" | st == "TX" | st == "LA") %>%
# filter(st == "AR" | st == "TN" | st == "MO") %>%
filter(dow == "Sun" | dow == "Sat") %>%
mutate(lpopD = log10(popD),
lpop = log10(pop),
lED = log10(ED))
model0Weekend = glm.nb(formula, data = df, link = log, init.theta = 1, epsilon = 1e-5)
model1Weekend = glm(formula, data = df,
family = neg.bin(theta = model0Weekend$theta),
epsilon = 1e-5)
effWeekend = allEffects(model1Weekend, xlevels = list(lED = seq(9, 12, 1),
lpopD = c(log10(1), log10(10), log10(100), log10(1000))),
confidence.level = .95)
resultsWeekend = as.data.frame(effWeekend[[1]]) %>%
mutate(popD = round(10^lpopD),
ED = 10^lED,
Density = paste(popD, "[people/sq. km]", sep = " "),
Day = "Weekend")
df = Torn.df %>%
mutate(DateTime = as.POSIXct(paste(date, time), format = "%Y-%m-%d %H:%M:%S"),
dow = wday(DateTime, label = TRUE, abbr = TRUE)) %>%
filter(popD > 0) %>%
filter(st == "AR" | st == "TN" | st == "MO" | st == "KY" | st == "OK" |
st == "AL" | st == "MS" | st == "TX" | st == "LA") %>%
# filter(st == "AR" | st == "TN" | st == "MO") %>%
filter(dow != "Sun" & dow != "Sat") %>%
mutate(lpopD = log10(popD),
lpop = log10(pop),
lED = log10(ED))
model0Weekday = glm.nb(formula, data = df, link = log, init.theta = 1, epsilon = 1e-5)
model1Weekday = glm(formula, data = df,
family = neg.bin(theta = model0Weekday$theta),
epsilon = 1e-5)
effWeekday = allEffects(model1Weekday, xlevels = list(lED = seq(9, 12, 1),
lpopD = c(log10(1), log10(10), log10(100), log10(1000))),
confidence.level = .95)
resultsWeekday = as.data.frame(effWeekday[[1]]) %>%
mutate(popD = round(10^lpopD),
ED = 10^lED,
Density = paste(popD, "[people/sq. km]", sep = " "),
Day = "Weekday")
results = rbind(resultsWeekend, resultsWeekday)
A7 = ggplot(results, aes(x = ED, y = fit, color = Day)) +
geom_point(position = dodge) +
scale_x_log10(breaks = c(10^9, 10^10, 10^11, 10^12),
labels = c("1", "10", "100", "1000")) +
scale_y_continuous(limits = c(0, NA), breaks = pretty_breaks()) +
facet_wrap(~ Density, scales = "free_y", nrow = 2) +
geom_errorbar(aes(ymax = upper, ymin = lower), width = 0, position = dodge) +
xlab("Energy Dissipation [GW]") +
ylab("Casualty Rate") +
scale_color_discrete("") +
theme_minimal() +
theme(legend.position="bottom")
df = Torn.df %>%
mutate(DateTime = as.POSIXct(paste(date, time), format = "%Y-%m-%d %H:%M:%S"),
dow = wday(DateTime, label = TRUE, abbr = TRUE)) %>%
filter(popD > 0) %>%
filter(st != "AR" & st != "TN" & st != "MO" & st != "KY" & st != "OK" &
st != "AL" & st != "MS" & st != "TX" & st != "LA") %>%
filter(dow == "Sun" | dow == "Sat") %>%
mutate(lpopD = log10(popD),
lpop = log10(pop),
lED = log10(ED))
model0Weekend = glm.nb(formula, data = df, link = log, init.theta = 1, epsilon = 1e-5)
model1Weekend = glm(formula, data = df,
family = neg.bin(theta = model0Weekend$theta),
epsilon = 1e-5)
effWeekend = allEffects(model1Weekend, xlevels = list(lED = seq(9, 12, 1),
lpopD = c(log10(1), log10(10), log10(100), log10(1000))),
confidence.level = .95)
resultsWeekend = as.data.frame(effWeekend[[1]]) %>%
mutate(popD = round(10^lpopD),
ED = 10^lED,
Density = paste(popD, "[people/sq. km]", sep = " "),
Day = "Weekend")
df = Torn.df %>%
mutate(DateTime = as.POSIXct(paste(date, time), format = "%Y-%m-%d %H:%M:%S"),
dow = wday(DateTime, label = TRUE, abbr = TRUE)) %>%
filter(popD > 0) %>%
filter(st != "AR" & st != "TN" & st != "MO" & st != "KY" & st != "OK" &
st != "AL" & st != "MS" & st != "TX" & st != "LA") %>%
filter(dow != "Sun" & dow != "Sat") %>%
mutate(lpopD = log10(popD),
lpop = log10(pop),
lED = log10(ED))
model0Weekday = glm.nb(formula, data = df, link = log, init.theta = 1, epsilon = 1e-5)
model1Weekday = glm(formula, data = df,
family = neg.bin(theta = model0Weekday$theta),
epsilon = 1e-5)
effWeekday = allEffects(model1Weekday, xlevels = list(lED = seq(9, 12, 1),
lpopD = c(log10(1), log10(10), log10(100), log10(1000))),
confidence.level = .95)
resultsWeekday = as.data.frame(effWeekday[[1]]) %>%
mutate(popD = round(10^lpopD),
ED = 10^lED,
Density = paste(popD, "[people/sq. km]", sep = " "),
Day = "Weekday")
results = rbind(resultsWeekend, resultsWeekday)
B7 = ggplot(results, aes(x = ED, y = fit, color = Day)) +
geom_point(position = dodge) +
scale_x_log10(breaks = c(10^9, 10^10, 10^11, 10^12),
labels = c("1", "10", "100", "1000")) +
scale_y_continuous(limits = c(0, NA), breaks = pretty_breaks()) +
facet_wrap(~ Density, scales = "free_y", nrow = 2) +
geom_errorbar(aes(ymax = upper, ymin = lower), width = 0, position = dodge) +
xlab("Energy Dissipation [GW]") +
ylab("Casualty Rate") +
scale_color_discrete("") +
theme_minimal() +
theme(legend.position="bottom")
mtrx = matrix(c(1, 2), nrow = 1, byrow = TRUE)
A7 = A7 + ggtitle("A Greater Mid South") +
theme(plot.title = element_text(hjust = 0))
B7 = B7 + ggtitle("B Elsewhere") +
theme(plot.title = element_text(hjust = 0))
multiplot(A7, B7, layout = mtrx)
From FEMA: Safer, Stronger, Smarter: A Guide to Improving School Natural Hazard Safety June 2017. In May 2013, an EF5 tornado struck Moore, Oklahoma and resulted in 24 fatalities, including seven children at the Plaza Towers Elementary School. In April 2014, an EF4 tornado leveled a brand-new school still under construction in Vilonia, asuburb of Little Rock, Arkansas. While schools generally have some shortterm notification of a tornado warning, and tornado safe rooms are becoming an accepted standard of care, and are now a requirement for new schools in certain locations under the 2015 International Building Code, many schools remain vulnerable to tornadoes with no safe haven for students or staff.
Percent elderly: State Level https://www.census.gov/quickfacts/fact/table/
state = c("Arkansas", "Tennessee", "Missouri", "Kentucky", "Illinois", "Oklahoma",
"Alabama", "Mississippi", "Texas", "North Carolina", "Louisiana",
"Georgia")
Interaction = c(1.023, .717, .607, .493, .417, .404, .347, .329,
.308, .286, .246, .128)
Elderly2010 = c(14.4, 13.4, 14, 13.3, 12.5, 13.5, 13.8, 12.8,
10.3, 12.9, 12.3, 10.7)
Elderly2016 = c(16.3, 15.7, 16.1, 15.6, 16.1, 15, 16.1, 15.1,
12, 15.5, 14.4, 13.1)
BachDegrees = c(21.1, 24.9, 27.1, 22.3, 32.3, 24.1, 23.5,
20.7, 27.6, 28.4, 22.5, 28.8)
Disability = c(12.3, 11.2, 10.4, 12.9, 7.1, 11.3, 11.8, 11.9, 8.1,
9.6, 11.0, 8.8)
Rent = c(677, 764, 746, 675, 907, 727, 717, 717, 882, 797, 788, 879)
Income = c(41371, 45219, 48173, 43740, 57574, 46879, 43623,
39665, 53207, 46868, 45047, 49620)
Black = c(15.4, 16.7, 11.6, 7.8, 14.5, 7.4, 26.2, 37, 11.8, 21.5,
32, 30.5)
White = c(74.5, 75.6, 81, 86.3, 63.7, 68.7, 67, 58, 45.3, 65.3,
60.3, 55.9)
cor.test(Interaction, Elderly2010)
##
## Pearson's product-moment correlation
##
## data: Interaction and Elderly2010
## t = 2.9821, df = 10, p-value = 0.01376
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1850385 0.9040284
## sample estimates:
## cor
## 0.6860824
dg = data.frame(state, Interaction, Elderly2010, Elderly2016)
xtable(dg)
## % latex table generated in R 3.4.1 by xtable 1.8-2 package
## % Tue Aug 22 09:04:20 2017
## \begin{table}[ht]
## \centering
## \begin{tabular}{rlrrr}
## \hline
## & state & Interaction & Elderly2010 & Elderly2016 \\
## \hline
## 1 & Arkansas & 1.02 & 14.40 & 16.30 \\
## 2 & Tennessee & 0.72 & 13.40 & 15.70 \\
## 3 & Missouri & 0.61 & 14.00 & 16.10 \\
## 4 & Kentucky & 0.49 & 13.30 & 15.60 \\
## 5 & Illinois & 0.42 & 12.50 & 16.10 \\
## 6 & Oklahoma & 0.40 & 13.50 & 15.00 \\
## 7 & Alabama & 0.35 & 13.80 & 16.10 \\
## 8 & Mississippi & 0.33 & 12.80 & 15.10 \\
## 9 & Texas & 0.31 & 10.30 & 12.00 \\
## 10 & North Carolina & 0.29 & 12.90 & 15.50 \\
## 11 & Louisiana & 0.25 & 12.30 & 14.40 \\
## 12 & Georgia & 0.13 & 10.70 & 13.10 \\
## \hline
## \end{tabular}
## \end{table}
r(Elderly2010, Interaction) = .69 Variable r Elderly2010: %65+ in 2010 +.69 Percent White alone +.62 Percent Disabled <65 +.42 Median Gross Rent 2011-2015 -.53 Percent Black alone -.46 Percent Bachelor Degrees -.33 Median Household Income -.31
Speculation: Elderly whites: Greater degree of fatalism, passivity (physically harder to do something), and inattention to warnings.
Percent elderly: City Level Wikipedia: city name %65+ popD Memphis, TN 15.4 770
Hot Springs, AR 23.2 392 Little Rock, AR 11.6 640 Fort Smith, AR 13.7 537 Joplin, MO 13.2 540 Bella Vista, AR 41.9 234
Gainesville, GA 10.5 468 Albany, GA 11.3 535 Alexandria, LA 15.1 680 Birmingham, AL 13.5 551
The elderly are moving to the suburbs and exurbs. Hot Springs, Arkansas is a hotspot for old people.
Predicted casualty rates using the Mid-South and Elsewhere models fit to data in overlapping windows of W consecutive years.
formula = cas ~ lpopD + lED + lpopD:lED
W = 10 # number of years in the window
S = 13 # tornado strength log energy dissipation
P = log10(500) # population density log scale
df = Torn.df %>%
filter(popD > 0) %>%
filter(st == "AR" | st == "TN" | st == "MO" | st == "KY" | st == "OK" |
st == "AL" | st == "MS" | st == "TX" | st == "LA") %>%
mutate(lpopD = log10(popD),
lpop = log10(pop),
lED = log10(ED))
fit = numeric()
UL = numeric(); UU = numeric()
for(decadeEndYr in 2011:2016){
df2 = df %>%
filter(yr > decadeEndYr - W & yr <= decadeEndYr)
model0A = glm.nb(formula, data = df2, link = log, init.theta = 1)
model1A = glm(formula, data = df2, family = neg.bin(theta = model0A$theta), epsilon = 1e-5)
pre = predict.glm(model1A, newdata = data.frame(lED = S, lpopD = P), type = "response", se.fit = TRUE)
fit = c(fit, pre$fit)
UL = c(UL, pre$fit - pre$se.fit)
UU = c(UU, pre$fit + pre$se.fit)
}
resultsMS = data.frame(Year = 2011:2016, fit, UL, UU, Region = "Greater Mid South")
df = Torn.df %>%
filter(popD > 0) %>%
filter(st != "AR" & st != "TN" & st != "MO" & st != "KY" &
st != "OK" & st != "AL" & st != "MS" & st != "TX" & st != "LA") %>%
mutate(lpopD = log10(popD),
lpop = log10(pop),
lED = log10(ED))
fit = numeric()
UL = numeric(); UU = numeric()
for(decadeEndYr in 2011:2016){
df2 = df %>%
filter(yr > decadeEndYr - W & yr <= decadeEndYr)
model0B = glm.nb(formula, data = df2, link = log, init.theta = 1, epsilon = 1e-5)
model1B = glm(formula, data = df2, family = neg.bin(theta = model0B$theta), epsilon = 1e-5)
pre = predict.glm(model1B, newdata = data.frame(lED = S, lpopD = P), type = "response", se.fit = TRUE)
fit = c(fit, pre$fit)
UL = c(UL, pre$fit - pre$se.fit)
UU = c(UU, pre$fit + pre$se.fit)
}
## Warning: glm.fit: algorithm did not converge
## Warning in glm.nb(formula, data = df2, link = log, init.theta = 1, epsilon
## = 1e-05): alternation limit reached
resultsElsewhere = data.frame(Year = 2011:2016, fit, UL, UU, Region = "Elsewhere")
results = rbind(resultsMS, resultsElsewhere)
ggplot(results, aes(x = Year, y = fit, color = Region)) +
geom_point() +
geom_errorbar(aes(ymin = UL, ymax = UU), width = .1) +
scale_y_continuous(limits = c(0, NA)) +
scale_x_continuous(breaks = 2011:2016) +
ylab("Casualty Rate") +
theme_minimal()
https://www.census.gov/population/socdemo/statbriefs/agebrief.html
April 2011 outbreak. https://www.cdc.gov/mmwr/preview/mmwrhtml/mm6128a3.htm
To check the model we use some Bayesian formulations.
library(brms)
options(mc.cores = parallel::detectCores())
Model
df = Torn.df %>%
filter(popD > 0) %>%
mutate(lpopD = log10(popD),
lED = log10(ED),
cas = cas - 1)
formula = cas ~ lpopD + lED + lpopD:lED
#formula = fat ~ lpopD + lED + lpopD:lED
family = negbinomial
betas = set_prior("normal(0,5)", class = "b")
model1 = brm(family = family, # likelihood
formula = formula, # predictors
data = df, # data
prior = betas, # prior
warmup = 1000, iter = 2000, chains = 2,
control = list(adapt_delta = .9))
fixef(model1)
pp_check(model1, type = "dens") + scale_x_log10()
yrep = posterior_predict(model1)
df.yrep = as.data.frame(t(yrep))
df.out = reshape2::melt(df.yrep) %>%
group_by(variable) %>%
summarize(mx = max(value),
mn = mean(value))
ggplot(df.out, aes(mx)) +
geom_histogram() +
geom_vline(xintercept = max(df$cas), color = "red") +
ylab("Posterior Frequency") +
xlab("Worst Case Per Tornado Loss (Number of Casaulties)") +
theme_minimal()
max(yrep)
Another view of the posterior predictive samples is the distribution of maximum losses. Maximum of the max compared to the data max. Looks good.
set.seed(344562)
mx = numeric()
for(i in 1:200){
yrep = posterior_predict(model1)
df.yrep = as.data.frame(t(yrep))
df.out = reshape2::melt(df.yrep) %>%
group_by(variable) %>%
summarize(mx = max(value),
mn = mean(value))
mx = c(mx, max(df.out$mx))
}
ggplot(data.frame(mx), aes(mx)) +
geom_density() +
geom_vline(xintercept = max(df$cas), color = "red") +
ylab("Posterior Frequency") +
xlab("Worst Case Per Tornado Loss (Number of Casaulties)") +
theme_minimal()
Need 3.4.0 or higher R for this.
library(UScensus2010)
library(UScensus2000cdp)
citation("UScensus2010")
data(arkansas.cdp)
df = as.data.frame(arkansas.cdp)
df = df[!duplicated(df), ]
dfAR = df %>%
filter(pop2000 > 10000) %>%
dplyr::select(name, pop2000, age.65.up) %>%
mutate(percElderly = age.65.up/pop2000 * 100) %>%
filter(percElderly > 18) %>%
arrange(desc(percElderly))
dfAR$State = "Arkansas"
data(tennessee.cdp)
df = as.data.frame(tennessee.cdp)
df = df[!duplicated(df), ]
dfTN = df %>%
filter(pop2000 > 10000) %>%
dplyr::select(name, pop2000, age.65.up) %>%
mutate(percElderly = age.65.up/pop2000 * 100) %>%
filter(percElderly > 18) %>%
arrange(desc(percElderly))
dfTN$State = "Tennessee"
data(missouri.cdp)
df = as.data.frame(missouri.cdp)
df = df[!duplicated(df), ]
dfMO = df %>%
filter(pop2000 > 10000) %>%
dplyr::select(name, pop2000, age.65.up) %>%
mutate(percElderly = age.65.up/pop2000 * 100) %>%
filter(percElderly > 18) %>%
arrange(desc(percElderly))
dfMO$State = "Missouri"
data(kentucky.cdp)
df = as.data.frame(kentucky.cdp)
df = df[!duplicated(df), ]
dfKY = df %>%
filter(pop2000 > 10000) %>%
dplyr::select(name, pop2000, age.65.up) %>%
mutate(percElderly = age.65.up/pop2000 * 100) %>%
filter(percElderly > 18) %>%
arrange(desc(percElderly))
dfKY$State = "Kentucky"
dfMidSouth = rbind(dfAR, dfTN, dfMO, dfKY)
dfMidSouth$Region = "MidSouth"
#data(illinois.cdp)
#df = as.data.frame(illinois.cdp)
#df = df[!duplicated(df), ]
#df %>%
# filter(pop2000 > 10000) %>%
# dplyr::select(name, pop2000, age.65.up) %>%
# mutate(percElderly = age.65.up/pop2000 * 100) %>%
# arrange(desc(percElderly))
data(georgia.cdp)
df = as.data.frame(georgia.cdp)
df = df[!duplicated(df), ]
dfGA = df %>%
filter(pop2000 > 10000) %>%
dplyr::select(name, pop2000, age.65.up) %>%
mutate(percElderly = age.65.up/pop2000 * 100) %>%
filter(percElderly > 18) %>%
arrange(desc(percElderly))
dfGA$State = "Georgia"
data(alabama.cdp)
df = as.data.frame(alabama.cdp)
df = df[!duplicated(df), ]
dfAL = df %>%
filter(pop2000 > 10000) %>%
dplyr::select(name, pop2000, age.65.up) %>%
mutate(percElderly = age.65.up/pop2000 * 100) %>%
filter(percElderly > 18) %>%
arrange(desc(percElderly))
dfAL$State = "Alabama"
data(mississippi.cdp)
df = as.data.frame(mississippi.cdp)
df = df[!duplicated(df), ]
dfMS = df %>%
filter(pop2000 > 10000) %>%
dplyr::select(name, pop2000, age.65.up) %>%
mutate(percElderly = age.65.up/pop2000 * 100) %>%
filter(percElderly > 18) %>%
arrange(desc(percElderly))
dfMS$State = "Mississippi"
data(louisiana.cdp)
df = as.data.frame(louisiana.cdp)
df = df[!duplicated(df), ]
dfLA = df %>%
filter(pop2000 > 10000) %>%
dplyr::select(name, pop2000, age.65.up) %>%
mutate(percElderly = age.65.up/pop2000 * 100) %>%
filter(percElderly > 18) %>%
arrange(desc(percElderly))
dfLA$State = "Louisiana"
dfSouth = rbind(dfGA, dfAL, dfMS, dfLA)
dfSouth$Region = "South"
dfAll = rbind(dfMidSouth, dfSouth)
dfAll %>%
group_by(Region) %>%
summarize(nCities = n(),
Pop = sum(pop2000),
PopElderly = sum(age.65.up),
percElderly = PopElderly/Pop * 100)