A Method For Making Tornado Casualty Maps

Fricker, Elsner, Mesev, Jagger, Widen

Estimating tornado casualties spatially.

0. Get tornado data and set domain. Subset storms by domain

# setwd("~/Dropbox/Tornadoes")
# download.file(url = "http://www.spc.noaa.gov/gis/svrgis/zipped/tornado.zip",
#              destfile = "tornado.zip")
unzip("tornado.zip")
library("rgdal")

## Loading required package: sp
## rgdal: version: 1.0-4, (SVN revision 548)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 1.11.2, released 2015/02/10
##  Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.2/Resources/library/rgdal/gdal
##  Loaded PROJ.4 runtime: Rel. 4.9.1, 04 March 2015, [PJ_VERSION: 491]
##  Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.2/Resources/library/rgdal/proj
##  Linking to sp version: 1.1-1

TornL = readOGR(dsn = "torn", layer = "torn", 
                stringsAsFactors = FALSE)

## OGR data source with driver: ESRI Shapefile 
## Source: "torn", layer: "torn"
## with 58959 features
## It has 21 fields

library("raster")

## Warning: no function found corresponding to methods exports from 'raster'
## for: 'overlay'

xmn = -104
xmx = -82
ymn = 31
ymx = 41
res = .5
syr = 1955
eyr = 2014
r = raster(xmn = xmn, xmx = xmx,
           ymn = ymn, ymx = ymx,
           resolution = res)
sp = as(r, 'SpatialPolygons')
spT = spTransform(sp, CRS(proj4string(TornL)))

Percent of all tornadoes in the SPC dataset with touchdown locations within the domain.

df = TornL@data
pAll = dim(subset(df, yr >= syr & slat >= ymn & slat <= ymx &
             slon >= xmn & slon <= xmx))[1]/dim(subset(df, yr >= syr))[1]
pInt = dim(subset(df, yr >= syr & mag >= 3 & slat >= ymn & slat <= ymx &
             slon >= xmn & slon <= xmx))[1]/dim(subset(df, yr >= syr & mag >= 3))[1]
print(pAll); print(pInt)

## [1] 0.5204322

## [1] 0.6660702

The study domain is chosen large enough to capture the most prolific tornado area of the country. As chosen it contains the majority (52%) of all tornadoes in the United States and two-thirds (67%) of all intense (EF3+) tornadoes. A grid of equal lat-lon cells is overlaid at a spatial resolution of 0.5 degrees latitude/longitude resulting in cells north-south and 44 cells east-west for a total of 880 cells.

Create tornado paths by buffering the tracks

Add a buffer to the tracks based on the width specified in the attribute table to create a spatial polygons data frame of tornado paths. Overlay paths onto the domain extent and return a vector indicating either NA (tornado is not inside extent) or the cell number. Subset the tornadoes by the raster extent (domain). Remove duplicate paths from the data frame.

library("rgeos")

## rgeos version: 0.3-11, (SVN revision 479)
##  GEOS runtime version: 3.4.2-CAPI-1.8.2 r3921 
##  Linking to sp version: 1.1-0 
##  Polygon checking: TRUE

Width = TornL$wid * .9144
sum(Width[Width == 0])

## [1] 0

TornP = gBuffer(TornL, byid = TRUE, width = Width/2, capStyle = "FLAT")
tc = over(TornP, spT)
TornP2 = subset(TornP, !is.na(tc) & yr >= syr)

df = data.frame(SLAT = TornP2$slat, 
                SLON = TornP2$slon, 
                ELAT = TornP2$elat, 
                ELON = TornP2$elon,
                DATE = TornP2$date)
dup = duplicated(df)
sum(dup)

## [1] 354

sum(dup)/dim(TornP2@data)[1] * 100

## [1] 1.194453

TornP3 = subset(TornP2, !dup)
dim(TornP3@data)

## [1] 29283    21

Create a latex table

library("xtable")
tA = table(TornP3$fat, TornP3$mag)[-1, ]
tA = rbind(tA[1:5,], colSums(tA[6:dim(tA)[1],]))
rownames(tA)[6] = "6+"
xtable(tA, digits = 0)

## % latex table generated in R 3.2.2 by xtable 1.8-0 package
## % Fri Nov 13 11:25:10 2015
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrrrr}
##   \hline
##  & 0 & 1 & 2 & 3 & 4 & 5 \\ 
##   \hline
## 1 & 5 & 68 & 145 & 152 & 37 & 3 \\ 
##   2 & 1 & 15 & 34 & 64 & 34 & 1 \\ 
##   3 & 1 & 3 & 11 & 39 & 29 & 3 \\ 
##   4 & 0 & 1 & 4 & 17 & 20 & 1 \\ 
##   5 & 0 & 1 & 1 & 11 & 10 & 1 \\ 
##   6+ & 0 & 1 & 3 & 28 & 79 & 26 \\ 
##    \hline
## \end{tabular}
## \end{table}

tB = table(TornP3$inj, TornP3$mag)[-1, ]
tB = rbind(tB[1:5,], colSums(tB[6:dim(tB)[1],]))
rownames(tB)[6] = "6+"
xtable(tB, digits = 0)

## % latex table generated in R 3.2.2 by xtable 1.8-0 package
## % Fri Nov 13 11:25:10 2015
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrrrr}
##   \hline
##  & 0 & 1 & 2 & 3 & 4 & 5 \\ 
##   \hline
## 1 & 80 & 457 & 434 & 140 & 12 & 0 \\ 
##   2 & 32 & 252 & 277 & 104 & 15 & 0 \\ 
##   3 & 14 & 113 & 178 & 74 & 9 & 0 \\ 
##   4 & 7 & 70 & 125 & 56 & 4 & 0 \\ 
##   5 & 2 & 48 & 93 & 46 & 11 & 0 \\ 
##   6+ & 8 & 120 & 397 & 477 & 242 & 35 \\ 
##    \hline
## \end{tabular}
## \end{table}

Table 1

There are 29283 tornado paths that intersect the domain. Table 1 shows the distributon of fatalities by EF rating.

Map showing the paths of all killer tornadoes within the domain

Use ggmap. This requires the spatial polygons data frame be converted to the geographic projection of the raster.

TornP3T = spTransform(TornP3, CRS(projection(r)))
library("ggmap")

## Loading required package: ggplot2

mapdata = fortify(TornP3T[TornP3T$fat > 0, ])

## Regions defined for each Polygons

Map = get_map(location = c(-((xmx - xmn)/2 - xmx) , (ymx - ymn)/2 + ymn), 
              source = "google",
              zoom = 5,
              color = "bw",
              maptype = "terrain")

## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=36,-93&zoom=5&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false

ggmap(Map, dev = "extent") + 
  geom_polygon(aes(x = long, y = lat, group = group), 
               color = "black", fill = "black",
               data = mapdata) +
  geom_segment(aes(x = xmn, xend = xmx, y = ymn, yend = ymn), 
             color = "red") +
  geom_segment(aes(x = xmn, xend = xmx, y = ymx, yend = ymx), 
             color = "red") +
  geom_segment(aes(x = xmn, xend = xmn, y = ymn, yend = ymx), 
             color = "red") +
  geom_segment(aes(x = xmx, xend = xmx, y = ymn, yend = ymx), 
             color = "red") +
  labs(x = expression(paste("Longitude (", degree, "W)")), 
       y = expression(paste("Latitude (", degree, "N)"))) +
  theme(legend.position = "none")

Figure 1: Extent of the study domain and the paths of all killer tornado that have at least some part of their path inside the domain

1. Exploratory analysis on tornado casualties at the level of individual tornadoes. Distribution (monthly, annual), trend, etc.

library("dplyr")

## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:rgeos':
## 
##     intersect, setdiff, union
## 
## The following objects are masked from 'package:raster':
## 
##     intersect, select, union
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

df = TornP3@data
dfA = df %>%
  group_by(yr) %>%
  summarize(nD = sum(fat),
            nI = sum(inj),
            nkT = sum(fat > 0),
            niT = sum(inj > 0))
A = ggplot(dfA, aes(x = yr, y = nD)) +
    geom_line() +
    geom_smooth() +
  scale_x_continuous(breaks = seq(1955, 2015, 10)) +
  xlab("Year") + ylab("Number of Tornado Deaths")
B = ggplot(dfA, aes(x = yr, y = nkT)) +
    geom_line() +
    geom_smooth() +
    scale_x_continuous(breaks = seq(1955, 2015, 10)) +
    xlab("Year") + ylab("Number of Killer Tornadoes")
C = df %>%
  group_by(mo) %>%
  summarize(nD = sum(fat)) %>%
  mutate(MonthF = factor(month.abb[mo], levels = month.abb)) %>%
  ggplot(., aes(x = MonthF, y = nD)) +
  geom_bar(stat = "identity") +
  scale_y_continuous(limits = c(0, 1500)) +
  xlab("") + ylab("Number of Tornado Deaths")
library("VGAM")

## Loading required package: stats4
## Loading required package: splines

dfD = data.frame(table(df$fat[df$fat > 0]))
dfD$Size = as.integer(dfD$Var1)
#dfD$Size = as.integer(rownames(dfD))
D = ggplot(dfD, aes(x = Size, y = Freq)) +
  scale_x_log10() + scale_y_log10() +
  geom_point() + 
  xlab(expression(paste("Number of Deaths"))) +
  ylab("Number of Tornadoes") 

source("multiplot.txt")
mat = matrix(c(1, 2, 3, 4), nrow = 2, byrow = TRUE)
A = A + ggtitle("A") + 
  theme(plot.title=element_text(hjust=0))
B = B + ggtitle("B") + 
  theme(plot.title=element_text(hjust=0))
C = C + ggtitle("C") + 
  theme(plot.title=element_text(hjust=0))
D = D + ggtitle("D") + 
  theme(plot.title=element_text(hjust=0))
multiplot(A, B, C, D, layout = mat)

## Loading required package: grid
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.

Figure 2: Tornado deaths.

Repeat for injuries.

A = ggplot(dfA, aes(x = yr, y = nI)) +
    geom_line() +
    geom_smooth() +
  scale_x_continuous(breaks = seq(1955, 2015, 10)) +
  xlab("Year") + ylab("Number of Tornado Injuries")
B = ggplot(dfA, aes(x = yr, y = niT)) +
    geom_line() +
    geom_smooth() +
    scale_x_continuous(breaks = seq(1955, 2015, 10)) +
    xlab("Year") + ylab("Number of Tornadoes with Injuries")
C = df %>%
  group_by(mo) %>%
  summarize(nI = sum(inj)) %>%
  mutate(MonthF = factor(month.abb[mo], levels = month.abb)) %>%
  ggplot(., aes(x = MonthF, y = nI)) +
  geom_bar(stat = "identity") +
  scale_y_continuous() +
  xlab("") + ylab("Number of Tornado Injuries")
dfD = data.frame(table(df$inj[df$inj > 0]))
dfD$Size = as.integer(dfD$Var1)
D = ggplot(dfD, aes(x = Size, y = Freq)) +
  scale_x_log10() + scale_y_log10() +
  geom_point() + 
  xlab(expression(paste("Number of Injuries"))) +
  ylab("Number of Tornadoes") 

source("multiplot.txt")
mat = matrix(c(1, 2, 3, 4), nrow = 2, byrow = TRUE)
A = A + ggtitle("A") + 
  theme(plot.title=element_text(hjust=0))
B = B + ggtitle("B") + 
  theme(plot.title=element_text(hjust=0))
C = C + ggtitle("C") + 
  theme(plot.title=element_text(hjust=0))
D = D + ggtitle("D") + 
  theme(plot.title=element_text(hjust=0))
multiplot(A, B, C, D, layout = mat)

## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.

Figure 3: Tornado injuries.

cf. Brooks & Doswell 2002, Ashley 2007: Some devastating tornadoes since the time of these articles including the 2011 season.

2. Overlay method & proportional allocation

Determine the number of tornado paths intersecting each cell.

ct = over(spT, TornP3, returnList = TRUE)
nT = sapply(ct, function(x) nrow(x))
nt = r
values(nt) = nT
dim(TornP3)[1]

## [1] 29283

sum(nT); mean(nT); min(nT); max(nT); sd(nT); var(nT)/mean(nT)

## [1] 35650

## [1] 40.51136

## [1] 0

## [1] 136

## [1] 19.38567

## [1] 9.276513

The average number of tornadoes per cell is 40.5, with a minimum of 0 and a maximum of 136 tornadoes. There are 203 cells that have between 35 and 45 tornado paths. The standard deviation of the tornadoes is 19.4 with a variance to mean ratio (VMR) of 9.28.

Map the number of tornadoes. First get county & state boundaries and convert to the geographic coordinates of the raster.

library("mapproj")
library("maptools")

## Checking rgeos availability: TRUE
## 
## Attaching package: 'maptools'
## 
## The following object is masked from 'package:xtable':
## 
##     label
## 
## The following object is masked from 'package:sp':
## 
##     nowrapSpatialLines

ext = as.vector(extent(r))
bndryS = map("state", fill = TRUE,
            xlim = ext[1:2],
            ylim = ext[3:4],
            plot = FALSE)
IDs = sapply(strsplit(bndryS$names, ":"), function(x) x[1])
bndrySP = map2SpatialPolygons(bndryS, IDs = IDs,
                              proj4string = CRS(projection(r)))

library("rasterVis")

## Loading required package: lattice
## Loading required package: latticeExtra
## Loading required package: RColorBrewer
## 
## Attaching package: 'latticeExtra'
## 
## The following object is masked from 'package:ggplot2':
## 
##     layer

library("wesanderson")
range(values(nt))

## [1]   0 136

rng = seq(0, 140, 20)
cr = wes_palette(7, name = "Zissou", type = "continuous")
levelplot(nt, margin = FALSE, 
          sub = "             Number of Reported Tornadoes", 
          xlab = NULL, ylab = NULL, 
          col.regions = cr, at = rng, 
          colorkey = list(space = 'bottom'),
          par.settings = list(fontsize = list(text = 15))) +
  latticeExtra::layer(sp.polygons(bndrySP, col = gray(.15), lwd = 1))

Figure 4. Number of tornadoes by grid cell.

Casualties in the SPC are given per tornado but where they occurred within the path (or surrounding) is unknown. The likelihood of a casualty obviously increases with the number of people affected so we assign to each cell under the tornado path a fraction of the casualty magnitude based on area of the path within the cell and the proportion of people in the area.

Here we overlay a raster of population density to proportionally allocate causalties geographically. This is done on a regular grid within the domain. Here we choose a grid cell resolution of .5 degree latitude and longitude.

Determine proportion of the tornado path that falls inside each cell. The rows index the tornado number and the columns index the cell number. Call these tornado segments. This takes about 5 minutes.

AreaTor = gArea(TornP3, byid = TRUE)
AreaCell = gArea(spT, byid = TRUE)
inter = gIntersects(spT, TornP3, byid = TRUE)
ids = which(inter, arr.ind = TRUE)
Areas = list()
for(i in 1:nrow(ids)){
    Areas[[i]] = gArea(gIntersection(spT[ids[i, 2], ], TornP3[ids[i, 1], ]))
    }
Areas.df = data.frame(matrix(unlist(Areas), ncol = 1, byrow = TRUE))
names(Areas.df) = "Area"
Areas.df$Cell = ids[, 2]
Areas.df$Torn = ids[, 1]

Get population density. The raster population data are imported then cropped to match the domain & interpolated to match the resolution of the tornado grid cells. Population density is per square km. Check this. Next add population density values and cell areas to the rows in the Areas.df data frame. Next add per tornado casualties and tornado area in the cell to the columns of Areas.df. Finally create a weight column as a product of cell population and area of tornado within the cell.

#download.file(url = "http://myweb.fsu.edu/jelsner/data/usadens.zip",
#              destfile = "usadens.zip")
#unzip("usadens.zip")
Pop = raster("usadens/usads00g/w001001.adf")
Pop = crop(Pop, r)
pop = resample(aggregate(Pop, 
                         fact = c(nrow(Pop)/nrow(r), ncol(Pop)/ncol(r)), 
                         fun = mean), r)

Areas.df$pop = values(pop)[ids[, 2]]
Areas.df$AreaCell = AreaCell[ids[, 2]]
Areas.df$Tfat = TornP3$fat[ids[, 1]]
Areas.df$Tinj = TornP3$inj[ids[, 1]]
Areas.df$TArea = AreaTor[ids[, 1]]
Areas.df$weight = Areas.df$pop * Areas.df$Area

For each tornado determine the total area-weighted population. Also determine the total proportion of each tornado inside the domain. Then for each tornado segment adjust the area-weighted population for proportion of tornado inside the domain and compute the proportion of casualties.

df = Areas.df %>%
  group_by(Torn) %>%
  summarize(tweight = sum(weight),
            propin = sum(Area)/TArea[1])
Areas.df$tweight = (df$tweight/df$propin)[ids[, 1]]
Areas.df$propfat = Areas.df$Tfat * Areas.df$weight/Areas.df$tweight
Areas.df$propinj = Areas.df$Tinj * Areas.df$weight/Areas.df$tweight
# number of fatalities attributed to a given area or cell
df1 = Areas.df %>%
  group_by(Cell) %>%
  summarize(propfat = sum(propfat),
            propinj = sum(propinj),
            fatrat = sum(propfat)/(pop[1] * AreaCell[1]) * 80 * 1000000/ (eyr - syr + 1),
            injrat = sum(propinj)/(pop[1] * AreaCell[1]) * 80 * 1000000/ (eyr - syr + 1))

3. Results: Maps of casualties

Create rasters. fat is the number of deaths within the cell and pfat is the probability of death at any location within the cell. inj is the number of injuries within the cell. Subset on cell number ensures a cell without a tornado gets an NA. Total number of deaths in the SPC dataset should match the total deaths across the cells.

fat = r
values(fat)[df1$Cell] = df1$propfat
sum(TornP3$fat)

## [1] 3511

sum(df1$propfat)

## [1] 3483.385

sum(TornP3$fat) - sum(df1$propfat)

## [1] 27.61516

(sum(TornP3$fat) - sum(df1$propfat))/sum(TornP3$fat) * 100

## [1] 0.7865326

fatrat = r
values(fatrat)[df1$Cell] = df1$fatrat
inj = r
values(inj)[df1$Cell] = df1$propinj
sum(TornP3$inj)

## [1] 55445

sum(df1$propinj)

## [1] 54868.08

sum(TornP3$inj) - sum(df1$propinj)

## [1] 576.9231

(sum(TornP3$inj) - sum(df1$propinj))/sum(TornP3$inj) * 100

## [1] 1.040532

Map the estimated number of deaths. The overlay is performed on the transformed spatial polygons data frame containing the tornado paths. It can take a few minutes to render because of the large number of tornado paths.

range(values(log10(fat)), na.rm = TRUE)

## [1]     -Inf 2.065035

rng = seq(0, 2.5, .5)
labs = as.character(round(10^rng, 0))
cr = brewer.pal(5, "Reds")
levelplot(log10(fat), margin = FALSE,
          col.regions = cr, at = rng,
          sub = "                        Estimated Number of Tornado Deaths (1955-2014)", 
          xlab = NULL, ylab = NULL,
          colorkey = list(space = 'bottom', labels = labs),
          par.settings = list(fontsize = list(text = 15))) +
  latticeExtra::layer(sp.polygons(bndrySP, col = gray(.15), lwd = 1)) +
  latticeExtra::layer(sp.polygons(TornP3T, fill = gray(.4), alpha = .3))

Figure 5: Estimated number of tornado deaths

Map the mortality rate per 100,000 people. Smooth first.

fatrat3 = focal(fatrat, w = matrix(1/9, nrow = 3, ncol = 3))
fatrat5 = focal(fatrat, w = matrix(1/25, nrow = 5, ncol = 5))
range(values(log10(fatrat5)), na.rm = TRUE)
rng = seq(-6, -3, .5)
cr = brewer.pal(6, "Reds")
levelplot(log10(fatrat5), margin = FALSE,
          col.regions = cr, at = rng,
          sub = "             Lifetime Tornado Mortality Rate Per Million People (log10 scale) ", 
          xlab = NULL, ylab = NULL,
          colorkey = list(space = 'bottom'),
          par.settings = list(fontsize = list(text = 15))) +
  latticeExtra::layer(sp.polygons(bndrySP, col = gray(.15), lwd = 1))
#  latticeExtra::layer(sp.polygons(TornP3T, fill = gray(.4), alpha = .3))

Map the estimated number of injuries.

range(values(log10(inj)), na.rm = TRUE)

## [1]     -Inf 3.176014

rng = seq(0, 3.5, .5)
labs = as.character(round(10^rng, 0))
cr = brewer.pal(7, "Purples")
levelplot(log10(inj), margin = FALSE,
          col.regions = cr, at = rng,
          sub = "                        Estimated Number of Tornado Injuries (1955-2014)", 
          xlab = NULL, ylab = NULL,
          colorkey = list(space = 'bottom', labels = labs),
          par.settings = list(fontsize = list(text = 15))) +
  latticeExtra::layer(sp.polygons(bndrySP, col = gray(.15), lwd = 1)) +
  latticeExtra::layer(sp.polygons(TornP3T, fill = gray(.4), alpha = .3))

Figure 6: Estimated number of tornado injuries

4. County level example from Tennessee

Get administrative boundaries. https://www.census.gov/geo/maps-data/data/cbf/cbf_counties.html. The 20m = 1:20,000,000 low resolution boundaries. Here we use the 5m = 1:5,000,000.

#download.file("http://www2.census.gov/geo/tiger/GENZ2013/cb_2013_us_county_5m.zip",
#              "cb_2013_us_county_5m.zip", mode = "wb")
#unzip("cb_2013_us_county_5m.zip")
library("rgdal")

## rgdal: version: 1.0-4, (SVN revision 548)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 1.11.2, released 2015/02/10
##  Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.2/Resources/library/rgdal/gdal
##  Loaded PROJ.4 runtime: Rel. 4.9.1, 04 March 2015, [PJ_VERSION: 491]
##  Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.2/Resources/library/rgdal/proj
##  Linking to sp version: 1.1-1

US.sp = readOGR(dsn = ".", layer = "cb_2013_us_county_5m", 
                                    stringsAsFactors = FALSE)

## OGR data source with driver: ESRI Shapefile 
## Source: ".", layer: "cb_2013_us_county_5m"
## with 3234 features
## It has 9 fields

Extract Tennessee using the state Federal Information Processing Standard (FIPS).

FP = 47
AB = "TN"
ST.sp = US.sp[US.sp$STATEFP == FP, ]
nrow(ST.sp)

## [1] 95

county = paste(ST.sp$STATEFP, ST.sp$COUNTYFP, sep = "")
countiesun.sp = geometry(spChFIDs(ST.sp, county)) 
counties.sp = spTransform(countiesun.sp, CRS(proj4string(TornL)))
rgeos::gArea(counties.sp) / 10^6

## [1] 108310.3

Overlay paths onto the domain extent and return a vector indicating either NA (tornado is not inside extent) or the cell number. Subset the tornadoes by the raster extent (domain). Remove duplicate paths from the data frame.

tc = over(TornP, counties.sp)
TornP4 = subset(TornP, !is.na(tc) & yr >= syr)
df = data.frame(SLAT = TornP4$slat, 
                SLON = TornP4$slon, 
                ELAT = TornP4$elat, 
                ELON = TornP4$elon,
                DATE = TornP4$date)
dup = duplicated(df)
sum(dup)

## [1] 4

sum(dup)/dim(TornP4@data)[1] * 100

## [1] 0.360036

TornP5 = subset(TornP4, !dup)
dim(TornP5@data)

## [1] 1107   21

Create a latex table

tA = table(TornP5$fat, TornP5$mag)[-1, ]
tA = rbind(tA[1:5,], colSums(tA[6:dim(tA)[1],]))
rownames(tA)[6] = "6+"
xtable(tA, digits = 0)

## % latex table generated in R 3.2.2 by xtable 1.8-0 package
## % Fri Nov 13 11:30:15 2015
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrrrr}
##   \hline
##  & 0 & 1 & 2 & 3 & 4 & 5 \\ 
##   \hline
## 1 & 0 & 14 & 12 & 9 & 3 & 0 \\ 
##   2 & 0 & 4 & 5 & 5 & 1 & 0 \\ 
##   3 & 0 & 0 & 1 & 7 & 6 & 1 \\ 
##   4 & 0 & 0 & 1 & 3 & 2 & 0 \\ 
##   5 & 0 & 0 & 0 & 0 & 1 & 0 \\ 
##   6+ & 0 & 0 & 0 & 7 & 7 & 2 \\ 
##    \hline
## \end{tabular}
## \end{table}

tB = table(TornP5$inj, TornP5$mag)[-1, ]
tB = rbind(tB[1:5,], colSums(tB[6:dim(tB)[1],]))
rownames(tB)[6] = "6+"
xtable(tB, digits = 0)

## % latex table generated in R 3.2.2 by xtable 1.8-0 package
## % Fri Nov 13 11:30:15 2015
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrrrr}
##   \hline
##  & 0 & 1 & 2 & 3 & 4 & 5 \\ 
##   \hline
## 1 & 7 & 36 & 22 & 7 & 0 & 0 \\ 
##   2 & 3 & 16 & 14 & 5 & 0 & 0 \\ 
##   3 & 2 & 13 & 15 & 7 & 0 & 0 \\ 
##   4 & 2 & 4 & 6 & 3 & 0 & 0 \\ 
##   5 & 0 & 0 & 4 & 5 & 1 & 0 \\ 
##   6+ & 1 & 10 & 33 & 44 & 19 & 3 \\ 
##    \hline
## \end{tabular}
## \end{table}

Table 2

There are 1107 tornado paths that intersect the state. Table 2 shows the distributon of fatalities by EF rating.

Use ggmap. This requires the spatial polygons data frame be converted to the geographic projection of the raster.

TornP5T = spTransform(TornP5, CRS(projection(r)))
mapdata = fortify(TornP5T[TornP5T$fat > 0, ])

## Regions defined for each Polygons

Map = get_map(location = "Nashville", 
              source = "google",
              zoom = 6,
              color = "bw",
              maptype = "terrain")

## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=Nashville&zoom=6&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Nashville&sensor=false

ggmap(Map, dev = "extent") + 
  geom_polygon(aes(x = long, y = lat, group = group), 
               color = "black", fill = "black",
               data = mapdata) +
  labs(x = expression(paste("Longitude (", degree, "W)")), 
       y = expression(paste("Latitude (", degree, "N)"))) +
  theme(legend.position = "none")

Determine the number of tornado paths intersecting each county.

ct = over(counties.sp, TornP5, returnList = TRUE)
nT = sapply(ct, function(x) nrow(x))
ID = sapply(slot(counties.sp, "polygons"), function(x) slot(x, "ID"))
df = data.frame(nT)
rownames(df) = ID
spdf = SpatialPolygonsDataFrame(counties.sp, df)
sum(nT); mean(nT); min(nT); max(nT); sd(nT); var(nT)/mean(nT)

## [1] 1338

## [1] 14.08421

## [1] 1

## [1] 54

## [1] 9.658711

## [1] 6.62378

The average number of tornadoes per cell is 14.1, with a minimum of 1 and a maximum of 54 tornadoes. There are 203 cells that have between 35 and 45 tornado paths. The standard deviation of the tornadoes is 9.7 with a variance to mean ratio (VMR) of 6.62.

Map number of tornadoes

lcc = "+proj=lcc +lat_1=33 +lat_2=45 +lat_0=39 +lon_0=-87 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"
spdf = spTransform(spdf, CRS(lcc))
range(nT)

## [1]  1 54

rng = seq(0, 60, 10)
cr = wes_palette(6, name = "Zissou", type = "continuous")
spplot(spdf, "nT", col = "gray", at = rng, 
       col.regions = cr,
       colorkey = list(space = "bottom", labels = paste(rng)),
       par.settings = list(axis.line = list(col = NA)),
       sub = "Number of Tornadoes (1955-2014)")

Figure 7. Number of tornadoes by county.

Determine proportion of the tornado path in square meters that falls inside each county. The rows index the tornado number and the columns index the county. Call these tornado segments.

AreaTor = gArea(TornP5, byid = TRUE)
AreaCounty = gArea(counties.sp, byid = TRUE)
inter = gIntersects(counties.sp, TornP5, byid = TRUE)
ids = which(inter, arr.ind = TRUE)
Areas = list()
for(i in 1:nrow(ids)){
    Areas[[i]] = gArea(gIntersection(counties.sp[ids[i, 2], ], TornP5[ids[i, 1], ]))
    }
Areas.df = data.frame(matrix(unlist(Areas), ncol = 1, byrow = TRUE))
names(Areas.df) = "Area"
Areas.df$County = ids[, 2]
Areas.df$Torn = ids[, 1]

Determine the mean population density per county.

Pop = raster("usadens/usads00g/w001001.adf")
Pop = crop(Pop, countiesun.sp)
PopT = projectRaster(Pop, crs = crs(counties.sp))
pop = extract(PopT, counties.sp, fun = mean, na.rm = TRUE)[, 1]

Areas.df$pop = pop[ids[, 2]]
Areas.df$AreaCounty = AreaCounty[ids[, 2]]
Areas.df$Tfat = TornP5$fat[ids[, 1]]
Areas.df$Tinj = TornP5$inj[ids[, 1]]
Areas.df$TArea = AreaTor[ids[, 1]]
Areas.df$weight = Areas.df$pop * Areas.df$Area

df = Areas.df %>%
  group_by(Torn) %>%
  summarize(tweight = sum(weight),
            propin = sum(Area)/TArea[1])
Areas.df$tweight = (df$tweight/df$propin)[ids[, 1]]
Areas.df$propfat = Areas.df$Tfat * Areas.df$weight/Areas.df$tweight
Areas.df$propinj = Areas.df$Tinj * Areas.df$weight/Areas.df$tweight
# number of fatalities attributed to a given area or county
df1 = Areas.df %>%
  group_by(County) %>%
  summarize(propfat = sum(propfat),
            propinj = sum(propinj),
            propfatratio = sum(propfat)/(pop[1] * AreaCounty[1]),
            propinjratio = sum(propinj)/(pop[1] * AreaCounty[1]))
sum(TornP5$inj)

## [1] 5104

sum(df1$propinj)

## [1] 3814.537

Map estimated injuries

range(df1$propinj)

## [1]   0.0000 418.4107

range(df1$propfat)

## [1]  0.00000 19.77719

spdf$propfat = df1$propfat
spdf$propinj = df1$propinj
rng = seq(0, 450, 50)
rng = seq(0, 20, 4)
cr = brewer.pal(5, "Reds")
A = spplot(spdf, "propfat", col = "gray", at = rng, 
       col.regions = cr,
       colorkey = list(space = "bottom", labels = paste(rng)),
       par.settings = list(axis.line = list(col = NA)),
       sub = "Estimated Number of Tornado Deaths (1955-2014)")

Compare with county-level casualty data from NCDC Storm Data. http://www.ncdc.noaa.gov/stormevents/ Search Tennessee > All Counties > Sort by Deaths/Inj Clean data in google sheets with add-on to remove cells. Duplicate rows with multiple county names. Read the cleaned data then sum the number of casualties by county. List them out and create a

Casualties = read.csv("http://myweb.fsu.edu/jelsner/data/storm_data_search_results.csv", header = TRUE)
AC = Casualties %>%
  group_by(CZ_NAME_STR) %>%
  summarize(nD = sum(DEATHS_DIRECT),
            nI = sum(INJURIES_DIRECT))

Match county names.

NMc = as.character(AC$CZ_NAME_STR)
NMc = unlist(strsplit(NMc, split = " CO."))
AC$NAME = NMc
#AC = AC[order(NMc), ]
nm = data.frame(NAME = toupper(ST.sp$NAME[order(ST.sp$NAME)]), stringsAsFactors=FALSE)
AC2 = left_join(nm, AC[, -1])

## Joining by: "NAME"

AC2[is.na(AC2)] = 0

Get the list of polygons IDs. Create a data frame of the IDs and the county names with rows alphabetical by county name.

IDs = as.integer(sapply(slot(spdf, "polygons"), function(x) slot(x, "ID")))
Names = toupper(ST.sp$NAME)
IDs = IDs[order(Names)]

df = data.frame(ID = ST.sp$GEOID[order(Names)], 
           Name = Names[order(Names)],
           AC2,
           spdf@data[order(Names), ])
cor(df$nD, df$propfat)

## [1] 0.8737883

cor(df$nI, df$propinj)

## [1] 0.9387231

sum(abs(df$nD - df$propfat))/length(df$nD)

## [1] 1.134387

sum(abs(df$nI - df$propinj))/length(df$nI)

## [1] 12.17411

Attach the observed casualties to the spatial polygons data frame and make a map.

IDs = as.integer(sapply(slot(spdf, "polygons"), function(x) slot(x, "ID")))
Match = match(IDs, rownames(df))
spdf$nD = df$nD[Match]
spdf$nI = df$nI[Match]

range(spdf$nD)

## [1]  0 20

rng = seq(0, 20, 4)
spdf$nD[which.max(spdf$nD)] = spdf$nD[which.max(spdf$nD)] - .001
cr = brewer.pal(5, "Reds")
B = spplot(spdf, "nD", col = "gray", at = rng, 
       col.regions = cr,
       colorkey = list(space = "bottom", labels = paste(rng)),
       par.settings = list(axis.line = list(col = NA)),
       sub = "Actual Number of Tornado Deaths (1955-2014)")

A = update(A, main = textGrob("A", x = unit(.05, "npc"), gp=gpar(fontsize=16)))
B = update(B, main = textGrob("B", x = unit(.05, "npc"), gp=gpar(fontsize=16)))
library("gridExtra")
grid.arrange(A, B, ncol = 1)

Figure 8: Estimated and actual number of tornado deaths.

Make line graphs.

A = ggplot(df, aes(x = nD, y = propfat)) +
  geom_point() +
  xlab("Actual Number of Deaths") +
  ylab("Estimated Number of Deaths") +
  geom_abline()
B = ggplot(df, aes(x = nI, y = propinj)) +
  geom_point() +
  xlab("Actual Number of Injuries") +
  ylab("Estimated Number of Injuries") +
  geom_abline()
mat = matrix(c(1, 2), nrow = 1, byrow = TRUE)
A = A + ggtitle("A") + 
  theme(plot.title=element_text(hjust=0))
B = B + ggtitle("B") + 
  theme(plot.title=element_text(hjust=0))
multiplot(A, B, layout = mat)

Figure 9: Actual vs estimated number of tornado casualties.

Leaflet maps. https://rstudio.github.io/leaflet/

Choropleth map of estimated injuries in Tennessee.

#devtools::install_github("rstudio/leaflet")
library("leaflet")

pal = colorNumeric(palette = cr, domain = range(spdf$propinj),
  na.color = "transparent")

spdfT = spTransform(spdf, CRS(projection(r)))

pal <- colorNumeric(
  palette = "YlGnBu",
  domain = spdfT$propinj
)

leaflet(spdfT) %>%
  addPolygons(stroke = FALSE, fillOpacity = 0.5, smoothFactor = 0.8,
    color = ~pal(spdfT$propinj)
    ) %>%
  addTiles() %>%
  addLegend(pal = pal, values = ~spdfT$propinj, opacity = 1,
    title = "Est. Injuries (1955-2014)")

Figure 10: Leaflet choropleth map

Raster map of estimated deaths across the domain.

fat[is.na(fat)] = 0
df = TornP3T@data
pal = colorNumeric(palette = cr, domain = range(values(fat)),
  na.color = "transparent")
leaflet(data = df[df$fat > 0, ]) %>% 
  addTiles() %>%
#  addProviderTiles("Acetate.terrain") %>%
  addRasterImage(fat, colors = pal, opacity = .7) %>%
  addLegend(pal = pal, values = values(fat),
    title = "Est. Fatalities (1955-2014)") %>%
  addPolygons(data = TornP3T[TornP3T$fat > 0, ], color = "black", fill = "black") %>%
  addMarkers(~slon, ~slat, popup = ~as.character(fat), clusterOptions = markerClusterOptions())

## Warning in colors(.): Some values were outside the color scale and will be
## treated as NA

Figure 11: Leaflet raster map

Sensitivity of the results to changes in domain and resolution.