Examining U.S. Tornado Vulnerability Using the National Weather Service Damage Assessment Toolkit

For submission to BAMS.

Preliminaries.

library(rgdal); library(ggmap); library(ggplot2);

## Loading required package: sp

## rgdal: version: 0.9-3, (SVN revision 530)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 1.11.2, released 2015/02/10
##  Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.2/Resources/library/rgdal/gdal
##  Loaded PROJ.4 runtime: Rel. 4.9.1, 04 March 2015, [PJ_VERSION: 491]
##  Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.2/Resources/library/rgdal/proj
##  Linking to sp version: 1.1-0

## Loading required package: ggplot2

library(rgeos); library(raster); library(sp); library(dplyr);

## rgeos version: 0.3-8, (SVN revision 460)
##  GEOS runtime version: 3.4.2-CAPI-1.8.2 r3921 
##  Polygon checking: TRUE

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:raster':
## 
##     intersect, select, union

## The following objects are masked from 'package:rgeos':
## 
##     intersect, setdiff, union

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(RColorBrewer); library(IDPmisc); library(xtable);

## Loading required package: grid

## Loading required package: lattice

## 
## Attaching package: 'IDPmisc'

## The following object is masked from 'package:raster':
## 
##     zoom

library(maptools); library(scales)

## Checking rgeos availability: TRUE

## 
## Attaching package: 'maptools'

## The following object is masked from 'package:xtable':
## 
##     label

## The following object is masked from 'package:sp':
## 
##     nowrapSpatialLines

setwd("~/Dropbox/Tornadoes/Holly/DATexamples")

Download data from the NWS Damage Assessment Toolkit using the Damage Survey Viewer at https://apps.dat.noaa.gov/StormDamage/DamageViewer/. Use the Extract Toolkit icon to obtain shapefile of polygons by drawing a polygon around the desired data. After double-clicking the last vertex, a link to the file is generated. Download the data using the link and read in tornado path shapefile (“extractDamageALL.zip”). Then obtain the coordinate ranges of the data extent and evaluate the EF Scale data.

#unzip("extractDamageFinal.zip")
Torn = readOGR(dsn = "extractDamagePolys.shp", 
               layer = "extractDamagePolys")

## OGR data source with driver: ESRI Shapefile 
## Source: "extractDamagePolys.shp", layer: "extractDamagePolys"
## with 2347 features
## It has 13 fields

plot(Torn, col = rainbow(nrow(Torn)))

coords = as.data.frame(coordinates(Torn))
colnames(coords) = c("Lon", "Lat")
Torn$Lat = coords$Lat
Torn$Lon = coords$Lon

lat = range(Torn$Lat)
lon = range(Torn$Lon)

table(Torn$efscale)

## 
##       EF0       EF1       EF2       EF3      EF3+       EF4       EF5 
##       768       886       284       108         4        46         8 
##       N/A TSTM/Wind   UNKNOWN 
##        16       193        34

There are 34 swaths with unknown EF Scale rating, 4 with EF3+ rating, 193 rated as thunderstorm/wind events, and 16 rated N/A. These 247 cases are removed from further consideration.

Retain only swaths with EF levels on the EF Scale. Recode injuries, fatalities, length, and width values of -99 (or any negative values) to NA. Then select on specific columns to remove unwanted data.

Torn = subset(Torn, efscale != "TSTM/Wind" & 
                 efscale != "UNKNOWN" &
                 efscale != "N/A" &
                 efscale != "EF3+")
              
Torn@data = Torn@data %>%
  mutate(injuries = replace(injuries, injuries == -99, NA), 
         fatalities = replace(fatalities, fatalities < 0, NA),
         length = replace(length, length <= 0, NA), 
         width = replace(width, width <= 0, NA))

Torn@data = Torn@data %>%
  dplyr::select(efscale, comments, stormdate:fatalities, shape_STAr:Lon)

Geolocate city near center of data and get map.

loc = data.frame(lon = mean(lon), lat = mean(lat))
loc = unlist(loc)
Map = get_map(location=loc, source="google", 
              maptype="terrain", zoom = 5, color = "bw")

## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=37.2473,-93.122327&zoom=5&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false

Fortify the tornado spdf to obtain data frame that is easy to plot with ggplot2. Plot map and tornado paths.

mapdata = fortify(Torn)

## Regions defined for each Polygons

Torn$id = rownames(Torn@data)
mapdata = plyr::join(mapdata, Torn@data, by="id")

source('ScaleBarNorth.R')
ggmap(Map) + 
  geom_polygon(aes(x = long, y = lat, group = group, color = efscale, fill = efscale), data = mapdata, alpha = .8) +
  theme_bw() +
  theme(axis.text.x = element_blank(),
          axis.text.y = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.ticks.x = element_blank(),
          axis.ticks.y = element_blank(),
          axis.title.x = element_blank(),
          axis.title.y = element_blank(),
          strip.background = element_blank(),
          panel.border = element_rect(colour = "white"),
          legend.key.width = unit(.25, "in"),
          legend.margin = unit(0, "in"),
          legend.title = element_blank()) + 
  scale_fill_discrete() +
  scaleBar(lon = -95, lat = 27, 
              distanceLon = 200, distanceLat = 30,
              distanceLegend = 50, dist.unit = "km",
              arrow.length = 100, arrow.distance = 100, 
              arrow.North.size = 10)

Figure 2: Map of tornado swaths and paths available in the DAT from 2008–2016.

Project tornado polygons using the Lambert Conformal Conic projection defined with the North American Datum 1983.

proj4 = "+proj=lcc +lat_1=33 +lat_2=45 +lat_0=39 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"
TornP = spTransform(Torn, CRS(proj4))

Make identifier for polygons (swaths) per tornado. Identification is based on uniqueness of the four element vector of injuries, fatalities, stormdate, length, and width.

TornP$TornId = as.integer(plyr::id(TornP@data[c("injuries", 
            "fatalities", "stormdate", "length", "width")],
            drop = TRUE))

Define maximum EF rating per tornado. The EF rating as a factor (EF0) is split as a character string so that the numeric value can be used to assign a max EF rating for each tornado event.

table(TornP$efscale)

## 
##       EF0       EF1       EF2       EF3      EF3+       EF4       EF5 
##       768       886       284       108         0        46         8 
##       N/A TSTM/Wind   UNKNOWN 
##         0         0         0

TornP$efscale = as.numeric(lapply(strsplit(as.character(TornP$efscale),
                "EF"), function(x) x[2]))

df = TornP@data
df = df %>%
  group_by(TornId) %>%
  mutate(MaxEF = max(efscale))

TornP$MaxEF = df$MaxEF

Of the 2100 tornado swaths where 768 are rated EF0, 886 EF1, 284 EF2, 108 EF3, 46 EF4, and 8 EF5. There are fewer EF0 swaths than the distribution of EF0 tornadoes from the SPC dataset indicating a preference for more damaging tornadoes to get a more comprehensive survey.

Calculate swath (by EF category) and path (total) areas (in m^2). Use gArea to calculate all swath areas. Then group by tornado identifier to determine the total path area of each event using the path (max swath) area. Remove duplicate path areas. Subset tornado spdf by max swath area using ids from the data frame (df) above to get only the whole paths.

TornP$SwathArea = gArea(TornP, byid=TRUE)

df = TornP@data %>%
  group_by(TornId) %>%
  filter(SwathArea == max(SwathArea)) %>%
  mutate(PathArea = SwathArea)

dup = duplicated(df$PathArea)
sum(dup)

## [1] 1

df = df[!dup, ]

TornPs = TornP[TornP$id %in% df$id, ]

Add path area as a new column in the tornado dataset. Graph distribution of path areas (TornPs).

TornPs$PathArea = df$PathArea

as.data.frame(TornPs) %>%
  ggplot(., aes(PathArea/1000000)) +
  geom_histogram(binwidth = .5, color = "white") +
  scale_x_continuous(breaks = c(1, 10, 100),
                     trans="log1p", expand=c(0,0),
                     "Path Area" ~ (km^{2})) + 
  ylab("Number of Tornadoes")

Figure 3: Distribution of tornado path areas (km\(^2\)).

Download Census Tract data from the NHGIS at https://data2.nhgis.org/main. Apply filters to obtain the desired information. For this study, Census Tracts are chosen for the geographic level and 5-year estimates from 2008-2012 are chosen for the socioeconomic factors. Topic filters include Total Population, Households, Age, Language, Educational Attainment, Labor Force and Employment Status, and Poverty. Specific table names include Total Population, Housing Units, Sex by Age, Age by Language Spoken at Home by Ability to Speak English for the Population 5 Years and Over, Educational Attainment for the Population 25 Years and Over, Employment Status for the Population 16 Years and Over, and Poverty Status in the Past 12 Months by Household Type by Age of Householder.

Read in the Census data. Tables from 2008-2012 ACS Census Tracts (Block Groups and Larger) Estimates for Contiguous US. Shapefile of Census Tracts from 2010 Decennial Census. Subset to elliminate unnecessary columns.

#unzip("nhgis0004_csv.zip")
#unzip("nhgis0005_shape.zip")
#unzip("nhgis0005_shapefile_tl2010_us_tract_2010.zip")

Conus.df = read.csv("nhgis0004_ds191_20125_2012_tract.csv", header = TRUE, sep = ",")

Conus = readOGR(dsn = "US_tract_2010.shp", 
               layer = "US_tract_2010")

## OGR data source with driver: ESRI Shapefile 
## Source: "US_tract_2010.shp", layer: "US_tract_2010"
## with 73669 features
## It has 15 fields

ConusP = spTransform(Conus, CRS(proj4))

ConusP@data = ConusP@data %>%
  left_join(Conus.df, ConusP@data, by = "GISJOIN") %>%
  dplyr::select(STATEFP10:YEAR, STATE:COUNTYA, TRACTA, NAME_E:QZ4M025)

## Warning in left_join_impl(x, y, by$x, by$y): joining factors with different
## levels, coercing to character vector

Intersect Census data with tornado polygons using gIntersects to identify polygons that intersect and gIntersection to perform the actual intersection of geometries. Obtain total areas (using the original datasets) and the intersection areas in square meters. Then use the areas to perform area weighting of the Census data.

interids = gIntersects(ConusP, TornPs, byid = T) 
ids = which(interids, arr.ind = T)
areasT = gArea(TornPs, byid = T)
areasC = gArea(ConusP, byid = T)

Areas = list()
for(i in 1:nrow(ids)){
  Areas[[i]] = gArea(gIntersection(ConusP[ids[i, 2],], TornPs[ids[i, 1],]))
}

Areas.df = data.frame(matrix(unlist(Areas), ncol = 1, byrow = T))
names(Areas.df) = "InterArea"
Areas.df$Tractid = ids[, 2]  
Areas.df$Tornid = ids[, 1]

Areas.df$AreaTract = areasC[ids[, 2]]
Areas.df$AreaTorn = areasT[ids[, 1]]
Areas.df$WeightArea = Areas.df$InterArea/Areas.df$AreaTract

Join attributes of tornadoes and Census data using row.names since the intersection only kept the row.names and geometries.

TornPs$Tornid = as.integer(row.names(df))
ConusP$Tractid = as.integer(row.names(ConusP@data))

Areas.df = Areas.df %>%
  left_join(TornPs@data, Areas.df, by = "Tornid") %>%
  left_join(ConusP@data, Areas.df, by = "Tractid")

Create desired columns for analysis including total casualties (sum fatalities and injuries), elderly (sum males and females ages 65+), non-English speakers (sum individuals that do not speak english well or at all ages 5+), and education attainment under high school level (substract high levels of education from total).

Areas.df = Areas.df %>%
  mutate(Cas = fatalities + injuries,
         Eld = QSEE020 + QSEE021 + QSEE022 +
               QSEE023 + QSEE024 + QSEE025 +
               QSEE044 + QSEE045 + QSEE046 + 
               QSEE047 + QSEE048 + QSEE049, 
                  #elderly males and females ages 65+
         NES = QUUE007 + QUUE008 + QUUE012 + QUUE013 + 
               QUUE017 + QUUE018 + QUUE022 + QUUE023 +
               QUUE029 + QUUE030 + QUUE034 + QUUE035 + 
               QUUE039 + QUUE040 + QUUE044 + QUUE045 +
               QUUE051 + QUUE052 + QUUE056 + QUUE057 +
               QUUE061 + QUUE062 + QUUE066 + QUUE067, 
                  #NES 5+ speak english not at all or not well
         EdUndrHS = QUSE001 - QUSE017 - QUSE018 - QUSE019 -
                    QUSE020 - QUSE021 - QUSE022 - QUSE023 - 
                    QUSE024 - QUSE025) 
                  #Education under HS level

Create a dataframe of descriptive statistics of tornado characteristics and socioeconomic estimates. Omit missing values (NAs) in the data.

Inc.df = Areas.df %>%
    group_by(TornId) %>%
 #  NaRV.omit() %>%
    summarize(avgEF = as.integer(mean(efscale, na.rm = TRUE)), 
              MaxEF = max(MaxEF, na.rm = TRUE), 
              totPA = mean(PathArea, na.rm = TRUE), 
              totFatal = mean(abs(fatalities), na.rm = TRUE), 
              totInj = mean(injuries, na.rm = TRUE), 
              totCas = mean(Cas, na.rm = TRUE), 
              interArea = sum(InterArea, na.rm = TRUE), 
              totPop = sum(QSPE001*WeightArea, na.rm = TRUE), 
              totHU = sum(QX6E001*WeightArea, na.rm = TRUE), 
              totPov = sum(QUYE001*WeightArea, na.rm = TRUE), 
              totEld = sum(Eld*WeightArea, na.rm = TRUE), 
                  #elderly males and females ages 65+
              totNES = sum(NES*WeightArea, na.rm = TRUE), 
                  #NES 5+ speak english not at all or not well
              totUEmp = sum(QXSE005*WeightArea, na.rm = TRUE), 
              totEdUndrHS = sum(EdUndrHS*WeightArea, na.rm = TRUE),
              totMH = sum(QYYE010*WeightArea, na.rm = TRUE)) %>%
    mutate(totPA = totPA/1000000,
           rateCas = totCas/totPop,
           PopDen = totPop/totPA, 
           HUDen = totHU/totPA,
           percPov = totPov/totPop*100, 
           percEld = totEld/totPop*100, 
           percNES = totNES/totPop*100, 
           percUEmp = totUEmp/totPop*100, 
           percMH = totMH/totHU*100,
           percEdUndrHS = totEdUndrHS/totPop*100)

Subset dataset to obtain only tornado events with at least one casualty.

Mag.df = subset(Inc.df, totCas > 0)

Subset datasets to exclude Tuscaloosa tornado outlier.

MagS.df = subset(Mag.df, totCas < 1000)
IncS.df = subset(Inc.df, totCas < 1000)

Correlations are a little lower but all close to the initial results with all of the tornado events. Regression models are very close to initial results using all tornadoes.

Exploratory/Descriptive Figures and Correlations

Histogram of tornadoes causing at least one casualty by avg and max EF rating

ggplot(Inc.df, aes(x=factor(MaxEF))) + 
  geom_bar() + 
  ylab("Tornadoes") + 
  xlab("Maximum EF Scale Rating") +
  theme(legend.position="none")

ggplot(Mag.df, aes(x=factor(MaxEF))) + 
  geom_bar() + 
  ylab("Tornadoes") + 
  xlab("Maximum EF Scale Rating")  +
  theme(legend.position="none")

Distribution of casualties.

ggplot(Mag.df, aes(totCas)) +
  geom_histogram(binwidth = .25, color = "white") +
  ylab("Number of Tornadoes") +
  scale_x_log10("Number of Casualties", 
                breaks = c(1, 10, 100, 1000))

Figure 6: Distribution of total casualties per tornado.

Distribution (box and whisker plot) of maximum EF ratings by total casualties of DAT and SPC.

#DAT
ggplot(Inc.df, aes(x = as.character(MaxEF), y = totCas)) +
  geom_boxplot() +
  xlab("Maximum EF Rating") +
  scale_y_log10("Number of Casualties", 
       breaks = c(1, 10, 100, 1000),
       labels = c("1", "10", "100", "1000"))

## Warning: Removed 910 rows containing non-finite values (stat_boxplot).

Figure 4: Box and whisker plot illustrating positive relationship between maximum EF Scale rating and casualties.

Relationships regarding physical and social sensitivity to casualties.

#Max EF and casualties
cor.test(Mag.df$MaxEF, log(Mag.df$totCas))

## 
##  Pearson's product-moment correlation
## 
## data:  Mag.df$MaxEF and log(Mag.df$totCas)
## t = 6.6406, df = 114, p-value = 1.11e-09
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3826624 0.6480508
## sample estimates:
##       cor 
## 0.5281334

#Path Area and casualties
ggplot(Mag.df, aes(x = totPA, y = totCas)) +
  geom_point() +
  scale_x_log10("Total Path Area" ~ (m^{2})) + 
  scale_y_log10("Number of Casualties")

cor.test(log(Mag.df$totPA), log(Mag.df$totCas))

## 
##  Pearson's product-moment correlation
## 
## data:  log(Mag.df$totPA) and log(Mag.df$totCas)
## t = 5.2553, df = 114, p-value = 6.965e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2819995 0.5774368
## sample estimates:
##       cor 
## 0.4416115

#Pop and casualties
ggplot(Mag.df, aes(x = totPop, y = totCas)) +
  geom_point() +
  scale_x_log10("Estimated Number of People Exposed") +
  scale_y_log10("Number of Casualties",
                 breaks = c(1, 10, 100, 1000))

cor.test(log(Mag.df$totPop), log(Mag.df$totCas))

## 
##  Pearson's product-moment correlation
## 
## data:  log(Mag.df$totPop) and log(Mag.df$totCas)
## t = 6.4341, df = 114, p-value = 3.029e-09
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3684955 0.6383814
## sample estimates:
##       cor 
## 0.5161366

cor.test(log(Mag.df$PopDen), log(Mag.df$totCas))

## 
##  Pearson's product-moment correlation
## 
## data:  log(Mag.df$PopDen) and log(Mag.df$totCas)
## t = 2.9805, df = 114, p-value = 0.003519
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.09101076 0.43009978
## sample estimates:
##       cor 
## 0.2688661

#Housing units and casualties
ggplot(Mag.df, aes(x = totHU, y = totCas)) +
  geom_point() +
  scale_x_log10("Estimated Number of Housing Units Exposed") +
  scale_y_log10("Number of Casualties")

cor.test(log(Mag.df$totHU), log(Mag.df$totCas))

## 
##  Pearson's product-moment correlation
## 
## data:  log(Mag.df$totHU) and log(Mag.df$totCas)
## t = 6.3452, df = 114, p-value = 4.646e-09
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3623069 0.6341304
## sample estimates:
##       cor 
## 0.5108777

cor.test(log(Mag.df$HUDen), log(Mag.df$totCas))

## 
##  Pearson's product-moment correlation
## 
## data:  log(Mag.df$HUDen) and log(Mag.df$totCas)
## t = 2.74, df = 114, p-value = 0.007134
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.06939436 0.41220269
## sample estimates:
##       cor 
## 0.2485662

#Mobile homes and casualties
ggplot(Mag.df, aes(x = totMH, y = totCas)) +
  geom_point() +
  scale_x_log10("Estimated Number of Mobile Homes") +
  scale_y_log10("Number of Casualties")

cor.test(log(Mag.df$totMH), log(Mag.df$totCas))

## 
##  Pearson's product-moment correlation
## 
## data:  log(Mag.df$totMH) and log(Mag.df$totCas)
## t = 5.9291, df = 114, p-value = 3.321e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3326045 0.6134970
## sample estimates:
##       cor 
## 0.4854819

#Elderly and casualties
ggplot(Mag.df, aes(x=totEld, y=totCas)) +
  geom_point() +
  scale_x_log10("Estimated Number of Elderly (Ages 65-75)") +
  scale_y_log10("Number of Casualties")

cor.test(log(Mag.df$totEld), log(Mag.df$totCas))

## 
##  Pearson's product-moment correlation
## 
## data:  log(Mag.df$totEld) and log(Mag.df$totCas)
## t = 6.2566, df = 114, p-value = 7.098e-09
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3560829 0.6298386
## sample estimates:
##       cor 
## 0.5055775

#Poverty and casualties
ggplot(Mag.df, aes(x=totPov, y=totCas)) +
  geom_point() +
  scale_x_log10("Estimated Number of Population in Poverty") +
  scale_y_log10("Number of Casualties")

cor.test(log(Mag.df$totPov), log(Mag.df$totCas))

## 
##  Pearson's product-moment correlation
## 
## data:  log(Mag.df$totPov) and log(Mag.df$totCas)
## t = 6.4171, df = 114, p-value = 3.288e-09
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.367314 0.637571
## sample estimates:
##       cor 
## 0.5151334

#Education and casualties
ggplot(Mag.df, aes(x=totEdUndrHS, y=totCas)) +
  geom_point() +
  scale_x_log10("Estimated Number of Population without High School Degree") +
  scale_y_log10("Number of Casualties")

cor.test(log(Mag.df$totEdUndrHS), log(Mag.df$totCas))

## 
##  Pearson's product-moment correlation
## 
## data:  log(Mag.df$totEdUndrHS) and log(Mag.df$totCas)
## t = 6.4486, df = 114, p-value = 2.823e-09
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3695046 0.6390729
## sample estimates:
##       cor 
## 0.5169931

#Unemployment and casualties
ggplot(Mag.df, aes(x=totUEmp, y=totCas)) +
  geom_point() +
  scale_x_log10("Estimated Number of Unemployed Population") +
  scale_y_log10("Number of Casualties")

cor.test(log(Mag.df$totUEmp), log(Mag.df$totCas))

## 
##  Pearson's product-moment correlation
## 
## data:  log(Mag.df$totUEmp) and log(Mag.df$totCas)
## t = 6.4477, df = 114, p-value = 2.837e-09
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3694362 0.6390261
## sample estimates:
##      cor 
## 0.516935

#Non-English speakers and casualties
ggplot(Mag.df, aes(x=totNES, y=totCas)) +
  geom_point() +
  scale_x_log10("Estimated Number of Non-English Speakers") +
  scale_y_log10("Number of Casualties")

cor.test(log(I(Mag.df$totNES + .00001)), log(Mag.df$totCas))

## 
##  Pearson's product-moment correlation
## 
## data:  log(I(Mag.df$totNES + 1e-05)) and log(Mag.df$totCas)
## t = 5.3566, df = 114, p-value = 4.464e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2897970 0.5830693
## sample estimates:
##       cor 
## 0.4484214

Linear models

Log-linear regression model for magnitude
Binomial logistic regression model for occurrence

Check for collinearity in the data.

#Collinearity in tornado variables
pairs(Mag.df[, c(2:5)], panel = panel.smooth)

#Collinearity in social variables
pairs(Mag.df[, c(10:15)], panel = panel.smooth)

pairs(Mag.df[, c(18:25)], panel = panel.smooth)

Model for magnitude of impact.

lm1 = lm(log(totCas) ~ factor(MaxEF) + totPA + PopDen, data = Mag.df)
lm2 = lm(log(totCas) ~ factor(MaxEF) + totPA + PopDen + percNES, data = Mag.df)
lm3 = lm(log(totCas) ~ factor(MaxEF) + totPA + PopDen + percPov, data = Mag.df)
lm4 = lm(log(totCas) ~ factor(MaxEF) + totPA + PopDen + percUEmp, data = Mag.df)
lm5 = lm(log(totCas) ~ factor(MaxEF) + totPA + PopDen + percEdUndrHS, data = Mag.df)
lm6 = lm(log(totCas) ~ factor(MaxEF) + totPA + PopDen + percEld, data = Mag.df)
lm7 = lm(log(totCas) ~ factor(MaxEF) + totPA + PopDen + percMH, data = Mag.df)
#Get AIC for each model
AIC(lm1, lm2, lm3, lm4, lm5, lm6, lm7)

##     df      AIC
## lm1  9 340.2636
## lm2 10 342.2554
## lm3 10 338.9862
## lm4 10 342.1841
## lm5 10 342.2632
## lm6 10 334.5812
## lm7 10 342.0588

Model for occurrence of at least one casualty.

summary(glm(I(totCas > 0) ~  factor(MaxEF) + totPA + PopDen, data = Inc.df, family = "binomial"))

## 
## Call:
## glm(formula = I(totCas > 0) ~ factor(MaxEF) + totPA + PopDen, 
##     family = "binomial", data = Inc.df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2834  -0.3495  -0.3403  -0.2736   2.5717  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -3.270e+00  2.741e-01 -11.927  < 2e-16 ***
## factor(MaxEF)1  4.492e-01  3.332e-01   1.348 0.177697    
## factor(MaxEF)2  2.479e+00  3.303e-01   7.506 6.11e-14 ***
## factor(MaxEF)3  2.444e+00  4.218e-01   5.793 6.92e-09 ***
## factor(MaxEF)4  1.847e+00  7.307e-01   2.528 0.011457 *  
## factor(MaxEF)5  2.891e+00  1.358e+00   2.129 0.033265 *  
## totPA           3.185e-02  8.639e-03   3.687 0.000227 ***
## PopDen          3.293e-06  5.271e-04   0.006 0.995016    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 722.40  on 1018  degrees of freedom
## Residual deviance: 568.03  on 1011  degrees of freedom
##   (7 observations deleted due to missingness)
## AIC: 584.03
## 
## Number of Fisher Scoring iterations: 6

summary(glm(I(totCas > 0) ~  factor(MaxEF) + totPA + PopDen + percEdUndrHS, data = Inc.df, family = "binomial"))

## 
## Call:
## glm(formula = I(totCas > 0) ~ factor(MaxEF) + totPA + PopDen + 
##     percEdUndrHS, family = "binomial", data = Inc.df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2668  -0.3694  -0.3133  -0.2625   2.6341  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -3.6526366  0.3718098  -9.824  < 2e-16 ***
## factor(MaxEF)1  0.4035288  0.3342770   1.207 0.227367    
## factor(MaxEF)2  2.4593638  0.3303751   7.444 9.76e-14 ***
## factor(MaxEF)3  2.4213829  0.4226168   5.729 1.01e-08 ***
## factor(MaxEF)4  1.9238792  0.7289016   2.639 0.008305 ** 
## factor(MaxEF)5  2.9687273  1.3562337   2.189 0.028600 *  
## totPA           0.0302974  0.0085153   3.558 0.000374 ***
## PopDen          0.0001598  0.0005224   0.306 0.759736    
## percEdUndrHS    0.0348499  0.0218942   1.592 0.111442    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 722.40  on 1018  degrees of freedom
## Residual deviance: 565.52  on 1010  degrees of freedom
##   (7 observations deleted due to missingness)
## AIC: 583.52
## 
## Number of Fisher Scoring iterations: 6

summary(glm(I(totCas > 0) ~  factor(MaxEF) + totPA + PopDen + percUEmp, data = Inc.df, family = "binomial"))

## 
## Call:
## glm(formula = I(totCas > 0) ~ factor(MaxEF) + totPA + PopDen + 
##     percUEmp, family = "binomial", data = Inc.df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0150  -0.3763  -0.2955  -0.2422   2.7070  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -4.1110111  0.3626837 -11.335  < 2e-16 ***
## factor(MaxEF)1  0.3920834  0.3350616   1.170 0.241927    
## factor(MaxEF)2  2.4409032  0.3331274   7.327 2.35e-13 ***
## factor(MaxEF)3  2.4666966  0.4281887   5.761 8.37e-09 ***
## factor(MaxEF)4  1.9820059  0.7289148   2.719 0.006546 ** 
## factor(MaxEF)5  3.0465948  1.3944659   2.185 0.028905 *  
## totPA           0.0295557  0.0084558   3.495 0.000474 ***
## PopDen         -0.0004601  0.0005271  -0.873 0.382758    
## percUEmp        0.2213939  0.0564322   3.923 8.74e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 722.40  on 1018  degrees of freedom
## Residual deviance: 553.03  on 1010  degrees of freedom
##   (7 observations deleted due to missingness)
## AIC: 571.03
## 
## Number of Fisher Scoring iterations: 6

summary(glm(I(totCas > 0) ~  factor(MaxEF) + totPA + PopDen + percPov, data = Inc.df, family = "binomial"))

## 
## Call:
## glm(formula = I(totCas > 0) ~ factor(MaxEF) + totPA + PopDen + 
##     percPov, family = "binomial", data = Inc.df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2763  -0.3507  -0.3383  -0.2742   2.5735  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -3.505e+00  1.116e+00  -3.141 0.001683 ** 
## factor(MaxEF)1  4.507e-01  3.333e-01   1.352 0.176333    
## factor(MaxEF)2  2.482e+00  3.306e-01   7.507 6.04e-14 ***
## factor(MaxEF)3  2.443e+00  4.219e-01   5.792 6.98e-09 ***
## factor(MaxEF)4  1.851e+00  7.307e-01   2.533 0.011312 *  
## factor(MaxEF)5  2.909e+00  1.361e+00   2.137 0.032570 *  
## totPA           3.190e-02  8.640e-03   3.693 0.000222 ***
## PopDen          2.776e-06  5.268e-04   0.005 0.995796    
## percPov         6.083e-03  2.797e-02   0.217 0.827827    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 722.40  on 1018  degrees of freedom
## Residual deviance: 567.98  on 1010  degrees of freedom
##   (7 observations deleted due to missingness)
## AIC: 585.98
## 
## Number of Fisher Scoring iterations: 6

summary(glm(I(totCas > 0) ~  factor(MaxEF) + totPA + PopDen + percMH, data = Inc.df, family = "binomial"))

## 
## Call:
## glm(formula = I(totCas > 0) ~ factor(MaxEF) + totPA + PopDen + 
##     percMH, family = "binomial", data = Inc.df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2520  -0.3609  -0.3178  -0.2650   2.6087  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -3.4596954  0.3321490 -10.416  < 2e-16 ***
## factor(MaxEF)1  0.4153023  0.3346463   1.241 0.214599    
## factor(MaxEF)2  2.4827451  0.3303869   7.515 5.71e-14 ***
## factor(MaxEF)3  2.4370316  0.4221337   5.773 7.78e-09 ***
## factor(MaxEF)4  1.8842409  0.7338742   2.568 0.010243 *  
## factor(MaxEF)5  2.8971144  1.3582855   2.133 0.032931 *  
## totPA           0.0312532  0.0086054   3.632 0.000281 ***
## PopDen          0.0002101  0.0005542   0.379 0.704622    
## percMH          0.0113112  0.0107045   1.057 0.290658    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 722.40  on 1018  degrees of freedom
## Residual deviance: 566.92  on 1010  degrees of freedom
##   (7 observations deleted due to missingness)
## AIC: 584.92
## 
## Number of Fisher Scoring iterations: 6

summary(glm(I(totCas > 0) ~  factor(MaxEF) + totPA + PopDen + percEld, data = Inc.df, family = "binomial"))

## 
## Call:
## glm(formula = I(totCas > 0) ~ factor(MaxEF) + totPA + PopDen + 
##     percEld, family = "binomial", data = Inc.df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.3290  -0.3610  -0.3147  -0.2699   2.6267  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -2.7076847  0.5088378  -5.321 1.03e-07 ***
## factor(MaxEF)1  0.4531760  0.3336798   1.358 0.174427    
## factor(MaxEF)2  2.4657030  0.3310360   7.448 9.44e-14 ***
## factor(MaxEF)3  2.4409589  0.4228544   5.773 7.81e-09 ***
## factor(MaxEF)4  1.8324107  0.7336119   2.498 0.012497 *  
## factor(MaxEF)5  2.7584487  1.3612091   2.026 0.042717 *  
## totPA           0.0318987  0.0087640   3.640 0.000273 ***
## PopDen         -0.0002013  0.0005631  -0.357 0.720765    
## percEld        -0.0347863  0.0270233  -1.287 0.198001    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 722.40  on 1018  degrees of freedom
## Residual deviance: 566.33  on 1010  degrees of freedom
##   (7 observations deleted due to missingness)
## AIC: 584.33
## 
## Number of Fisher Scoring iterations: 6

summary(glm(I(totCas > 0) ~  factor(MaxEF) + totPA + PopDen + percNES, data = Inc.df, family = "binomial"))

## 
## Call:
## glm(formula = I(totCas > 0) ~ factor(MaxEF) + totPA + PopDen + 
##     percNES, family = "binomial", data = Inc.df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2625  -0.3503  -0.3389  -0.2752   2.5764  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -3.253e+00  2.794e-01 -11.643  < 2e-16 ***
## factor(MaxEF)1  4.471e-01  3.333e-01   1.341 0.179761    
## factor(MaxEF)2  2.483e+00  3.306e-01   7.510 5.90e-14 ***
## factor(MaxEF)3  2.443e+00  4.219e-01   5.792 6.97e-09 ***
## factor(MaxEF)4  1.846e+00  7.307e-01   2.526 0.011527 *  
## factor(MaxEF)5  2.881e+00  1.358e+00   2.121 0.033908 *  
## totPA           3.181e-02  8.630e-03   3.686 0.000227 ***
## PopDen          1.304e-05  5.274e-04   0.025 0.980270    
## percNES        -1.531e-02  5.167e-02  -0.296 0.766947    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 722.40  on 1018  degrees of freedom
## Residual deviance: 567.94  on 1010  degrees of freedom
##   (7 observations deleted due to missingness)
## AIC: 585.94
## 
## Number of Fisher Scoring iterations: 6

Statistical averages for discussion

Calculate averages of all variables over whole study area as well as different regions.

Determine bbox regions using http://boundingbox.klokantech.com/ Regional Bounding Boxes: SE –> -92.85,30.17,-82.23,36.57 MW –> -93.22,36.58,-82.61,42.49 CP –> -103.93,32.06,-93.04,44.02

Subset Areas.df to get proper regions using lat and lon. Then make map.

#SE 
xmn1 = -93
xmx1 = -82
ymn1 = 30
ymx1 = 37
SE = data.frame(lon = c(xmn1, xmn1, xmx1, xmx1, xmn1), lat = c(ymn1, ymx1, ymx1, ymn1, ymn1))

#MW 
xmn2 = -93
xmx2 = -82
ymn2 = 37
ymx2 = 43
MW = data.frame(lon = c(xmn2, xmn2, xmx2, xmx2, xmn2), lat = c(ymn2, ymx2, ymx2, ymn2, ymn2))

#CP 
xmn3 = -104
xmx3 = -93
ymn3 = 32
ymx3 = 44
CP = data.frame(lon = c(xmn3, xmn3, xmx3, xmx3, xmn3), lat = c(ymn3, ymx3, ymx3, ymn3, ymn3))

TornSE = subset(Areas.df, Lat >= ymn1 & Lat <= ymx1 & 
                  Lon >= xmn1 & Lon <= xmx1)
TornMW = subset(Areas.df, Lat >= ymn2 & Lat <= ymx2 & 
                  Lon >= xmn2 & Lon <= xmx2)
TornCP = subset(Areas.df, Lat >= ymn3 & Lat <= ymx3 & 
                  Lon >= xmn3 & Lon <= xmx3)

Map2 = get_map(location = loc, source="google", 
               maptype="roadmap", color="bw", zoom=5)

## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=37.2473,-93.122327&zoom=5&size=640x640&scale=2&maptype=roadmap&language=en-EN&sensor=false

ggmap(Map2, extent="device") + 
    geom_polygon(data=SE, aes(x=lon, y=lat), 
                 color="red", fill="red", alpha=.2) +
    geom_polygon(data=MW, aes(x=lon, y=lat), 
                 color="blue", fill="blue", alpha=.2) +
    geom_polygon(data=CP, aes(x=lon, y=lat), 
                 color="green", fill="green", alpha=.2) +
    theme_bw() +
    theme(axis.text.x = element_blank(),
          axis.text.y = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.ticks.x = element_blank(),
          axis.ticks.y = element_blank(),
          axis.title.x = element_blank(),
          axis.title.y = element_blank(),
          strip.background = element_blank(),
          panel.border = element_rect(colour = "white")) +
    scale_fill_discrete() +
    scaleBar(lon = -95, lat = 27, 
              distanceLon = 200, distanceLat = 30,
              distanceLegend = 50, dist.unit = "km",
              arrow.length = 100, arrow.distance = 100, 
              arrow.North.size = 10)

Figure 5: Bounding boxes to delineate the Central Plains (green), Midwest (blue), and Southeast (red) regions.

First calculate averages for all torns in dataset.

avgStats.df = Inc.df %>%
#    NaRV.omit() %>%
    summarize(avgEF = mean(avgEF, na.rm = TRUE), 
              avgMaxEF = mean(MaxEF, na.rm = TRUE), 
              avgPA = mean(totPA, na.rm = TRUE), 
              avgTotFatal = mean(totFatal, na.rm = TRUE), 
              avgTotInj = mean(totInj, na.rm = TRUE), 
              avgTotCas = mean(totCas, na.rm = TRUE), 
              avgTotPop= mean(totPop, na.rm = TRUE), 
              avgPopDen = mean(PopDen, na.rm = TRUE),
              avgPercPov = mean(percPov, na.rm = TRUE),
              avgPercEld = mean(percEld, na.rm = TRUE),
              avgPercNES = mean(percNES, na.rm = TRUE),
              avgPercUEmp = mean(percUEmp, na.rm = TRUE),
              avgPercMH = mean(percMH, na.rm = TRUE),
              avgPercEdUndrHS = mean(percEdUndrHS, na.rm = TRUE))

Then calculate averages of variables associated with tornado events causing casualties over whole study area.

avgFinal.df = Mag.df %>%
#  NaRV.omit() %>%
  summarize(avgEF = mean(avgEF, na.rm = TRUE), 
              avgMaxEF = mean(MaxEF, na.rm = TRUE), 
              avgPA = mean(totPA, na.rm = TRUE), 
              avgTotFatal = mean(totFatal, na.rm = TRUE), 
              avgTotInj = mean(totInj, na.rm = TRUE), 
              avgTotCas = mean(totCas, na.rm = TRUE), 
              avgTotPop= mean(totPop, na.rm = TRUE),
              avgPopDen = mean(PopDen, na.rm = TRUE),
              avgPercPov = mean(percPov, na.rm = TRUE),
              avgPercEld = mean(percEld, na.rm = TRUE),
              avgPercNES = mean(percNES, na.rm = TRUE),
              avgPercUEmp = mean(percUEmp, na.rm = TRUE),
              avgPercMH = mean(percMH, na.rm = TRUE),
              avgPercEdUndrHS = mean(percEdUndrHS, na.rm = TRUE))

Re-run code to obtain results for each region. SE:

StatsSE.df = TornSE %>%
    group_by(TornId) %>%
 #  NaRV.omit() %>%
    summarize(avgEF = as.integer(mean(efscale, na.rm = TRUE)), 
              MaxEF = max(MaxEF, na.rm = TRUE), 
              totPA = mean(PathArea, na.rm = TRUE), 
              totFatal = mean(abs(fatalities), na.rm = TRUE), 
              totInj = mean(injuries, na.rm = TRUE), 
              totCas = mean(Cas, na.rm = TRUE), 
              interArea = sum(InterArea, na.rm = TRUE), 
              totPop = sum(QSPE001*WeightArea, na.rm = TRUE), 
              totHU = sum(QX6E001*WeightArea, na.rm = TRUE), 
              totPov = sum(QUYE001*WeightArea, na.rm = TRUE), 
              totEld = sum(Eld*WeightArea, na.rm = TRUE), 
                  #elderly males and females ages 65-75
              totNES = sum(NES*WeightArea, na.rm = TRUE), 
                  #NES 18-64 speak english not at all or not well
              totUEmp = sum(QXSE005*WeightArea, na.rm = TRUE), 
              totEdUndrHS = sum(EdUndrHS*WeightArea, na.rm = TRUE),
              totMH = sum(QYYE010*WeightArea, na.rm = TRUE)) %>%
    mutate(totPA = totPA/1000000,
           rateCas = totCas/totPop,
           PopDen = totPop/totPA, 
           HUDen = totHU/totPA,
           percPov = totPov/totPop*100, 
           percEld = totEld/totPop*100, 
           percNES = totNES/totPop*100, 
           percUEmp = totUEmp/totPop*100, 
           percMH = totMH/totHU*100,
           percEdUndrHS = totEdUndrHS/totPop*100)
#Average stats for SE region
avgStatsSE.df = StatsSE.df %>%
#   NaRV.omit() %>%
  summarize(avgEF = mean(avgEF, na.rm = TRUE), 
              avgMaxEF = mean(MaxEF, na.rm = TRUE), 
              avgPA = mean(totPA, na.rm = TRUE), 
              avgTotFatal = mean(totFatal, na.rm = TRUE), 
              avgTotInj = mean(totInj, na.rm = TRUE), 
              avgTotCas = mean(totCas, na.rm = TRUE), 
              avgTotPop= mean(totPop, na.rm = TRUE),
              avgPopDen = mean(PopDen, na.rm = TRUE),
              avgPercPov = mean(percPov, na.rm = TRUE),
              avgPercEld = mean(percEld, na.rm = TRUE),
              avgPercNES = mean(percNES, na.rm = TRUE),
              avgPercUEmp = mean(percUEmp, na.rm = TRUE),
              avgPercMH = mean(percMH, na.rm = TRUE),
              avgPercEdUndrHS = mean(percEdUndrHS, na.rm = TRUE))
#Average stats for torns that caused at least 1 casualty
avgFinalSE.df = StatsSE.df[StatsSE.df$totCas>0, ] %>%
#  NaRV.omit() %>%
  summarize(avgEF = mean(avgEF, na.rm = TRUE), 
              avgMaxEF = mean(MaxEF, na.rm = TRUE), 
              avgPA = mean(totPA, na.rm = TRUE), 
              avgTotFatal = mean(totFatal, na.rm = TRUE), 
              avgTotInj = mean(totInj, na.rm = TRUE), 
              avgTotCas = mean(totCas, na.rm = TRUE), 
              avgTotPop= mean(totPop, na.rm = TRUE), 
              avgPopDen = mean(PopDen, na.rm = TRUE),
              avgPercPov = mean(percPov, na.rm = TRUE),
              avgPercEld = mean(percEld, na.rm = TRUE),
              avgPercNES = mean(percNES, na.rm = TRUE),
              avgPercUEmp = mean(percUEmp, na.rm = TRUE),
              avgPercMH = mean(percMH, na.rm = TRUE),
              avgPercEdUndrHS = mean(percEdUndrHS, na.rm = TRUE))

MW:

StatsMW.df = TornMW %>%
    group_by(TornId) %>%
 #  NaRV.omit() %>%
    summarize(avgEF = as.integer(mean(efscale, na.rm = TRUE)), 
              MaxEF = max(MaxEF, na.rm = TRUE), 
              totPA = mean(PathArea, na.rm = TRUE), 
              totFatal = mean(abs(fatalities), na.rm = TRUE), 
              totInj = mean(injuries, na.rm = TRUE), 
              totCas = mean(Cas, na.rm = TRUE), 
              interArea = sum(InterArea, na.rm = TRUE), 
              totPop = sum(QSPE001*WeightArea, na.rm = TRUE), 
              totHU = sum(QX6E001*WeightArea, na.rm = TRUE), 
              totPov = sum(QUYE001*WeightArea, na.rm = TRUE), 
              totEld = sum(Eld*WeightArea, na.rm = TRUE), 
                  #elderly males and females ages 65-75
              totNES = sum(NES*WeightArea, na.rm = TRUE), 
                  #NES 18-64 speak english not at all or not well
              totUEmp = sum(QXSE005*WeightArea, na.rm = TRUE), 
              totEdUndrHS = sum(EdUndrHS*WeightArea, na.rm = TRUE),
              totMH = sum(QYYE010*WeightArea, na.rm = TRUE)) %>%
    mutate(totPA = totPA/1000000,
           rateCas = totCas/totPop,
           PopDen = totPop/totPA, 
           HUDen = totHU/totPA,
           percPov = totPov/totPop*100, 
           percEld = totEld/totPop*100, 
           percNES = totNES/totPop*100, 
           percUEmp = totUEmp/totPop*100, 
           percMH = totMH/totHU*100,
           percEdUndrHS = totEdUndrHS/totPop*100)
#Average stats for MW region
avgStatsMW.df = StatsMW.df %>%
#    NaRV.omit() %>%
    summarize(avgEF = mean(avgEF, na.rm = TRUE), 
              avgMaxEF = mean(MaxEF, na.rm = TRUE), 
              avgPA = mean(totPA, na.rm = TRUE), 
              avgTotFatal = mean(totFatal, na.rm = TRUE), 
              avgTotInj = mean(totInj, na.rm = TRUE), 
              avgTotCas = mean(totCas, na.rm = TRUE), 
              avgTotPop= mean(totPop, na.rm = TRUE), 
              avgPopDen = mean(PopDen, na.rm = TRUE),
              avgPercPov = mean(percPov, na.rm = TRUE),
              avgPercEld = mean(percEld, na.rm = TRUE),
              avgPercNES = mean(percNES, na.rm = TRUE),
              avgPercUEmp = mean(percUEmp, na.rm = TRUE),
              avgPercMH = mean(percMH, na.rm = TRUE),
              avgPercEdUndrHS = mean(percEdUndrHS, na.rm = TRUE))
#Average stats for torns that caused at least 1 casualty
avgFinalMW.df = StatsMW.df[StatsMW.df$totCas>0, ] %>%
#  NaRV.omit() %>%
  summarize(avgEF = mean(avgEF, na.rm = TRUE), 
              avgMaxEF = mean(MaxEF, na.rm = TRUE), 
              avgPA = mean(totPA, na.rm = TRUE), 
              avgTotFatal = mean(totFatal, na.rm = TRUE), 
              avgTotInj = mean(totInj, na.rm = TRUE), 
              avgTotCas = mean(totCas, na.rm = TRUE), 
              avgTotPop= mean(totPop, na.rm = TRUE), 
              avgPopDen = mean(PopDen, na.rm = TRUE),
              avgPercPov = mean(percPov, na.rm = TRUE),
              avgPercEld = mean(percEld, na.rm = TRUE),
              avgPercNES = mean(percNES, na.rm = TRUE),
              avgPercUEmp = mean(percUEmp, na.rm = TRUE),
              avgPercMH = mean(percMH, na.rm = TRUE),
              avgPercEdUndrHS = mean(percEdUndrHS, na.rm = TRUE))

CP:

StatsCP.df = TornCP %>%
    group_by(TornId) %>%
 #  NaRV.omit() %>%
    summarize(avgEF = as.integer(mean(efscale, na.rm = TRUE)), 
              MaxEF = max(MaxEF, na.rm = TRUE), 
              totPA = mean(PathArea, na.rm = TRUE), 
              totFatal = mean(abs(fatalities), na.rm = TRUE), 
              totInj = mean(injuries, na.rm = TRUE), 
              totCas = mean(Cas, na.rm = TRUE), 
              interArea = sum(InterArea, na.rm = TRUE), 
              totPop = sum(QSPE001*WeightArea, na.rm = TRUE), 
              totHU = sum(QX6E001*WeightArea, na.rm = TRUE), 
              totPov = sum(QUYE001*WeightArea, na.rm = TRUE), 
              totEld = sum(Eld*WeightArea, na.rm = TRUE), 
                  #elderly males and females ages 65-75
              totNES = sum(NES*WeightArea, na.rm = TRUE), 
                  #NES ages 18-64 speak english not at all or not well
              totUEmp = sum(QXSE005*WeightArea, na.rm = TRUE), 
              totEdUndrHS = sum(EdUndrHS*WeightArea, na.rm = TRUE),
              totMH = sum(QYYE010*WeightArea, na.rm = TRUE)) %>%
    mutate(totPA = totPA/1000000,
           rateCas = totCas/totPop,
           PopDen = totPop/totPA, 
           HUDen = totHU/totPA,
           percPov = totPov/totPop*100, 
           percEld = totEld/totPop*100, 
           percNES = totNES/totPop*100, 
           percUEmp = totUEmp/totPop*100, 
           percMH = totMH/totHU*100,
           percEdUndrHS = totEdUndrHS/totPop*100)
#Average stats for CP region
avgStatsCP.df = StatsCP.df %>%
#    NaRV.omit() %>%
    summarize(avgEF = mean(avgEF, na.rm = TRUE), 
              avgMaxEF = mean(MaxEF, na.rm = TRUE), 
              avgPA = mean(totPA, na.rm = TRUE), 
              avgTotFatal = mean(totFatal, na.rm = TRUE), 
              avgTotInj = mean(totInj, na.rm = TRUE), 
              avgTotCas = mean(totCas, na.rm = TRUE), 
              avgTotPop= mean(totPop, na.rm = TRUE),
              avgPopDen = mean(PopDen, na.rm = TRUE),
              avgPercPov = mean(percPov, na.rm = TRUE),
              avgPercEld = mean(percEld, na.rm = TRUE),
              avgPercNES = mean(percNES, na.rm = TRUE),
              avgPercUEmp = mean(percUEmp, na.rm = TRUE),
              avgPercMH = mean(percMH, na.rm = TRUE),
              avgPercEdUndrHS = mean(percEdUndrHS, na.rm = TRUE))
#Average stats for torns that caused at least 1 casualty
avgFinalCP.df = StatsCP.df[StatsCP.df$totCas>0, ] %>%
#  NaRV.omit() %>%
  summarize(avgEF = mean(avgEF, na.rm = TRUE), 
              avgMaxEF = mean(MaxEF, na.rm = TRUE), 
              avgPA = mean(totPA, na.rm = TRUE), 
              avgTotFatal = mean(totFatal, na.rm = TRUE), 
              avgTotInj = mean(totInj, na.rm = TRUE), 
              avgTotCas = mean(totCas, na.rm = TRUE), 
              avgTotPop= mean(totPop, na.rm = TRUE),
              avgPopDen = mean(PopDen, na.rm = TRUE),
              avgPercPov = mean(percPov, na.rm = TRUE),
              avgPercEld = mean(percEld, na.rm = TRUE),
              avgPercNES = mean(percNES, na.rm = TRUE),
              avgPercUEmp = mean(percUEmp, na.rm = TRUE),
              avgPercMH = mean(percMH, na.rm = TRUE),
              avgPercEdUndrHS = mean(percEdUndrHS, na.rm = TRUE))

Combine rows of each dataset.

avgInc.df = rbind(avgStats.df, avgStatsSE.df, avgStatsMW.df, avgStatsCP.df)
avgMag.df = rbind(avgFinal.df, avgFinalSE.df, avgFinalMW.df, avgFinalCP.df)

Table of resulting averages and keep desire columns.

xtable(avgInc.df[,c(2,3,6,8:14)])

## % latex table generated in R 3.2.4 by xtable 1.8-2 package
## % Sun Feb  5 10:54:11 2017
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrrrrrrrr}
##   \hline
##  & avgMaxEF & avgPA & avgTotCas & avgPopDen & avgPercPov & avgPercEld & avgPercNES & avgPercUEmp & avgPercMH & avgPercEdUndrHS \\ 
##   \hline
## 1 & 0.92 & 4.15 & 3.27 & 79.79 & 38.38 & 15.92 & 1.12 & 3.83 & 16.32 & 11.10 \\ 
##   2 & 1.07 & 6.72 & 8.18 & 85.96 & 37.65 & 14.76 & 1.14 & 4.77 & 22.87 & 14.60 \\ 
##   3 & 0.94 & 2.52 & 4.03 & 111.97 & 38.09 & 14.84 & 0.42 & 4.34 & 10.40 & 9.22 \\ 
##   4 & 0.94 & 3.90 & 0.36 & 48.13 & 38.77 & 16.84 & 1.36 & 3.05 & 14.87 & 9.82 \\ 
##    \hline
## \end{tabular}
## \end{table}

xtable(avgMag.df[,c(2,3,6,8:14)])

## % latex table generated in R 3.2.4 by xtable 1.8-2 package
## % Sun Feb  5 10:54:11 2017
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrrrrrrrr}
##   \hline
##  & avgMaxEF & avgPA & avgTotCas & avgPopDen & avgPercPov & avgPercEld & avgPercNES & avgPercUEmp & avgPercMH & avgPercEdUndrHS \\ 
##   \hline
## 1 & 1.82 & 14.72 & 28.72 & 79.40 & 38.20 & 15.17 & 1.13 & 4.58 & 17.21 & 12.15 \\ 
##   2 & 2.00 & 19.21 & 40.63 & 96.84 & 37.71 & 14.11 & 1.26 & 5.16 & 19.38 & 13.87 \\ 
##   3 & 2.08 & 9.97 & 47.08 & 114.56 & 37.68 & 14.36 & 0.53 & 4.68 & 15.19 & 12.08 \\ 
##   4 & 1.47 & 10.25 & 4.28 & 36.94 & 38.72 & 17.20 & 1.38 & 3.66 & 14.40 & 9.96 \\ 
##    \hline
## \end{tabular}
## \end{table}