Tornado Statistics On Spatial Clusters

James B. Elsner & Tyler Fricker

Set working directory and load packages.

setwd("~/Dropbox/Tornadoes/SpatialClusters")
suppressPackageStartupMessages(library("MASS"))
suppressPackageStartupMessages(library("ggplot2"))
suppressPackageStartupMessages(library("rgdal"))
suppressPackageStartupMessages(library("rgeos"))
suppressPackageStartupMessages(library("raster"))

## Warning: no function found corresponding to methods exports from 'raster'
## for: 'overlay'

suppressPackageStartupMessages(library("ggmap"))
suppressPackageStartupMessages(library("dplyr"))
suppressPackageStartupMessages(library("lubridate"))
suppressPackageStartupMessages(library("raster"))

Read the tornado data. Data source: Storm Prediction Center (SPC) http://www.spc.noaa.gov/gis/svrgis/.

TornL = readOGR(dsn = "torn", layer = "torn", 
                stringsAsFactors = FALSE)

## OGR data source with driver: ESRI Shapefile 
## Source: "torn", layer: "torn"
## with 60114 features
## It has 22 fields

TornL$DateTime = as.POSIXct(paste(TornL$date, TornL$time), format = "%Y-%m-%d %H:%M:%S")
TornL$Length = as.numeric(TornL$len) * 1609.34
TornL$Width = as.numeric(TornL$wid) * .9144
TornL$Area = TornL$Length * TornL$Width
Torn.df = as.data.frame(TornL)

Compute per tornado kinetic energy. Percentages are from Table 3 of NRC report.

perc = c(1, 0, 0, 0, 0, 0)
perc = c(perc, .772, .228, 0, 0, 0, 0)
perc = c(perc, .616, .268, .115, 0, 0, 0)
perc = c(perc, .529, .271, .133, .067, 0, 0)
perc = c(perc, .543, .238, .131, .056, .032, 0)
perc = c(perc, .538, .223, .119, .07, .033, .017)
percM = matrix(perc, ncol = 6, byrow = TRUE)
threshW = c(29.06, 38.45, 49.62, 60.8, 74.21, 89.41)

ef = Torn.df$mag + 1
EW2 = numeric()
for(i in 1:length(ef)){
EW2[i] = threshW^2 %*% percM[ef[i], ]
}

Torn.df$KE = .5 * EW2 * Torn.df$Area * 1000

Summarize the tornado path area (units of sq. km) and path length (units of km).

Torn.df %>%
# filter(yr >= 1994) %>%
 mutate(pathArea = Length * Width) %>%
 summarize(mArea = mean(pathArea) / 10^6,
           mLength = mean(Length) / 1000,
           qA75 = quantile(pathArea, p = .75) / 10^6,
           qA95 = quantile(pathArea, p = .95) / 10^6,
           qL75 = quantile(Length, p = .75) / 1000,
           qL95 = quantile(Length, p = .95) / 1000)

##      mArea  mLength      qA75     qA95    qL75     qL95
## 1 1.396464 5.601662 0.3311056 5.179963 4.82802 25.58851

Create a spatial points data frame. Assign a geographic projection (epsg:4326)

TornP = Torn.df
coordinates(TornP) = ~ slon + slat
proj4string(TornP) = CRS("+init=epsg:4326")

Create a raster and rasterize the spatial points. The CRS for the raster is geographic (latitude/longitude) because it is a raster. Then subset tornado data by raster extent.

xmn = -110; xmx = -74; ymn = 28; ymx = 48; res = 2
r = raster(xmn = xmn, xmx = xmx,
           ymn = ymn, ymx = ymx,
           resolution = res, 
           crs = "+init=epsg:4326")
ncell(r); nrow(r); ncol(r)

## [1] 180

## [1] 10

## [1] 18

TornR = rasterize(TornP, r, field = 1,  fun = 'count')
TornP = crop(TornP, r)
extent(TornP)

## class       : Extent 
## xmin        : -110 
## xmax        : -74 
## ymin        : 28 
## ymax        : 48

dim(TornP)[1]/dim(TornL)[1] * 100

## [1] 92.49759

The raster extent contains 92.5% of all U.S. tornadoes.

Plot the counts and add state borders. These are total counts of genesis points by cells within the extent of the raster. Cells without tornadoes are NA.

suppressPackageStartupMessages(library("mapproj"))
suppressPackageStartupMessages(library("maptools"))
suppressPackageStartupMessages(library("rasterVis"))
suppressPackageStartupMessages(library("wesanderson"))
ext = as.vector(extent(r))
bndryS = map("state", fill = TRUE,
            xlim = ext[1:2],
            ylim = ext[3:4],
            plot = FALSE)
IDs = sapply(strsplit(bndryS$names, ":"), function(x) x[1])
bndrySP = map2SpatialPolygons(bndryS, IDs = IDs,
                              proj4string = CRS(projection(r)))
range(values(TornR), na.rm = TRUE)

## [1]    3 1037

#rng = seq(0, 450, 50) # res = 1
rng = seq(0, 1200, 200)
cr = wes_palette(6, name = "Zissou", type = "continuous")
levelplot(TornR, margin = FALSE, 
          sub = expression("         Number of Tornadoes"), 
          xlab = NULL, ylab = NULL, 
          col.regions = cr, at = rng, 
          colorkey = list(space = 'bottom'),
          par.settings = list(fontsize = list(text = 15))) +
  latticeExtra::layer(sp.polygons(bndrySP, col = gray(.45), lwd = 1))

Get the dates on which there were at least N tornadoes in the domain.

N = 16
Day.df = as.data.frame(TornP) %>%
  filter(mag > 0) %>%
  group_by(date) %>%
  summarize(nT = n(),
            nST = sum(mag >= 3),
            avgL = mean(Length),
            avgW = mean(Width)) %>%
  filter(nT >= N)

DD = Day.df$date

sum(TornP$inj[TornP$date %in% DD])/sum(TornP$inj) * 100

## [1] 47.97762

sum(TornP$fat[TornP$date %in% DD])/sum(TornP$fat) * 100

## [1] 51.81448

48% of all injuries and 53% of all fatalities occur on days with at least 16 tornadoes.

Filter the tornado data by the above dates and select columns.

Torn.df = as.data.frame(TornP) %>%
  filter(date %in% Day.df$date) %>%
  mutate(dayF = as.factor(date),
         hour = hour(DateTime)) %>%
  dplyr::select(date, DateTime, hour, yr, mo, 
         Length, Width,
         mag, st, inj, fat,
         loss, closs, slat, slon, elat, elon)
TornP = subset(TornP, date %in% Day.df$date)

Rasterize a single day. Begin by subsetting the data for 1959-05-10, then rasterize on the subset point data. Next, use the clump function to clump regions based on a Queen’s Case (8 directions) contiguity.

#TornP1 = subset(TornP, date == "2011-04-27")
TornP1 = subset(TornP, date == "1959-05-10")

TornR = rasterize(TornP1, r, field = 1,  fun = 'count')
range(values(TornR), na.rm = TRUE)

## [1] 1 5

#rng = seq(0, 25, 5)
#rng = seq(0, 50, 10)
rng = seq(0, 5, 1)
cr = wes_palette(5, name = "Zissou", type = "continuous")
levelplot(TornR, margin = FALSE, 
          sub = expression("         Number of Tornadoes [5-10-1959]"), 
          xlab = NULL, ylab = NULL, 
          col.regions = cr, at = rng, 
          colorkey = list(space = 'bottom'),
          par.settings = list(fontsize = list(text = 15))) +
  latticeExtra::layer(sp.polygons(bndrySP, col = gray(.15), lwd = 1))

Cluster

suppressPackageStartupMessages(library("igraph"))
C1 = clump(TornR, directions = 8, gaps = FALSE)
range(values(C1), na.rm  = TRUE)

## [1] 1 3

rng = seq(0, 6, 1)
cr = brewer.pal("Set1", n = 7)
levelplot(C1, margin = FALSE, 
          sub = expression("         Tornadoes Clusters [5-10-1959]"), 
          xlab = NULL, ylab = NULL, 
          col.regions = cr, at = rng, 
          colorkey = NULL,
          par.settings = list(fontsize = list(text = 15))) +
  latticeExtra::layer(sp.polygons(bndrySP, col = gray(.15), lwd = 1)) +
  latticeExtra::layer(sp.polygons(TornP1, fill = gray(.4), alpha = .3))

TornP1$Cluster = raster::extract(C1, TornP1)

Get population density (/sq. km) raster, convert to points and project.

Pop = raster("../usadens/usads00g/w001001.adf")
Pop = crop(Pop, r)
pop = resample(aggregate(Pop, 
                         fact = c(nrow(Pop)/nrow(r), ncol(Pop)/ncol(r)), 
                         fun = mean), r)

Loop over all big tornado days and determine clusters based on clumps. This loop takes time. Remove the lines that creates the cluster stack to speed the loop. Subset the population grid and compute the mean pop density. Compute the approximate cell areas (sq. km) and sum the areas over each cluster.

DD = Day.df$date
ClusNum = numeric()
PopDen = numeric()
ClusArea = numeric()
#Cs = stack()
for(dd in DD){
#  print(dd)
  Torn = subset(TornP, date == dd)
  TornR = rasterize(Torn, r, field = 1,  fun = 'count')
  C = clump(TornR, directions = 8, gaps = FALSE)
#  Cs = stack(Cs, C)
clusNum = raster::extract(C, Torn)
ClusNum = c(ClusNum, clusNum)
Area = raster::area(C)
clusArea = numeric()
popDen = numeric()
  for(i in 1:max(clusNum)){
  popDen[i] = mean(pop[C == i], na.rm = TRUE)
  clusArea[i] = sum(Area[C == i])
  }
PopDen = c(PopDen, popDen[clusNum])
ClusArea = c(ClusArea, clusArea[clusNum])
}

TornP$ClusNum = ClusNum
TornP$PopDen = PopDen
TornP$ClusArea = ClusArea
TornP$DateCluster = paste(TornP$date, "-", TornP$ClusNum, sep = "")
#names(Cs) = DD

Determine cluster statistics.

df = as.data.frame(TornP) %>%
  group_by(DateCluster) %>%
  summarize(nT = n(),
            Year = yr[1],
            mo = mo[1],
            Ma = factor(month.abb[mo], 
                       levels = month.abb[1:12]),
            mLength = mean(Length),
            totalLength = sum(Length),
            mWidth = mean(Width),
            totalWidth = sum(Width),
            maxEF = max(mag),
            totalKE = sum(KE),
            totInj = sum(inj),
            totFat = sum(fat),
            clusArea = ClusArea[1],
            clusDensity = nT/clusArea,
            clusPop = PopDen[1] * clusArea) %>%
  arrange(desc(clusDensity))
df = as.data.frame(df)

Plots

suppressPackageStartupMessages(library("scales"))
df %>% 
  filter(totInj > 0) %>%
ggplot(., aes(totalKE/10^9, totInj)) +
  geom_point() +
  scale_x_log10(labels = comma) +
  scale_y_log10() +
  geom_smooth() +
  xlab("Tornado Energy Dissipation [GJ/kg]") + ylab("Number of Injuries")

df %>% 
  filter(totInj > 0) %>%
ggplot(., aes(clusPop, totInj)) +
  geom_point() +
  scale_x_log10() +
  scale_y_log10() +
  geom_smooth()

Models

summary(lm(log(totFat) ~ log(totalKE) + log(clusPop), data = df[df$totFat > 0 & df$Year >= 2007, ]))

## 
## Call:
## lm(formula = log(totFat) ~ log(totalKE) + log(clusPop), data = df[df$totFat > 
##     0 & df$Year >= 2007, ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.6913 -0.7663 -0.1686  0.8778  2.8737 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -23.7057     4.4818  -5.289 4.69e-06 ***
## log(totalKE)   0.6020     0.1264   4.761 2.53e-05 ***
## log(clusPop)   0.3656     0.1837   1.991   0.0534 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.07 on 40 degrees of freedom
## Multiple R-squared:  0.4494, Adjusted R-squared:  0.4218 
## F-statistic: 16.32 on 2 and 40 DF,  p-value: 6.566e-06

summary(lm(log(totInj) ~ log(totalKE) + log(clusPop), data = df[df$totInj > 0 & df$Year >= 2007, ]))

## 
## Call:
## lm(formula = log(totInj) ~ log(totalKE) + log(clusPop), data = df[df$totInj > 
##     0 & df$Year >= 2007, ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4111 -0.8653  0.1392  0.8734  3.3598 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -23.62180    4.15573  -5.684 3.35e-07 ***
## log(totalKE)   0.58524    0.09995   5.855 1.72e-07 ***
## log(clusPop)   0.50441    0.18211   2.770   0.0073 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.421 on 65 degrees of freedom
## Multiple R-squared:  0.4092, Adjusted R-squared:  0.391 
## F-statistic: 22.51 on 2 and 65 DF,  p-value: 3.728e-08

A 100% increase in KE (doubling) leads to a 2^(.6) - 1 = 51% increase in the number of injuries (and fatalities) since 2007. The population effect is also significant although less than the KE effect.

Trends

df2 = df %>%
  filter(maxEF > 0, Year >= 1955) %>%
  group_by(Year) %>%
  summarize(nC = n(),
            clusArea = sum(clusArea),
            areaPerClus = clusArea/nC,
            nT = sum(nT),
            clusDensity = nT/clusArea * 10^6,
            mLength = mean(mLength),
            mWidth = mean(mWidth),
            mKE = mean(totalKE),
            sFat = sum(totFat),
            sInj = sum(totInj),
            mFat = sum(totFat)/nC,
            mInj = sum(totInj)/nC)

ggplot(df2, aes(x = Year, y = mKE/10^12)) +
  geom_point() +
  geom_smooth() +
  scale_y_continuous(labels = comma) +
  ylab("Mean Energy Dissipation (TJ)")

ggplot(df2, aes(x = Year, y = sInj)) +
  geom_point() +
  geom_smooth(method = lm) +
  ylab("Total Injuries")

ggplot(df2, aes(x = Year, y = sFat)) +
  geom_point() +
  geom_smooth(method = lm) +
  ylab("Total Fatalities")

ggplot(df2, aes(x = Year, y = nC)) +
  geom_point() +
  geom_smooth(method = lm) +
  ylab("Number of Clusters")

ggplot(df2, aes(x = Year, y = areaPerClus)) +
  geom_point() +
  geom_smooth(method = lm) +
  scale_y_continuous(labels = comma) +
  ylab("Area Per Cluster (sq. km)")

ggplot(df2, aes(x = Year, y = clusDensity)) +
  geom_point() +
  geom_smooth(method = lm) +
  ylab("Number of Tornadoes [per 10,000 sq. km]") +
  scale_x_continuous(breaks = seq(1955, 2015, 10)) +
  xlab("Year")

ggplot(df2, aes(x = Year, y = nT)) +
  geom_point() +
  geom_smooth(method = lm) +
  ylab("Number of Tornadoes in Clusters")

ggplot(df2, aes(x = Year, y = mLength/1000)) +
  geom_point() +
  geom_smooth(method = lm) +
  ylab("Average Path Length (km)")

Clusters are getting larger and there are more tornadoes per cluster. The average track length has declined but has been holding roughly steady since the 1980s.