Set working directory and load packages.
setwd("~/Dropbox/Tornadoes/SpatialClusters")
suppressPackageStartupMessages(library("MASS"))
suppressPackageStartupMessages(library("ggplot2"))
suppressPackageStartupMessages(library("rgdal"))
suppressPackageStartupMessages(library("rgeos"))
suppressPackageStartupMessages(library("raster"))
## Warning: no function found corresponding to methods exports from 'raster'
## for: 'overlay'
suppressPackageStartupMessages(library("ggmap"))
suppressPackageStartupMessages(library("dplyr"))
suppressPackageStartupMessages(library("lubridate"))
suppressPackageStartupMessages(library("raster"))
Read the tornado data. Data source: Storm Prediction Center (SPC) http://www.spc.noaa.gov/gis/svrgis/.
TornL = readOGR(dsn = "torn", layer = "torn",
stringsAsFactors = FALSE)
## OGR data source with driver: ESRI Shapefile
## Source: "torn", layer: "torn"
## with 60114 features
## It has 22 fields
TornL$DateTime = as.POSIXct(paste(TornL$date, TornL$time), format = "%Y-%m-%d %H:%M:%S")
TornL$Length = as.numeric(TornL$len) * 1609.34
TornL$Width = as.numeric(TornL$wid) * .9144
TornL$Area = TornL$Length * TornL$Width
Torn.df = as.data.frame(TornL)
Compute per tornado kinetic energy. Percentages are from Table 3 of NRC report.
perc = c(1, 0, 0, 0, 0, 0)
perc = c(perc, .772, .228, 0, 0, 0, 0)
perc = c(perc, .616, .268, .115, 0, 0, 0)
perc = c(perc, .529, .271, .133, .067, 0, 0)
perc = c(perc, .543, .238, .131, .056, .032, 0)
perc = c(perc, .538, .223, .119, .07, .033, .017)
percM = matrix(perc, ncol = 6, byrow = TRUE)
threshW = c(29.06, 38.45, 49.62, 60.8, 74.21, 89.41)
ef = Torn.df$mag + 1
EW2 = numeric()
for(i in 1:length(ef)){
EW2[i] = threshW^2 %*% percM[ef[i], ]
}
Torn.df$KE = .5 * EW2 * Torn.df$Area * 1000
Summarize the tornado path area (units of sq. km) and path length (units of km).
Torn.df %>%
# filter(yr >= 1994) %>%
mutate(pathArea = Length * Width) %>%
summarize(mArea = mean(pathArea) / 10^6,
mLength = mean(Length) / 1000,
qA75 = quantile(pathArea, p = .75) / 10^6,
qA95 = quantile(pathArea, p = .95) / 10^6,
qL75 = quantile(Length, p = .75) / 1000,
qL95 = quantile(Length, p = .95) / 1000)
## mArea mLength qA75 qA95 qL75 qL95
## 1 1.396464 5.601662 0.3311056 5.179963 4.82802 25.58851
Create a spatial points data frame. Assign a geographic projection (epsg:4326)
TornP = Torn.df
coordinates(TornP) = ~ slon + slat
proj4string(TornP) = CRS("+init=epsg:4326")
Create a raster and rasterize the spatial points. The CRS for the raster is geographic (latitude/longitude) because it is a raster. Then subset tornado data by raster extent.
xmn = -110; xmx = -74; ymn = 28; ymx = 48; res = 2
r = raster(xmn = xmn, xmx = xmx,
ymn = ymn, ymx = ymx,
resolution = res,
crs = "+init=epsg:4326")
ncell(r); nrow(r); ncol(r)
## [1] 180
## [1] 10
## [1] 18
TornR = rasterize(TornP, r, field = 1, fun = 'count')
TornP = crop(TornP, r)
extent(TornP)
## class : Extent
## xmin : -110
## xmax : -74
## ymin : 28
## ymax : 48
dim(TornP)[1]/dim(TornL)[1] * 100
## [1] 92.49759
The raster extent contains 92.5% of all U.S. tornadoes.
Plot the counts and add state borders. These are total counts of genesis points by cells within the extent of the raster. Cells without tornadoes are NA.
suppressPackageStartupMessages(library("mapproj"))
suppressPackageStartupMessages(library("maptools"))
suppressPackageStartupMessages(library("rasterVis"))
suppressPackageStartupMessages(library("wesanderson"))
ext = as.vector(extent(r))
bndryS = map("state", fill = TRUE,
xlim = ext[1:2],
ylim = ext[3:4],
plot = FALSE)
IDs = sapply(strsplit(bndryS$names, ":"), function(x) x[1])
bndrySP = map2SpatialPolygons(bndryS, IDs = IDs,
proj4string = CRS(projection(r)))
range(values(TornR), na.rm = TRUE)
## [1] 3 1037
#rng = seq(0, 450, 50) # res = 1
rng = seq(0, 1200, 200)
cr = wes_palette(6, name = "Zissou", type = "continuous")
levelplot(TornR, margin = FALSE,
sub = expression(" Number of Tornadoes"),
xlab = NULL, ylab = NULL,
col.regions = cr, at = rng,
colorkey = list(space = 'bottom'),
par.settings = list(fontsize = list(text = 15))) +
latticeExtra::layer(sp.polygons(bndrySP, col = gray(.45), lwd = 1))
Get the dates on which there were at least N tornadoes in the domain.
N = 16
Day.df = as.data.frame(TornP) %>%
filter(mag > 0) %>%
group_by(date) %>%
summarize(nT = n(),
nST = sum(mag >= 3),
avgL = mean(Length),
avgW = mean(Width)) %>%
filter(nT >= N)
DD = Day.df$date
sum(TornP$inj[TornP$date %in% DD])/sum(TornP$inj) * 100
## [1] 47.97762
sum(TornP$fat[TornP$date %in% DD])/sum(TornP$fat) * 100
## [1] 51.81448
48% of all injuries and 53% of all fatalities occur on days with at least 16 tornadoes.
Filter the tornado data by the above dates and select columns.
Torn.df = as.data.frame(TornP) %>%
filter(date %in% Day.df$date) %>%
mutate(dayF = as.factor(date),
hour = hour(DateTime)) %>%
dplyr::select(date, DateTime, hour, yr, mo,
Length, Width,
mag, st, inj, fat,
loss, closs, slat, slon, elat, elon)
TornP = subset(TornP, date %in% Day.df$date)
Rasterize a single day. Begin by subsetting the data for 1959-05-10, then rasterize on the subset point data. Next, use the clump function to clump regions based on a Queen’s Case (8 directions) contiguity.
#TornP1 = subset(TornP, date == "2011-04-27")
TornP1 = subset(TornP, date == "1959-05-10")
TornR = rasterize(TornP1, r, field = 1, fun = 'count')
range(values(TornR), na.rm = TRUE)
## [1] 1 5
#rng = seq(0, 25, 5)
#rng = seq(0, 50, 10)
rng = seq(0, 5, 1)
cr = wes_palette(5, name = "Zissou", type = "continuous")
levelplot(TornR, margin = FALSE,
sub = expression(" Number of Tornadoes [5-10-1959]"),
xlab = NULL, ylab = NULL,
col.regions = cr, at = rng,
colorkey = list(space = 'bottom'),
par.settings = list(fontsize = list(text = 15))) +
latticeExtra::layer(sp.polygons(bndrySP, col = gray(.15), lwd = 1))
Cluster
suppressPackageStartupMessages(library("igraph"))
C1 = clump(TornR, directions = 8, gaps = FALSE)
range(values(C1), na.rm = TRUE)
## [1] 1 3
rng = seq(0, 6, 1)
cr = brewer.pal("Set1", n = 7)
levelplot(C1, margin = FALSE,
sub = expression(" Tornadoes Clusters [5-10-1959]"),
xlab = NULL, ylab = NULL,
col.regions = cr, at = rng,
colorkey = NULL,
par.settings = list(fontsize = list(text = 15))) +
latticeExtra::layer(sp.polygons(bndrySP, col = gray(.15), lwd = 1)) +
latticeExtra::layer(sp.polygons(TornP1, fill = gray(.4), alpha = .3))
TornP1$Cluster = raster::extract(C1, TornP1)
Get population density (/sq. km) raster, convert to points and project.
Pop = raster("../usadens/usads00g/w001001.adf")
Pop = crop(Pop, r)
pop = resample(aggregate(Pop,
fact = c(nrow(Pop)/nrow(r), ncol(Pop)/ncol(r)),
fun = mean), r)
Loop over all big tornado days and determine clusters based on clumps. This loop takes time. Remove the lines that creates the cluster stack to speed the loop. Subset the population grid and compute the mean pop density. Compute the approximate cell areas (sq. km) and sum the areas over each cluster.
DD = Day.df$date
ClusNum = numeric()
PopDen = numeric()
ClusArea = numeric()
#Cs = stack()
for(dd in DD){
# print(dd)
Torn = subset(TornP, date == dd)
TornR = rasterize(Torn, r, field = 1, fun = 'count')
C = clump(TornR, directions = 8, gaps = FALSE)
# Cs = stack(Cs, C)
clusNum = raster::extract(C, Torn)
ClusNum = c(ClusNum, clusNum)
Area = raster::area(C)
clusArea = numeric()
popDen = numeric()
for(i in 1:max(clusNum)){
popDen[i] = mean(pop[C == i], na.rm = TRUE)
clusArea[i] = sum(Area[C == i])
}
PopDen = c(PopDen, popDen[clusNum])
ClusArea = c(ClusArea, clusArea[clusNum])
}
TornP$ClusNum = ClusNum
TornP$PopDen = PopDen
TornP$ClusArea = ClusArea
TornP$DateCluster = paste(TornP$date, "-", TornP$ClusNum, sep = "")
#names(Cs) = DD
Determine cluster statistics.
df = as.data.frame(TornP) %>%
group_by(DateCluster) %>%
summarize(nT = n(),
Year = yr[1],
mo = mo[1],
Ma = factor(month.abb[mo],
levels = month.abb[1:12]),
mLength = mean(Length),
totalLength = sum(Length),
mWidth = mean(Width),
totalWidth = sum(Width),
maxEF = max(mag),
totalKE = sum(KE),
totInj = sum(inj),
totFat = sum(fat),
clusArea = ClusArea[1],
clusDensity = nT/clusArea,
clusPop = PopDen[1] * clusArea) %>%
arrange(desc(clusDensity))
df = as.data.frame(df)
Plots
suppressPackageStartupMessages(library("scales"))
df %>%
filter(totInj > 0) %>%
ggplot(., aes(totalKE/10^9, totInj)) +
geom_point() +
scale_x_log10(labels = comma) +
scale_y_log10() +
geom_smooth() +
xlab("Tornado Energy Dissipation [GJ/kg]") + ylab("Number of Injuries")
df %>%
filter(totInj > 0) %>%
ggplot(., aes(clusPop, totInj)) +
geom_point() +
scale_x_log10() +
scale_y_log10() +
geom_smooth()
Models
summary(lm(log(totFat) ~ log(totalKE) + log(clusPop), data = df[df$totFat > 0 & df$Year >= 2007, ]))
##
## Call:
## lm(formula = log(totFat) ~ log(totalKE) + log(clusPop), data = df[df$totFat >
## 0 & df$Year >= 2007, ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.6913 -0.7663 -0.1686 0.8778 2.8737
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -23.7057 4.4818 -5.289 4.69e-06 ***
## log(totalKE) 0.6020 0.1264 4.761 2.53e-05 ***
## log(clusPop) 0.3656 0.1837 1.991 0.0534 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.07 on 40 degrees of freedom
## Multiple R-squared: 0.4494, Adjusted R-squared: 0.4218
## F-statistic: 16.32 on 2 and 40 DF, p-value: 6.566e-06
summary(lm(log(totInj) ~ log(totalKE) + log(clusPop), data = df[df$totInj > 0 & df$Year >= 2007, ]))
##
## Call:
## lm(formula = log(totInj) ~ log(totalKE) + log(clusPop), data = df[df$totInj >
## 0 & df$Year >= 2007, ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4111 -0.8653 0.1392 0.8734 3.3598
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -23.62180 4.15573 -5.684 3.35e-07 ***
## log(totalKE) 0.58524 0.09995 5.855 1.72e-07 ***
## log(clusPop) 0.50441 0.18211 2.770 0.0073 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.421 on 65 degrees of freedom
## Multiple R-squared: 0.4092, Adjusted R-squared: 0.391
## F-statistic: 22.51 on 2 and 65 DF, p-value: 3.728e-08
A 100% increase in KE (doubling) leads to a 2^(.6) - 1 = 51% increase in the number of injuries (and fatalities) since 2007. The population effect is also significant although less than the KE effect.
Trends
df2 = df %>%
filter(maxEF > 0, Year >= 1955) %>%
group_by(Year) %>%
summarize(nC = n(),
clusArea = sum(clusArea),
areaPerClus = clusArea/nC,
nT = sum(nT),
clusDensity = nT/clusArea * 10^6,
mLength = mean(mLength),
mWidth = mean(mWidth),
mKE = mean(totalKE),
sFat = sum(totFat),
sInj = sum(totInj),
mFat = sum(totFat)/nC,
mInj = sum(totInj)/nC)
ggplot(df2, aes(x = Year, y = mKE/10^12)) +
geom_point() +
geom_smooth() +
scale_y_continuous(labels = comma) +
ylab("Mean Energy Dissipation (TJ)")
ggplot(df2, aes(x = Year, y = sInj)) +
geom_point() +
geom_smooth(method = lm) +
ylab("Total Injuries")
ggplot(df2, aes(x = Year, y = sFat)) +
geom_point() +
geom_smooth(method = lm) +
ylab("Total Fatalities")
ggplot(df2, aes(x = Year, y = nC)) +
geom_point() +
geom_smooth(method = lm) +
ylab("Number of Clusters")
ggplot(df2, aes(x = Year, y = areaPerClus)) +
geom_point() +
geom_smooth(method = lm) +
scale_y_continuous(labels = comma) +
ylab("Area Per Cluster (sq. km)")
ggplot(df2, aes(x = Year, y = clusDensity)) +
geom_point() +
geom_smooth(method = lm) +
ylab("Number of Tornadoes [per 10,000 sq. km]") +
scale_x_continuous(breaks = seq(1955, 2015, 10)) +
xlab("Year")
ggplot(df2, aes(x = Year, y = nT)) +
geom_point() +
geom_smooth(method = lm) +
ylab("Number of Tornadoes in Clusters")
ggplot(df2, aes(x = Year, y = mLength/1000)) +
geom_point() +
geom_smooth(method = lm) +
ylab("Average Path Length (km)")
Clusters are getting larger and there are more tornadoes per cluster. The average track length has declined but has been holding roughly steady since the 1980s.