New Methods in Tornado Climatology

Widen/Fricker/Elsner

Set working directory and load packages.

slibrary <- function(...) suppressMessages(library(...))
slibrary(ggplot2); slibrary(plyr); slibrary(maps); slibrary(ggmap);
slibrary(rgdal); slibrary(sp); slibrary(lubridate); slibrary(gamlss.cens);
slibrary(fpc); slibrary(MASS); slibrary(scales); slibrary(rgeos); slibrary(dplyr);
slibrary(ggthemes); slibrary(wesanderson); slibrary(httr); slibrary(moments);
slibrary(spdep); slibrary(RColorBrewer)

Figure 1: Timeline of technological improvements in tornado reporting.

Event = c("Systematic Tabulation of Tornado Data", "Tornado Watch and Warning Program", "Tornado Spotter Network", 
          "Fujita Scale", "Discovery of Microbursts", "Doppler Radar", "Mobile Radar", "Enhanced Fujita Scale")
Event = factor(Event, levels = rev(Event))
StartYear = c(1916, 1952, 1953, 1971, 1973, 1988, 1996, 2007)
EndYear = rep(2014, 8)
EndYear[4] = 2007
df = data.frame(Event, StartYear, EndYear)
ggplot(df) +
  geom_segment(aes(x = StartYear, xend = EndYear, y = Event, yend = Event), size = 4) +
#               arrow = arrow(length = unit(.25, "cm"), type = "open")) +
  xlab("Year") + ylab("") +
  scale_x_continuous(limits = c(1900, 2020)) +
  theme_bw() +
  theme(text = element_text(size = 20))

plot of chunk Timeline

Data from the Storm Prediction Center (SPC) http://www.spc.noaa.gov/gis/svrgis/.

download.file("http://www.spc.noaa.gov/gis/svrgis/zipped/tornado.zip",
              "tornado.zip", mode="wb")
unzip("tornado.zip")

Read the tornado data.

TornL = readOGR(dsn = ".", layer = "tornado", stringsAsFactors=FALSE)

## OGR data source with driver: ESRI Shapefile 
## Source: ".", layer: "tornado"
## with 57988 features and 21 fields
## Feature type: wkbLineString with 2 dimensions

Change the data types in the data slot.

TornL$Year = as.integer(TornL$YR)
TornL$Month = as.integer(TornL$MO)
TornL$EF = as.integer(TornL$MAG)
TornL$Date = as.Date(TornL$DATE)
TornL$Length = as.numeric(TornL$LEN) * 1609.34
TornL$Width = as.numeric(TornL$WID) * 0.9144
TornL$FAT = as.integer(TornL$FAT)
TornL$SLON = as.numeric(TornL$SLON)
TornL$SLAT = as.numeric(TornL$SLAT)
TornL$ELON = as.numeric(TornL$ELON)
TornL$ELAT = as.numeric(TornL$ELAT)
TornL$INJ = as.numeric(TornL$INJ)
TornL$ID = as.integer(row.names(TornL)) + 1

Figure 2: The number of tornadoes and tornado days.

Torn2.df = as.data.frame(TornL)
Torn3.df = subset(Torn2.df, 
                  Year >= 1953 & EF > 0)
byYear =  group_by(Torn3.df, Year)
TorDays.df = summarize(byYear, 
                       nT = n(), 
                       nDay = length(unique(Date)))
p2a = ggplot(TorDays.df, aes(x = Year, y = nT)) +
  geom_point(size = 3) +
  ylab("Number of Tornadoes") +
  geom_smooth(method="lm") +
  theme_bw()
p2b = ggplot(TorDays.df, aes(x = Year, y = nDay)) +
  geom_point(size = 3) +
  ylab("Number of Tornado Days") +
  geom_smooth() +
  theme_bw()
source("multiplot.txt")
mat = matrix(c(1, 2), nrow = 1, byrow = TRUE)
p2a = p2a + ggtitle("a") + theme(plot.title = element_text(hjust = 0))
p2b = p2b + ggtitle("b") + theme(plot.title = element_text(hjust = 0))              
multiplot(p2a, p2b, layout = mat)

## Loading required package: grid
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.

plot of chunk TornNum

a displays linear trend, b nonlinear trend

Figure 3: Proportion of all tornadoes occurring on days with at least N tornadoes for N = 4, 8, and 16.

byDay = group_by(Torn3.df, Date)
Day.df = summarise(byDay,
                   nT = n())
Day.df$Year = year(Day.df$Date)
tmp = table(Day.df$Year, Day.df$nT)
tmp1 = t(apply(t(tmp), 2, function(x) rev(cumsum(rev(x)))))
nTYear.df = data.frame(tmp1)
dimnames(nTYear.df) = dimnames(tmp)
nTYear.df$Year = as.integer(rownames(nTYear.df))
nT1 = numeric()
nT4 = numeric()
nT8 = numeric()
nT16 = numeric()
for(i in 1:length(nTYear.df$Year)){
    nT1 = c(nT1, sum(nTYear.df[i, 1:54]))
    nT4 = c(nT4, sum(nTYear.df[i, 4:54]))
    nT8 = c(nT8, sum(nTYear.df[i, 8:54]))
    nT16 = c(nT16, sum(nTYear.df[i, 16:54]))
    }
pTYear.df = data.frame(Year = rep(1953:2013, 3),
                Value = c(nT4, nT8, nT16),
                Value2 = c(nT4/nT1, nT8/nT1, nT16/nT1),
                Type = factor(rep(c("N = 4", "N = 8", "N = 16"), 
                           each = length(nTYear.df$Year)), levels =
                           c("N = 4", "N = 8", "N = 16")))
ggplot(pTYear.df, aes(Year, Value2)) + 
  geom_point(size = 3) +
  theme_bw() +
  facet_wrap(~ Type, nrow = 3, 
             scales = "free_y") +
  stat_smooth(method = "glm", family = "binomial", 
              formula = y ~ x, 
              data = pTYear.df, se = TRUE) +
  ylab("Proportion of All Tornadoes Occurring On Days\nWith At Least N Tornadoes")

## Warning: non-integer #successes in a binomial glm!
## Warning: non-integer #successes in a binomial glm!
## Warning: non-integer #successes in a binomial glm!

plot of chunk TornProp

Gray bands indicate prediction interval.

Figure 4: Clustering procedure.

N = 16 #Days with at least 16 (E)F1+ tornadoes
Day2.df = subset(Day.df, nT >= N)
Torn4.df = subset(Torn3.df, Date %in% Day2.df$Date)
Torn4.df$dayF = as.factor(as.character(Torn4.df$Date))
Torn4.spdf = SpatialPointsDataFrame(coords = cbind(Torn4.df$SLON, Torn4.df$SLAT), 
                                    data = Torn4.df)
ll = "+proj=longlat +datum=NAD83 +ellps=GRS80"
proj4string(Torn4.spdf) = CRS(ll)
Torn4.spdf = spTransform(Torn4.spdf, CRS(proj4string(TornL)))
kmax = N - 1
nc = numeric()
meds = numeric()
size = numeric()
area = numeric()
ID = numeric()
member = numeric()
for(i in 1:dim(Day2.df)[1]){
xx = subset(Torn4.spdf, Date == Day2.df$Date[i])
 cc = coordinates(xx)
 best = pamk(cc, krange = 1:kmax, alpha = .05)
 nc[i] = best$nc
 meds = rbind(meds, best$pamobject$medoids)
   clusSize = summary(best$pamobject)$clusinfo[, 1]
# print(clusSize)
 size = c(size, clusSize)
 ID = c(ID, xx$ID[best$pamobject$id.med])
 member = c(member, best$pamobject$clustering)
 for(j in 1:best$nc){
   zz = SpatialPoints(matrix(best$pamobject$data[best$pamobject$clustering == j, ], ncol = 2))
   proj4string(zz) = CRS(proj4string(TornL))
   if(clusSize[j] > 1) CHull = gConvexHull(zz)
   if(clusSize[j] == 1) CHull = zz
   Area = gArea(gBuffer(CHull, width = 40000))
   area = c(area, Area)
 }
}
df0 = cbind(Day2.df, nc)
df0$Year = year(df0$Date)
df = data.frame(Date = rep(Day2.df$Date, nc), 
                nT = rep(Day2.df$nT, nc),
                meds,
                size,
                area,
                ID)
spdf = SpatialPointsDataFrame(coords = meds, 
                              data = df,
                              proj4string = CRS(proj4string(TornL)))
spdf = spTransform(spdf, CRS(ll))
df2 = as.data.frame(spdf)
df2$x = df2$coords.x1.1
df2$y = df2$coords.x2.1
loc = geocode("Athens, GA")

## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Athens,+GA&sensor=false
## Google Maps API Terms of Service : http://developers.google.com/maps/terms

loc = unlist(loc)
Map = get_map(location = loc, source = "google", 
              maptype = "roadmap", zoom = 6,
              color = "color")

## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=33.95,-83.383333&zoom=6&size=%20640x640&scale=%202&maptype=roadmap&sensor=false
## Google Maps API Terms of Service : http://developers.google.com/maps/terms

dd = "2008-05-08"
xx = subset(Torn4.spdf, Date == dd)
cc = coordinates(xx)
best = pamk(cc, krange = 1:kmax, alpha = .05)
H1 = gConvexHull(SpatialPoints(best$pamobject$data[best$pamobject$clustering == 1, ]))
H2 = gConvexHull(SpatialPoints(best$pamobject$data[best$pamobject$clustering == 2, ]))
H1 = gBuffer(H1, width = 40000) #40 km buffer
H2 = gBuffer(H2, width = 40000)
proj4string(H1) = proj4string(TornL)
proj4string(H2) = proj4string(TornL)
H1 = spTransform(H1, CRS(ll))
H2 = spTransform(H2, CRS(ll))
H1.df = fortify(H1)
H2.df = fortify(H2)
p4a = ggmap(Map, extent = "normal") +
  geom_point(aes(x = SLON, y = SLAT),
             data = Torn4.df[Torn4.df$Date == dd, ],
             color = "black", size = 2) +
  geom_point(aes(x = x, y = y, size = factor(size)), 
             data = df2[df2$Date == dd, ], 
             color = "red", alpha = .5) +
  geom_polygon(aes(x = long, y = lat),
            data = H1.df, fill = "gray", alpha = .45, color = "black", size = 1) +
  geom_polygon(aes(x = long, y = lat),
            data = H2.df, fill = "gray", alpha = .45, color = "black", size = 1) +
    theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(), 
        panel.background = element_blank(),
        axis.ticks = element_blank(),
        axis.text = element_blank(),
        legend.position = "bottom") +
  scale_size_discrete("Cluster Size\n(Number of Tornadoes)", range = c(4, 6)) +
  xlab("") + ylab("")
loc = geocode("St Louis, MO")

## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=St+Louis,+MO&sensor=false
## Google Maps API Terms of Service : http://developers.google.com/maps/terms

loc = unlist(loc)
Map2 = get_map(location = loc, source = "google", 
              maptype = "roadmap", zoom = 5,
              color = "color")

## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=38.627003,-90.199404&zoom=5&size=%20640x640&scale=%202&maptype=roadmap&sensor=false
## Google Maps API Terms of Service : http://developers.google.com/maps/terms

lla = attr(Map2, "bb")$ll.lat
llo = attr(Map2, "bb")$ll.lon
ula = attr(Map2, "bb")$ur.lat
ulo = attr(Map2, "bb")$ur.lon
df3 = subset(df2, y <= ula & y >= lla & 
                  x <= ulo & x >= llo)
p4b = ggmap(Map2, extent = "normal") + 
  geom_point(aes(x = x, y = y, size = size), 
             data = df3, color = "red", alpha = .5) +
  xlab("") + ylab("") +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(), 
        panel.background = element_blank(),
        axis.ticks = element_blank(),
        axis.text = element_blank(),
        legend.position = "bottom") + 
   scale_size_continuous("Cluster Size\n(Number of Tornadoes)")
mat = matrix(c(1, 2), nrow = 1, byrow = TRUE)
p4a = p4a + ggtitle("a") + theme(plot.title = element_text(hjust = 0))
p4b = p4b + ggtitle("b") + theme(plot.title = element_text(hjust = 0))              
multiplot(p4a, p4b, layout = mat)

$plot of chunk Clusters$

Figure 5: Length, width and EF scale.

Torn3.df = subset(Torn2.df, 
                  Year >= 1994 & EF >= 0)
Torn3.df$EFF = factor(Torn3.df$EF, ordered = TRUE)
p5a = ggplot(Torn3.df, aes(x = EFF, y = Length/1000)) + 
  geom_boxplot(fill = 'grey') + theme_bw() + coord_flip() +
  ylab("Path Length (km)") + xlab("EF Category")
p5b = ggplot(Torn3.df, aes(x = EFF, y = Width)) + 
  geom_boxplot(fill = 'grey') + theme_bw() + coord_flip() +
  ylab("Path Width (m)") + xlab("EF Category")
mat = matrix(c(1, 2), nrow = 1, byrow = TRUE)
p5a = p5a + ggtitle("a") + theme(plot.title = element_text(hjust = 0))
p5b = p5b + ggtitle("b") + theme(plot.title = element_text(hjust = 0))              
multiplot(p5a, p5b, layout = mat)

plot of chunk LenWidEF

Figure 6: Total annual kinetic energy

Torn3.df = subset(Torn2.df, 
                  Year >= 1994 & EF >= 0)
Torn3.df$Area = .5 * Torn3.df$Length * .5 * Torn3.df$Width * pi
threshold = 64.9
Low = (c(65, 86, 111, 136, 166, 200) - threshold) * 0.44704
High = (c(86, 111, 136, 166, 200, 250) - threshold) * 0.44704
ef = Torn3.df$EF + 1
WS = Surv(Low[ef], High[ef], rep(3, length(ef)), "interval")
Torn3.df$WS = WS
rm(WS)
WEIic = cens(WEI, type = "interval", local = "FALSE")
fitw = gamlss(WS ~ Length + Width, 
              sigma.formula = ~ Width + Length,
              data = Torn3.df, family = WEIic)

## GAMLSS-RS iteration 1: Global Deviance = 47589 
## GAMLSS-RS iteration 2: Global Deviance = 42327 
## GAMLSS-RS iteration 3: Global Deviance = 40620 
## GAMLSS-RS iteration 4: Global Deviance = 40188 
## GAMLSS-RS iteration 5: Global Deviance = 40108 
## GAMLSS-RS iteration 6: Global Deviance = 40095 
## GAMLSS-RS iteration 7: Global Deviance = 40093 
## GAMLSS-RS iteration 8: Global Deviance = 40093 
## GAMLSS-RS iteration 9: Global Deviance = 40093 
## GAMLSS-RS iteration 10: Global Deviance = 40093 
## GAMLSS-RS iteration 11: Global Deviance = 40093 
## GAMLSS-RS iteration 12: Global Deviance = 40093 
## GAMLSS-RS iteration 13: Global Deviance = 40093 
## GAMLSS-RS iteration 14: Global Deviance = 40093 
## GAMLSS-RS iteration 15: Global Deviance = 40093 
## GAMLSS-RS iteration 16: Global Deviance = 40093 
## GAMLSS-RS iteration 17: Global Deviance = 40093 
## GAMLSS-RS iteration 18: Global Deviance = 40093 
## GAMLSS-RS iteration 19: Global Deviance = 40093 
## GAMLSS-RS iteration 20: Global Deviance = 40093

## Warning: Algorithm RS has not yet converged

samplefitw <- function(W, mu, sigma, n = 1){
                      out <- qweibull(pWEIic(q = W[rep(1:nrow(W), n)], 
                      mu = mu, sigma = sigma),
                      scale = mu, shape = sigma)
  return(matrix(out, ncol = n))
  }
samples = samplefitw(Torn3.df$WS, mu = fitw$mu.fv,
                      sigma = fitw$sigma.fv, n = 1)
Torn3.df$Wmax = samples + threshold * .44704
thresh = 29.06
perMax = c(1, .228, .115, .067, .032, .017)
perMax[1] = 0
EW = thresh + (Torn3.df$Wmax - thresh)/log(1/perMax[Torn3.df$EF + 1])
EW2 = EW^2 + (EW - thresh)^2/(log(1/perMax[Torn3.df$EF + 1])^2)
Torn3.df$KE = .5 * EW2 * Torn3.df$Area * 1000
totalKE = ddply(Torn3.df, .(Year), summarize,
                count = length(Year),
                tKE = sum(KE))
pal = wes.palette(name = "Zissou", type = "continuous")
or = order(totalKE$tKE, decreasing = FALSE)
totalKE$YearF = factor(totalKE$Year, levels = totalKE$Year[or])
ggplot(totalKE, aes(x = YearF, y = tKE/10^15, fill = count)) + 
  geom_histogram(stat = "identity") + 
  coord_flip() + 
  xlab("Year") + 
  ylab("Total Kinetic Energy of All U.S. Tornadoes (petajoules)\nRanked by Year") + 
  scale_fill_gradientn(colours = pal(20), name = "Annual\nNumber of\nTornadoes") +
  theme_bw()

plot of chunk TKE

Figure 7: Kansas tornado counts. Get administrative boundaries. https://www.census.gov/geo/maps-data/data/cbf/cbf_counties.html. Here we use the 20m = 1:20,000,000 low resolution boundaries. 5m = 1:5,000,000

download.file("http://www2.census.gov/geo/tiger/GENZ2013/cb_2013_us_county_5m.zip",
              "cb_2013_us_county_5m.zip", mode = "wb")
unzip("cb_2013_us_county_5m.zip")

US.sp = readOGR(dsn = ".", layer = "cb_2013_us_county_5m", 
                                    stringsAsFactors = FALSE)

## OGR data source with driver: ESRI Shapefile 
## Source: ".", layer: "cb_2013_us_county_5m"
## with 3234 features and 9 fields
## Feature type: wkbPolygon with 2 dimensions

Extract Kansas using the state Federal Information Processing Standard (FIPS).

FP = 20
AB = "KS"
ST.sp = US.sp[US.sp$STATEFP == FP, ]
nrow(ST.sp)

## [1] 105

county = paste(ST.sp$STATEFP, ST.sp$COUNTYFP, sep = "")
countiesun.sp = geometry(spChFIDs(ST.sp, county)) 
counties.sp = spTransform(countiesun.sp, CRS(proj4string(TornL)))
rgeos::gArea(counties.sp) / 10^6

## [1] 210845

Choropleth map of tornado counts & tornado days.

#useDrop <- function(x, use.fun = source, ...){
#                    tf <- tempfile()
#                    tmp <- GET(x)
#                    stop_for_status(tmp)
#                    writeBin(content(tmp, type = "raw"), tf)
#                    use.fun(tf, ...)
#                    }
#useDrop("https://dl.dropboxusercontent.com/s/bw5hvvj9e732e8i/CountyStatisticsSupport.R", source)
TornL2 = subset(TornL, Year >= 1970 & EF >= 1)
ct = over(counties.sp, TornL2, returnList = TRUE)
names(ct) = county
nT = sapply(ct, function(x) nrow(x))
nTd = sapply(ct, function(x) length(unique(x$Date)))
Nyears = diff(range(TornL2$Year)) + 1
TracksAll.df = ldply(ct, data.frame)
TracksAll.df$county = rep(county, sapply(ct, nrow))
nTor.df = data.frame(state = AB, county = county, Nyears =  Nyears,
                     nT = nT, nTd = nTd,
                     stringsAsFactors = FALSE, ID = 1:length(county))
spdf = SpatialPolygonsDataFrame(counties.sp, nTor.df)
crq = wes.palette(name = "Zissou", type = "continuous")
range(spdf$nT)

## [1]  1 26

rng = seq(0, 30, 5)
p7a = spplot(spdf, "nT", col = "white", at = rng, 
       col.regions = crq(8),
       colorkey = list(space = "bottom", labels = paste(rng)),
       par.settings = list(axis.line = list(col = NA)),
       sub = "Tornadoes (1970-2013)")
p7a = update(p7a, main=textGrob("a", x=unit(.05, "npc")), par.settings=list(fontsize=list(text=12)))
#plotTorn(x = spdf, tracks = TracksAll.df) +
#  theme(plot.title = element_text(vjust = -2,
#                                  hjust = .3,
#                                  size = 20, 
#                                  face = "bold")) +
#  scale_fill_gradientn(colours = crq(8))
#OR
#plotTorn(x = spdf, tracks = TracksAll.df) +
#  scale_fill_gradientn("EF1+\nTornado Reports\n(1970-2013)", 
#                     colours = crq(10))
range(spdf$nTd)

## [1]  1 19

rng = seq(0, 20, 4)
p7b = spplot(spdf, "nTd", col = "white", at = rng, 
       col.regions = crq(8),
       colorkey = list(space = "bottom", labels = paste(rng)),
       par.settings = list(axis.line = list(col = NA)),
       sub = "Tornado Days (1970-2013)")
p7b = update(p7b, main=textGrob("b", x=unit(.05, "npc")), par.settings=list(fontsize=list(text=12)))
print(p7a, position = c(0,.5,1,1),more=T) 
print(p7b, position = c(0,0,1,.5))

plot of chunk CountyTorn

cor.test(spdf$nT, spdf$nTd)

## 
##  Pearson's product-moment correlation
## 
## data:  spdf$nT and spdf$nTd
## t = 22.58, df = 103, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8731 0.9395
## sample estimates:
##    cor 
## 0.9121

Figure 8: Model for county-level tornado counts See http://rpubs.com/jelsner/RegionalTornadoStudies for a similar figure with this code {r CountyModel, dev=‘tiff’, fig.width=6, fig.height=9} CRS0 = CRS(“+proj=longlat +datum=WGS84”) bc = getBordersAndCenters(spdf, CRS0) df2 = bc$centers df3 = cbind(df, df2) names(df3)[5] = 'newfield' borders.df = merge(bc$borders, df3[c(“county”, “newfield”, “Sig”)], by = “county”) borders2.df = borders.df[borders.df$Sig, ] crq = brewer.pal(8, “BrBG”) ggplotSpdata(spdf, “RandomQ50”, tfun = function(x) round((exp(x) - 1) * 100)) + scale_fill_gradientn(“Tornado Occurrence, aes(x = long, y = lat, group = group, fill = newfield), color = muted(”brown“), size = 2) + geom_text(data = df3[df3$Sig,], aes(x = long, y = lat, group = NULL, label = round(newfield)), vjust = .5, size = 5, color = ‘black’)