Widen/Fricker/Elsner
Set working directory and load packages.
slibrary <- function(...) suppressMessages(library(...))
slibrary(ggplot2); slibrary(plyr); slibrary(maps); slibrary(ggmap);
slibrary(rgdal); slibrary(sp); slibrary(lubridate); slibrary(gamlss.cens);
slibrary(fpc); slibrary(MASS); slibrary(scales); slibrary(rgeos); slibrary(dplyr);
slibrary(ggthemes); slibrary(wesanderson); slibrary(httr); slibrary(moments);
slibrary(spdep); slibrary(RColorBrewer)
Figure 1: Timeline of technological improvements in tornado reporting.
Event = c("Systematic Tabulation of Tornado Data", "Tornado Watch and Warning Program", "Tornado Spotter Network",
"Fujita Scale", "Discovery of Microbursts", "Doppler Radar", "Mobile Radar", "Enhanced Fujita Scale")
Event = factor(Event, levels = rev(Event))
StartYear = c(1916, 1952, 1953, 1971, 1973, 1988, 1996, 2007)
EndYear = rep(2014, 8)
EndYear[4] = 2007
df = data.frame(Event, StartYear, EndYear)
ggplot(df) +
geom_segment(aes(x = StartYear, xend = EndYear, y = Event, yend = Event), size = 4) +
# arrow = arrow(length = unit(.25, "cm"), type = "open")) +
xlab("Year") + ylab("") +
scale_x_continuous(limits = c(1900, 2020)) +
theme_bw() +
theme(text = element_text(size = 20))
Data from the Storm Prediction Center (SPC) http://www.spc.noaa.gov/gis/svrgis/.
download.file("http://www.spc.noaa.gov/gis/svrgis/zipped/tornado.zip",
"tornado.zip", mode="wb")
unzip("tornado.zip")
Read the tornado data.
TornL = readOGR(dsn = ".", layer = "tornado", stringsAsFactors=FALSE)
## OGR data source with driver: ESRI Shapefile
## Source: ".", layer: "tornado"
## with 57988 features and 21 fields
## Feature type: wkbLineString with 2 dimensions
Change the data types in the data slot.
TornL$Year = as.integer(TornL$YR)
TornL$Month = as.integer(TornL$MO)
TornL$EF = as.integer(TornL$MAG)
TornL$Date = as.Date(TornL$DATE)
TornL$Length = as.numeric(TornL$LEN) * 1609.34
TornL$Width = as.numeric(TornL$WID) * 0.9144
TornL$FAT = as.integer(TornL$FAT)
TornL$SLON = as.numeric(TornL$SLON)
TornL$SLAT = as.numeric(TornL$SLAT)
TornL$ELON = as.numeric(TornL$ELON)
TornL$ELAT = as.numeric(TornL$ELAT)
TornL$INJ = as.numeric(TornL$INJ)
TornL$ID = as.integer(row.names(TornL)) + 1
Figure 2: The number of tornadoes and tornado days.
Torn2.df = as.data.frame(TornL)
Torn3.df = subset(Torn2.df,
Year >= 1953 & EF > 0)
byYear = group_by(Torn3.df, Year)
TorDays.df = summarize(byYear,
nT = n(),
nDay = length(unique(Date)))
p2a = ggplot(TorDays.df, aes(x = Year, y = nT)) +
geom_point(size = 3) +
ylab("Number of Tornadoes") +
geom_smooth(method="lm") +
theme_bw()
p2b = ggplot(TorDays.df, aes(x = Year, y = nDay)) +
geom_point(size = 3) +
ylab("Number of Tornado Days") +
geom_smooth() +
theme_bw()
source("multiplot.txt")
mat = matrix(c(1, 2), nrow = 1, byrow = TRUE)
p2a = p2a + ggtitle("a") + theme(plot.title = element_text(hjust = 0))
p2b = p2b + ggtitle("b") + theme(plot.title = element_text(hjust = 0))
multiplot(p2a, p2b, layout = mat)
## Loading required package: grid
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
a displays linear trend, b nonlinear trend
Figure 3: Proportion of all tornadoes occurring on days with at least N tornadoes for N = 4, 8, and 16.
byDay = group_by(Torn3.df, Date)
Day.df = summarise(byDay,
nT = n())
Day.df$Year = year(Day.df$Date)
tmp = table(Day.df$Year, Day.df$nT)
tmp1 = t(apply(t(tmp), 2, function(x) rev(cumsum(rev(x)))))
nTYear.df = data.frame(tmp1)
dimnames(nTYear.df) = dimnames(tmp)
nTYear.df$Year = as.integer(rownames(nTYear.df))
nT1 = numeric()
nT4 = numeric()
nT8 = numeric()
nT16 = numeric()
for(i in 1:length(nTYear.df$Year)){
nT1 = c(nT1, sum(nTYear.df[i, 1:54]))
nT4 = c(nT4, sum(nTYear.df[i, 4:54]))
nT8 = c(nT8, sum(nTYear.df[i, 8:54]))
nT16 = c(nT16, sum(nTYear.df[i, 16:54]))
}
pTYear.df = data.frame(Year = rep(1953:2013, 3),
Value = c(nT4, nT8, nT16),
Value2 = c(nT4/nT1, nT8/nT1, nT16/nT1),
Type = factor(rep(c("N = 4", "N = 8", "N = 16"),
each = length(nTYear.df$Year)), levels =
c("N = 4", "N = 8", "N = 16")))
ggplot(pTYear.df, aes(Year, Value2)) +
geom_point(size = 3) +
theme_bw() +
facet_wrap(~ Type, nrow = 3,
scales = "free_y") +
stat_smooth(method = "glm", family = "binomial",
formula = y ~ x,
data = pTYear.df, se = TRUE) +
ylab("Proportion of All Tornadoes Occurring On Days\nWith At Least N Tornadoes")
## Warning: non-integer #successes in a binomial glm!
## Warning: non-integer #successes in a binomial glm!
## Warning: non-integer #successes in a binomial glm!
Gray bands indicate prediction interval.
Figure 4: Clustering procedure.
N = 16 #Days with at least 16 (E)F1+ tornadoes
Day2.df = subset(Day.df, nT >= N)
Torn4.df = subset(Torn3.df, Date %in% Day2.df$Date)
Torn4.df$dayF = as.factor(as.character(Torn4.df$Date))
Torn4.spdf = SpatialPointsDataFrame(coords = cbind(Torn4.df$SLON, Torn4.df$SLAT),
data = Torn4.df)
ll = "+proj=longlat +datum=NAD83 +ellps=GRS80"
proj4string(Torn4.spdf) = CRS(ll)
Torn4.spdf = spTransform(Torn4.spdf, CRS(proj4string(TornL)))
kmax = N - 1
nc = numeric()
meds = numeric()
size = numeric()
area = numeric()
ID = numeric()
member = numeric()
for(i in 1:dim(Day2.df)[1]){
xx = subset(Torn4.spdf, Date == Day2.df$Date[i])
cc = coordinates(xx)
best = pamk(cc, krange = 1:kmax, alpha = .05)
nc[i] = best$nc
meds = rbind(meds, best$pamobject$medoids)
clusSize = summary(best$pamobject)$clusinfo[, 1]
# print(clusSize)
size = c(size, clusSize)
ID = c(ID, xx$ID[best$pamobject$id.med])
member = c(member, best$pamobject$clustering)
for(j in 1:best$nc){
zz = SpatialPoints(matrix(best$pamobject$data[best$pamobject$clustering == j, ], ncol = 2))
proj4string(zz) = CRS(proj4string(TornL))
if(clusSize[j] > 1) CHull = gConvexHull(zz)
if(clusSize[j] == 1) CHull = zz
Area = gArea(gBuffer(CHull, width = 40000))
area = c(area, Area)
}
}
df0 = cbind(Day2.df, nc)
df0$Year = year(df0$Date)
df = data.frame(Date = rep(Day2.df$Date, nc),
nT = rep(Day2.df$nT, nc),
meds,
size,
area,
ID)
spdf = SpatialPointsDataFrame(coords = meds,
data = df,
proj4string = CRS(proj4string(TornL)))
spdf = spTransform(spdf, CRS(ll))
df2 = as.data.frame(spdf)
df2$x = df2$coords.x1.1
df2$y = df2$coords.x2.1
loc = geocode("Athens, GA")
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Athens,+GA&sensor=false
## Google Maps API Terms of Service : http://developers.google.com/maps/terms
loc = unlist(loc)
Map = get_map(location = loc, source = "google",
maptype = "roadmap", zoom = 6,
color = "color")
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=33.95,-83.383333&zoom=6&size=%20640x640&scale=%202&maptype=roadmap&sensor=false
## Google Maps API Terms of Service : http://developers.google.com/maps/terms
dd = "2008-05-08"
xx = subset(Torn4.spdf, Date == dd)
cc = coordinates(xx)
best = pamk(cc, krange = 1:kmax, alpha = .05)
H1 = gConvexHull(SpatialPoints(best$pamobject$data[best$pamobject$clustering == 1, ]))
H2 = gConvexHull(SpatialPoints(best$pamobject$data[best$pamobject$clustering == 2, ]))
H1 = gBuffer(H1, width = 40000) #40 km buffer
H2 = gBuffer(H2, width = 40000)
proj4string(H1) = proj4string(TornL)
proj4string(H2) = proj4string(TornL)
H1 = spTransform(H1, CRS(ll))
H2 = spTransform(H2, CRS(ll))
H1.df = fortify(H1)
H2.df = fortify(H2)
p4a = ggmap(Map, extent = "normal") +
geom_point(aes(x = SLON, y = SLAT),
data = Torn4.df[Torn4.df$Date == dd, ],
color = "black", size = 2) +
geom_point(aes(x = x, y = y, size = factor(size)),
data = df2[df2$Date == dd, ],
color = "red", alpha = .5) +
geom_polygon(aes(x = long, y = lat),
data = H1.df, fill = "gray", alpha = .45, color = "black", size = 1) +
geom_polygon(aes(x = long, y = lat),
data = H2.df, fill = "gray", alpha = .45, color = "black", size = 1) +
theme(panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
panel.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "bottom") +
scale_size_discrete("Cluster Size\n(Number of Tornadoes)", range = c(4, 6)) +
xlab("") + ylab("")
loc = geocode("St Louis, MO")
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=St+Louis,+MO&sensor=false
## Google Maps API Terms of Service : http://developers.google.com/maps/terms
loc = unlist(loc)
Map2 = get_map(location = loc, source = "google",
maptype = "roadmap", zoom = 5,
color = "color")
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=38.627003,-90.199404&zoom=5&size=%20640x640&scale=%202&maptype=roadmap&sensor=false
## Google Maps API Terms of Service : http://developers.google.com/maps/terms
lla = attr(Map2, "bb")$ll.lat
llo = attr(Map2, "bb")$ll.lon
ula = attr(Map2, "bb")$ur.lat
ulo = attr(Map2, "bb")$ur.lon
df3 = subset(df2, y <= ula & y >= lla &
x <= ulo & x >= llo)
p4b = ggmap(Map2, extent = "normal") +
geom_point(aes(x = x, y = y, size = size),
data = df3, color = "red", alpha = .5) +
xlab("") + ylab("") +
theme(panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
panel.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "bottom") +
scale_size_continuous("Cluster Size\n(Number of Tornadoes)")
mat = matrix(c(1, 2), nrow = 1, byrow = TRUE)
p4a = p4a + ggtitle("a") + theme(plot.title = element_text(hjust = 0))
p4b = p4b + ggtitle("b") + theme(plot.title = element_text(hjust = 0))
multiplot(p4a, p4b, layout = mat)
Figure 5: Length, width and EF scale.
Torn3.df = subset(Torn2.df,
Year >= 1994 & EF >= 0)
Torn3.df$EFF = factor(Torn3.df$EF, ordered = TRUE)
p5a = ggplot(Torn3.df, aes(x = EFF, y = Length/1000)) +
geom_boxplot(fill = 'grey') + theme_bw() + coord_flip() +
ylab("Path Length (km)") + xlab("EF Category")
p5b = ggplot(Torn3.df, aes(x = EFF, y = Width)) +
geom_boxplot(fill = 'grey') + theme_bw() + coord_flip() +
ylab("Path Width (m)") + xlab("EF Category")
mat = matrix(c(1, 2), nrow = 1, byrow = TRUE)
p5a = p5a + ggtitle("a") + theme(plot.title = element_text(hjust = 0))
p5b = p5b + ggtitle("b") + theme(plot.title = element_text(hjust = 0))
multiplot(p5a, p5b, layout = mat)
Figure 6: Total annual kinetic energy
Torn3.df = subset(Torn2.df,
Year >= 1994 & EF >= 0)
Torn3.df$Area = .5 * Torn3.df$Length * .5 * Torn3.df$Width * pi
threshold = 64.9
Low = (c(65, 86, 111, 136, 166, 200) - threshold) * 0.44704
High = (c(86, 111, 136, 166, 200, 250) - threshold) * 0.44704
ef = Torn3.df$EF + 1
WS = Surv(Low[ef], High[ef], rep(3, length(ef)), "interval")
Torn3.df$WS = WS
rm(WS)
WEIic = cens(WEI, type = "interval", local = "FALSE")
fitw = gamlss(WS ~ Length + Width,
sigma.formula = ~ Width + Length,
data = Torn3.df, family = WEIic)
## GAMLSS-RS iteration 1: Global Deviance = 47589
## GAMLSS-RS iteration 2: Global Deviance = 42327
## GAMLSS-RS iteration 3: Global Deviance = 40620
## GAMLSS-RS iteration 4: Global Deviance = 40188
## GAMLSS-RS iteration 5: Global Deviance = 40108
## GAMLSS-RS iteration 6: Global Deviance = 40095
## GAMLSS-RS iteration 7: Global Deviance = 40093
## GAMLSS-RS iteration 8: Global Deviance = 40093
## GAMLSS-RS iteration 9: Global Deviance = 40093
## GAMLSS-RS iteration 10: Global Deviance = 40093
## GAMLSS-RS iteration 11: Global Deviance = 40093
## GAMLSS-RS iteration 12: Global Deviance = 40093
## GAMLSS-RS iteration 13: Global Deviance = 40093
## GAMLSS-RS iteration 14: Global Deviance = 40093
## GAMLSS-RS iteration 15: Global Deviance = 40093
## GAMLSS-RS iteration 16: Global Deviance = 40093
## GAMLSS-RS iteration 17: Global Deviance = 40093
## GAMLSS-RS iteration 18: Global Deviance = 40093
## GAMLSS-RS iteration 19: Global Deviance = 40093
## GAMLSS-RS iteration 20: Global Deviance = 40093
## Warning: Algorithm RS has not yet converged
samplefitw <- function(W, mu, sigma, n = 1){
out <- qweibull(pWEIic(q = W[rep(1:nrow(W), n)],
mu = mu, sigma = sigma),
scale = mu, shape = sigma)
return(matrix(out, ncol = n))
}
samples = samplefitw(Torn3.df$WS, mu = fitw$mu.fv,
sigma = fitw$sigma.fv, n = 1)
Torn3.df$Wmax = samples + threshold * .44704
thresh = 29.06
perMax = c(1, .228, .115, .067, .032, .017)
perMax[1] = 0
EW = thresh + (Torn3.df$Wmax - thresh)/log(1/perMax[Torn3.df$EF + 1])
EW2 = EW^2 + (EW - thresh)^2/(log(1/perMax[Torn3.df$EF + 1])^2)
Torn3.df$KE = .5 * EW2 * Torn3.df$Area * 1000
totalKE = ddply(Torn3.df, .(Year), summarize,
count = length(Year),
tKE = sum(KE))
pal = wes.palette(name = "Zissou", type = "continuous")
or = order(totalKE$tKE, decreasing = FALSE)
totalKE$YearF = factor(totalKE$Year, levels = totalKE$Year[or])
ggplot(totalKE, aes(x = YearF, y = tKE/10^15, fill = count)) +
geom_histogram(stat = "identity") +
coord_flip() +
xlab("Year") +
ylab("Total Kinetic Energy of All U.S. Tornadoes (petajoules)\nRanked by Year") +
scale_fill_gradientn(colours = pal(20), name = "Annual\nNumber of\nTornadoes") +
theme_bw()
Figure 7: Kansas tornado counts. Get administrative boundaries. https://www.census.gov/geo/maps-data/data/cbf/cbf_counties.html. Here we use the 20m = 1:20,000,000 low resolution boundaries. 5m = 1:5,000,000
download.file("http://www2.census.gov/geo/tiger/GENZ2013/cb_2013_us_county_5m.zip",
"cb_2013_us_county_5m.zip", mode = "wb")
unzip("cb_2013_us_county_5m.zip")
US.sp = readOGR(dsn = ".", layer = "cb_2013_us_county_5m",
stringsAsFactors = FALSE)
## OGR data source with driver: ESRI Shapefile
## Source: ".", layer: "cb_2013_us_county_5m"
## with 3234 features and 9 fields
## Feature type: wkbPolygon with 2 dimensions
Extract Kansas using the state Federal Information Processing Standard (FIPS).
FP = 20
AB = "KS"
ST.sp = US.sp[US.sp$STATEFP == FP, ]
nrow(ST.sp)
## [1] 105
county = paste(ST.sp$STATEFP, ST.sp$COUNTYFP, sep = "")
countiesun.sp = geometry(spChFIDs(ST.sp, county))
counties.sp = spTransform(countiesun.sp, CRS(proj4string(TornL)))
rgeos::gArea(counties.sp) / 10^6
## [1] 210845
Choropleth map of tornado counts & tornado days.
#useDrop <- function(x, use.fun = source, ...){
# tf <- tempfile()
# tmp <- GET(x)
# stop_for_status(tmp)
# writeBin(content(tmp, type = "raw"), tf)
# use.fun(tf, ...)
# }
#useDrop("https://dl.dropboxusercontent.com/s/bw5hvvj9e732e8i/CountyStatisticsSupport.R", source)
TornL2 = subset(TornL, Year >= 1970 & EF >= 1)
ct = over(counties.sp, TornL2, returnList = TRUE)
names(ct) = county
nT = sapply(ct, function(x) nrow(x))
nTd = sapply(ct, function(x) length(unique(x$Date)))
Nyears = diff(range(TornL2$Year)) + 1
TracksAll.df = ldply(ct, data.frame)
TracksAll.df$county = rep(county, sapply(ct, nrow))
nTor.df = data.frame(state = AB, county = county, Nyears = Nyears,
nT = nT, nTd = nTd,
stringsAsFactors = FALSE, ID = 1:length(county))
spdf = SpatialPolygonsDataFrame(counties.sp, nTor.df)
crq = wes.palette(name = "Zissou", type = "continuous")
range(spdf$nT)
## [1] 1 26
rng = seq(0, 30, 5)
p7a = spplot(spdf, "nT", col = "white", at = rng,
col.regions = crq(8),
colorkey = list(space = "bottom", labels = paste(rng)),
par.settings = list(axis.line = list(col = NA)),
sub = "Tornadoes (1970-2013)")
p7a = update(p7a, main=textGrob("a", x=unit(.05, "npc")), par.settings=list(fontsize=list(text=12)))
#plotTorn(x = spdf, tracks = TracksAll.df) +
# theme(plot.title = element_text(vjust = -2,
# hjust = .3,
# size = 20,
# face = "bold")) +
# scale_fill_gradientn(colours = crq(8))
#OR
#plotTorn(x = spdf, tracks = TracksAll.df) +
# scale_fill_gradientn("EF1+\nTornado Reports\n(1970-2013)",
# colours = crq(10))
range(spdf$nTd)
## [1] 1 19
rng = seq(0, 20, 4)
p7b = spplot(spdf, "nTd", col = "white", at = rng,
col.regions = crq(8),
colorkey = list(space = "bottom", labels = paste(rng)),
par.settings = list(axis.line = list(col = NA)),
sub = "Tornado Days (1970-2013)")
p7b = update(p7b, main=textGrob("b", x=unit(.05, "npc")), par.settings=list(fontsize=list(text=12)))
print(p7a, position = c(0,.5,1,1),more=T)
print(p7b, position = c(0,0,1,.5))
cor.test(spdf$nT, spdf$nTd)
##
## Pearson's product-moment correlation
##
## data: spdf$nT and spdf$nTd
## t = 22.58, df = 103, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8731 0.9395
## sample estimates:
## cor
## 0.9121
Figure 8: Model for county-level tornado counts See http://rpubs.com/jelsner/RegionalTornadoStudies for a similar figure with this code {r CountyModel, dev=‘tiff’, fig.width=6, fig.height=9} CRS0 = CRS(“+proj=longlat +datum=WGS84”) bc = getBordersAndCenters(spdf, CRS0) df2 = bc\(centers df3 = cbind(df, df2) names(df3)[5] = 'newfield' borders.df = merge(bc\)borders, df3[c(“county”, “newfield”, “Sig”)], by = “county”) borders2.df = borders.df[borders.df$Sig, ] crq = brewer.pal(8, “BrBG”) ggplotSpdata(spdf, “RandomQ50”, tfun = function(x) round((exp(x) - 1) * 100)) + scale_fill_gradientn(“Tornado Occurrence, aes(x = long, y = lat, group = group, fill = newfield), color = muted(”brown“), size = 2) + geom_text(data = df3[df3$Sig,], aes(x = long, y = lat, group = NULL, label = round(newfield)), vjust = .5, size = 5, color = ‘black’)