Set working directory and load packages.
setwd("~/Dropbox/Tornadoes")
library(ggplot2)
library(rgdal)
## Loading required package: sp
## rgdal: version: 0.8-16, (SVN revision 498)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 1.9.2, released 2012/10/08
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.1/Resources/library/rgdal/gdal
## Loaded PROJ.4 runtime: Rel. 4.8.0, 6 March 2012, [PJ_VERSION: 480]
## Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.1/Resources/library/rgdal/proj
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(maps)
library(ggmap)
library(rgeos)
## rgeos version: 0.3-4, (SVN revision 438)
## GEOS runtime version: 3.3.3-CAPI-1.7.4
## Polygon checking: TRUE
library(lubridate)
Data from the Storm Prediction Center (SPC) http://www.spc.noaa.gov/gis/svrgis/.
download.file("http://www.spc.noaa.gov/gis/svrgis/zipped/tornado.zip", "tornado.zip",
mode = "wb")
Read the tornado data.
unzip("tornado.zip")
TornL = readOGR(dsn = ".", layer = "tornado", stringsAsFactors = FALSE)
## OGR data source with driver: ESRI Shapefile
## Source: ".", layer: "tornado"
## with 57988 features and 21 fields
## Feature type: wkbLineString with 2 dimensions
Change the data types in the data slot.
TornL$OM = as.integer(TornL$OM)
TornL$Year = as.integer(TornL$YR)
TornL$Month = as.integer(TornL$MO)
TornL$EF = as.integer(TornL$MAG)
TornL$Date = as.Date(TornL$DATE)
TornL$Length = as.numeric(TornL$LEN) * 1609.34
TornL$Width = as.numeric(TornL$WID) * 0.9144
TornL$FAT = as.integer(TornL$FAT)
TornL$SLON = as.numeric(TornL$SLON)
TornL$SLAT = as.numeric(TornL$SLAT)
TornL$ELON = as.numeric(TornL$ELON)
TornL$ELAT = as.numeric(TornL$ELAT)
TornL$INJ = as.numeric(TornL$INJ)
TornL$ID = as.integer(row.names(TornL)) + 1
Torn2.df = as.data.frame(TornL)
Torn3.df = subset(Torn2.df, Year >= 1954 & EF > 0)
byDay = group_by(Torn3.df, Date)
Day.df = summarise(byDay, nT = n())
Day.df$Year = year(Day.df$Date)
Is the probability of large outbreaks changing over time?
Years = 1954:2013
N = 16
Day2.df = subset(Day.df, nT >= N)
Day2.df$Year = year(Day2.df$Date)
byYear = group_by(Day2.df, Year)
OutByYear = summarise(byYear, nBig = n(), maxSize = max(nT), nTBig = sum(nT))
# Years without tor days fill with zeros
nBig = rep(0, length(Years))
nTBig = rep(0, length(Years))
nBig[Years %in% OutByYear$Year] = OutByYear$nBig
nTBig[Years %in% OutByYear$Year] = OutByYear$nTBig
Fill.df = data.frame(Year = Years, nBig = nBig, nTBig = nTBig)
Years = 1954:2013
N = 1
Day2.df = subset(Day.df, nT >= N)
Day2.df$Year = year(Day2.df$Date)
byYear = group_by(Day2.df, Year)
OutByYear = summarise(byYear, nSmall = n(), maxSize = max(nT), nTSmall = sum(nT))
Fill2.df = data.frame(Year = Years, nSmall = OutByYear$nSmall, nTSmall = OutByYear$nTSmall)
Figure 1
Fill.df$nSmall = Fill2.df$nSmall
Fill.df$nTSmall = Fill2.df$nTSmall
Fill.df$ratio = Fill.df$nBig/Fill.df$nSmall
Fill.df$nTratio = Fill.df$nTBig/Fill.df$nTSmall
byYear = group_by(Day.df, Year)
byYear.df = summarise(byYear, nT = sum(nT))
Fill.df$nT = byYear.df$nT
p1a = ggplot(Fill.df, aes(x = Year, y = nT)) + geom_point(size = 3) + geom_line() +
ylab("Number of Tornadoes") + theme_bw()
p1b = ggplot(Fill.df, aes(x = Year, y = nSmall)) + geom_point(size = 3) + geom_line() +
ylab("Number of Tornado Days") + theme_bw()
source("multiplot.txt")
mat = matrix(c(1, 2), nrow = 2, byrow = TRUE)
p1a = p1a + ggtitle("a") + theme(plot.title = element_text(hjust = 0))
p1b = p1b + ggtitle("b") + theme(plot.title = element_text(hjust = 0))
multiplot(p1a, p1b, layout = mat)
## Loading required package: grid
p1c = ggplot(Fill.df, aes(x = Year, y = nTratio)) + geom_point(size = 3) + geom_line()
tmp = table(Day.df$Year, Day.df$nT)
tmp1 = t(apply(t(tmp), 2, function(x) rev(cumsum(rev(x)))))
nTYear.df = data.frame(tmp1)
dimnames(nTYear.df) = dimnames(tmp)
nTYear.df$Year = as.integer(rownames(nTYear.df))
Figure 2
df = data.frame(Year = rep(1954:2013, 4), Value = c(nTYear.df[, "4"], nTYear.df[,
"8"], nTYear.df[, "16"], nTYear.df[, "32"]), Type = factor(rep(c("N = 4",
"N = 8", "N = 16", "N = 32"), each = length(nTYear.df$Year)), levels = c("N = 4",
"N = 8", "N = 16", "N = 32")))
p2 = ggplot(df, aes(Year, Value)) + geom_point(size = 3) + geom_line() + theme_bw() +
facet_wrap(~Type, nrow = 2, scales = "free_y") + ylab("Annual Number of Days With At Least N Tornadoes")
p2
df = data.frame(Year = rep(1954:2013, 6), Value = c(nTYear.df[, "1"], nTYear.df[,
"2"], nTYear.df[, "4"], nTYear.df[, "8"], nTYear.df[, "16"], nTYear.df[,
"32"]), Type = factor(rep(c("N = 1", "N = 2", "N = 4", "N = 8", "N = 16",
"N = 32"), each = length(nTYear.df$Year)), levels = c("N = 1", "N = 2",
"N = 4", "N = 8", "N = 16", "N = 32")))
p2 = ggplot(df, aes(Year, Value)) + geom_point(size = 3) + geom_line() + theme_bw() +
facet_wrap(~Type, ncol = 2, scales = "free_y") + ylab("Annual Number of Days With At Least N Tornadoes")
df = data.frame(Year = rep(1954:2013, 4), Value = c(nTYear.df[, "4"]/nTYear.df[,
"1"], nTYear.df[, "8"]/nTYear.df[, "1"], nTYear.df[, "16"]/nTYear.df[, "1"],
nTYear.df[, "32"]/nTYear.df[, "1"]), Type = factor(rep(c("N = 4", "N = 8",
"N = 16", "N = 32"), each = length(nTYear.df$Year)), levels = c("N = 4",
"N = 8", "N = 16", "N = 32")))
p3 = ggplot(df, aes(Year, Value)) + geom_point(size = 3) + geom_line() + theme_bw() +
facet_wrap(~Type, nrow = 2, scales = "free_y") + stat_smooth(method = "glm",
family = "binomial", formula = y ~ x, data = df, se = FALSE) + ylab("Probability of a Day With At Least N Tornadoes\nOn Days With At Least One Tornado")
p3
## Warning: non-integer #successes in a binomial glm!
## Warning: non-integer #successes in a binomial glm!
## Warning: non-integer #successes in a binomial glm!
## Warning: non-integer #successes in a binomial glm!
Proportion of tornadoes occurring on big days
nT1 = numeric()
nT4 = numeric()
nT8 = numeric()
nT16 = numeric()
nT32 = numeric()
for (i in 1:length(nTYear.df$Year)) {
nT1 = c(nT1, sum(nTYear.df[i, 1:54]))
nT4 = c(nT4, sum(nTYear.df[i, 4:54]))
nT8 = c(nT8, sum(nTYear.df[i, 8:54]))
nT16 = c(nT16, sum(nTYear.df[i, 16:54]))
nT32 = c(nT32, sum(nTYear.df[i, 32:54]))
}
pTYear.df = data.frame(Year = rep(1954:2013, 4), Value = c(nT4, nT8, nT16, nT32),
Value2 = c(nT4/nT1, nT8/nT1, nT16/nT1, nT32/nT1), Type = factor(rep(c("N = 4",
"N = 8", "N = 16", "N = 32"), each = length(nTYear.df$Year)), levels = c("N = 4",
"N = 8", "N = 16", "N = 32")))
p4 = ggplot(pTYear.df, aes(Year, Value2)) + geom_point(size = 3) + geom_line() +
theme_bw() + facet_wrap(~Type, nrow = 2, scales = "free_y") + stat_smooth(method = "glm",
family = "binomial", formula = y ~ x, data = pTYear.df, se = FALSE) + ylab("Proportion of All Tornadoes Occurring On Days\nWith At Least N Tornadoes")
p4
## Warning: non-integer #successes in a binomial glm!
## Warning: non-integer #successes in a binomial glm!
## Warning: non-integer #successes in a binomial glm!
## Warning: non-integer #successes in a binomial glm!
model1 = glm(family = "binomial", data = nTYear.df, cbind(nTYear.df[, "30"],
nTYear.df[, "1"] - nTYear.df[, "30"]) ~ I(Year - 1980))
summary(model1)
##
## Call:
## glm(formula = cbind(nTYear.df[, "30"], nTYear.df[, "1"] - nTYear.df[,
## "30"]) ~ I(Year - 1980), family = "binomial", data = nTYear.df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.808 -0.944 -0.416 0.537 1.902
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.32313 0.18879 -28.20 < 2e-16 ***
## I(Year - 1980) 0.05381 0.00898 5.99 2.1e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 99.334 on 59 degrees of freedom
## Residual deviance: 57.011 on 58 degrees of freedom
## AIC: 139.8
##
## Number of Fisher Scoring iterations: 5
model2 = glm(family = "binomial", data = nTYear.df, I(nTYear.df[, "15"]/nTYear.df[,
"1"]) ~ I(Year - 1980), weights = nTYear.df[, "1"])
summary(model2)
##
## Call:
## glm(formula = I(nTYear.df[, "15"]/nTYear.df[, "1"]) ~ I(Year -
## 1980), family = "binomial", data = nTYear.df, weights = nTYear.df[,
## "1"])
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1144 -0.7889 -0.0783 0.4072 3.1087
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.21190 0.06123 -52.46 < 2e-16 ***
## I(Year - 1980) 0.02031 0.00342 5.94 2.8e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 105.423 on 59 degrees of freedom
## Residual deviance: 69.677 on 58 degrees of freedom
## AIC: 276.6
##
## Number of Fisher Scoring iterations: 4
nTYear.df2 = subset(nTYear.df, Year >= 1954)
summary(glm(family = "binomial", data = nTYear.df2, I(nTYear.df2[, "16"]/nTYear.df2[,
"1"]) ~ Year, weights = nTYear.df2[, "1"]))
##
## Call:
## glm(formula = I(nTYear.df2[, "16"]/nTYear.df2[, "1"]) ~ Year,
## family = "binomial", data = nTYear.df2, weights = nTYear.df2[,
## "1"])
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.930 -0.762 -0.224 0.454 3.299
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -52.53344 7.33240 -7.16 7.8e-13 ***
## Year 0.02482 0.00369 6.73 1.7e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 113.695 on 59 degrees of freedom
## Residual deviance: 67.314 on 58 degrees of freedom
## AIC: 264.5
##
## Number of Fisher Scoring iterations: 4
plt.df = data.frame(Year = nTYear.df2$Year, ratio = nTYear.df2[, "16"]/nTYear.df2[,
"1"], weights = nTYear.df2[, "1"])
Figure 5
eY = c(1954, 1974, 1984, 1994)
nT = c(4, 8, 16, 32)
tt = c("N = 4", "N = 8", "N = 16", "N = 32")
trendD = numeric()
SED = numeric()
trendP = numeric()
SEP = numeric()
for (j in 1:4) {
nTYear.df2 = subset(nTYear.df, Year >= eY[j])
pTYear.df2 = subset(pTYear.df, Year >= eY[j])
ii = 0
for (i in nT) {
ii = ii + 1
m1 = summary(glm(family = "binomial", data = nTYear.df2, I(nTYear.df2[,
as.character(i)]/nTYear.df2[, "1"]) ~ Year, weights = nTYear.df2[,
"1"]))
trendD = c(trendD, coef(m1)[2, 1])
SED = c(SED, coef(m1)[2, 2])
m2 = summary(glm(family = "binomial", data = pTYear.df2[pTYear.df2$Type ==
tt[ii], ], Value2 ~ Year, weights = nTYear.df2[, "1"]))
trendP = c(trendP, coef(m2)[2, 1])
SEP = c(SEP, coef(m2)[2, 2])
}
}
df = data.frame(nT, trendD, trendP, tDLo = trendD - 2 * SED, tDHi = trendD +
2 * SED, tPLo = trendP - 2 * SEP, tPHi = trendP + 2 * SEP, Years = c(rep("1954-2013",
4), rep("1974-2013", 4), rep("1984-2013", 4), rep("1994-2013", 4)))
p5a = ggplot(df, aes(factor(nT), trendD * 100)) + geom_point(size = 3) + geom_linerange(aes(x = factor(nT),
ymax = tDHi * 100, ymin = tDLo * 100)) + ylab("Percent Change in Annual Probability\nof a Day With At Least N Tornadoes\nOn Days With At Least One Tornado") +
xlab("N") + facet_wrap(~Years) + theme_bw()
p5b = ggplot(df, aes(factor(nT), trendP * 100)) + geom_point(size = 3) + geom_linerange(aes(x = factor(nT),
ymax = tPHi * 100, ymin = tPLo * 100)) + # geom_hline(yintercept = 0) +
scale_y_continuous(limits = c(0, 15)) + ylab("Percent Change in Annual Proportion\nof Tornadoes Occurring On Days\nWith At Least N Tornadoes") +
xlab("N") + facet_wrap(~Years) + theme_bw()
mat = matrix(c(1, 2), nrow = 2, byrow = TRUE)
p5a = p5a + ggtitle("a") + theme(plot.title = element_text(hjust = 0))
p5b = p5b + ggtitle("b") + theme(plot.title = element_text(hjust = 0))
multiplot(p5a, p5b, layout = mat)