Fewer Days With Tornadoes But More Tornadoes On Days With Tornadoes

Set working directory and load packages.

setwd("~/Dropbox/Tornadoes")
library(ggplot2)
library(rgdal)
## Loading required package: sp
## rgdal: version: 0.8-16, (SVN revision 498)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 1.9.2, released 2012/10/08
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.1/Resources/library/rgdal/gdal
## Loaded PROJ.4 runtime: Rel. 4.8.0, 6 March 2012, [PJ_VERSION: 480]
## Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.1/Resources/library/rgdal/proj
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(maps)
library(ggmap)
library(rgeos)
## rgeos version: 0.3-4, (SVN revision 438)
##  GEOS runtime version: 3.3.3-CAPI-1.7.4 
##  Polygon checking: TRUE
library(lubridate)

Data from the Storm Prediction Center (SPC) http://www.spc.noaa.gov/gis/svrgis/.

download.file("http://www.spc.noaa.gov/gis/svrgis/zipped/tornado.zip", "tornado.zip", 
    mode = "wb")

Read the tornado data.

unzip("tornado.zip")
TornL = readOGR(dsn = ".", layer = "tornado", stringsAsFactors = FALSE)
## OGR data source with driver: ESRI Shapefile 
## Source: ".", layer: "tornado"
## with 57988 features and 21 fields
## Feature type: wkbLineString with 2 dimensions

Change the data types in the data slot.

TornL$OM = as.integer(TornL$OM)
TornL$Year = as.integer(TornL$YR)
TornL$Month = as.integer(TornL$MO)
TornL$EF = as.integer(TornL$MAG)
TornL$Date = as.Date(TornL$DATE)
TornL$Length = as.numeric(TornL$LEN) * 1609.34
TornL$Width = as.numeric(TornL$WID) * 0.9144
TornL$FAT = as.integer(TornL$FAT)
TornL$SLON = as.numeric(TornL$SLON)
TornL$SLAT = as.numeric(TornL$SLAT)
TornL$ELON = as.numeric(TornL$ELON)
TornL$ELAT = as.numeric(TornL$ELAT)
TornL$INJ = as.numeric(TornL$INJ)
TornL$ID = as.integer(row.names(TornL)) + 1
Torn2.df = as.data.frame(TornL)
Torn3.df = subset(Torn2.df, Year >= 1954 & EF > 0)
byDay = group_by(Torn3.df, Date)
Day.df = summarise(byDay, nT = n())
Day.df$Year = year(Day.df$Date)

Is the probability of large outbreaks changing over time?

Years = 1954:2013
N = 16
Day2.df = subset(Day.df, nT >= N)
Day2.df$Year = year(Day2.df$Date)
byYear = group_by(Day2.df, Year)
OutByYear = summarise(byYear, nBig = n(), maxSize = max(nT), nTBig = sum(nT))
# Years without tor days fill with zeros
nBig = rep(0, length(Years))
nTBig = rep(0, length(Years))
nBig[Years %in% OutByYear$Year] = OutByYear$nBig
nTBig[Years %in% OutByYear$Year] = OutByYear$nTBig
Fill.df = data.frame(Year = Years, nBig = nBig, nTBig = nTBig)
Years = 1954:2013
N = 1
Day2.df = subset(Day.df, nT >= N)
Day2.df$Year = year(Day2.df$Date)

byYear = group_by(Day2.df, Year)
OutByYear = summarise(byYear, nSmall = n(), maxSize = max(nT), nTSmall = sum(nT))
Fill2.df = data.frame(Year = Years, nSmall = OutByYear$nSmall, nTSmall = OutByYear$nTSmall)

Figure 1

Fill.df$nSmall = Fill2.df$nSmall
Fill.df$nTSmall = Fill2.df$nTSmall
Fill.df$ratio = Fill.df$nBig/Fill.df$nSmall
Fill.df$nTratio = Fill.df$nTBig/Fill.df$nTSmall

byYear = group_by(Day.df, Year)
byYear.df = summarise(byYear, nT = sum(nT))
Fill.df$nT = byYear.df$nT

p1a = ggplot(Fill.df, aes(x = Year, y = nT)) + geom_point(size = 3) + geom_line() + 
    ylab("Number of Tornadoes") + theme_bw()
p1b = ggplot(Fill.df, aes(x = Year, y = nSmall)) + geom_point(size = 3) + geom_line() + 
    ylab("Number of Tornado Days") + theme_bw()
source("multiplot.txt")
mat = matrix(c(1, 2), nrow = 2, byrow = TRUE)
p1a = p1a + ggtitle("a") + theme(plot.title = element_text(hjust = 0))
p1b = p1b + ggtitle("b") + theme(plot.title = element_text(hjust = 0))
multiplot(p1a, p1b, layout = mat)
## Loading required package: grid

plot of chunk figure1


p1c = ggplot(Fill.df, aes(x = Year, y = nTratio)) + geom_point(size = 3) + geom_line()
tmp = table(Day.df$Year, Day.df$nT)
tmp1 = t(apply(t(tmp), 2, function(x) rev(cumsum(rev(x)))))
nTYear.df = data.frame(tmp1)
dimnames(nTYear.df) = dimnames(tmp)
nTYear.df$Year = as.integer(rownames(nTYear.df))

Figure 2

df = data.frame(Year = rep(1954:2013, 4), Value = c(nTYear.df[, "4"], nTYear.df[, 
    "8"], nTYear.df[, "16"], nTYear.df[, "32"]), Type = factor(rep(c("N = 4", 
    "N = 8", "N = 16", "N = 32"), each = length(nTYear.df$Year)), levels = c("N = 4", 
    "N = 8", "N = 16", "N = 32")))
p2 = ggplot(df, aes(Year, Value)) + geom_point(size = 3) + geom_line() + theme_bw() + 
    facet_wrap(~Type, nrow = 2, scales = "free_y") + ylab("Annual Number of Days With At Least N Tornadoes")
p2

plot of chunk figure2

df = data.frame(Year = rep(1954:2013, 6), Value = c(nTYear.df[, "1"], nTYear.df[, 
    "2"], nTYear.df[, "4"], nTYear.df[, "8"], nTYear.df[, "16"], nTYear.df[, 
    "32"]), Type = factor(rep(c("N = 1", "N = 2", "N = 4", "N = 8", "N = 16", 
    "N = 32"), each = length(nTYear.df$Year)), levels = c("N = 1", "N = 2", 
    "N = 4", "N = 8", "N = 16", "N = 32")))
p2 = ggplot(df, aes(Year, Value)) + geom_point(size = 3) + geom_line() + theme_bw() + 
    facet_wrap(~Type, ncol = 2, scales = "free_y") + ylab("Annual Number of Days With At Least N Tornadoes")
df = data.frame(Year = rep(1954:2013, 4), Value = c(nTYear.df[, "4"]/nTYear.df[, 
    "1"], nTYear.df[, "8"]/nTYear.df[, "1"], nTYear.df[, "16"]/nTYear.df[, "1"], 
    nTYear.df[, "32"]/nTYear.df[, "1"]), Type = factor(rep(c("N = 4", "N = 8", 
    "N = 16", "N = 32"), each = length(nTYear.df$Year)), levels = c("N = 4", 
    "N = 8", "N = 16", "N = 32")))
p3 = ggplot(df, aes(Year, Value)) + geom_point(size = 3) + geom_line() + theme_bw() + 
    facet_wrap(~Type, nrow = 2, scales = "free_y") + stat_smooth(method = "glm", 
    family = "binomial", formula = y ~ x, data = df, se = FALSE) + ylab("Probability of a Day With At Least N Tornadoes\nOn Days With At Least One Tornado")
p3
## Warning: non-integer #successes in a binomial glm!
## Warning: non-integer #successes in a binomial glm!
## Warning: non-integer #successes in a binomial glm!
## Warning: non-integer #successes in a binomial glm!

plot of chunk figure3

Proportion of tornadoes occurring on big days

nT1 = numeric()
nT4 = numeric()
nT8 = numeric()
nT16 = numeric()
nT32 = numeric()
for (i in 1:length(nTYear.df$Year)) {
    nT1 = c(nT1, sum(nTYear.df[i, 1:54]))
    nT4 = c(nT4, sum(nTYear.df[i, 4:54]))
    nT8 = c(nT8, sum(nTYear.df[i, 8:54]))
    nT16 = c(nT16, sum(nTYear.df[i, 16:54]))
    nT32 = c(nT32, sum(nTYear.df[i, 32:54]))
}
pTYear.df = data.frame(Year = rep(1954:2013, 4), Value = c(nT4, nT8, nT16, nT32), 
    Value2 = c(nT4/nT1, nT8/nT1, nT16/nT1, nT32/nT1), Type = factor(rep(c("N = 4", 
        "N = 8", "N = 16", "N = 32"), each = length(nTYear.df$Year)), levels = c("N = 4", 
        "N = 8", "N = 16", "N = 32")))
p4 = ggplot(pTYear.df, aes(Year, Value2)) + geom_point(size = 3) + geom_line() + 
    theme_bw() + facet_wrap(~Type, nrow = 2, scales = "free_y") + stat_smooth(method = "glm", 
    family = "binomial", formula = y ~ x, data = pTYear.df, se = FALSE) + ylab("Proportion of All Tornadoes Occurring On Days\nWith At Least N Tornadoes")
p4
## Warning: non-integer #successes in a binomial glm!
## Warning: non-integer #successes in a binomial glm!
## Warning: non-integer #successes in a binomial glm!
## Warning: non-integer #successes in a binomial glm!

plot of chunk figure4

model1 = glm(family = "binomial", data = nTYear.df, cbind(nTYear.df[, "30"], 
    nTYear.df[, "1"] - nTYear.df[, "30"]) ~ I(Year - 1980))
summary(model1)
## 
## Call:
## glm(formula = cbind(nTYear.df[, "30"], nTYear.df[, "1"] - nTYear.df[, 
##     "30"]) ~ I(Year - 1980), family = "binomial", data = nTYear.df)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.808  -0.944  -0.416   0.537   1.902  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -5.32313    0.18879  -28.20  < 2e-16 ***
## I(Year - 1980)  0.05381    0.00898    5.99  2.1e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 99.334  on 59  degrees of freedom
## Residual deviance: 57.011  on 58  degrees of freedom
## AIC: 139.8
## 
## Number of Fisher Scoring iterations: 5
model2 = glm(family = "binomial", data = nTYear.df, I(nTYear.df[, "15"]/nTYear.df[, 
    "1"]) ~ I(Year - 1980), weights = nTYear.df[, "1"])
summary(model2)
## 
## Call:
## glm(formula = I(nTYear.df[, "15"]/nTYear.df[, "1"]) ~ I(Year - 
##     1980), family = "binomial", data = nTYear.df, weights = nTYear.df[, 
##     "1"])
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1144  -0.7889  -0.0783   0.4072   3.1087  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -3.21190    0.06123  -52.46  < 2e-16 ***
## I(Year - 1980)  0.02031    0.00342    5.94  2.8e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 105.423  on 59  degrees of freedom
## Residual deviance:  69.677  on 58  degrees of freedom
## AIC: 276.6
## 
## Number of Fisher Scoring iterations: 4

nTYear.df2 = subset(nTYear.df, Year >= 1954)
summary(glm(family = "binomial", data = nTYear.df2, I(nTYear.df2[, "16"]/nTYear.df2[, 
    "1"]) ~ Year, weights = nTYear.df2[, "1"]))
## 
## Call:
## glm(formula = I(nTYear.df2[, "16"]/nTYear.df2[, "1"]) ~ Year, 
##     family = "binomial", data = nTYear.df2, weights = nTYear.df2[, 
##         "1"])
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.930  -0.762  -0.224   0.454   3.299  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -52.53344    7.33240   -7.16  7.8e-13 ***
## Year          0.02482    0.00369    6.73  1.7e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 113.695  on 59  degrees of freedom
## Residual deviance:  67.314  on 58  degrees of freedom
## AIC: 264.5
## 
## Number of Fisher Scoring iterations: 4
plt.df = data.frame(Year = nTYear.df2$Year, ratio = nTYear.df2[, "16"]/nTYear.df2[, 
    "1"], weights = nTYear.df2[, "1"])

Figure 5

eY = c(1954, 1974, 1984, 1994)
nT = c(4, 8, 16, 32)
tt = c("N = 4", "N = 8", "N = 16", "N = 32")
trendD = numeric()
SED = numeric()
trendP = numeric()
SEP = numeric()
for (j in 1:4) {
    nTYear.df2 = subset(nTYear.df, Year >= eY[j])
    pTYear.df2 = subset(pTYear.df, Year >= eY[j])
    ii = 0
    for (i in nT) {
        ii = ii + 1
        m1 = summary(glm(family = "binomial", data = nTYear.df2, I(nTYear.df2[, 
            as.character(i)]/nTYear.df2[, "1"]) ~ Year, weights = nTYear.df2[, 
            "1"]))
        trendD = c(trendD, coef(m1)[2, 1])
        SED = c(SED, coef(m1)[2, 2])
        m2 = summary(glm(family = "binomial", data = pTYear.df2[pTYear.df2$Type == 
            tt[ii], ], Value2 ~ Year, weights = nTYear.df2[, "1"]))
        trendP = c(trendP, coef(m2)[2, 1])
        SEP = c(SEP, coef(m2)[2, 2])
    }
}
df = data.frame(nT, trendD, trendP, tDLo = trendD - 2 * SED, tDHi = trendD + 
    2 * SED, tPLo = trendP - 2 * SEP, tPHi = trendP + 2 * SEP, Years = c(rep("1954-2013", 
    4), rep("1974-2013", 4), rep("1984-2013", 4), rep("1994-2013", 4)))
p5a = ggplot(df, aes(factor(nT), trendD * 100)) + geom_point(size = 3) + geom_linerange(aes(x = factor(nT), 
    ymax = tDHi * 100, ymin = tDLo * 100)) + ylab("Percent Change in Annual Probability\nof a Day With At Least N Tornadoes\nOn Days With At Least One Tornado") + 
    xlab("N") + facet_wrap(~Years) + theme_bw()

p5b = ggplot(df, aes(factor(nT), trendP * 100)) + geom_point(size = 3) + geom_linerange(aes(x = factor(nT), 
    ymax = tPHi * 100, ymin = tPLo * 100)) + # geom_hline(yintercept = 0) +
scale_y_continuous(limits = c(0, 15)) + ylab("Percent Change in Annual Proportion\nof Tornadoes Occurring On Days\nWith At Least N Tornadoes") + 
    xlab("N") + facet_wrap(~Years) + theme_bw()

mat = matrix(c(1, 2), nrow = 2, byrow = TRUE)
p5a = p5a + ggtitle("a") + theme(plot.title = element_text(hjust = 0))
p5b = p5b + ggtitle("b") + theme(plot.title = element_text(hjust = 0))
multiplot(p5a, p5b, layout = mat)

plot of chunk figure5