Estimating kinetic energy of historical tornadoes

Elsner/Jagger/Fricker

Preliminaries

Set working directory and load packages.

date()

## [1] "Thu Jun  5 08:58:20 2014"

setwd("~/Dropbox/Tornadoes")
require(MASS)

## Loading required package: MASS

require(ggplot2)

## Loading required package: ggplot2

require(rgdal)

## Loading required package: rgdal
## Loading required package: sp
## rgdal: version: 0.8-16, (SVN revision 498)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 1.9.2, released 2012/10/08
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.1/Resources/library/rgdal/gdal
## Loaded PROJ.4 runtime: Rel. 4.8.0, 6 March 2012, [PJ_VERSION: 480]
## Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.1/Resources/library/rgdal/proj

library(plyr)
library(reshape2)
library(lubridate)

## 
## Attaching package: 'lubridate'
## 
## The following object is masked from 'package:plyr':
## 
##     here

require(gamlss.cens)

## Loading required package: gamlss.cens
## Loading required package: gamlss
## Loading required package: splines
## Loading required package: gamlss.data
## Loading required package: gamlss.dist
## Loading required package: nlme
##  **********   GAMLSS Version 4.2-8  ********** 
## For more on GAMLSS look at http://www.gamlss.org/
## Type gamlssNews() to see new features/changes/bug fixes.
## 
## Loading required package: survival
## 
## Attaching package: 'survival'
## 
## The following object is masked from 'package:gamlss':
## 
##     ridge

Tornado data

download.file("http://www.spc.noaa.gov/gis/svrgis/zipped/tornado.zip",
              "tornado.zip", mode = "wb")

Read the tornado data and change data types

#unzip("tornado.zip")
TornL = readOGR(dsn = ".", layer = "tornado", 
                stringsAsFactors = FALSE)

## OGR data source with driver: ESRI Shapefile 
## Source: ".", layer: "tornado"
## with 57988 features and 21 fields
## Feature type: wkbLineString with 2 dimensions

TornL$OM = as.integer(TornL$OM)
TornL$Year = as.integer(TornL$YR)
TornL$Month = as.integer(TornL$MO)
TornL$EF = as.integer(TornL$MAG)
TornL$Date = as.Date(TornL$DATE)
TornL$Length = as.numeric(TornL$LEN) * 1609.34
TornL$Width = as.numeric(TornL$WID) * .9144
TornL$FAT = as.integer(TornL$FAT)
TornL$SLON = as.numeric(TornL$SLON)
TornL$SLAT = as.numeric(TornL$SLAT)
TornL$ELON = as.numeric(TornL$ELON)
TornL$ELAT = as.numeric(TornL$ELAT)
TornL$INJ = as.numeric(TornL$INJ)
TornL$LOSS = as.numeric(TornL$LOSS)
TornL$WDAY = wday(TornL$Date)

Remove tornadoes without an EF scale rating. Compute area of damage path using an ellipse. Here we are interested in the area damaged by the tornado and not the area covered by debris scattered by the tornado.

Torn = subset(TornL, TornL$EF >= 0)
Torn.df = as.data.frame(Torn)
Torn.df$Area = .5 * Torn.df$Length * .5 * Torn.df$Width * pi
Torn2.df = subset(Torn.df, Year >= 1994)

Tornado wind speeds (3 sec gusts, mph); EF scale implemented in the U.S. on 1 February 2007. Values in parentheses are m/s. Operational EF Scale (Table 2-1 of NRC2007)

EF0 = 65, 85 (29.06, 38.45) EF1 = 86, 110 (38.45, 49.62) EF2 = 111, 135 (49.62, 60.80) EF3 = 136, 165 (60.80, 74.21) EF4 = 166, 200 (74.21, 89.41) EF5 > 200 (> 89.41)

Method 1: Empirical model (NRC 2007)

perc = c(1, 0, 0, 0, 0, 0)
perc = c(perc, .772, .228, 0, 0, 0, 0)
perc = c(perc, .616, .268, .115, 0, 0, 0)
perc = c(perc, .529, .271, .133, .067, 0, 0)
perc = c(perc, .543, .238, .131, .056, .032, 0)
perc = c(perc, .538, .223, .119, .07, .033, .017)
percM = matrix(perc, ncol = 6, byrow = TRUE)
threshW = c(29.06, 38.45, 49.62, 60.8, 74.21, 89.41)

ef = Torn2.df$EF + 1
EW2 = numeric()
for(i in 1:length(ef)){
EW2[i] = threshW^2 %*% percM[ef[i], ]
}

Torn2.df$KE1 = .5 * EW2 * Torn2.df$Area * 1000

Method 2a: Exponential distribution + threshold wind speed

Let \( p \) be the percentage of the path area covered and \( W \) be any value of tornado intensity. We can interpret \( p \) then as the probability that a random observed intensity within the storm's path exceeds some value \( w \).

One choice is the exponential distribution, since \( (W - a)/b \) is also exponential. That is it scales and has a power law. Suppose, \( W_{min} \) is the minimum observed intensity and covers 100% of the area, then \[ P(W > w|W > W_{min}) = \exp(-(w - W_{min})/a)) = p, \] The expected value of \( W \) is \( E(W) = W_{min} + a \). So \[ w = W_{min} - a \log(p) = W_{min} - \log(p^a). \]

Now \( a \) is determined by any other value of \( W \) and the probability. For example, given the 1 percent value for the maximum of \( W \), \( W_{max} \) then \[ W_{max} - W_{min} = a \log(p), \] So \[ a = \frac{(W_{max} - W_{min}}{\log(p_{max}} = \frac{(W_{max} - W_{min}}{\log(.01)} \] \[ w(p) = W_{min} + \frac{W_{max} - W_{min}}{\log(1/p_{max}) \log(1/p)} \] For \( p_{max} \) = .01 \[ w(p) = W_{min} + (W_{max} - W_{min})/(\log(100) \log(1/p)) \]

Some values for a uniform random variable X, E(g(X))=E(g(x(Z))), where Z has a uniform distribution, and X(Z) is the quantile funtion for X. Assume Z has a uniform 0,1 distribution, then

E(log(1/Z)^k) = integral(0,1) log(1/Z))^k = k! E(log(1/Z)) = 1 E(log(1/Z)²⁾ = 2

E(X) = Xmin + (Xmax-Xmin)/log(1/pmax) = Xmin(1-1/log(1/pmax))+Xmax(1/log(pmax)) E(X²⁾ = Xmin² + (Xmin *(Xmax-Xmin))/log(1/pmax) + 2((Xmax-Xmin)/log(1/pmax))² =E(X)^{2+((Xmax-Xmin)/log(1/pmax))²} = E(X)^{2+(E(X)-Xmin)^2,} as would be true for an exponential distribution.

So using wind speeds say Wmin = 40 m/s, Wmax = 200 m/s, pmin = 1, pmax = .01

E(W) = 40 + 160 /log(1/.01) = 74.75 E(W²⁾ = 74.75² + (74.5 - 40)^{2/((log(1/.01))²⁾} = 5644

So using wind speeds say Wmin = 30 m/s, Wmax = 90 m/s pmin = 1, pmax = .00124 (Moore/Newcastle 2013)

EW = 30 + 60/log(1/.00124) E(W²⁾ = EW² + (EW - 30)^{2/((log(1/.00124))²⁾}

perMax = c(1, .228, .115, .067, .032, .017)
threshW = c(29.06, 38.45, 49.62, 60.8, 74.21, 89.41)
EW = 29.06
EW2 = EW^2
for(i in 1:5){
x = threshW[1] + (threshW[i + 1] - threshW[1])/log(1/perMax[i + 1])
x2 = x^2 + (x - threshW[1])^2/(log(1/perMax[i + 1])^2)
EW = c(EW, x)
EW2 = c(EW2, x2)
}

Torn2.df$KE2a = .5 * EW2 * Torn2.df$Area * 1000

Method 2b: Exponential distribution + Weibull model for maximum intensity

We assume the data have a Weibull distribution with the P(V>v)=exp(-(v-v0/a)^k), as was done in Jagger, Elsner, Niu (2000) for county wind data from hurricanes.

In the model in which W ~ weibull(scale=mu, shape=sigma)*I(a, b) we use a and b as the lower and upper limits of the EF category speeds minus 85.9 mph.

We fit the model and then estimate the value of mu and sigma for each torndao using an interval censored model. From this model we generate samples of the actual wind speed conditional on the estimated values of mu and sigma.

threshold = 64.9
Low = (c(65, 86, 111, 136, 166, 200) - threshold) * 0.44704
High = (c(86, 111, 136, 166, 200, 250) - threshold) * 0.44704
ef = Torn2.df$EF + 1
WS = Surv(Low[ef], High[ef], rep(3, length(ef)), "interval")
Torn2.df$WS = WS
rm(WS)
WEIic = cens(WEI, type = "interval", local = "FALSE")
fitw = gamlss(WS ~ Length + Width, 
              sigma.formula = ~ Width + Length,
              data = Torn2.df, family = WEIic)

## GAMLSS-RS iteration 1: Global Deviance = 47588 
## GAMLSS-RS iteration 2: Global Deviance = 42326 
## GAMLSS-RS iteration 3: Global Deviance = 40620 
## GAMLSS-RS iteration 4: Global Deviance = 40188 
## GAMLSS-RS iteration 5: Global Deviance = 40108 
## GAMLSS-RS iteration 6: Global Deviance = 40095 
## GAMLSS-RS iteration 7: Global Deviance = 40093 
## GAMLSS-RS iteration 8: Global Deviance = 40093 
## GAMLSS-RS iteration 9: Global Deviance = 40093 
## GAMLSS-RS iteration 10: Global Deviance = 40093 
## GAMLSS-RS iteration 11: Global Deviance = 40093 
## GAMLSS-RS iteration 12: Global Deviance = 40093 
## GAMLSS-RS iteration 13: Global Deviance = 40093 
## GAMLSS-RS iteration 14: Global Deviance = 40093 
## GAMLSS-RS iteration 15: Global Deviance = 40093

samplefitw <- function(W, mu, sigma, n = 1){
  out <- qweibull(pWEIic(q = W[rep(1:nrow(W), n)], 
                         mu = mu, sigma = sigma),
                         scale = mu, shape = sigma)
  return(matrix(out, ncol = n))
  }
nsamples = 1000
samples = samplefitw(Torn2.df$WS[1:36], mu = fitw$mu.fv[1:36], 
                     sigma = fitw$sigma.fv[1:36], n = nsamples)
samplesplus = samples + threshold * .44704

samples2 = samplefitw(Torn2.df$WS, mu = fitw$mu.fv,
                      sigma = fitw$sigma.fv, n = 1)
Torn2.df$Wmax = samples2 + threshold * .44704

thresh = 29.06
perMax[1] = 0
EW = thresh + (Torn2.df$Wmax - thresh)/log(1/perMax[Torn2.df$EF + 1])
EW2 = EW^2 + (EW - thresh)^2/(log(1/perMax[Torn2.df$EF + 1])^2)
Torn2.df$KE2b = .5 * EW2 * Torn2.df$Area * 1000
cor(Torn2.df$KE1, Torn2.df$LOSS)

## [1] 0.347

cor(Torn2.df$KE2a, Torn2.df$LOSS)

## [1] 0.345

cor(Torn2.df$KE2b, Torn2.df$LOSS)

##        [,1]
## [1,] 0.3594

cor(Torn2.df$Area, Torn2.df$LOSS)

## [1] 0.3276

Torn2.df2 = Torn2.df[order(-Torn2.df$KE1), ]

Plots

dfWide = as.data.frame(t(samplesplus))
names(dfWide) = paste(Torn2.df$OM[1:36], Torn2.df$DATE[1:36], Torn2.df$TIME[1:36])
dfLong = melt(dfWide)

## No id variables; using all as measure variables

dfLong$EF = rep(Torn2.df$EF[1:36], each = nsamples)
ggplot(dfLong, aes(value, fill = factor(EF))) +
  geom_density(color = "transparent") +
  facet_wrap(~ variable) +
  theme(legend.position = "none") +
  xlab("Tornado Wind Speed (m/s)") +
  ylab("Density") +
  theme_bw()

plot of chunk plotSamples

df = data.frame(OM = Torn2.df$OM, KE1 = Torn2.df$KE1, 
                KE2a = Torn2.df$KE2a, KE2b = Torn2.df$KE2b)
dfLong = melt(df, id = "OM")
ggplot(dfLong, aes(log10(value))) + 
  geom_histogram(binwidth = .5) +
  facet_wrap(~variable)

plot of chunk distribution

Total KE by year

totalKE = ddply(Torn2.df, .(Year), summarize,
                count = length(Year),
                tKE1 = sum(KE1),
                tKE2a = sum(KE2a),
                tKE2b = sum(KE2b))
ggplot(totalKE, aes(Year, tKE1/10^15)) + 
  geom_point() + geom_smooth()

## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.

plot of chunk totalAnnualKEbyYear

ggplot(totalKE, aes(Year, tKE1/count)) + 
  geom_point() + geom_smooth()

## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.

plot of chunk totalAnnualKEbyYear

library(ggthemes)
library(wesanderson)
pal = wes.palette(name = "Zissou", type = "continuous")
or = order(totalKE$tKE1, decreasing = FALSE)
totalKE$YearF = factor(totalKE$Year, levels = totalKE$Year[or])
ggplot(totalKE, aes(x = YearF, y = tKE1/10^15, fill = count)) + 
  geom_histogram(stat = "identity") + 
  coord_flip() + 
  xlab("Year") + 
  ylab("Total Kinetic Energy of All U.S. Tornadoes (petajoules)\nRanked by Year") + 
  scale_fill_gradientn(colours = pal(20), name = "Number of\nTornadoes") +
  theme_bw()

plot of chunk barchartYear

#  theme(legend.position="none")
or = order(totalKE$tKE2b, decreasing = FALSE)
totalKE$YearF = factor(totalKE$Year, levels = totalKE$Year[or])
ggplot(totalKE, aes(x = YearF, y = tKE2b/10^15, fill = count)) + 
  geom_histogram(stat = "identity") + 
  coord_flip() + 
  xlab("Year") + 
  ylab("Total Kinetic Energy of All U.S. Tornadoes (petajoules)\nRanked by Year") + 
  scale_fill_gradientn(colours = pal(20), name = "Number of\nTornadoes") +
  theme_bw()

plot of chunk barchartYear

Total KE by day (top ten)

totalKE = ddply(Torn2.df, .(Date), summarize,
                count = length(Year),
                tKE1 = sum(KE1),
                tKE2a = sum(KE2a),
                tKE2b = sum(KE2b))

totalKE2 = totalKE[order(-totalKE$tKE2b), ]
totalKE2 = totalKE2[1:20, ]

library(ggthemes)
library(wesanderson)
pal = wes.palette(name = "Zissou", type = "continuous")
or = order(totalKE2$tKE2b, decreasing = FALSE)
totalKE2$DateF = factor(totalKE2$Date, 
                        levels = as.character(totalKE2$Date[or]))
ggplot(totalKE2, aes(x = DateF, y = tKE2b/10^15, fill = count)) + 
  geom_histogram(stat = "identity") + 
  coord_flip() + 
  xlab("Date (Year-Month-Day)") + 
  ylab("Total Kinetic Energy of U.S. Tornadoes (petajoules)\nRanked by Day") + 
  scale_fill_gradientn(colours = pal(20), name = "Number of\nTornadoes") +
  theme_bw()

plot of chunk barchartDay