A Statistical Model for Tornado Intensity

James B. Elsner, Thomas H. Jagger, and Ian Elsner

Code in support of our paper in review with PLoS ONE.

date()

## [1] "Sun Aug 10 18:57:20 2014"

Preliminaries

Set working directory and load packages.

setwd("~/Dropbox/Tornadoes")
library(MASS)
library(RColorBrewer)
library(ggplot2)
library(rgdal)

## Loading required package: sp
## rgdal: version: 0.8-16, (SVN revision 498)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 1.9.2, released 2012/10/08
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.1/Resources/library/rgdal/gdal
## Loaded PROJ.4 runtime: Rel. 4.8.0, 6 March 2012, [PJ_VERSION: 480]
## Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.1/Resources/library/rgdal/proj

library(quantreg)

## Loading required package: SparseM
## 
## Attaching package: 'SparseM'
## 
## The following object is masked from 'package:base':
## 
##     backsolve

library(plyr)
library(reshape)

## 
## Attaching package: 'reshape'
## 
## The following objects are masked from 'package:plyr':
## 
##     rename, round_any

library(maptools)

## Checking rgeos availability: TRUE

library(rgeos)

## rgeos version: 0.3-4, (SVN revision 438)
##  GEOS runtime version: 3.3.3-CAPI-1.7.4 
##  Polygon checking: TRUE

library(lubridate)

## 
## Attaching package: 'lubridate'
## 
## The following object is masked from 'package:reshape':
## 
##     stamp
## 
## The following object is masked from 'package:plyr':
## 
##     here

source("Multiplot.txt")

Tornadoes

From the Storm Prediction Center (SPC) http://www.spc.noaa.gov/gis/svrgis/

download.file("http://www.spc.noaa.gov/gis/svrgis/zipped/tornado.zip",
              "tornado.zip", mode = "wb")
unzip("tornado.zip")

TornL = readOGR(dsn = ".", layer = "tornado", 
                stringsAsFactors = FALSE)

## OGR data source with driver: ESRI Shapefile 
## Source: ".", layer: "tornado"
## with 57988 features and 21 fields
## Feature type: wkbLineString with 2 dimensions

TornL$OM = as.integer(TornL$OM)
TornL$Year = as.integer(TornL$YR)
TornL$Month = as.integer(TornL$MO)
TornL$EF = as.integer(TornL$MAG)
TornL$Date = as.Date(TornL$DATE)
TornL$Length = as.numeric(TornL$LEN) * 1609.34
TornL$Width = as.numeric(TornL$WID) * .9144
TornL$FAT = as.integer(TornL$FAT)
TornL$SLON = as.numeric(TornL$SLON)
TornL$SLAT = as.numeric(TornL$SLAT)
TornL$ELON = as.numeric(TornL$ELON)
TornL$ELAT = as.numeric(TornL$ELAT)
TornL$INJ = as.numeric(TornL$INJ)
TornL$LOSS = as.numeric(TornL$LOSS)
TornL$WDAY = wday(TornL$Date)

Exploratory analysis

Remove tornadoes without a damage rating and tornadoes before 2007

TornL2 = TornL
TornL2 = subset(TornL, TornL$EF >= 0 & TornL$Year >= 2007)
table(TornL2$EF)

## 
##    0    1    2    3    4    5 
## 4994 2642  818  232   57    9

Occurrence rates

Torn2.df = as.data.frame(TornL2)
trpyWide = ddply(Torn2.df, .(Year), summarize, 
                 F1 = sum(EF == 1), F2 = sum(EF == 2),
                 F3 = sum(EF == 3), F4 = sum(EF == 4), 
                 F5 = sum(EF == 5), F1F5 = sum(EF >= 1), 
                 F2F5 = sum(EF >= 2), F3F5 = sum(EF >= 3),
                 F4F5 = sum(EF >= 4))
trpyLong = melt(trpyWide, id = "Year", 
                measure.vars = c("F1F5", "F2F5", "F3F5", "F4F5"))
trpyLong$"F1-F5" = trpyLong$F1F5
trpyLong$"F2-F5" = trpyLong$F2F5
trpyLong$"F3-F5" = trpyLong$F3F5
trpyLong$"F4-F5" = trpyLong$F4F5
ggplot(trpyLong, aes(x = Year, y = value)) + 
  geom_bar(stat = "identity", fill = 'gray') + 
  facet_grid(variable ~ ., scale="free_y") +
  theme_bw() + 
  ylab("Number of Tornado Reports (2007-2013)") + 
  xlab("")

plot of chunk unnamed-chunk-5

Fraction of all tornadoes since 2007 that are violent and path relationships.

nrow(Torn2.df[Torn2.df$EF >= 4, ])/nrow(Torn2.df)

## [1] 0.007541

cor(Torn2.df$Length, Torn2.df$EF)^2

## [1] 0.3048

cor(Torn2.df$Width, Torn2.df$EF)^2

## [1] 0.369

cor(Torn2.df$Length, Torn2.df$Width)

## [1] 0.5917

sum(Torn2.df$Width < 75)

## [1] 5114

cor(Torn2.df$Length[Torn2.df$Width < 75], Torn2.df$Width[Torn2.df$Width < 75])

## [1] 0.2182

sum(Torn2.df$Width >= 75)

## [1] 3638

cor(Torn2.df$Length[Torn2.df$Width >= 75], Torn2.df$Width[Torn2.df$Width >= 75])

## [1] 0.5198

Create an ordered factor for the EF category.

Torn2.df$EFF = factor(Torn2.df$EF, ordered = TRUE)
Torn2.df$FAT2 = Torn2.df$FAT > 0

Plot relationships between EF category and path length/width

p1a = ggplot(Torn2.df, aes(x = EFF, y = Length/1000)) + 
  geom_boxplot() + theme_bw() + coord_flip() +
  ylab("Path Length (km)") + xlab("EF Category")
p1b = ggplot(Torn2.df, aes(x = EFF, y = Width)) + 
  geom_boxplot() + theme_bw() + coord_flip() +
  ylab("Path Width (m)") + xlab("EF Category")
mat = matrix(c(1, 2), nrow = 1, byrow = TRUE)
p1a = p1a + ggtitle("a") + theme(plot.title=element_text(hjust=0))
p1b = p1b + ggtitle("b") + theme(plot.title=element_text(hjust=0))                         
multiplot(p1a, p1b, layout = mat)

## Loading required package: grid

plot of chunk unnamed-chunk-8 FIGURE 1

In table form.

ddply(Torn2.df, ~ EFF, summarize, 
      avgL = mean(Length/1000),
      q25L = quantile(Length/1000, prob = .25),
      q50L = quantile(Length/1000, prob = .5),
      q75L = quantile(Length/1000, prob = .75),
      avgW = mean(Width),
      q25W = quantile(Width, prob = .25),
      q50W = quantile(Width, prob = .5),
      q75W = quantile(Width, prob = .75),
      nr = length(Year)
      )

##   EFF   avgL    q25L    q50L  q75L    avgW    q25W    q50W    q75W   nr
## 1   0  2.267  0.2897  0.8047  2.70   54.95   22.86   45.72   68.58 4994
## 2   1  7.096  1.7703  4.3855  9.33  163.78   68.58   91.44  182.88 2642
## 3   2 14.290  4.5263 10.0342 19.25  344.10  137.16  228.60  402.34  818
## 4   3 29.093 12.3758 23.2791 36.36  736.28  339.47  548.64 1005.84  232
## 5   4 52.551 17.7027 34.8422 64.63  997.95  603.50  804.67 1207.01   57
## 6   5 71.948 45.5121 58.9501 65.93 1635.76 1207.01 1609.34 1920.24    9

Weibull regression model

Fit a Weibull model.

library(gamlss.cens)

## Loading required package: gamlss
## Loading required package: splines
## Loading required package: gamlss.data
## Loading required package: gamlss.dist
## Loading required package: nlme
##  **********   GAMLSS Version 4.2-8  ********** 
## For more on GAMLSS look at http://www.gamlss.org/
## Type gamlssNews() to see new features/changes/bug fixes.
## 
## Loading required package: survival
## 
## Attaching package: 'survival'
## 
## The following object is masked from 'package:gamlss':
## 
##     ridge
## 
## The following object is masked from 'package:quantreg':
## 
##     untangle.specials

threshold = 64.9
#Low = (c(65, 86, 111, 136, 166, 200) - threshold) * 0.44704
#High = (c(86, 111, 136, 166, 200, 250) - threshold) * 0.44704
Low = (c(65, 86, 110, 138, 168, 200) - threshold) * 0.44704
High = (c(86, 110, 138, 168, 200, 234) - threshold) * 0.44704

ef = Torn2.df$EF + 1
WS = Surv(Low[ef], High[ef], rep(3, length(ef)), "interval")
Torn2.df$WS = WS
rm(WS)
WEIic = cens(family = "WEI", type = "interval", local = "FALSE")
fitw = gamlss(WS ~ Length + Width + FAT2, 
              sigma.formula = ~ Width + Length,
              data = Torn2.df, family = WEIic)

## GAMLSS-RS iteration 1: Global Deviance = 17961 
## GAMLSS-RS iteration 2: Global Deviance = 15761 
## GAMLSS-RS iteration 3: Global Deviance = 15106 
## GAMLSS-RS iteration 4: Global Deviance = 14978 
## GAMLSS-RS iteration 5: Global Deviance = 14961 
## GAMLSS-RS iteration 6: Global Deviance = 14959 
## GAMLSS-RS iteration 7: Global Deviance = 14959 
## GAMLSS-RS iteration 8: Global Deviance = 14959 
## GAMLSS-RS iteration 9: Global Deviance = 14959 
## GAMLSS-RS iteration 10: Global Deviance = 14959 
## GAMLSS-RS iteration 11: Global Deviance = 14959

Res.df = data.frame(Res = residuals(fitw))
summary(fitw)

## Warning: summary: vcov has failed, option qr is used instead

## *******************************************************************
## Family:  c("WEIic", "interval censored Weibull") 
## 
## Call:  
## gamlss(formula = WS ~ Length + Width + FAT2, sigma.formula = ~Width +  
##     Length, family = WEIic, data = Torn2.df) 
## 
## Fitting method: RS() 
## 
## -------------------------------------------------------------------
## Mu link function:  log
## Mu Coefficients:
##              Estimate  Std. Error  t value   Pr(>|t|)
## (Intercept)  2.05e+00    7.41e-03   276.81   0.00e+00
## Length       2.27e-05    9.35e-07    24.24  1.16e-125
## Width        1.54e-03    4.42e-05    34.94  1.48e-250
## FAT2TRUE     3.59e-01    5.51e-02     6.51   8.01e-11
## 
## -------------------------------------------------------------------
## Sigma link function:  log
## Sigma Coefficients:
##               Estimate  Std. Error  t value  Pr(>|t|)
## (Intercept)   6.32e-01    9.30e-03    68.02  0.00e+00
## Width        -3.12e-04    4.03e-05    -7.73  1.19e-14
## Length       -4.04e-06    8.47e-07    -4.77  1.85e-06
## 
## -------------------------------------------------------------------
## No. of observations in the fit:  8752 
## Degrees of Freedom for the fit:  7
##       Residual Deg. of Freedom:  8745 
##                       at cycle:  11 
##  
## Global Deviance:     14959 
##             AIC:     14973 
##             SBC:     15022 
## *******************************************************************

Interpretation

The model quantifies the significant relationship between damage-rating wind speed intervals and path length and width. Model coefficients are ratios based on the exceedance over the threshold of 29 m~s$^{-1}$ (lower bound on the EF0 rating). The coefficient on the length term is 2.265$$10$^{-5}$ so $\exp(.2265)$ = 1.25 or a 25% increase over the threshold for a 100 km increase in path length. The coefficient on the width term is 1.544$$10${^-3}$ so $\exp(.1544)$ = 1.17 or a 17% increase over the threshold for a 1 km increase in path width. The fatality term is significant and indicates a $\exp(.3586)$ = 1.43 or a 43% increase in the expected intensity when fatalities are observed. That is, on average, tornadoes that kill have been about 43% stronger than those that did not.

For a fixed variance, a 50 m~s$^{-1}$ ten km long tornado will on average be a (50 $-$ 29) $\times$ 1.25 $+$ 29 = 55.3 m~s$^{-1}$ tornado and a 50 m~s$^{-1}$ 100 m wide tornado will be a (50 - 29) $\times$ 1.17 $+$ 29 = 53.6 m~s$^{-1}$ tornado. A 50 m~s$^{-1}$ tornado observed with no fatalities with the same width and length would be a (50 - 29) $\times$ 1.43 $+$ 29 = 59.0 m~s$^{-1}$ tornado.

sigma1 = predict(fitw, "sigma",
        newdata = data.frame(Length = 10000, Width = 100))
sigma2 = predict(fitw, "sigma",
        newdata = data.frame(Length = 20000, Width = 200))
gamma(1/exp(sigma1) + 1)

## [1] 0.8906

gamma(1/exp(sigma2) + 1)

## [1] 0.8951

The Weibull shape parameter (sigma) is allowed to vary with length and width in the model, but the coefficients on this term are about an order of magnitude smaller than the coefficients on the mean term. For instance, $\Gamma(1/\sigma+1)$ = .891 for a 100 m wide, 10 km long tornado and .895 for a 200 m wide, 20 km long tornado. This is a change of .4%, so we can ignore this variation when interpreting the results, although we note that the signs on the length and width coefficients in the model for the scale parameter are negative indicating that the shape parameter decreases for larger widths and lengths. The shape of the intensity distribution becomes broader as tornado path width and length increase.

samplefitw <- function(W, mu, sigma, n = 1){
  out <- qweibull(pWEIic(q = W[rep(1:nrow(W), n)], 
                         mu = mu, sigma = sigma),
                         scale = mu, shape = sigma)
  return(matrix(out, ncol = n))
  }
nsamples = 1000
s = sort(Torn2.df$EF)
runs = cumsum(rle(s)$lengths)
or = order(Torn2.df$EF)
ncases = 6
index = c(or[sample(1:runs[1], ncases)], 
          or[sample((runs[1] + 1):runs[2], ncases)],
          or[sample((runs[2] + 1):runs[3], ncases)],
          or[sample((runs[3] + 1):runs[4], ncases)],
          or[sample((runs[4] + 1):runs[5], ncases)],
          or[sample((runs[5] + 1):runs[6], ncases)])
WmaxS = samplefitw(Torn2.df$WS[index], mu = fitw$mu.fv[index], 
                   sigma = fitw$sigma.fv[index], n = nsamples) +
                   threshold * .44704
dfWide = as.data.frame(t(WmaxS))
names(dfWide) = paste(Torn2.df$DATE[index], Torn2.df$TIME[index])
dfLong = melt(dfWide)

## Using  as id variables

dfLong$EF = rep(Torn2.df$EF[index], each = nsamples)
p2 = ggplot(dfLong, aes(value, fill = factor(EF))) +
  geom_histogram(binwidth = 1) +
  facet_wrap(~ variable) +
  theme_bw() +
  theme(legend.position = "none") +
  xlab("Tornado Wind Speed (m/s)") +
  ylab("Frequency") +
  theme(strip.text.x = element_text(size = 8))
p2

plot of chunk unnamed-chunk-12 FIGURE 2

Given an EF rating along with path length, path width, and whether or not there was a fatality, the model generates samples of predictive intensity. The plot shows histograms based on 1000 samples from 36 tornadoes since 2007. Six randomly chosen tornadoes are included from each of the six EF ratings (top row lowest to highest) with the date/time displayed in the panel heading. The histograms are bounded by the wind speeds assigned to the EF category. In some cases the histogram is flat indicating that the length and width do not provide information on tornado intensity beyond the EF rating. However there are exceptions especially for tornadoes with ratings between EF1 and EF3. Here we see cases where the distribution is positively skewed indicating that length and width suggest a lower-end intensity for the given rating.

Validity and adequacy

WmaxS = samplefitw(Torn2.df$WS, mu = fitw$mu.fv,
                   sigma = fitw$sigma.fv, n = 1) +
                   threshold * .44704
WmaxS.df = as.data.frame(WmaxS)
names(WmaxS.df) = "Intensity"
p3a = ggplot(WmaxS.df, aes(Intensity)) +
  geom_histogram(binwidth = 1, color = 'black') +
  ylab("Frequency") +
  xlab("Tornado Intensity (m/s)") +
  theme_bw()
p3b = ggplot(Res.df, aes(Res)) +
  geom_histogram(binwidth = .1, color = 'black') +
  ylab("Frequency") + 
  xlab("Standardized Residuals") +
  theme_bw()
qqplot.data <- function (vec){
  y <- quantile(vec[!is.na(vec)], c(0.25, 0.75))
  x <- qnorm(c(0.25, 0.75))
  slope <- diff(y)/diff(x)
  int <- y[1L] - slope * x[1L]
  d <- data.frame(resids = vec)
  ggplot(d, aes(sample = resids)) + 
    stat_qq() + 
    geom_abline(slope = 1, intercept = 0) +
    theme_bw() +
    xlab("Theoretical Quantiles") +
    ylab("Residual Quantiles")
}
p3c = qqplot.data(Res.df$Res)
mat = matrix(c(1, 2, 3), nrow = 1, byrow = TRUE)
p3a = p3a + ggtitle("a") + theme(plot.title=element_text(hjust=0))
p3b = p3b + ggtitle("b") + theme(plot.title=element_text(hjust=0))
p3c = p3c + ggtitle("c") + theme(plot.title=element_text(hjust=0))                          
multiplot(p3a, p3b, p3c, layout = mat)

plot of chunk unnamed-chunk-13 FIGURE 3

Compare the model-estimated wind speeds with observations for 9 tornadoes that had estimates.

Recent = c(57154, 57162, 57203, 57357, 57364, 57389, 57447, 57487, 57835)
Names = c("Adairsville GA 2013-01-30", "Hattiesburg MS 2013-02-10", "Kilpatrick AL 2013-03-18",
          "Rozel KS 2013-05-18", "Sedgwick KS 2013-05-19", "New Castle/Moore OK 2013-05-20",
          "Lebanon KS 2013-05-27", "York NE 2013-05-29", "Wayne NE 2013-10-10" )
x = as.numeric(rownames(Torn2.df)) %in% Recent
index = which(x)
nsamples = 1000
WmaxS = samplefitw(Torn2.df$WS[index], mu = fitw$mu.fv[index], 
                   sigma = fitw$sigma.fv[index], n = nsamples) +
                   threshold * .44704
dfWide = as.data.frame(t(WmaxS))
names(dfWide) = Names
dfLong = melt(dfWide)

## Using  as id variables

dfLong$EF = rep(Torn2.df$EF[index], each = nsamples)
Estimated = c(160, 170, 125, 175, 135, 205, 140, 110, 170) * .44704
dfLong$Estimated = rep(Estimated, each = nsamples)
dfLong$High = High[dfLong$EF + 1] + threshold * .44704
dfLong$Low = Low[dfLong$EF + 1] + threshold * .44704
p4 = ggplot(dfLong, aes(value, fill = factor(EF))) +
  geom_histogram(binwidth = .5) +
  facet_wrap(~ variable) +
  theme_bw() +
  theme(legend.position = "none") +
  xlab("Tornado Wind Speed (m/s)") +
  ylab("Frequency") +
  geom_segment(aes(x = High, xend = Low, y = -10, yend = -10), size = 2, color = "gray85") +
  geom_point(aes(x = Estimated, y = -10), size = 3, color = "black") +
  theme(strip.text.x = element_text(size = 8))
p4

plot of chunk unnamed-chunk-14 FIGURE 4

Histograms of predicted tornado intensities for nine tornadoes from 2013 for which a wind speed was estimated. The location of the estimated wind speed is shown as a dot and the range of wind speeds defined by the corresponding EF rating is shown as a gray horizontal bar.

dfStats = ddply(dfLong, .(variable), summarize,
                Avg = mean(value),
                Med = median(value))
dfStats$Estimated = Estimated
cor.test(dfStats$Avg, dfStats$Estimated)

## 
##  Pearson's product-moment correlation
## 
## data:  dfStats$Avg and dfStats$Estimated
## t = 11.44, df = 7, p-value = 8.769e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8787 0.9948
## sample estimates:
##    cor 
## 0.9743

sqrt(mean((dfStats$Avg - dfStats$Estimated)^2))

## [1] 5.264

library(moments)
nsamples = 100
WmaxS = samplefitw(Torn2.df$WS, mu = fitw$mu.fv,
                   sigma = fitw$sigma.fv, n = nsamples) +
                   threshold * .44704

dfWide = as.data.frame(t(WmaxS))
dfLong = melt(dfWide)

## Using  as id variables

dfStats = ddply(dfLong, .(variable), summarize,
                Avg = mean(value),
                Med = median(value),
                Min = min(value),
                Max = max(value),
                Q1 = quantile(value, prob = .25),
                Q3 = quantile(value, prob = .75),
                Q05 = quantile(value, prob = .05),
                Q95 = quantile(value, prob = .95),
                Skew = skewness(value))
All = cbind(Torn2.df, dfStats)

WmaxS2 = numeric()
for(i in 1:dim(Torn2.df)[1]){
WmaxS2 = c(WmaxS2, runif(nsamples,
               Low[Torn2.df$EF[i] + 1] + threshold * .44704,
               High[Torn2.df$EF[i] + 1] + threshold * .44704))
}
dfLong2 = data.frame(variable = rep(1:dim(Torn2.df)[1], each = nsamples), value = WmaxS2)
dfStats2 = ddply(dfLong2, .(variable), summarize,
                Avg2 = mean(value),
                Med2 = median(value),
                Skew2 = skewness(value))
All = cbind(All, dfStats2)

Model skill is estimated relative to a null model of choosing a random intensity within the wind speed ranges. Skill is assessed as the percentage increase in the coefficient of variation between path dimension and predicted intensities.

The correlation between path length and EF rating is .552. The correlation between length and tornado intensity using the Weibull model is .604 for an increase in the coefficient of variation of (.604^2 - .552^2)/.5522 * 100 = 20%. The correlation between length and tornado intensity from the null model is .566 for an increase in the coefficient of variation of (.566^2 - .552^2)/.5522 * 100 = 5%.

The correlation between path width and EF rating is .607. The correlation between width and tornado intensity using the Weibull model is .663 for an increase in the cofficient of variation of (.663^2 - .607^2)/.6072 * 100 = 19%. The correlation between width and tornado intensity from the null model is .621 for an increase in the coefficient of variation of (.621^2 - .607^2)/.6072 * 100 = 5%.

mu1 = predict(fitw, "mu",
        newdata = data.frame(Length = 20000, Width = 200, FAT2 = TRUE))
sigma1 = predict(fitw, "sigma",
        newdata = data.frame(Length = 20000, Width = 200))
eff = 2
WS = Surv(Low[eff + 1], High[eff + 1], 3, "interval") 
Wmax = qweibull(pWEIic(q = WS, mu = mu1, sigma = sigma1), 
                scale = mu1, shape = sigma1)
Wmax + threshold * .44704

## status 
##  55.86

Compare with Doppler radar estimates

Torn3.df = as.data.frame(TornL)
Torn3.df = subset(Torn3.df, EF > 0)
Torn3.df$FAT2 = Torn3.df$FAT > 0
DowTors = numeric()
DowTors = c(DowTors, as.numeric(rownames(Torn3.df[Torn3.df$Date == "1995/05/16", ][5, ]))) # May 16, 1995 Hanston, KS
DowTors = c(DowTors, as.numeric(rownames(Torn3.df[Torn3.df$Date == "1996/05/25", ][3, ]))) # May 25, 1996 Friona, TX
DowTors = c(DowTors, as.numeric(rownames(Torn3.df[Torn3.df$Date == "1997/04/10", ][4, ]))) # April 10, 1997 Tulia, TX
DowTors = c(DowTors, as.numeric(rownames(Torn3.df[Torn3.df$Date == "1997/05/26", ][2, ]))) # May 26, 1997 Kiefer, OK
DowTors = c(DowTors, as.numeric(rownames(Torn3.df[Torn3.df$Date == "1998/05/30", ][4, ]))) # May 30, 1998 Spencer, SD
DowTors = c(DowTors, as.numeric(rownames(Torn3.df[Torn3.df$Date == "1999/05/01", ][1, ]))) # May 1, 1999 Tarzan, TX
DowTors = c(DowTors, as.numeric(rownames(Torn3.df[Torn3.df$Date == "1999/05/03", ][4, ]))) # May 3, 1999 Moore, OK
DowTors = c(DowTors, as.numeric(rownames(Torn3.df[Torn3.df$Date == "1999/05/31", ][3, ]))) # May 31, 1999 Sitka, KS
DowTors = c(DowTors, as.numeric(rownames(Torn3.df[Torn3.df$Date == "1999/06/04", ][6, ]))) # June 4 1999 Thedford, NE
DowTors = c(DowTors, as.numeric(rownames(Torn3.df[Torn3.df$Date == "2004/05/24", ][3, ]))) # May 24 2004 Hebron, NE
DowTors = c(DowTors, as.numeric(rownames(Torn3.df[Torn3.df$Date == "2008/05/10", ][13, ]))) # May 10 2008 Stuttgart, AR
DowTors = c(DowTors, as.numeric(rownames(Torn3.df[Torn3.df$Date == "2009/06/01", ][1, ]))) # June 5 2009 Goshen County, WY

x = as.numeric(rownames(Torn3.df)) %in% DowTors
Torn4.df = Torn3.df[x, ]

mu1 = predict(fitw, "mu",
        newdata = data.frame(Length = Torn4.df$Length, Width = Torn4.df$Width, FAT2 = Torn4.df$FAT2))
sigma1 = abs(predict(fitw, "sigma",
        newdata = data.frame(Length = Torn4.df$Length, Width = Torn4.df$Width)))
eff = Torn4.df$EF
Wmax = numeric()
for(i in 1:length(DowTors)){
WS = Surv(Low[eff[i] + 1], High[eff[i] + 1], 3, "interval") 
Wmax = c(Wmax, qweibull(pWEIic(q = WS, mu = mu1[i], sigma = sigma1[i]), 
                scale = mu1[i], shape = sigma1[i]) + threshold * .44704)
 }

Idow = c(82.6, 114.2, 54.1, 90.2, 205.5, 65.8, 117, 65, 96.4, 80.5, 113.4, 108.6)
Iwsr = c(65, 64, 46.5, 48.6, 111.9, 42.5, 92.9, 48, 74.8, 62.6, 81, 53)
Mdow = c(62.88, 64.38, 53.64, 37.69, 98.70, 38.97, 83.46, 57.88, 84.3, 59.44, 91.25, 52.67)

df = data.frame(Date = Torn4.df$Date, Length = Torn4.df$Length, Width = Torn4.df$Width, 
                EF = Torn4.df$EF, FAT2 = Torn4.df$FAT2, Wmax, Idow, Iwsr, Mdow)
cor.test(df$Wmax, df$Idow)

## 
##  Pearson's product-moment correlation
## 
## data:  df$Wmax and df$Idow
## t = 2.844, df = 10, p-value = 0.01742
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1539 0.8980
## sample estimates:
##    cor 
## 0.6688

cor.test(df$Wmax, df$Iwsr)

## 
##  Pearson's product-moment correlation
## 
## data:  df$Wmax and df$Iwsr
## t = 5.837, df = 10, p-value = 0.0001646
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.6164 0.9658
## sample estimates:
##    cor 
## 0.8792

cor.test(df$Wmax, df$Mdow)

## 
##  Pearson's product-moment correlation
## 
## data:  df$Wmax and df$Mdow
## t = 4.705, df = 10, p-value = 0.0008357
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4889 0.9509
## sample estimates:
##    cor 
## 0.8299

sqrt(mean((df$Wmax - df$Mdow)^2))

## [1] 17.16

Annual variation

nsamples = 1
WmaxS = samplefitw(Torn2.df$WS, mu = fitw$mu.fv,
                   sigma = fitw$sigma.fv, n = nsamples) +
                   threshold * .44704
df = data.frame(Year = Torn2.df$Year, Intensity = WmaxS, EF = Torn2.df$EF)
coef(lm(Intensity ~ Year, data = df))[2]

##   Year 
## 0.1963

ggplot(df, aes(factor(Year), Intensity)) + 
  geom_boxplot() +
  theme_bw() +
  xlab("Year") +
  ylab("Tornado Intensity (m/s)")

plot of chunk unnamed-chunk-19 FIGURE 5

df2 = ddply(df, .(Year, EF), summarize,
            mI = mean(Intensity),
            nT = length(Intensity))
ggplot(df2, aes(Year, nT)) + 
  geom_point() +
  geom_line() +
  facet_wrap(~ EF, scale = "free_y")