The North Atlantic Oscillation and May severe hail risk in the central and eastern United States

Code in support of our paper submitted to Geophysical Research Letters

R version3.1 (2013-05-16) – “Good Sport” Copyright © 2013 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: x86_64-apple-darwin10.8.0 (64-bit)

Set working directory and load packages.

setwd("~/GDrive/hail")
library(sp)
## Warning: package 'sp' was built under R version 3.0.2
library(rgdal)
## Warning: package 'rgdal' was built under R version 3.0.2
## rgdal: version: 0.8-13, (SVN revision 494)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 1.9.2, released 2012/10/08
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.0/Resources/library/rgdal/gdal
## Loaded PROJ.4 runtime: Rel. 4.8.0, 6 March 2012, [PJ_VERSION: 480]
## Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.0/Resources/library/rgdal/proj
library(maptools)
## Checking rgeos availability: TRUE
library(maps)
library(colorspace)
library(ggplot2)
library(gamlss)
## Loading required package: splines
## Loading required package: gamlss.dist
## Loading required package: MASS
## Loading required package: gamlss.data
## Loading required package: nlme
## Warning: package 'nlme' was built under R version 3.0.2
##  **********   GAMLSS Version 4.2-6  ********** 
## For more on GAMLSS look at http://www.gamlss.org/
## Type gamlssNews() to see new features/changes/bug fixes.
library(gamlss.util)
## Loading required package: Matrix
## Warning: package 'Matrix' was built under R version 3.0.2
## Loading required package: rgenoud
## Loading required package: parallel
## ##  rgenoud (Version 5.7-12, Build Date: 2013-06-28)
## ##  See http://sekhon.berkeley.edu/rgenoud for additional documentation.
## ##  Please cite software as:
## ##   Walter Mebane, Jr. and Jasjeet S. Sekhon. 2011.
## ##   ``Genetic Optimization Using Derivatives: The rgenoud package for R.''
## ##   Journal of Statistical Software, 42(11): 1-26. 
## ##
## 
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following object is masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Loading required package: gamlss.add
## Loading required package: mgcv
## This is mgcv 1.7-27. For overview type 'help("mgcv-package")'.
## Loading required package: nnet
## Loading required package: gamlss.nl
## Loading required package: survival
## 
## Attaching package: 'survival'
## 
## The following object is masked from 'package:gamlss':
## 
##     ridge
## 
## Loading required package: lattice
## Loading required package: spam
## Loading required package: grid
## Spam version 0.40-0 (2013-09-11) is loaded.
## Type 'help( Spam)' or 'demo( spam)' for a short introduction 
## and overview of this package.
## Help for individual functions is also obtained by adding the
## suffix '.spam' to the function name, e.g. 'help( chol.spam)'.
## 
## Attaching package: 'spam'
## 
## The following object is masked from 'package:base':
## 
##     backsolve, forwardsolve
require(gridExtra)
## Loading required package: gridExtra

Download hail data from http://www.spc.noaa.gov/gis/svrgis/ and read into R session. Data is appended with hail 2012 data which was manually edited.

# tmp =
# download.file('http://www.spc.noaa.gov/gis/svrgis/zipped/hail.zip','hail.zip',
# mode='wb') unzip('hail.zip') hail1 =
# readOGR('/Users/robhodges/GDrive/hail', 'hail')
# #http://www.spc.noaa.gov/gis/svrgis/ p.h = proj4string(hail1) hail2012 =
# read.csv('http://dl.dropboxusercontent.com/u/616635/data/2012_hail_edit.csv',T)
# #add 2012 data hail2012$LON = hail2012$ELON #to make similar to 'hail' for
# merging hail2012$LAT = hail2012$ELAT #same coordinates(hail2012) = ~ LON +
# LAT #make SPDF proj4string(hail2012) = '+proj=longlat' hail.2012 =
# spTransform(hail2012, CRS(p.h)) hail <- spRbind(hail1, hail.2012) #spatial
# merge save(hail, file = 'hail.RData')
load(url("http://dl.dropboxusercontent.com/u/616635/data/hail.RData"))
nao = read.table("http://dl.dropboxusercontent.com/u/616635/data/NAO.txt", T)
h500 = read.table("http://dl.dropboxusercontent.com/u/616635/data/H500-US-NCEP.txt", 
    T)

Subset the hail data spatially and temporally.

years = 1955:2012
hail = hail[hail$SLON > -105 & hail$SLON < -65 & hail$SLAT < 50 & hail$SLAT > 
    20 & hail$YEAR %in% years & hail$SIZE >= 1 & hail$MONTH == 5, ]

Total US severe hail events for the month of May, 1955 through 2010.

counts.df = data.frame(table(hail$YEAR))
colnames(counts.df) = c("Years", "Counts")
pre = data.frame(Years = years, NAO = nao$May)
df = merge(pre, counts.df, by = "Years", all = T)
df[is.na(df)] <- 0  #missing years same as 0 counts

# postscript(file = '~/GDrive/hail/tex/figs/counts.eps', height = 3, width =
# 6, horizontal=F)
ggplot(df, aes(x = Years, y = Counts)) + geom_bar(stat = "identity", width = 0.8) + 
    # stat_smooth(col='red', lwd=2, se=F) +
ylab("US Severe Hail Reports (May)") + xlab("")

plot of chunk hailTotals

# dev.off()

The time series is visibily non-stationary in terms of changing mean and variance, likely due to population bias and observation advances.

ACF/PACF plots of hail counts.

# postscript(file = '~/GDrive/hail/tex/figs/cf.eps', height = 5, width = 6,
# horizontal=F)
y.acf <- acf(df$Counts, plot = FALSE)
y.pacf <- pacf(df$Counts, plot = FALSE)
ci <- 0.95  # Indicates 95% confidence level
clim0 <- qnorm((1 + ci)/2)/sqrt(y.acf$n.used)
clim <- c(-clim0, clim0)
hline.data <- data.frame(z = c(0, clim), type = c("base", "ci", "ci"))

acfPlot <- ggplot(data.frame(lag = y.acf$lag, acf = y.acf$acf)) + geom_hline(aes(yintercept = z, 
    colour = type, linetype = type), hline.data) + geom_linerange(aes(x = lag, 
    ymin = 0, ymax = acf)) + scale_colour_manual(values = c("black", "blue")) + 
    scale_linetype_manual(values = c("solid", "dashed")) + ylab("ACF") + theme(plot.title = element_text(size = 10), 
    axis.title.x = element_text(size = 8)) + annotate("text", x = 17, y = 0.95, 
    label = "(a)")

pacfPlot <- ggplot(data.frame(lag = y.pacf$lag, pacf = y.pacf$acf)) + geom_hline(aes(yintercept = z, 
    colour = type, linetype = type), hline.data) + geom_linerange(aes(x = lag, 
    ymin = 0, ymax = pacf)) + scale_colour_manual(values = c("black", "blue")) + 
    scale_linetype_manual(values = c("solid", "dashed")) + ylab("PACF") + theme(plot.title = element_text(size = 10), 
    axis.title.x = element_text(size = 8)) + annotate("text", x = 17, y = 0.75, 
    label = "(b)")

grid.arrange(acfPlot, pacfPlot, nrow = 2)

plot of chunk ggplot2ACFPACF

# dev.off()

Our goal here is predictive climatology. That is, we are interested in improving our understanding of severe hail events in response to climatic phenomena like the North Atlantic Oscillation. The North Atlantic Oscillation (NAO) index is the monthly difference in normalized sea-level pressures between Stykkisholmur, Iceland and Ponta Delgada, Azores.

The North Atlantic Oscillation (NAO) index is the monthly difference in normalized sea-level pressures between Stykkisholmur, Iceland and Ponta Delgada, Azores. When pressures are low over Iceland (Icelandic low) they tend to be high over the Azores (Azores high), and vice-versa. The NAO is an important pattern of global interannual variability (Hurrell and van Loon, 1997; Mann and Park, 1994).

# postscript(file = '~/GDrive/hail/tex/figs/nao.eps', height = 3, width = 6,
# horizontal=F)
p1 = ggplot(df, aes(x = Years, y = NAO)) + stat_smooth(se = F, lwd = 1.5) + 
    geom_bar(stat = "identity", width = 0.8) + ylab("May NAO (s.d.)") + xlab("") + 
    scale_y_continuous(breaks = seq(-1, 2, 1), labels = c("-1", "0", "+1", "+2"))
print(p1)
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
## Warning: Stacking not well defined when ymin != 0

plot of chunk naoPlot

# dev.off()

Use a GARMA (1,0) model to examine the NAO influence on severe hail frequency. The family is negative binomial.

hail.garma = garmaFit(Counts ~ I(Years - mean(Years)) + NAO, order = c(1, 0), 
    family = NBI, data = df)
## deviance of linear model=  730.1 
## deviance of  garma model=  714.5
summary(hail.garma)
## 
## Family:  c("NBI", "Negative Binomial type I") 
## Fitting method: "nlminb" 
## 
## Call:  
## garmaFit(formula = Counts ~ I(Years - mean(Years)) + NAO, order = c(1,  
##     0), data = df, family = NBI) 
## 
## 
## Coefficient(s):
##                                Estimate  Std. Error   t value   Pr(>|t|)
## beta.(Intercept)             6.07472886  0.05544476 109.56363 < 2.22e-16
## beta.I(Years - mean(Years))  0.05929190  0.00330985  17.91380 < 2.22e-16
## beta.NAO                    -0.12461504  0.04238276  -2.94023  0.0032797
## phi                          0.25536547  0.12366587   2.06496  0.0389265
## sigma                        0.08712649  0.01673553   5.20608 1.9287e-07
##                                
## beta.(Intercept)            ***
## beta.I(Years - mean(Years)) ***
## beta.NAO                    ** 
## phi                         *  
## sigma                       ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Degrees of Freedom for the fit: 5 Residual Deg. of Freedom   53 
## Global Deviance:     714.5 
##             AIC:     724.5 
##             SBC:     734.9
hail.garma$sd = sqrt(diag(vcov(hail.garma)))  #model parameter SD

Examine model coefficients.

hail.garma$coef + 2 * hail.garma$sd
##            beta.(Intercept) beta.I(Years - mean(Years)) 
##                     6.18562                     0.06591 
##                    beta.NAO                         phi 
##                    -0.03985                     0.50270 
##                       sigma 
##                     0.12060
hail.garma$coef - 2 * hail.garma$sd
##            beta.(Intercept) beta.I(Years - mean(Years)) 
##                    5.963839                    0.052672 
##                    beta.NAO                         phi 
##                   -0.209381                    0.008034 
##                       sigma 
##                    0.053655

Examine model residuals.

# postscript(file = '~/GDrive/hail/tex/figs/garmaResid.eps', height = 5,
# width = 6, horizontal=F)
y.acf <- acf(hail.garma$resid, plot = FALSE)
y.pacf <- pacf(hail.garma$resid, plot = FALSE)
ci <- 0.95  # Indicates 95% confidence level
clim0 <- qnorm((1 + ci)/2)/sqrt(y.acf$n.used)
clim <- c(-clim0, clim0)
hline.data <- data.frame(z = c(0, clim), type = c("base", "ci", "ci"))

acfPlot <- ggplot(data.frame(lag = y.acf$lag, acf = y.acf$acf)) + geom_hline(aes(yintercept = z, 
    colour = type, linetype = type), hline.data) + geom_linerange(aes(x = lag, 
    ymin = 0, ymax = acf)) + scale_colour_manual(values = c("black", "blue")) + 
    scale_linetype_manual(values = c("solid", "dashed")) + ylab("ACF") + theme(plot.title = element_text(size = 10), 
    axis.title.x = element_text(size = 8)) + annotate("text", x = 17, y = 0.95, 
    label = "(a)")

pacfPlot <- ggplot(data.frame(lag = y.pacf$lag, pacf = y.pacf$acf)) + geom_hline(aes(yintercept = z, 
    colour = type, linetype = type), hline.data) + geom_linerange(aes(x = lag, 
    ymin = 0, ymax = pacf)) + scale_colour_manual(values = c("black", "blue")) + 
    scale_linetype_manual(values = c("solid", "dashed")) + ylab("PACF") + theme(plot.title = element_text(size = 10), 
    axis.title.x = element_text(size = 8)) + annotate("text", x = 17, y = 0.33, 
    label = "(b)")

grid.arrange(acfPlot, pacfPlot, nrow = 2)

plot of chunk garima.resid

# dev.off()

NAO ~ 500 hPa geopotential heights relationship.

h500 = h500[h500$Year %in% years, ]
summary(lm(h500$H500 ~ df$NAO))
## 
## Call:
## lm(formula = h500$H500 ~ df$NAO)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -34.56  -9.71   0.52  11.20  46.68 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5733.59       2.33  2465.7   <2e-16 ***
## df$NAO          5.63       2.45     2.3    0.025 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.7 on 56 degrees of freedom
## Multiple R-squared:  0.0862, Adjusted R-squared:  0.0698 
## F-statistic: 5.28 on 1 and 56 DF,  p-value: 0.0253
summary(lm(h500$H500 ~ df$NAO + df$Year))
## 
## Call:
## lm(formula = h500$H500 ~ df$NAO + df$Year)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -37.52  -8.68  -1.03  12.76  44.89 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 5275.094    274.958   19.19   <2e-16 ***
## df$NAO         6.306      2.445    2.58    0.013 *  
## df$Year        0.231      0.139    1.67    0.101    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.4 on 55 degrees of freedom
## Multiple R-squared:  0.13,   Adjusted R-squared:  0.0985 
## F-statistic: 4.11 on 2 and 55 DF,  p-value: 0.0216