A Modeling Framework for Regional Tornado Studies

Thomas H. Jagger, James B. Elsner, and Holly M. Widen

Journal of Advances in Modeling Earth Systems

suppressMessages(library(httr))
suppressMessages(library(moments))
suppressMessages(library(raster))
suppressMessages(library(spdep))
suppressMessages(library(splines))
useDrop <- function(x, use.fun = source, ...){
                    tf <- tempfile()
                    tmp <- GET(x)
                    stop_for_status(tmp)
                    writeBin(content(tmp, type = "raw"), tf)
                    use.fun(tf, ...)
                    }
useDrop("https://dl.dropboxusercontent.com/s/bw5hvvj9e732e8i/CountyStatisticsSupport.R", source)

## [1] "Sourcing CountyStatisticsSupport.R on Wed Sep 17 09:27:10 2014"
## [1] "loading packages"

## rgeos version: 0.3-4, (SVN revision 438)
##  GEOS runtime version: 3.3.3-CAPI-1.7.4 
##  Polygon checking: TRUE 
## 
## rgdal: version: 0.8-16, (SVN revision 498)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 1.9.2, released 2012/10/08
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.1/Resources/library/rgdal/gdal
## Loaded PROJ.4 runtime: Rel. 4.8.0, 6 March 2012, [PJ_VERSION: 480]
## Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.1/Resources/library/rgdal/proj
## Checking rgeos availability: TRUE

The SPC Tornado Dataset

download.file("http://www.spc.noaa.gov/gis/svrgis/zipped/tornado.zip",
              "tornado.zip", mode="wb")

#setwd("~/Dropbox/Tornadoes/CountyStatistics")
unzip("tornado.zip")
TornL = changeTornDataTypes(readOGR(dsn = ".", layer = "tornado", 
                                    stringsAsFactors = FALSE))

## OGR data source with driver: ESRI Shapefile 
## Source: ".", layer: "tornado"
## with 57988 features and 21 fields
## Feature type: wkbLineString with 2 dimensions

Tornado Counts By County

useDrop("http://data.biogeo.ucdavis.edu/data/gadm2/R/USA_adm2.RData",
        load, envir = sys.frame())
KS.sp = gadm[gadm$NAME_1 == "Kansas", ]
county = tolower(KS.sp$NAME_2)
countiesun.sp = geometry(spChFIDs(KS.sp, county))                 
counties.sp = spTransform(countiesun.sp, CRS(proj4string(TornL)))

TornL2 = subset(TornL, Year >= 1954 & EF >= 1)
ct = over(counties.sp, TornL2, returnList = TRUE)
nT = sapply(ct, function(x) nrow(x))
area = rgeos::gArea(counties.sp, byid = TRUE)
Nyears = diff(range(TornL2$Year)) + 1
nTor.df = data.frame(state = "KS", county = county, Nyears =  Nyears,
                     nT = nT, area = area/1000000, rate = nT/area * 10^6 * 10000 / Nyears,
                     stringsAsFactors = FALSE, ID = 1:length(county))

TracksAll.df = ldply(ct, data.frame)
TracksAll.df$county = rep(county, sapply(ct, nrow))
nTor.df.day = ddply(TracksAll.df, .(county, Year), summarize, 
                    nT = length(county), 
                    nTd = length(unique(Date)), .drop = FALSE)
spdf = SpatialPolygonsDataFrame(counties.sp, nTor.df)

suppressMessages(library(wesanderson))
crq = wes.palette(name = "Zissou", type = "continuous")
range(spdf$nT)

## [1]  2 39

rng = seq(0, 40, 5)
spplot(spdf, "nT", col = "white", at = rng, 
       col.regions = crq(8),
       colorkey = list(space = "bottom", labels = paste(rng)),
       sub = "Number of EF1+ Tornadoes (1954-2013)")

plot of chunk E

plotTorn <- function(x, tracks){
  CRS0 = CRS("+proj=longlat +datum=WGS84")
  #x is the spatial polygons object
  #tracks is the track file for each polygon.
  bc = getBordersAndCenters(x, CRS0)
  bc$centers$nT = x$nT
  bc$centers$rate = round(x$rate, 2)
  borders.df = merge(bc$centers[c("county", "nT", "rate")], 
                     bc$borders, by = "county")
  #Use this data to make the plot of intensity
  mapT = ggplot(borders.df, aes(x = long, y = lat, group = county, fill = nT)) +
    geom_polygon(colour = "gray", size = .2) +
    coord_map(project="polyconic") + 
    theme(panel.grid.minor=element_blank(), 
          panel.grid.major=element_blank(), 
          panel.background=element_blank(),
          axis.ticks = element_blank(),
          axis.text = element_blank(),
          legend.position="none") +
    #  labs(title="Number of EF1+ Tornadoes\n(1954-2013)") + 
    xlab("") + ylab("") +
    geom_text(data=bc$centers, aes(x = long, y = lat, group = NULL, label = nT), 
              vjust = .5, size = 5, color = "white")
  #generate the tracks data from the track.df file 
  tracks = tracks[!duplicated(tracks$OM), ]
  tracks = subset(tracks, ELAT != 0)
  mapT +
    geom_segment(aes(x = SLON, xend = ELON, y = SLAT, yend = ELAT, 
                     group = NULL, fill = NULL),
                 data = tracks, lineend = "round",
                 color = "black", size = 1.5, alpha = .6) +
    geom_text(data=bc$centers, aes(x = long, y = lat, group = NULL, label = nT), 
              vjust = .5, size = 5, color="white") +
    theme(plot.title = element_text(vjust = -2,
                                  hjust = .3,
                                  size = 20, 
                                  face = "bold"))
}

print(plotTorn(x = spdf, tracks = TracksAll.df) +
  scale_fill_gradientn(colours = crq(10)))

plot of chunk G

cor.test(spdf$nT, spdf$area)

## 
##  Pearson's product-moment correlation
## 
## data:  spdf$nT and spdf$area
## t = 2.856, df = 103, p-value = 0.005184
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.0836 0.4398
## sample estimates:
##    cor 
## 0.2709

County-Level Population Data

pop = useDrop("https://www.dropbox.com/s/o8s6423r5k290xs/PEP_2012_PEPANNRES.csv",
              read.csv, stringsAsFactors = FALSE)  
#pop = read.csv("PEP_2013_PEPANNRES.csv", stringsAsFactors = FALSE)
spdf$pop = processCity(pop, field = "respop72012")
spdf$Lpop = log10(spdf$pop)

sort(spdf$pop)

##   [1]   1298   1517   1704   1913   1963   2175   2181   2256   2496   2538
##  [11]   2560   2578   2639   2678   2720   2729   2757   2784   2871   2979
##  [21]   2986   3046   3068   3169   3174   3220   3278   3571   3765   3806
##  [31]   3968   4256   4358   4396   4858   4861   4937   5223   5519   5612
##  [41]   5756   5758   5854   5911   6030   6072   6113   6355   6454   6494
##  [51]   6928   6946   7039   7863   7864   7917   7923   7941   8502   8531
##  [61]   9105   9397   9441   9728   9881   9985  10022  10132  12347  13319
##  [71]  13449  14897  16142  16406  16813  18945  19762  21226  21284  22302
##  [81]  23547  23674  25906  27557  29053  29356  32612  33748  34459  34752
##  [91]  34852  36288  37200  38013  39361  55988  64438  65827  75508  77739
## [101] 112864 159129 178991 503889 559913

spdf$county[order(spdf$pop)]

##   [1] "greeley"      "wallace"      "lane"         "comanche"    
##   [5] "hodgeman"     "stanton"      "clark"        "wichita"     
##   [9] "kiowa"        "sheridan"     "rawlins"      "graham"      
##  [13] "hamilton"     "cheyenne"     "elk"          "gove"        
##  [17] "chase"        "logan"        "decatur"      "edwards"     
##  [21] "trego"        "jewell"       "ness"         "morton"      
##  [25] "lincoln"      "rush"         "woodson"      "chautauqua"  
##  [29] "smith"        "osborne"      "kearny"       "haskell"     
##  [33] "stafford"     "meade"        "republic"     "barber"      
##  [37] "scott"        "rooks"        "phillips"     "norton"      
##  [41] "stevens"      "washington"   "morris"       "harper"      
##  [45] "gray"         "ottawa"       "sherman"      "mitchell"    
##  [49] "greenwood"    "ellsworth"    "pawnee"       "russell"     
##  [53] "wabaunsee"    "kingman"      "doniphan"     "anderson"    
##  [57] "grant"        "thomas"       "coffey"       "clay"        
##  [61] "wilson"       "cloud"        "linn"         "pratt"       
##  [65] "brown"        "rice"         "marshall"     "nemaha"      
##  [69] "marion"       "allen"        "jackson"      "bourbon"     
##  [73] "osage"        "neosho"       "atchison"     "jefferson"   
##  [77] "dickinson"    "cherokee"     "labette"      "pottawatomie"
##  [81] "seward"       "sumner"       "franklin"     "barton"      
##  [85] "ellis"        "mcpherson"    "miami"        "lyon"        
##  [89] "montgomery"   "ford"         "harvey"       "cowley"      
##  [93] "finney"       "geary"        "crawford"     "saline"      
##  [97] "reno"         "butler"       "riley"        "leavenworth" 
## [101] "douglas"      "wyandotte"    "shawnee"      "sedgwick"    
## [105] "johnson"

cor.test(spdf$nT, spdf$pop)

## 
##  Pearson's product-moment correlation
## 
## data:  spdf$nT and spdf$pop
## t = 1.867, df = 103, p-value = 0.06467
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.01107  0.36015
## sample estimates:
##   cor 
## 0.181

suppressMessages(library(RColorBrewer))
range(spdf$Lpop)

## [1] 3.113 5.748

rng = seq(3, 6, .5)
crq = brewer.pal(7, "Blues")

spdf$density = spdf$pop/spdf$area
spdf$Ldensity = log10(spdf$density)
range(spdf$Ldensity)

## [1] -0.1881  2.6598

rng = seq(-.2, 2.8, .6)
crq = brewer.pal(6, "Blues")
labs = as.character(round(10^rng, 0))
spplot(spdf, "Ldensity", col = "white", at = rng, 
       col.regions = crq,
       colorkey = list(space = "bottom", labels = labs),
       sub = "Population Density (2013) [persons per square km]")

plot of chunk K

cor.test(spdf$nT, spdf$area)

## 
##  Pearson's product-moment correlation
## 
## data:  spdf$nT and spdf$area
## t = 2.856, df = 103, p-value = 0.005184
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.0836 0.4398
## sample estimates:
##    cor 
## 0.2709

A Statistical Model for County-Level Tornado Counts

#source("http://www.math.ntnu.no/inla/givemeINLA.R")
nb = poly2nb(spdf)
wts = nb2listw(nb)
suppressMessages(library(INLA))
nb2INLA("tornb.inla", nb)
tornb.inla = inla.read.graph("tornb.inla")
spdf.inla.formula = nT ~ Ldensity + 
                         f(ID, model = "besag", graph = tornb.inla)
spdf.inla = inla(formula = spdf.inla.formula, family = "nbinomial", E = area,
                  data = spdf@data, control.predictor = list(compute = TRUE),
                  control.results = list(return.marginals.random = TRUE),
                  control.compute = list(config = TRUE, mlik = TRUE, cpo = TRUE,
                                        dic = TRUE, po = TRUE))       
summary(spdf.inla)

## 
## Call:
## c("inla(formula = spdf.inla.formula, family = \"nbinomial\", data = spdf@data, ",  "    E = area, control.compute = list(config = TRUE, mlik = TRUE, ",  "        cpo = TRUE, dic = TRUE, po = TRUE), control.predictor = list(compute = TRUE), ",  "    control.results = list(return.marginals.random = TRUE))" )
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.1138          0.1722          0.0843          0.3703 
## 
## Fixed effects:
##                mean     sd 0.025quant 0.5quant 0.975quant   mode kld
## (Intercept) -4.9262 0.0781    -5.0801  -4.9261     -4.773 -4.926   0
## Ldensity     0.3524 0.0953     0.1653   0.3522      0.540  0.352   0
## 
## Random effects:
## Name   Model
##  ID   Besags ICAR model 
## 
## Model hyperparameters:
##                                                      mean   sd    
## size for the nbinomial observations (overdispersion)  9.329  2.146
## Precision for ID                                     17.992 17.994
##                                                      0.025quant 0.5quant
## size for the nbinomial observations (overdispersion)  5.692      9.146  
## Precision for ID                                      4.198     12.519  
##                                                      0.975quant mode  
## size for the nbinomial observations (overdispersion) 14.079      8.819
## Precision for ID                                     64.588      7.443
## 
## Expected number of effective parameters(std dev): 18.35(7.739)
## Number of equivalent replicates : 5.723 
## 
## Deviance Information Criterion: 707.38
## Effective number of parameters: 20.09
## 
## Marginal Likelihood:  -451.62 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed

spdf$random = spdf.inla$summary.random$ID$mean
spdf$fitted = spdf.inla$summary.fitted.values$mean

print(ggplotSpdata(spdf, "random", tfun = function(x) round((exp(x) - 1) * 100)) +
#  ggtitle("Posterior Mean Random Effect (%)") +
  theme(plot.title = element_text(vjust = 0, hjust = .5, size = 16, face = "bold")))

plot of chunk L1

spdf$nTadj = mean(spdf$nT) + (mean(spdf$nT) * (exp(spdf$random) - 1))
rng = seq(12, 27, 3)
crq = brewer.pal(6, "Reds")
spplot(spdf, "nTadj", col = "white", at = rng, 
       col.regions = crq,
       colorkey = list(space = "bottom", labels = as.character(rng)),
       sub = "Adjusted Tornado Occurrence")

Spatial Pattern of Tornado Activity

Kansas

spdf$county[(exp(spdf$random) - 1) * 100 > 25]

## [1] "barton"   "brown"    "edwards"  "pawnee"   "stafford"

TornL2.df = as.data.frame(TornL2)
TornL2.df = subset(TornL2.df, ST == "KS")
sum(TornL2.df$Month >= 4 & TornL2.df$Month <= 6)/dim(TornL2.df)[1] * 100

## [1] 72.8

Illinois Tornadoes

IL.sp = gadm[gadm$NAME_1 == "Illinois", ]
county = tolower(IL.sp$NAME_2)
county = county[-50] #Remove Lake Michigan from Ill
IL.sp = IL.sp[-50, ] #Remove Lake Michigan from Ill
countiesun.sp = geometry(spChFIDs(IL.sp, county))                 
counties.sp = spTransform(countiesun.sp, CRS(proj4string(TornL)))

TornL2 = subset(TornL, Year >= 1954 & EF >= 1)
ct = over(counties.sp, TornL2, returnList = TRUE)
nT = sapply(ct, function(x) nrow(x))
area = rgeos::gArea(counties.sp, byid = TRUE)
Nyears = diff(range(TornL2$Year)) + 1
nTor.df = data.frame(state = "IL", county = county, Nyears =  Nyears,
        nT = nT, area = area/1000000, rate = nT/area * 10^6 * 10000 / Nyears,
        stringsAsFactors = FALSE, ID = 1:length(county))
TracksAll.df = ldply(ct, data.frame)
TracksAll.df$county = rep(county, sapply(ct, nrow))
nTor.df.day = ddply(TracksAll.df, .(county, Year), summarize, 
         nT = length(county), 
         nTd = length(unique(Date)), .drop = FALSE)
spdfIL = SpatialPolygonsDataFrame(counties.sp, nTor.df)
# set lat_0 and lon_0 based on Wolfram Alpha center coordinates Illinois
p4sIL = "+proj=lcc +lat_1=33 +lat_2=45 +lat_0=40.07 +lon_0=-89.2 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"
spdfIL = spTransform(spdfIL, CRS(p4sIL))

range(spdfIL$nT)

## [1]  1 48

crq = wes.palette(name = "Zissou", type = "continuous")
rng = seq(0, 50, 5)
spplot(spdfIL, "nT", col = "white", at = rng, 
       col.regions = crq(10),
       colorkey = list(space = "bottom", labels = paste(rng)),
       sub = "Number of EF1+ Tornadoes (1954-2013)")

plot of chunk S

print(plotTorn(x = spdfIL, tracks = TracksAll.df) +
  scale_fill_gradientn(colours = crq(10)))

plot of chunk T

cor.test(spdfIL$nT, spdfIL$area)

## 
##  Pearson's product-moment correlation
## 
## data:  spdfIL$nT and spdfIL$area
## t = 9.68, df = 100, p-value = 4.441e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5794 0.7839
## sample estimates:
##    cor 
## 0.6955

pop = read.csv("PEP_2013_PEPANNRES_IL.csv", stringsAsFactors = FALSE)
pop[82, 3] = "Saint Clair County, Illinois"
pop[19, 3] = "De Kalb County, Illinois"
pop[50, 3] = "La Salle County, Illinois"
#spdfIL$pop = processCity(pop, field = "respop72012")
ctys = sapply(strsplit(pop$GEO.display.label, ","), function(x) x[1])
ctys2 = tolower(sapply(strsplit(ctys, " "), function(x) x[1]))
pop = pop[order(ctys2), ]
all(tolower(substring(pop$GEO.display.label, 1, nchar(spdfIL$county))) == spdfIL$county)

## [1] TRUE

spdfIL$pop = pop$respop72012
spdfIL$Lpop = log10(spdfIL$pop)

sort(spdfIL$pop)

##   [1]    4256    4271    5021    5282    5439    5876    5948    5960
##   [9]    6724    6908    7062    7490    7752    8386    9656   10977
##  [17]   11746   12315   12704   12786   13421   13622   13746   13997
##  [25]   14372   14584   14640   14946   15047   15174   16214   16223
##  [33]   16276   16298   16484   16511   16592   16622   17612   17649
##  [41]   17808   18160   18900   19605   19883   22025   22078   22205
##  [49]   22552   22738   25007   29257   29688   30027   32572   32956
##  [57]   33310   34314   34335   34597   35132   35309   36673   38083
##  [65]   38525   38677   38868   38965   39482   46943   47205   50177
##  [73]   50208   52254   52850   53659   53859   57832   60113   66714
##  [81]   67243   80715  104622  109978  112847  112944  118108  136122
##  [89]  147514  172295  187255  199269  203435  267899  268714  291844
##  [97]  307729  521306  681590  701219  927418 5227992

spdfIL$county[order(spdfIL$pop)]

##   [1] "hardin"      "pope"        "calhoun"     "scott"       "gallatin"   
##   [6] "putnam"      "stark"       "pulaski"     "edwards"     "brown"      
##  [11] "henderson"   "schuyler"    "alexander"   "hamilton"    "jasper"     
##  [16] "cumberland"  "wabash"      "marshall"    "menard"      "johnson"    
##  [21] "cass"        "greene"      "clay"        "ford"        "mason"      
##  [26] "white"       "washington"  "moultrie"    "carroll"     "massac"     
##  [31] "richland"    "mercer"      "pike"        "clark"       "de witt"    
##  [36] "piatt"       "lawrence"    "wayne"       "bond"        "union"      
##  [41] "warren"      "edgar"       "hancock"     "crawford"    "douglas"    
##  [46] "fayette"     "perry"       "shelby"      "jo daviess"  "jersey"     
##  [51] "saline"      "iroquois"    "montgomery"  "logan"       "mcdonough"  
##  [56] "randolph"    "monroe"      "bureau"      "effingham"   "christian"  
##  [61] "lee"         "morgan"      "fulton"      "clinton"     "livingston" 
##  [66] "jefferson"   "marion"      "woodford"    "franklin"    "stephenson" 
##  [71] "macoupin"    "henry"       "grundy"      "knox"        "ogle"       
##  [76] "coles"       "boone"       "whiteside"   "jackson"     "williamson" 
##  [81] "adams"       "vermilion"   "de kalb"     "macon"       "kankakee"   
##  [86] "la salle"    "kendall"     "tazewell"    "rock island" "mclean"     
##  [91] "peoria"      "sangamon"    "champaign"   "madison"     "saint clair"
##  [96] "winnebago"   "mchenry"     "kane"        "will"        "lake"       
## [101] "dupage"      "cook"

cor.test(spdfIL$nT, spdfIL$pop)

## 
##  Pearson's product-moment correlation
## 
## data:  spdfIL$nT and spdfIL$pop
## t = 3.385, df = 100, p-value = 0.001019
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1345 0.4849
## sample estimates:
##    cor 
## 0.3206

range(spdfIL$Lpop)

## [1] 3.629 6.718

rng = seq(3, 7, .5)
crq = brewer.pal(9, "Blues")

spdfIL$density = spdfIL$pop/spdfIL$area
spdfIL$Ldensity = log10(spdfIL$density)
range(spdfIL$Ldensity)

## [1] 0.6438 3.3254

rng = seq(.6, 3.6, .6)
crq = brewer.pal(6, "Blues")
labs = as.character(round(10^rng, 0))
spplot(spdfIL, "Ldensity", col = "white", at = rng, 
       col.regions = crq,
       colorkey = list(space = "bottom", labels = labs),
       sub = "Population Density (2013) [per square km]")

plot of chunk X

cor.test(spdfIL$nT, spdfIL$area)

## 
##  Pearson's product-moment correlation
## 
## data:  spdfIL$nT and spdfIL$area
## t = 9.68, df = 100, p-value = 4.441e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5794 0.7839
## sample estimates:
##    cor 
## 0.6955

cor.test(spdfIL$nT, spdfIL$density)

## 
##  Pearson's product-moment correlation
## 
## data:  spdfIL$nT and spdfIL$density
## t = 3.16, df = 100, p-value = 0.002089
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1135 0.4683
## sample estimates:
##    cor 
## 0.3013

nb = poly2nb(spdfIL)
wts = nb2listw(nb)

nb2INLA("tornb.inla", nb)
tornb.inla = inla.read.graph("tornb.inla")
spdf.inla.formula = nT ~ Ldensity + 
                         f(ID, model = "besag", graph = tornb.inla)
spdf.inla = inla(formula = spdf.inla.formula, family = "nbinomial", E = area,
                  data = spdfIL@data, control.predictor = list(compute = TRUE),
                  control.results = list(return.marginals.random = TRUE),
                  control.compute = list(config = TRUE, mlik = TRUE, cpo = TRUE,
                                        dic = TRUE, po = TRUE))       
summary(spdf.inla)

## 
## Call:
## c("inla(formula = spdf.inla.formula, family = \"nbinomial\", data = spdfIL@data, ",  "    E = area, control.compute = list(config = TRUE, mlik = TRUE, ",  "        cpo = TRUE, dic = TRUE, po = TRUE), control.predictor = list(compute = TRUE), ",  "    control.results = list(return.marginals.random = TRUE))" )
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.0809          0.1245          0.0567          0.2621 
## 
## Fixed effects:
##               mean     sd 0.025quant 0.5quant 0.975quant    mode kld
## (Intercept) -5.002 0.1515    -5.3045  -5.0003    -4.7102 -4.9960   0
## Ldensity     0.336 0.1002     0.1444   0.3343     0.5371  0.3305   0
## 
## Random effects:
## Name   Model
##  ID   Besags ICAR model 
## 
## Model hyperparameters:
##                                                      mean    sd     
## size for the nbinomial observations (overdispersion)  10.760   2.313
## Precision for ID                                     148.642 411.556
##                                                      0.025quant 0.5quant
## size for the nbinomial observations (overdispersion)   6.854     10.551 
## Precision for ID                                      10.581     56.553 
##                                                      0.975quant mode   
## size for the nbinomial observations (overdispersion)  15.883     10.164
## Precision for ID                                     851.706     20.674
## 
## Expected number of effective parameters(std dev): 8.361(4.534)
## Number of equivalent replicates : 12.20 
## 
## Deviance Information Criterion: 626.15
## Effective number of parameters: 10.63
## 
## Marginal Likelihood:  -405.04 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed

spdfIL$random = spdf.inla$summary.random$ID$mean
spdfIL$fitted = spdf.inla$summary.fitted.values$mean

print(ggplotSpdata(spdfIL, "random", tfun = function(x) round((exp(x) - 1) * 100)))

plot of chunk AA

TornL2.df = as.data.frame(TornL2)
IL.df = subset(TornL2.df, ST == "IL")
IL.df$MLAT = IL.df$SLAT > 40.07
IL.df$NS = ifelse(IL.df$MLAT, "North", "South")
IL.df$MonthABB = factor(month.abb[IL.df$Month], levels = month.abb)
ggplot(IL.df, aes(MonthABB, fill = NS)) + 
  geom_bar() + 
  facet_wrap(~NS, nrow = 2) +
  theme_bw() +
  theme(legend.position = "none") +
  xlab("") +
  ylab("Number of Tornado Reports in Illinois")

plot of chunk BA

xx = ddply(IL.df, .(Month, MLAT), summarize, 
                    nT = length(OM))
sum(xx$nT[xx$MLAT & xx$Month >= 4 & xx$Month <= 6])

## [1] 367

sum(xx$nT[!xx$MLAT & xx$Month >= 4 & xx$Month <= 6])

## [1] 362

sum(xx$nT[!xx$MLAT & (xx$Month <=3 | xx$Month >= 10)])

## [1] 181

sum(xx$nT[xx$MLAT & (xx$Month <=3 | xx$Month >= 10)])

## [1] 87

sum(xx$nT[!xx$MLAT & (xx$Month <=3 | xx$Month >= 10)])

## [1] 181

87/(87+181)

## [1] 0.3246

sum(xx$nT[xx$MLAT & xx$Month >= 4  & xx$Month <= 9])

## [1] 485

sum(xx$nT[!xx$MLAT & xx$Month >= 4 & xx$Month <= 9])

## [1] 432

485/(485+432)

## [1] 0.5289

Mississippi Tornadoes

MS.sp = gadm[gadm$NAME_1 == "Mississippi", ]
county = tolower(MS.sp$NAME_2)
countiesun.sp = geometry(spChFIDs(MS.sp, county))                 
counties.sp = spTransform(countiesun.sp, CRS(proj4string(TornL)))

TornL2 = subset(TornL, Year >= 1954 & EF >= 1)
ct = over(counties.sp, TornL2, returnList = TRUE)
nT = sapply(ct, function(x) nrow(x))
area = rgeos::gArea(counties.sp, byid = TRUE)
Nyears = diff(range(TornL2$Year)) + 1
nTor.df = data.frame(state = "MS", county = county, Nyears =  Nyears,
        nT = nT, area = area/1000000, rate = nT/area * 10^6 * 10000 / Nyears,
        stringsAsFactors = FALSE, ID = 1:length(county))
TracksAll.df = ldply(ct, data.frame)
TracksAll.df$county = rep(county, sapply(ct, nrow))
nTor.df.day = ddply(TracksAll.df, .(county, Year), summarize, 
         nT = length(county), 
         nTd = length(unique(Date)), .drop = FALSE)
spdfMS = SpatialPolygonsDataFrame(counties.sp, nTor.df)
# set lat_0 and lon_0 based on Wolfram Alpha center coordinates Mississippi
p4sMS = "+proj=lcc +lat_1=33 +lat_2=45 +lat_0=32.8 +lon_0=-89.7 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"
spdfMS = spTransform(spdfMS, CRS(p4sMS))

range(spdfMS$nT)

## [1]  6 57

crq = wes.palette(name = "Zissou", type = "continuous")
rng = seq(0, 60, 5)
spplot(spdfMS, "nT", col = "white", at = rng, 
       col.regions = crq(12),
       colorkey = list(space = "bottom", labels = paste(rng)),
       sub = "Number of EF1+ Tornadoes (1954-2013)")

plot of chunk FA

print(plotTorn(x = spdfMS, tracks = TracksAll.df) +
  scale_fill_gradientn(colours = crq(10)))

plot of chunk GA

cor.test(spdfMS$nT, spdfMS$area)

## 
##  Pearson's product-moment correlation
## 
## data:  spdfMS$nT and spdfMS$area
## t = 5.326, df = 80, p-value = 9.005e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3314 0.6558
## sample estimates:
##    cor 
## 0.5116

pop = read.csv("PEP_2013_PEPANNRES_MS.csv", stringsAsFactors = FALSE, header = TRUE)
pop = pop[-1, ]
pop[c(33, 32), ] = pop[c(32, 33), ]
#spdfMS$pop = processCity(pop, field = "respop72012")
ctys = sapply(strsplit(pop$GEO.display.label, ","), function(x) x[1])
ctys2 = tolower(sapply(strsplit(ctys, " "), function(x) x[1]))
pop = pop[order(ctys2), ]
all(tolower(substring(pop$GEO.display.label, 1, nchar(spdfMS$county))) == spdfMS$county)

## [1] TRUE

spdfMS$pop = as.numeric(pop$respop72012)
spdfMS$Lpop = log10(spdfMS$pop)

sort(spdfMS$pop)

##  [1]   1393   4805   7659   7798   7912   8333   8713   9207   9377   9457
## [11]  10074  10379  10439  10510  10596  11210  12089  12095  12402  12557
## [21]  12966  14300  14842  15103  15106  16366  16539  16554  17443  18060
## [31]  18743  19011  19107  19566  19615  20428  20656  21568  21613  21925
## [41]  22885  23289  23353  25306  25667  26395  27345  27398  28220  28260
## [51]  28442  28499  28964  29698  30498  31565  32145  34078  34427  34896
## [61]  36422  36545  37228  40118  45266  48108  48992  50078  50502  55248
## [71]  57739  59698  68582  76925  80258  84960  98555 140039 145173 166317
## [81] 193702 248149

spdfMS$county[order(spdfMS$pop)]

##  [1] "issaquena"       "sharkey"         "jefferson"      
##  [4] "quitman"         "franklin"        "choctaw"        
##  [7] "benton"          "humphreys"       "claiborne"      
## [10] "wilkinson"       "webster"         "kemper"         
## [13] "carroll"         "tunica"          "montgomery"     
## [16] "noxubee"         "jefferson davis" "perry"          
## [19] "yalobusha"       "lawrence"        "amite"          
## [22] "greene"          "calhoun"         "walthall"       
## [25] "tallahatchie"    "smith"           "clarke"         
## [28] "jasper"          "chickasaw"       "stone"          
## [31] "holmes"          "winston"         "attala"         
## [34] "covington"       "tishomingo"      "clay"           
## [37] "wayne"           "newton"          "grenada"        
## [40] "tippah"          "george"          "leake"          
## [43] "itawamba"        "prentiss"        "coahoma"        
## [46] "marion"          "simpson"         "union"          
## [49] "yazoo"           "scott"           "sunflower"      
## [52] "tate"            "copiah"          "neshoba"        
## [55] "pontotoc"        "leflore"         "adams"          
## [58] "bolivar"         "panola"          "lincoln"        
## [61] "monroe"          "marshall"        "alcorn"         
## [64] "pike"            "hancock"         "warren"         
## [67] "oktibbeha"       "washington"      "lafayette"      
## [70] "pearl river"     "lamar"           "lowndes"        
## [73] "jones"           "forrest"         "lauderdale"     
## [76] "lee"             "madison"         "jackson"        
## [79] "rankin"          "desoto"          "harrison"       
## [82] "hinds"

cor.test(spdfMS$nT, spdfMS$pop)

## 
##  Pearson's product-moment correlation
## 
## data:  spdfMS$nT and spdfMS$pop
## t = 5.433, df = 80, p-value = 5.818e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3405 0.6616
## sample estimates:
##    cor 
## 0.5192

range(spdfMS$Lpop)

## [1] 3.144 5.395

rng = seq(3, 6, .5)
crq = brewer.pal(9, "Blues")

spdfMS$density = spdfMS$pop/spdfMS$area
spdfMS$Ldensity = log10(spdfMS$density)
range(spdfMS$Ldensity)

## [1] 0.08499 2.11568

rng = seq(.08, 2.5, .4)
crq = brewer.pal(7, "Blues")
labs = as.character(round(10^rng, 0))
spplot(spdfMS, "Ldensity", col = "white", at = rng, 
       col.regions = crq,
       colorkey = list(space = "bottom", labels = labs),
       sub = "Population Density (2013) [people per square km]")

plot of chunk LA

cor.test(spdfMS$nT, spdfMS$area)

## 
##  Pearson's product-moment correlation
## 
## data:  spdfMS$nT and spdfMS$area
## t = 5.326, df = 80, p-value = 9.005e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3314 0.6558
## sample estimates:
##    cor 
## 0.5116

cor.test(spdfMS$nT, spdfMS$density)

## 
##  Pearson's product-moment correlation
## 
## data:  spdfMS$nT and spdfMS$density
## t = 3.604, df = 80, p-value = 0.0005431
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1706 0.5464
## sample estimates:
##    cor 
## 0.3737

nb = poly2nb(spdfMS)
wts = nb2listw(nb)

nb2INLA("tornb.inla", nb)
tornb.inla = inla.read.graph("tornb.inla")
spdf.inla.formula = nT ~ Ldensity + 
                         f(ID, model = "besag", graph = tornb.inla)
spdf.inla = inla(formula = spdf.inla.formula, family = "nbinomial", E = area,
                  data = spdfMS@data, control.predictor = list(compute = TRUE),
                  control.results = list(return.marginals.random = TRUE),
                  control.compute = list(config = TRUE, mlik = TRUE, cpo = TRUE,
                                        dic = TRUE, po = TRUE))       
summary(spdf.inla)

## 
## Call:
## c("inla(formula = spdf.inla.formula, family = \"nbinomial\", data = spdfMS@data, ",  "    E = area, control.compute = list(config = TRUE, mlik = TRUE, ",  "        cpo = TRUE, dic = TRUE, po = TRUE), control.predictor = list(compute = TRUE), ",  "    control.results = list(return.marginals.random = TRUE))" )
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.0809          0.1113          0.0536          0.2458 
## 
## Fixed effects:
##                mean     sd 0.025quant 0.5quant 0.975quant   mode kld
## (Intercept) -4.7792 0.1747    -5.1234  -4.7790    -4.4364 -4.779   0
## Ldensity     0.4122 0.1363     0.1453   0.4118     0.6811  0.411   0
## 
## Random effects:
## Name   Model
##  ID   Besags ICAR model 
## 
## Model hyperparameters:
##                                                      mean    sd     
## size for the nbinomial observations (overdispersion)   9.784   2.254
## Precision for ID                                      28.311  34.084
##                                                      0.025quant 0.5quant
## size for the nbinomial observations (overdispersion)   5.998      9.579 
## Precision for ID                                       5.287     18.047 
##                                                      0.975quant mode   
## size for the nbinomial observations (overdispersion)  14.802      9.207
## Precision for ID                                     113.588      9.822
## 
## Expected number of effective parameters(std dev): 12.79(5.601)
## Number of equivalent replicates : 6.41 
## 
## Deviance Information Criterion: 560.38
## Effective number of parameters: 14.51
## 
## Marginal Likelihood:  -359.45 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed

spdfMS$random = spdf.inla$summary.random$ID$mean
spdfMS$fitted = spdf.inla$summary.fitted.values$mean

print(ggplotSpdata(spdfMS, "random", tfun = function(x) round((exp(x) - 1) * 100)))

plot of chunk OA

Surface roughness and low-level inflow hypothesis

KS.sp = gadm[gadm$NAME_1 == "Kansas", ]
county = tolower(KS.sp$NAME_2)
countiesun.sp = geometry(spChFIDs(KS.sp, county))                 
counties.sp = spTransform(countiesun.sp, CRS(proj4string(TornL)))

download.file(url = "http://www.viewfinderpanoramas.org/DEM/TIF15/15-H.zip",
              destfile = "15-H.ZIP", mode = "wb")
unzip("15-H.ZIP")

Elev = raster("15-H.tif")
Elev1 = as(crop(Elev, extent(countiesun.sp)), "SpatialGridDataFrame")
proj4string(Elev1) = proj4string(countiesun.sp) #same projection, different order of parameters

## Warning: A new CRS was assigned to an object with an existing CRS:
## +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0
## without reprojecting.
## For reprojection, use function spTransform in package rgdal

Elev.data = over(countiesun.sp, Elev1, returnList = TRUE)
Elev.df = data.frame(county = rep(county, sapply(Elev.data,nrow)),
                     Elev = unlist(Elev.data), stringsAsFactors = FALSE)
CE.df = ddply(Elev.df, .(county), summarize,
              elev = mean(Elev, na.rm = TRUE),
              elevS = sd(Elev, na.rm = TRUE),
              elevK = kurtosis(Elev, na.rm = TRUE),
              elevCV = elevS/elev)
all(spdf$county == CE.df$county)

## [1] TRUE

spdf@data[colnames(CE.df)] = CE.df

mycolors = div_gradient_pal(low = muted("blue"), 
                            mid = "white", 
                            high = muted("orange"), 
                            space = "rgb")(seq(0, 1, length = 104))
t1 = list("sp.lines", as(KS.sp, "SpatialLines"))
range(Elev1@data)

## [1]  211 1227

spplot(Elev1, col.regions = mycolors, 
       scales = list(draw = TRUE),
       at = seq(211, 1227, length = 104),
       sp.layout = t1,
       colorkey = list(space = "bottom"),
       sub = "Elevation (m)")

plot of chunk SA

range(spdf$elevS)

## [1] 11.34 73.53

rng = seq(10, 80, 10)
crq = brewer.pal(7, "YlOrBr")
spplot(spdf, "elevS", col = "white", at = rng, 
       col.regions = crq,
       colorkey = list(space = "bottom"),
       sub = "Elevation Standard Deviation [m]")

plot of chunk TA

cor.test(spdf$nT, spdf$elevS)

## 
##  Pearson's product-moment correlation
## 
## data:  spdf$nT and spdf$elevS
## t = -2.201, df = 103, p-value = 0.02998
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.38784 -0.02112
## sample estimates:
##     cor 
## -0.2119

nb = poly2nb(spdf)
wts = nb2listw(nb)
nb2INLA("tornb.inla", nb)
tornb.inla = inla.read.graph("tornb.inla")
spdf.inla.formula = nT ~ elevS + Ldensity +
                         f(ID, model = "besag", graph = tornb.inla)
spdf.inla = inla(formula = spdf.inla.formula, family = "nbinomial", E = area,
                  data = spdf@data, control.predictor = list(compute = TRUE),
                  control.results = list(return.marginals.random = TRUE),
                  control.compute = list(config = TRUE, mlik = TRUE, cpo = TRUE,
                                        dic = TRUE, po = TRUE))       
summary(spdf.inla)

## 
## Call:
## c("inla(formula = spdf.inla.formula, family = \"nbinomial\", data = spdf@data, ",  "    E = area, control.compute = list(config = TRUE, mlik = TRUE, ",  "        cpo = TRUE, dic = TRUE, po = TRUE), control.predictor = list(compute = TRUE), ",  "    control.results = list(return.marginals.random = TRUE))" )
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.0835          0.2443          0.0549          0.3827 
## 
## Fixed effects:
##                mean     sd 0.025quant 0.5quant 0.975quant    mode kld
## (Intercept) -4.3926 0.1868    -4.7600  -4.3926    -4.0258 -4.3925   0
## elevS       -0.0151 0.0048    -0.0246  -0.0151    -0.0056 -0.0151   0
## Ldensity     0.2392 0.0979     0.0477   0.2389     0.4325  0.2382   0
## 
## Random effects:
## Name   Model
##  ID   Besags ICAR model 
## 
## Model hyperparameters:
##                                                      mean   sd    
## size for the nbinomial observations (overdispersion) 10.259  2.332
## Precision for ID                                     20.551 20.803
##                                                      0.025quant 0.5quant
## size for the nbinomial observations (overdispersion)  6.317     10.056  
## Precision for ID                                      4.755     14.221  
##                                                      0.975quant mode  
## size for the nbinomial observations (overdispersion) 15.431      9.690
## Precision for ID                                     74.281      8.411
## 
## Expected number of effective parameters(std dev): 18.34(7.344)
## Number of equivalent replicates : 5.724 
## 
## Deviance Information Criterion: 698.61
## Effective number of parameters: 20.22
## 
## Marginal Likelihood:  -455.58 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed

spdf$random2 = spdf.inla$summary.random$ID$mean

ggplotmargin(spdf.inla, type = "fixed", effect = "elevS",
             xlab = "Elevation Standard Deviation Coefficient") +  
  theme(title = element_text(size = 20, face = "bold"))

plot of chunk VA

temp = inla.posterior.sample(n = 10000, spdf.inla)

## Warning: inla.posterior.sample: THIS FUNCTION IS EXPERIMENTAL!!!

temp1 = sapply(temp, function(x) x$latent["elevS.1", 1] * 100)
df1 = as.data.frame(temp1)
ggplot(df1, aes(temp1)) +
  geom_histogram(binwidth = .05, fill = 'gray') +
  geom_vline(xintercept = 0, color = "red") +
  ylab("Number of Posterior Samples") +
  xlab("Elevation Standard Deviation Coefficient\n[Tornadoes/10,000 sq km/century per m]") +
  theme_bw()

plot of chunk WA

sum(df1$temp1 < 0)/length(df1$temp1) * 100

## [1] 99.95

quantile(df1$temp1, probs = c(.025, .975))

##    2.5%   97.5% 
## -2.4634 -0.5467

rm(list=c("temp", "temp1"))

hiWays = read.table("MajorHiWays_KS.txt", header = TRUE,
                    stringsAsFactors = FALSE)
spdf$highway = hiWays$highway[order(hiWays$county)]