Relative Risk of Intense Tornadoes By State

Group term project for Applied Spatial Statistics: Spring 2013

South Dakota

Download the data and read it into R. Use the rgdal package for the readOGR() function. Print the projection information.

tmp = download.file("http://www.spc.noaa.gov/gis/svrgis/zipped/tornado.zip", 
    "tornado.zip", mode = "wb")

unzip("tornado.zip")
require(rgdal)

## Warning: package 'rgdal' was built under R version 2.15.2

## Warning: package 'sp' was built under R version 2.15.2

Torn = readOGR(dsn = ".", layer = "tornado")

## OGR data source with driver: ESRI Shapefile 
## Source: ".", layer: "tornado"
## with 56221 features and 28 fields
## Feature type: wkbPoint with 2 dimensions

names(Torn)

##  [1] "OM"         "YEAR"       "MONTH"      "DAY"        "DATE"      
##  [6] "TIME"       "TIMEZONE"   "STATE"      "FIPS"       "STATENUMBE"
## [11] "FSCALE"     "INJURIES"   "FATALITIES" "LOSS"       "CROPLOSS"  
## [16] "SLAT"       "SLON"       "ELAT"       "ELON"       "LENGTH"    
## [21] "WIDTH"      "NS"         "SN"         "SG"         "F1"        
## [26] "F2"         "F3"         "F4"

proj4string(Torn)

## [1] "+proj=lcc +lat_1=33 +lat_2=45 +lat_0=39 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"

Subset on year and F scale and create a factor variable (Category) for the F-scale. Convert length from miles and the width from yards to meters. Compute path area.

Torn2 = subset(Torn, FSCALE > 0 & YEAR >= 1950)
Torn2$Category = factor(Torn2$FSCALE, ordered = TRUE)
Torn2$Length = Torn2$LENGTH * 1609.34
Torn2$Width = Torn2$WIDTH * 0.9144
Torn2$Area = Torn2$Width * Torn2$Length

Choose a domain. Here I'm interested in the state of South Dakota (46).

tmp = download.file("http://www.census.gov/geo/cob/bdy/st/st00shp/st46_d00_shp.zip", 
    "st46_d00_shp.zip", mode = "wb")
unzip("st46_d00_shp.zip")
STATE = readOGR(dsn = ".", layer = "st46_d00")

## OGR data source with driver: ESRI Shapefile 
## Source: ".", layer: "st46_d00"
## with 1 features and 10 fields
## Feature type: wkbPolygon with 2 dimensions

Add projection information and transform the state boundaries.

ll = "+proj=longlat +ellps=GRS80 +datum=NAD83"
proj4string(STATE) = CRS(ll)
STATE = spTransform(STATE, CRS(proj4string(Torn2)))
require(rgeos)

## Warning: package 'rgeos' was built under R version 2.15.2

stArea = gArea(STATE)

Subset by state boundary.

st = overlay(Torn2, STATE)
st = !is.na(st)
Torn2 = Torn2[st, ]

Check the subsetting.

plot(STATE)
plot(Torn2, add = TRUE)
table(Torn2$STATE)

## 
##  AK  AL  AR  AZ  CA  CO  CT  DC  DE  FL  GA  HI  IA  ID  IL  IN  KS  KY 
##   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0   0 
##  LA  MA  MD  ME  MI  MN  MO  MS  MT  NC  ND  NE  NH  NJ  NM  NV  NY  OH 
##   0   0   0   0   0   2   0   0   0   0   0   0   0   0   0   0   0   0 
##  OK  OR  PA  PR  RI  SC  SD  TN  TX  UT  VA  VT  WA  WI  WV  WY 
##   0   0   0   0   0   0 622   0   0   0   0   0   0   0   0   1

plot(Torn2[Torn2$STATE == "NE", ], add = TRUE, col = "red")

plot of chunk plotsChecks

Compute total area of tornado paths and annual average area.

tArea = sum(Torn2$Area)
areaPerYear = tArea/length(1950:2011)
stArea/areaPerYear  # Annual avg recurrence interval for any location

## [1] 11530

areaPerYear/stArea * 100  # Annual probability of getting struck by a tornado

## [1] 0.008673

Next, create and summarize a planar point pattern object.

require(spatstat)

## Warning: package 'spatstat' was built under R version 2.15.2

## Warning: package 'deldir' was built under R version 2.15.2

require(maptools)

## Warning: package 'maptools' was built under R version 2.15.2

W = as(STATE, "owin")

## Checking 1 polygon...[Checking polygon with 1700 edges...]
## done.

All.ppp = as.ppp(Torn2["Category"])
All.ppp = All.ppp[W]
unitname(All.ppp) = c("meter", "meters")
summary(All.ppp)

## Marked planar point pattern: 626 points
## Average intensity 3.14e-09 points per square meter  
## 
## *Pattern contains duplicated points*
## Multitype:
##   frequency proportion intensity
## 1       337     0.5380  1.69e-09
## 2       225     0.3590  1.13e-09
## 3        55     0.0879  2.76e-10
## 4         8     0.0128  4.01e-11
## 5         1     0.0016  5.02e-12
## 
## Window: polygonal boundary
## single connected closed polygon with 1700 vertices
## enclosing rectangle: [-653762, -35425]x[384624, 796798]meters
## Window area =  1.99367e+11 square meters 
## Unit of length: 1 meter

Relative risk of intense tornadoes (F3 or higher).

Naming convention

F0-F1: weak

F2-F3: strong

F4-F5: violent

F2-F5: significant

F3-F5: intense (as an opposite we could use 'moderate')

Enhanced Fujita scale (mph)

EF0 = 65, 85

EF1 = 86, 110

EF2 = 111, 135

EF3 = 136, 165

EF4 = 166, 200

EF5 > 200

multiply by .44704 to get m/s

Create two ppp objects and join them. This is needed for the relative risk function.

T = All.ppp
plot(split(T))

plot of chunk summaryAnalysis

H = unmark(T[T$marks <= 2 & T$marks >= 1])
I = unmark(T[T$marks >= 3])
T2 = superimpose(H = H, I = I)

## Warning: data contain duplicated points

summary(T2)

## Marked planar point pattern: 626 points
## Average intensity 3.14e-09 points per square meter  
## 
## *Pattern contains duplicated points*
## Multitype:
##   frequency proportion intensity
## H       562      0.898  2.82e-09
## I        64      0.102  3.21e-10
## 
## Window: polygonal boundary
## single connected closed polygon with 1700 vertices
## enclosing rectangle: [-653762, -35425]x[384624, 796798]meters
## Window area =  1.99367e+11 square meters 
## Unit of length: 1 meter

Plot the intensy events.

plot(I, pch = 25, cols = "red", main = "")

plot of chunk plotIntenseTDevents

Compute spatial densities and relative risk.

eps = 320
dAll = density(unmark(T2), dimyx = c(eps, eps))
mean(dAll$v, na.rm = TRUE)

## [1] 3.168e-09

dH = density(H, dimyx = c(eps, eps))
dI = density(I, dimyx = c(eps, eps))
bw = bw.relrisk(T2, hmax = 1e+10)
rr = relrisk(T2, dimyx = c(eps, eps), sigma = bw)

Statewide mean relative risk.

mean(rr$v, na.rm = TRUE)

## [1] 0.1018

sd(as.vector(rr$v), na.rm = TRUE)

## [1] 0.002445

range(rr$v, na.rm = TRUE)

## [1] 0.09611 0.10561

Map them on a 4-panel plot.

par(mfrow = c(2, 2))
plot(dAll, col = terrain.colors(10), ribbon = FALSE, main = "")
contour(dAll, add = TRUE)
plot(dH, col = terrain.colors(10), ribbon = FALSE, main = "")
contour(dH, add = TRUE)
plot(dI, col = terrain.colors(10), ribbon = FALSE, main = "")
contour(dI, add = TRUE)
plot(rr, ribbon = FALSE, main = "")
contour(rr, add = TRUE)

plot of chunk create4panelPlot

Create a SpatialGridDataFrame (S4 class).

rr.sgdf = as(rr, "SpatialGridDataFrame")
proj4string(rr.sgdf) = CRS(proj4string(Torn2))

Map the relative risk.

require(maps)

## Loading required package: maps

brd = map("county", "south dakota")

plot of chunk mapRR

brd_ll = map2SpatialLines(brd, proj4string = CRS(ll))
brd_lcc = spTransform(brd_ll, CRS(proj4string(Torn2)))
require(RColorBrewer)

## Loading required package: RColorBrewer

## Warning: package 'RColorBrewer' was built under R version 2.15.2

range(rr.sgdf$v, na.rm = TRUE)

## [1] 0.09611 0.10561

rng = seq(0.095, 0.107, 0.002)
cls = brewer.pal(7, "YlOrRd")
l3 = list("sp.lines", brd_lcc, lwd = 0.5, first = FALSE)
p1 = spplot(rr.sgdf, "v", col.regions = cls, at = rng, sp.layout = l3, colorkey = list(space = "bottom", 
    labels = list(cex = 1.2)), sub = list("Relative probability that a tornado will be intense (EF3 or stronger)", 
    cex = 1.2, font = 1))
p1

plot of chunk mapRR

Extract relative risk by county

Download the county boundaries (all US).

tmp = download.file("http://www.nws.noaa.gov/geodata/catalog/county/data/c_02ap13.zip", 
    "c_02ap13.zip", mode = "wb")
unzip("c_02ap13.zip")
COUNTY = readOGR(dsn = ".", layer = "c_02ap13")

## OGR data source with driver: ESRI Shapefile 
## Source: ".", layer: "c_02ap13"
## with 3312 features and 11 fields
## Feature type: wkbPolygon with 2 dimensions

COUNTY = COUNTY[COUNTY$STATE == "SD", ]
COUNTY = spTransform(COUNTY, CRS(proj4string(Torn2)))

Extract the mean relative risk per county.

COUNTY$v = over(COUNTY, rr.sgdf, fn = mean)
spplot(COUNTY, "v")

plot of chunk countyMeanRR

county.df = as.data.frame(COUNTY)
county.df[rev(order(county.df$v))[1:6], ]

##      OBJECTID_1 STATE CWA COUNTYNAME  FIPS TIME_ZONE FE_AREA    LON   LAT
## 1872       1873    SD FSD      Union 46127         C      se -96.66 42.83
## 1832       1833    SD FSD    Lincoln 46083         C      se -96.72 43.28
## 1856       1857    SD FSD       Clay 46027         C      se -96.98 42.91
## 1825       1826    SD FSD      Moody 46101         C      ec -96.67 44.02
## 1826       1827    SD FSD  Minnehaha 46099         C      se -96.79 43.67
## 1851       1852    SD ABR      Deuel 46039         C      ne -96.67 44.76
##      Shape_Leng Shape_Area      v
## 1872      2.500     0.1332 0.1054
## 1832      2.007     0.1662 0.1052
## 1856      1.428     0.1190 0.1052
## 1825      1.567     0.1515 0.1051
## 1826      2.049     0.2352 0.1051
## 1851      1.730     0.1874 0.1050

Spatial point pattern models

Get data on the location of cities.

tmp = download.file("http://www.nws.noaa.gov/geodata/catalog/national/data/ci08au12.zip", 
    "ci08au12.zip", mode = "wb")
unzip("ci08au12.zip")
US = readOGR(dsn = ".", layer = "ci08au12", p4s = ll)

## OGR data source with driver: ESRI Shapefile 
## Source: ".", layer: "ci08au12"
## with 43603 features and 23 fields
## Feature type: wkbPoint with 2 dimensions

UST = spTransform(US, CRS(proj4string(Torn2)))

st = overlay(UST, STATE)
st = !is.na(st)
Cities = UST[st, ]
plot(unmark(T), pch = 16, cex = 0.4)
plot(Cities, pch = 15, col = "green", cex = 0.4, add = TRUE)

plot of chunk subsetByStateBoundary2

C = as.ppp(Cities)
window = as.owin(T)
C = C[window]

Create a map of distance from nearest city centers. Distance from nearest city is a covariate in the point pattern model (ppm).

Zc = distmap(C)
plot(Zc)

Examine the smoothed estimate of spatial density as a function of the covariate.

H = unmark(T[T$marks <= 2 & T$marks >= 1])
I = unmark(T[T$marks >= 3])
plot(rhohat(H, Zc, smoother = "kernel", method = "transform"), legend = FALSE, 
    main = "")

plot of chunk smoothingEstimateCovariateTransform

plot(rhohat(I, Zc, smoother = "kernel", method = "transform"), legend = FALSE, 
    main = "")

plot of chunk smoothingEstimateCovariateTransform

rhat = rhohat(unmark(T), Zc, smoother = "kernel", method = "transform")

## Warning: Some tiles with zero area contain points

## Warning: Some weights are zero

rptsPerYear = max(rhat$rho)/length(1950:2011) * stArea
areaPerYear = rptsPerYear * mean(Torn2$Area)
stArea/areaPerYear  # Annual avg recurrence interval for any location

## [1] 4387

areaPerYear/stArea * 100  # Annual probability of a tornado at any location

## [1] 0.02279

Model the spatial density of the intense tornadoes using distance from city center as a covariate.

fit1 = ppm(I, ~Zc, covariates = list(Zc = Zc), correction = "border", rbord = 5)

## Warning: Some tiles with zero area contain points

## Warning: Some weights are zero

summary(fit1)

## Point process model
## Fitting method: maximum likelihood (Berman-Turner approximation) 
## Model was fitted using glm() 
## Algorithm converged
## Call:
## ppm(Q = I, trend = ~Zc, covariates = list(Zc = Zc), correction = "border", 
##     rbord = 5)
## Edge correction: "border" 
##  [border correction distance r = 5 ]
## 
## ----------------------------------------------------
## Quadrature scheme = data + dummy + weights
## Data pattern:
## Planar point pattern: 64 points
## Average intensity 3.21e-10 points per square meter  
## 
## Window: polygonal boundary
## single connected closed polygon with 1700 vertices
## enclosing rectangle: [-653762, -35425]x[384624, 796798]meters
## Window area =  1.99367e+11 square meters 
## Unit of length: 1 meter 
## 
## 
## Dummy quadrature points:
## ( 32 x 32 grid, plus 4 corner points)
## Planar point pattern: 841 points
## Average intensity 4.22e-09 points per square meter  
## 
## Window: polygonal boundary
## single connected closed polygon with 1700 vertices
## enclosing rectangle: [-653762, -35425]x[384624, 796798]meters
## Window area =  1.99367e+11 square meters 
## Unit of length: 1 meter 
## 
## 
## Quadrature weights:
## (counting weights based on 32 x 32 array of rectangular tiles)
## All weights:
##  range: [0, 2.49e+08]    total: 2e+11
## Weights on data points:
##  range: [0, 1.24e+08]    total: 7.41e+09
## Weights on dummy points:
##  range: [15600000, 2.49e+08] total: 1.92e+11
## 
## ----------------------------------------------------
## FITTED MODEL:
## 
## Nonstationary Poisson process
## 
## 
##  ---- Intensity: ----
## 
## Trend formula: ~Zc
## Model depends on external covariate 'Zc' 
## 
## Covariates provided:
##  Zc: im
## 
## Fitted coefficients for trend formula:
## (Intercept)          Zc 
##  -2.052e+01  -1.886e-04 
## 
##               Estimate      S.E. Ztest    CI95.lo    CI95.hi
## (Intercept) -2.052e+01 2.420e-01    na -2.100e+01 -2.005e+01
## Zc          -1.886e-04 3.613e-05   *** -2.594e-04 -1.178e-04
## 
## 
## ----------- gory details -----
## 
## Fitted regular parameters (theta): 
## (Intercept)          Zc 
##  -2.052e+01  -1.886e-04 
## 
## Fitted exp(theta): 
## (Intercept)          Zc 
##   1.224e-09   9.998e-01

Create another covariate (distance from nearest weak tornado).

Zh = distmap(H)

Model the spatial density using two covariates.

fit2 = ppm(I, ~Zc + Zh, covariates = list(Zc = Zc, Zh = Zh), correction = "border", 
    rbord = 5)

## Warning: Some tiles with zero area contain points

## Warning: Some weights are zero

summary(fit2)

## Point process model
## Fitting method: maximum likelihood (Berman-Turner approximation) 
## Model was fitted using glm() 
## Algorithm converged
## Call:
## ppm(Q = I, trend = ~Zc + Zh, covariates = list(Zc = Zc, Zh = Zh), 
##     correction = "border", rbord = 5)
## Edge correction: "border" 
##  [border correction distance r = 5 ]
## 
## ----------------------------------------------------
## Quadrature scheme = data + dummy + weights
## Data pattern:
## Planar point pattern: 64 points
## Average intensity 3.21e-10 points per square meter  
## 
## Window: polygonal boundary
## single connected closed polygon with 1700 vertices
## enclosing rectangle: [-653762, -35425]x[384624, 796798]meters
## Window area =  1.99367e+11 square meters 
## Unit of length: 1 meter 
## 
## 
## Dummy quadrature points:
## ( 32 x 32 grid, plus 4 corner points)
## Planar point pattern: 841 points
## Average intensity 4.22e-09 points per square meter  
## 
## Window: polygonal boundary
## single connected closed polygon with 1700 vertices
## enclosing rectangle: [-653762, -35425]x[384624, 796798]meters
## Window area =  1.99367e+11 square meters 
## Unit of length: 1 meter 
## 
## 
## Quadrature weights:
## (counting weights based on 32 x 32 array of rectangular tiles)
## All weights:
##  range: [0, 2.49e+08]    total: 2e+11
## Weights on data points:
##  range: [0, 1.24e+08]    total: 7.41e+09
## Weights on dummy points:
##  range: [15600000, 2.49e+08] total: 1.92e+11
## 
## ----------------------------------------------------
## FITTED MODEL:
## 
## Nonstationary Poisson process
## 
## 
##  ---- Intensity: ----
## 
## Trend formula: ~Zc + Zh
## Model depends on external covariates 'Zc' and 'Zh' 
## 
## Covariates provided:
##  Zc: im
##  Zh: im
## 
## Fitted coefficients for trend formula:
## (Intercept)          Zc          Zh 
##  -2.012e+01  -1.676e-04  -5.369e-05 
## 
##               Estimate      S.E. Ztest    CI95.lo    CI95.hi
## (Intercept) -2.012e+01 2.773e-01    na -2.067e+01 -1.958e+01
## Zc          -1.676e-04 3.730e-05   *** -2.407e-04 -9.448e-05
## Zh          -5.369e-05 2.064e-05    ** -9.415e-05 -1.323e-05
## 
## 
## ----------- gory details -----
## 
## Fitted regular parameters (theta): 
## (Intercept)          Zc          Zh 
##  -2.012e+01  -1.676e-04  -5.369e-05 
## 
## Fitted exp(theta): 
## (Intercept)          Zc          Zh 
##   1.825e-09   9.998e-01   9.999e-01

Can you drop one of the covariates? no

fit3 = ppm(I, ~Zh, covariates = list(Zh = Zh), correction = "border", rbord = 5)

## Warning: Some tiles with zero area contain points

## Warning: Some weights are zero

AIC(fit1)

## [1] 2852

AIC(fit2)

## [1] 2845

AIC(fit3)

## [1] 2867

2 * (as.numeric(logLik(fit2)) - as.numeric(logLik(fit3)))/I$n

## [1] 0.3733

Does adding a term for clustering improve things?

fit4 = kppm(I, ~Zc + Zh, covariates = list(Zc = Zc, Zh = Zh), interaction = "Thomas", 
    correction = "border", rbord = 5)

## Warning: Some tiles with zero area contain points

## Warning: Some weights are zero

fit4

## Inhomogeneous cluster point process model
## Fitted to point pattern dataset 'I' 
## Fitted using the inhomogeneous K-function 
## 
## Trend formula: ~Zc + Zh
## 
## Fitted coefficients for trend formula:
## (Intercept)          Zc          Zh 
##  -2.012e+01  -1.676e-04  -5.369e-05 
## 
## Cluster model: Thomas process 
## Fitted cluster parameters:
##     kappa    sigma2 
## 1.000e+00 2.816e+09 
## Mean cluster size: [pixel image]

Prediction correlation.

M = quadrat.test(fit2, nx = 8, ny = 8)

## Warning: Some expected counts are small; chi^2 approximation may be
## inaccurate

cor.test(M$expected, M$observed)

## 
##  Pearson's product-moment correlation
## 
## data:  M$expected and M$observed 
## t = 4.336, df = 56, p-value = 6.101e-05
## alternative hypothesis: true correlation is not equal to 0 
## 95 percent confidence interval:
##  0.2792 0.6726 
## sample estimates:
##    cor 
## 0.5014