Class Project: The Relative Risk of Intense Tornadoes

Michael Patterson

date()

## [1] "Tue Apr 02 15:28:32 2013"

This analysis is for the state of Arkansas

Here I download the data and read it into R. I need the rgdal package for the readOGR() function. I print the projection information.

tmp = download.file("http://www.spc.noaa.gov/gis/svrgis/zipped/tornado.zip", 
    "tornado.zip", mode = "wb")

unzip("tornado.zip")
require(rgdal)

## Warning: package 'rgdal' was built under R version 2.15.3

## Warning: package 'sp' was built under R version 2.15.3

Torn = readOGR(dsn = ".", layer = "tornado")

## OGR data source with driver: ESRI Shapefile 
## Source: ".", layer: "tornado"
## with 56221 features and 28 fields
## Feature type: wkbPoint with 2 dimensions

names(Torn)

##  [1] "OM"         "YEAR"       "MONTH"      "DAY"        "DATE"      
##  [6] "TIME"       "TIMEZONE"   "STATE"      "FIPS"       "STATENUMBE"
## [11] "FSCALE"     "INJURIES"   "FATALITIES" "LOSS"       "CROPLOSS"  
## [16] "SLAT"       "SLON"       "ELAT"       "ELON"       "LENGTH"    
## [21] "WIDTH"      "NS"         "SN"         "SG"         "F1"        
## [26] "F2"         "F3"         "F4"

proj4string(Torn)

## [1] "+proj=lcc +lat_1=33 +lat_2=45 +lat_0=39 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"

Create the factor variable (Category) for the F-scale.

Torn2 = subset(Torn, FSCALE >= 0 & YEAR >= 1950)
Torn2$Category = factor(Torn2$FSCALE, ordered = TRUE)

require(maptools)

## Warning: package 'maptools' was built under R version 2.15.2

require(PBSmapping)

## Warning: package 'PBSmapping' was built under R version 2.15.3

unzip("tl_2009_05_county.zip")
AR = readOGR(dsn = ".", layer = "tl_2009_05_county")

## OGR data source with driver: ESRI Shapefile 
## Source: ".", layer: "tl_2009_05_county"
## with 75 features and 17 fields
## Feature type: wkbPolygon with 2 dimensions

AR = spTransform(AR, CRS(proj4string(Torn2)))
lps = getSpPPolygonsLabptSlots(AR)

## Warning: use coordinates method

IDOneBin = cut(lps[, 1], range(lps[, 1]), include.lowest = TRUE)
ARw = unionSpatialPolygons(AR, IDOneBin)

## Warning: package 'rgeos' was built under R version 2.15.3

Next, create and summarize a planar point pattern object.

require(spatstat)

## Warning: package 'spatstat' was built under R version 2.15.2

## Warning: package 'deldir' was built under R version 2.15.2

W = as(ARw, "owin")

## Checking 1 polygon...[Checking polygon with 18656 edges...]
## done.

All.ppp = as.ppp(Torn2["Category"])
All.ppp = All.ppp[W]
unitname(All.ppp) = c("meter", "meters")
summary(All.ppp)

## Marked planar point pattern: 1611 points
## Average intensity 1.18e-08 points per square meter  
## 
## *Pattern contains duplicated points*
## Multitype:
##   frequency proportion intensity
## 0       451     0.2800  3.29e-09
## 1       602     0.3740  4.39e-09
## 2       389     0.2410  2.84e-09
## 3       142     0.0881  1.04e-09
## 4        27     0.0168  1.97e-10
## 5         0     0.0000  0.00e+00
## 
## Window: polygonal boundary
## single connected closed polygon with 18656 vertices
## enclosing rectangle: [123266, 571076]x[-659256, -259511]meters
## Window area =  1.36987e+11 square meters 
## Unit of length: 1 meter

The native spatial units are meters.

Relative risk of intense tornadoes (F3 or higher).

Create two ppp objects and join them. This is needed for the relative risk function.

T = All.ppp
plot(split(T))

plot of chunk summaryAnalysis

H = unmark(T[T$marks <= 2 & T$marks >= 1])
I = unmark(T[T$marks >= 3])
T2 = superimpose(H = H, I = I)

## Warning: data contain duplicated points

summary(T2)

## Marked planar point pattern: 1160 points
## Average intensity 8.47e-09 points per square meter  
## 
## *Pattern contains duplicated points*
## Multitype:
##   frequency proportion intensity
## H       991      0.854  7.23e-09
## I       169      0.146  1.23e-09
## 
## Window: polygonal boundary
## single connected closed polygon with 18656 vertices
## enclosing rectangle: [123266, 571076]x[-659256, -259511]meters
## Window area =  1.36987e+11 square meters 
## Unit of length: 1 meter

plot(I, pch = 25, cols = "red", main = "")
plot(AR, add = TRUE, lwd = 0.1)

plot of chunk plotIntenseTDevents

Compute spatial densities and relative risk.

eps = 40
dAll = density(unmark(T2), dimyx = c(eps, eps))
# dAllc = density(unmark(T2), xy=list(x=c(13083, 13084), y=c(-97147,
# -97146)))
mean(dAll$v, na.rm = TRUE)

## [1] 8.567e-09

dH = density(H, dimyx = c(eps, eps))
dI = density(I, dimyx = c(eps, eps))
rr = relrisk(T2, dimyx = c(eps, eps), hmax = 150000)

Statewide mean relative risk.

mean(rr$v, na.rm = TRUE)

## [1] 0.1477

sd(as.vector(rr$v), na.rm = TRUE)

## [1] 0.02047

Map them on a 4-panel plot.

par(mfrow = c(2, 2))
plot(dAll, col = terrain.colors(10), ribbon = FALSE, main = "")
plot(AR, add = TRUE, lwd = 0.1)
contour(dAll, add = TRUE)
plot(dH, col = terrain.colors(10), ribbon = FALSE, main = "")
plot(AR, add = TRUE, lwd = 0.1)
contour(dH, add = TRUE)
plot(dI, col = terrain.colors(10), ribbon = FALSE, main = "")
plot(AR, add = TRUE, lwd = 0.1)
contour(dI, add = TRUE)
plot(rr, ribbon = FALSE, main = "")
plot(AR, add = TRUE, lwd = 0.1)
contour(rr, add = TRUE)

plot of chunk create4panelPlot

Create a SpatialGridDataFrame (S4 class) and map the relative risk.

require(maps)

## Loading required package: maps

## Warning: package 'maps' was built under R version 2.15.3

require(maptools)
ll = "+proj=longlat +ellps=GRS80 +datum=NAD83"
brd = map("county", "Arkansas")

plot of chunk mapRR

brd_ll = map2SpatialLines(brd, proj4string = CRS(ll))
brd_lcc = spTransform(brd_ll, CRS(proj4string(Torn2)))
rr.sgdf = as(rr, "SpatialGridDataFrame")
require(RColorBrewer)

## Loading required package: RColorBrewer

## Warning: package 'RColorBrewer' was built under R version 2.15.2

range(rr.sgdf$v, na.rm = TRUE)

## [1] 0.1166 0.1940

rng = seq(0.1, 0.2, 0.02)
cls = brewer.pal(9, "YlOrRd")
l3 = list("sp.lines", brd_lcc, lwd = 0.5, first = FALSE)
p1 = spplot(rr.sgdf, "v", col.regions = cls, at = rng, sp.layout = l3, colorkey = list(space = "bottom", 
    labels = list(cex = 1.2)), sub = list("Relative probability that a tornado will be intense (EF3 or stronger)", 
    cex = 1.2, font = 1))
p1

plot of chunk mapRR

Spatial models

Rescale units to kilometers.

T = affine(All.ppp, mat = diag(c(1/1000, 1/1000)))
unitname(T) = c("km", "kms")
summary(T)
T = All.ppp

Get data on the location of cities.

unzip("ci08au12.zip")
US = readOGR(dsn = ".", layer = "ci08au12", p4s = ll)

## OGR data source with driver: ESRI Shapefile 
## Source: ".", layer: "ci08au12"
## with 43603 features and 23 fields
## Feature type: wkbPoint with 2 dimensions

UST = spTransform(US, CRS(proj4string(Torn2)))
USt = subset(UST, UST$POP_1990 > 1000)
US.ppp = as.ppp(USt)
# US.ppp = affine(US.ppp, mat = diag(c(1/1000, 1/1000))) unitname(US.ppp)
# = c('km', 'kms')
window = as.owin(T)
CP.ppp = US.ppp[window]
plot(unmark(CP.ppp))

plot of chunk readCityData

Create a map of distance from nearest city centers. This is a covariate in the ppm.

Zc = distmap(CP.ppp)
plot(Zc)

plot of chunk createDistanceMap

Examine the smoothed estimate of spatial density as a function of the covariate.

H = unmark(T[T$marks <= 2 & T$marks >= 1])
I = unmark(T[T$marks >= 3])
plot(rhohat(H, Zc, smoother = "kernel", method = "transform"), legend = FALSE, 
    main = "")

plot of chunk smoothingEstimateCovariateTransform

plot(rhohat(I, Zc, smoother = "kernel", method = "transform"), legend = FALSE, 
    main = "")

plot of chunk smoothingEstimateCovariateTransform

Model the spatial density of the intense tornadoes using distance from city center as a covariate.

fit1 = ppm(I, ~Zc, covariates = list(Zc = Zc), correction = "border", rbord = 5)
summary(fit1)

## Point process model
## Fitting method: maximum likelihood (Berman-Turner approximation) 
## Model was fitted using glm() 
## Algorithm converged
## Call:
## ppm(Q = I, trend = ~Zc, covariates = list(Zc = Zc), correction = "border", 
##     rbord = 5)
## Edge correction: "border" 
##  [border correction distance r = 5 ]
## 
## ----------------------------------------------------
## Quadrature scheme = data + dummy + weights
## Data pattern:
## Planar point pattern: 169 points
## Average intensity 1.23e-09 points per square meter  
## 
## Window: polygonal boundary
## single connected closed polygon with 18656 vertices
## enclosing rectangle: [123266, 571076]x[-659256, -259511]meters
## Window area =  1.36987e+11 square meters 
## Unit of length: 1 meter 
## 
## 
## Dummy quadrature points:
## ( 32 x 32 grid, plus 4 corner points)
## Planar point pattern: 833 points
## Average intensity 6.08e-09 points per square meter  
## 
## Window: polygonal boundary
## single connected closed polygon with 18656 vertices
## enclosing rectangle: [123266, 571076]x[-659256, -259511]meters
## Window area =  1.36987e+11 square meters 
## Unit of length: 1 meter 
## 
## 
## Quadrature weights:
## (counting weights based on 32 x 32 array of rectangular tiles)
## All weights:
##  range: [10900000, 1.75e+08] total: 1.37e+11
## Weights on data points:
##  range: [3.5e+07, 87400000]  total: 1.29e+10
## Weights on dummy points:
##  range: [10900000, 1.75e+08] total: 1.24e+11
## 
## ----------------------------------------------------
## FITTED MODEL:
## 
## Nonstationary Poisson process
## 
## 
##  ---- Intensity: ----
## 
## Trend formula: ~Zc
## Model depends on external covariate 'Zc' 
## 
## Covariates provided:
##  Zc: im
## 
## Fitted coefficients for trend formula:
## (Intercept)          Zc 
##  -1.969e+01  -6.888e-05 
## 
##               Estimate      S.E. Ztest    CI95.lo    CI95.hi
## (Intercept) -1.969e+01 1.478e-01    na -1.998e+01 -1.940e+01
## Zc          -6.888e-05 1.214e-05   *** -9.268e-05 -4.508e-05
## 
## 
## ----------- gory details -----
## 
## Fitted regular parameters (theta): 
## (Intercept)          Zc 
##  -1.969e+01  -6.888e-05 
## 
## Fitted exp(theta): 
## (Intercept)          Zc 
##   2.818e-09   9.999e-01

Create another covariate (distance from nearest weak tornado).

Zh = distmap(H)

Model the spatial density using two covariates.

fit2 = ppm(I, ~Zc + Zh, covariates = list(Zc = Zc, Zh = Zh), correction = "border", 
    rbord = 5)
summary(fit2)

## Point process model
## Fitting method: maximum likelihood (Berman-Turner approximation) 
## Model was fitted using glm() 
## Algorithm converged
## Call:
## ppm(Q = I, trend = ~Zc + Zh, covariates = list(Zc = Zc, Zh = Zh), 
##     correction = "border", rbord = 5)
## Edge correction: "border" 
##  [border correction distance r = 5 ]
## 
## ----------------------------------------------------
## Quadrature scheme = data + dummy + weights
## Data pattern:
## Planar point pattern: 169 points
## Average intensity 1.23e-09 points per square meter  
## 
## Window: polygonal boundary
## single connected closed polygon with 18656 vertices
## enclosing rectangle: [123266, 571076]x[-659256, -259511]meters
## Window area =  1.36987e+11 square meters 
## Unit of length: 1 meter 
## 
## 
## Dummy quadrature points:
## ( 32 x 32 grid, plus 4 corner points)
## Planar point pattern: 833 points
## Average intensity 6.08e-09 points per square meter  
## 
## Window: polygonal boundary
## single connected closed polygon with 18656 vertices
## enclosing rectangle: [123266, 571076]x[-659256, -259511]meters
## Window area =  1.36987e+11 square meters 
## Unit of length: 1 meter 
## 
## 
## Quadrature weights:
## (counting weights based on 32 x 32 array of rectangular tiles)
## All weights:
##  range: [10900000, 1.75e+08] total: 1.37e+11
## Weights on data points:
##  range: [3.5e+07, 87400000]  total: 1.29e+10
## Weights on dummy points:
##  range: [10900000, 1.75e+08] total: 1.24e+11
## 
## ----------------------------------------------------
## FITTED MODEL:
## 
## Nonstationary Poisson process
## 
## 
##  ---- Intensity: ----
## 
## Trend formula: ~Zc + Zh
## Model depends on external covariates 'Zc' and 'Zh' 
## 
## Covariates provided:
##  Zc: im
##  Zh: im
## 
## Fitted coefficients for trend formula:
## (Intercept)          Zc          Zh 
##  -1.943e+01  -6.229e-05  -5.537e-05 
## 
##               Estimate      S.E. Ztest    CI95.lo    CI95.hi
## (Intercept) -1.943e+01 1.801e-01    na -1.978e+01 -1.908e+01
## Zc          -6.229e-05 1.230e-05   *** -8.640e-05 -3.819e-05
## Zh          -5.537e-05 2.342e-05     * -1.013e-04 -9.465e-06
## 
## 
## ----------- gory details -----
## 
## Fitted regular parameters (theta): 
## (Intercept)          Zc          Zh 
##  -1.943e+01  -6.229e-05  -5.537e-05 
## 
## Fitted exp(theta): 
## (Intercept)          Zc          Zh 
##   3.650e-09   9.999e-01   9.999e-01

Can you drop one of the covariates?

fit3 = ppm(I, ~Zh, covariates = list(Zh = Zh), correction = "border", rbord = 5)
AIC(fit1)

## [1] 7238

AIC(fit2)

## [1] 7234

AIC(fit3)

## [1] 7262

2 * (as.numeric(logLik(fit2)) - as.numeric(logLik(fit3)))/I$n

## [1] 0.1744

Does adding a term for clustering improve things?

fit4 = kppm(I, ~Zc + Zh, covariates = list(Zc = Zc, Zh = Zh), interaction = "Thomas", 
    correction = "border", rbord = 5)
fit4

## Inhomogeneous cluster point process model
## Fitted to point pattern dataset 'I' 
## Fitted using the inhomogeneous K-function 
## 
## Trend formula: ~Zc + Zh
## 
## Fitted coefficients for trend formula:
## (Intercept)          Zc          Zh 
##  -1.943e+01  -6.229e-05  -5.537e-05 
## 
## Cluster model: Thomas process 
## Fitted cluster parameters:
##     kappa    sigma2 
##         1 630599298 
## Mean cluster size: [pixel image]

Prediction correlation.

M = quadrat.test(fit2, nx = 6, ny = 4)

## Warning: Some expected counts are small; chi^2 approximation may be
## inaccurate

cor(M$expected, M$observed)

## [1] 0.7365