Class Project: The Relative Risk of Intense Tornadoes

Study Area:South Dakota

setwd("C:/Users/Guang/Desktop/AdvancedR")
tmp = download.file("http://www.spc.noaa.gov/gis/svrgis/zipped/tornado.zip", 
    "tornado.zip", mode = "wb")
tmp1 = download.file("http://www2.census.gov/cgi-bin/shapefiles2009/state-files?state=46/tl_2009_46_county.zip", 
    "tl_2009_46_county.zip", mode = "wb")

unzip("tornado.zip")
require(rgdal)
## Warning: package 'rgdal' was built under R version 2.15.2
## Warning: package 'sp' was built under R version 2.15.2
Torn = readOGR(dsn = ".", layer = "tornado")
## OGR data source with driver: ESRI Shapefile 
## Source: ".", layer: "tornado"
## with 56221 features and 28 fields
## Feature type: wkbPoint with 2 dimensions
names(Torn)
##  [1] "OM"         "YEAR"       "MONTH"      "DAY"        "DATE"      
##  [6] "TIME"       "TIMEZONE"   "STATE"      "FIPS"       "STATENUMBE"
## [11] "FSCALE"     "INJURIES"   "FATALITIES" "LOSS"       "CROPLOSS"  
## [16] "SLAT"       "SLON"       "ELAT"       "ELON"       "LENGTH"    
## [21] "WIDTH"      "NS"         "SN"         "SG"         "F1"        
## [26] "F2"         "F3"         "F4"
proj4string(Torn)
## [1] "+proj=lcc +lat_1=33 +lat_2=45 +lat_0=39 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"

Create the factor variable (Category) for the F-scale.

Torn2 = subset(Torn, FSCALE >= 0 & YEAR >= 1950)
Torn2$Category = factor(Torn2$FSCALE, ordered = TRUE)

Choose a domain:South Dakota(SD).

require(maptools)
## Warning: package 'maptools' was built under R version 2.15.2
require(PBSmapping)
## Warning: package 'PBSmapping' was built under R version 2.15.3
unzip("tl_2009_46_county.zip")
## Warning: error 1 in extracting from zip file
SD = readOGR(dsn = ".", layer = "tl_2009_46_county")
## OGR data source with driver: ESRI Shapefile 
## Source: ".", layer: "tl_2009_46_county"
## with 66 features and 17 fields
## Feature type: wkbPolygon with 2 dimensions
SD = spTransform(SD, CRS(proj4string(Torn2)))
lps = getSpPPolygonsLabptSlots(SD)
## Warning: use coordinates method
IDOneBin = cut(lps[, 1], range(lps[, 1]), include.lowest = TRUE)
SDw = unionSpatialPolygons(SD, IDOneBin)
## Warning: package 'rgeos' was built under R version 2.15.2

Use overlay to select tornadoes in South Dakota

require(sp)
class(Torn2)
## [1] "SpatialPointsDataFrame"
## attr(,"package")
## [1] "sp"
class(SDw)
## [1] "SpatialPolygons"
## attr(,"package")
## [1] "sp"
Torn2$Olay = overlay(SDw, Torn2)
names(Torn2)
##  [1] "OM"         "YEAR"       "MONTH"      "DAY"        "DATE"      
##  [6] "TIME"       "TIMEZONE"   "STATE"      "FIPS"       "STATENUMBE"
## [11] "FSCALE"     "INJURIES"   "FATALITIES" "LOSS"       "CROPLOSS"  
## [16] "SLAT"       "SLON"       "ELAT"       "ELON"       "LENGTH"    
## [21] "WIDTH"      "NS"         "SN"         "SG"         "F1"        
## [26] "F2"         "F3"         "F4"         "Category"   "Olay"
SDTorn = subset(Torn2, Torn2$Olay == 1)
class(SDTorn)
## [1] "SpatialPointsDataFrame"
## attr(,"package")
## [1] "sp"
plot(SDTorn)
require(spatstat)
## Loading required package: spatstat
## Warning: package 'spatstat' was built under R version 2.15.2
## Loading required package: mgcv
## This is mgcv 1.7-18. For overview type 'help("mgcv-package")'.
## Loading required package: deldir
## Warning: package 'deldir' was built under R version 2.15.2
## deldir 0.0-21
## spatstat 1.29-0 Type 'help(spatstat)' for an overview of spatstat
## 'latest.news()' for news on latest version 'licence.polygons()' for
## licence information on polygon calculations
W = as(SDw, "owin")
## Checking 1 polygon...[Checking polygon with 19998 edges...]
## done.
All.ppp = as.ppp(SDTorn["Category"])
All.ppp = All.ppp[W]
class(All.ppp)
## [1] "ppp"
summary(All.ppp)
## Marked planar point pattern: 1531 points
## Average intensity 7.68e-09 points per square unit  
## 
## *Pattern contains duplicated points*
## Multitype:
##   frequency proportion intensity
## 0       905   0.591000  4.54e-09
## 1       337   0.220000  1.69e-09
## 2       225   0.147000  1.13e-09
## 3        55   0.035900  2.76e-10
## 4         8   0.005230  4.01e-11
## 5         1   0.000653  5.02e-12
## 
## Window: polygonal boundary
## single connected closed polygon with 19998 vertices
## enclosing rectangle: [-653754, -35416]x[384630, 796798]units
## Window area =  1.99368e+11 square units
unitname(All.ppp) = c("meter", "meters")
plot(SDw, add = TRUE)

plot of chunk subsetSPDF, createPPP

The native spatial units are meters.

Relative risk of intense tornadoes (F3 or higher).

Create two ppp objects and join them. This is needed for the relative risk function.

T = All.ppp
plot(split(T))

plot of chunk summaryAnalysis

H = unmark(T[T$marks <= 2 & T$marks >= 1])
I = unmark(T[T$marks >= 3])
T2 = superimpose(H = H, I = I)
## Warning: data contain duplicated points
summary(T2)
## Marked planar point pattern: 626 points
## Average intensity 3.14e-09 points per square meter  
## 
## *Pattern contains duplicated points*
## Multitype:
##   frequency proportion intensity
## H       562      0.898  2.82e-09
## I        64      0.102  3.21e-10
## 
## Window: polygonal boundary
## single connected closed polygon with 19998 vertices
## enclosing rectangle: [-653754, -35416]x[384630, 796798]meters
## Window area =  1.99368e+11 square meters 
## Unit of length: 1 meter
plot(H, pch = 1, cols = "blue", main = "")
plot(I, pch = 25, cols = "red", add = TRUE, main = "")
plot(SDw, add = TRUE, lwd = 0.1)

plot of chunk plotIntenseTDevents

Compute spatial densities and relative risk.

eps = 40
dAll = density(unmark(T2), dimyx = c(eps, eps))
# dAllc = density(unmark(T2), xy=list(x=c(13083, 13084), y=c(-97147,
# -97146)))
mean(dAll$v, na.rm = TRUE)
## [1] 3.167e-09
dH = density(H, dimyx = c(eps, eps))
dI = density(I, dimyx = c(eps, eps))
rr = relrisk(T2, dimyx = c(eps, eps), hmax = 4e+05)

Statewide mean relative risk.

mean(rr$v, na.rm = TRUE)
## [1] 0.101
sd(as.vector(rr$v), na.rm = TRUE)
## [1] 0.004806

Map them on a 4-panel plot.

par(mfrow = c(2, 2))
plot(dAll, col = terrain.colors(10), ribbon = FALSE, main = "")
plot(SD, add = TRUE, lwd = 0.1)
contour(dAll, add = TRUE)
plot(dH, col = terrain.colors(10), ribbon = FALSE, main = "")
plot(SD, add = TRUE, lwd = 0.1)
contour(dH, add = TRUE)
plot(dI, col = terrain.colors(10), ribbon = FALSE, main = "")
plot(SD, add = TRUE, lwd = 0.1)
contour(dI, add = TRUE)
plot(rr, ribbon = FALSE, main = "")
plot(SD, add = TRUE, lwd = 0.1)
contour(rr, add = TRUE)

plot of chunk create4panelPlot

Create a SpatialGridDataFrame (S4 class) and map the relative risk.

require(maps)
## Loading required package: maps
require(maptools)
ll = "+proj=longlat +ellps=GRS80 +datum=NAD83"
brd = map("county", "South Dakota")

plot of chunk mapRR

brd_ll = map2SpatialLines(brd, proj4string = CRS(ll))
brd_lcc = spTransform(brd_ll, CRS(proj4string(Torn2)))
rr.sgdf = as(rr, "SpatialGridDataFrame")
require(RColorBrewer)
## Loading required package: RColorBrewer
## Warning: package 'RColorBrewer' was built under R version 2.15.2
range(rr.sgdf$v, na.rm = TRUE)
## [1] 0.08902 0.10759
rng = seq(0.085, 0.11, 0.005)
cls = brewer.pal(5, "YlOrRd")
l3 = list("sp.lines", brd_lcc, lwd = 0.5, first = FALSE)
p1 = spplot(rr.sgdf, "v", col.regions = cls, at = rng, sp.layout = l3, colorkey = list(space = "bottom", 
    labels = list(cex = 1.2)), sub = list("Relative probability that a tornado will be intense (EF3 or stronger)", 
    cex = 1.2, font = 1))
p1

plot of chunk mapRR

Spatial models

Get data on the location of cities.

unzip("ci08au12.zip")
US = readOGR(dsn = ".", layer = "ci08au12", p4s = ll)
## OGR data source with driver: ESRI Shapefile 
## Source: ".", layer: "ci08au12"
## with 43603 features and 23 fields
## Feature type: wkbPoint with 2 dimensions
UST = spTransform(US, CRS(proj4string(Torn2)))
USt = subset(UST, UST$POP_1990 > 1000)
US.ppp = as.ppp(USt)
window = as.owin(T)
CP.ppp = US.ppp[window]
plot(unmark(CP.ppp))

plot of chunk readCityData

Create a map of distance from nearest city centers. This is a covariate in the ppm.

Zc = distmap(CP.ppp)
plot(Zc)

plot of chunk createDistanceMap

Examine the smoothed estimate of spatial density as a function of the covariate.

H = unmark(T[T$marks <= 2 & T$marks >= 1])
I = unmark(T[T$marks >= 3])
plot(rhohat(H, Zc, smoother = "kernel", method = "transform"), legend = FALSE, 
    main = "")

plot of chunk smoothingEstimateCovariateTransform

plot(rhohat(I, Zc, smoother = "kernel", method = "transform"), legend = FALSE, 
    main = "")

plot of chunk smoothingEstimateCovariateTransform

Model the spatial density of the intense tornadoes using distance from city center as a covariate.

fit1 = ppm(I, ~Zc, covariates = list(Zc = Zc), correction = "border", rbord = 5)
## Warning: Some tiles with zero area contain points
## Warning: Some weights are zero
summary(fit1)
## Point process model
## Fitting method: maximum likelihood (Berman-Turner approximation) 
## Model was fitted using glm() 
## Algorithm converged
## Call:
## ppm(Q = I, trend = ~Zc, covariates = list(Zc = Zc), correction = "border", 
##     rbord = 5)
## Edge correction: "border" 
##  [border correction distance r = 5 ]
## 
## ----------------------------------------------------
## Quadrature scheme = data + dummy + weights
## Data pattern:
## Planar point pattern: 64 points
## Average intensity 3.21e-10 points per square meter  
## 
## Window: polygonal boundary
## single connected closed polygon with 19998 vertices
## enclosing rectangle: [-653754, -35416]x[384630, 796798]meters
## Window area =  1.99368e+11 square meters 
## Unit of length: 1 meter 
## 
## 
## Dummy quadrature points:
## ( 32 x 32 grid, plus 4 corner points)
## Planar point pattern: 841 points
## Average intensity 4.22e-09 points per square meter  
## 
## Window: polygonal boundary
## single connected closed polygon with 19998 vertices
## enclosing rectangle: [-653754, -35416]x[384630, 796798]meters
## Window area =  1.99368e+11 square meters 
## Unit of length: 1 meter 
## 
## 
## Quadrature weights:
## (counting weights based on 32 x 32 array of rectangular tiles)
## All weights:
##  range: [0, 2.49e+08]    total: 1.99e+11
## Weights on data points:
##  range: [0, 1.24e+08]    total: 7.41e+09
## Weights on dummy points:
##  range: [15600000, 2.49e+08] total: 1.92e+11
## 
## ----------------------------------------------------
## FITTED MODEL:
## 
## Nonstationary Poisson process
## 
## 
##  ---- Intensity: ----
## 
## Trend formula: ~Zc
## Model depends on external covariate 'Zc' 
## 
## Covariates provided:
##  Zc: im
## 
## Fitted coefficients for trend formula:
## (Intercept)          Zc 
##  -2.105e+01  -3.758e-05 
## 
##               Estimate      S.E. Ztest    CI95.lo    CI95.hi
## (Intercept) -2.105e+01 2.264e-01    na -2.150e+01 -2.061e+01
## Zc          -3.758e-05 1.027e-05   *** -5.772e-05 -1.745e-05
## 
## 
## ----------- gory details -----
## 
## Fitted regular parameters (theta): 
## (Intercept)          Zc 
##  -2.105e+01  -3.758e-05 
## 
## Fitted exp(theta): 
## (Intercept)          Zc 
##   7.195e-10   1.000e+00

Create another covariate (distance from nearest weak tornado).

Zh = distmap(H)

Model the spatial density using two covariates.

fit2 = ppm(I, ~Zc + Zh, covariates = list(Zc = Zc, Zh = Zh), correction = "border", 
    rbord = 5)
## Warning: Some tiles with zero area contain points
## Warning: Some weights are zero
summary(fit2)
## Point process model
## Fitting method: maximum likelihood (Berman-Turner approximation) 
## Model was fitted using glm() 
## Algorithm converged
## Call:
## ppm(Q = I, trend = ~Zc + Zh, covariates = list(Zc = Zc, Zh = Zh), 
##     correction = "border", rbord = 5)
## Edge correction: "border" 
##  [border correction distance r = 5 ]
## 
## ----------------------------------------------------
## Quadrature scheme = data + dummy + weights
## Data pattern:
## Planar point pattern: 64 points
## Average intensity 3.21e-10 points per square meter  
## 
## Window: polygonal boundary
## single connected closed polygon with 19998 vertices
## enclosing rectangle: [-653754, -35416]x[384630, 796798]meters
## Window area =  1.99368e+11 square meters 
## Unit of length: 1 meter 
## 
## 
## Dummy quadrature points:
## ( 32 x 32 grid, plus 4 corner points)
## Planar point pattern: 841 points
## Average intensity 4.22e-09 points per square meter  
## 
## Window: polygonal boundary
## single connected closed polygon with 19998 vertices
## enclosing rectangle: [-653754, -35416]x[384630, 796798]meters
## Window area =  1.99368e+11 square meters 
## Unit of length: 1 meter 
## 
## 
## Quadrature weights:
## (counting weights based on 32 x 32 array of rectangular tiles)
## All weights:
##  range: [0, 2.49e+08]    total: 1.99e+11
## Weights on data points:
##  range: [0, 1.24e+08]    total: 7.41e+09
## Weights on dummy points:
##  range: [15600000, 2.49e+08] total: 1.92e+11
## 
## ----------------------------------------------------
## FITTED MODEL:
## 
## Nonstationary Poisson process
## 
## 
##  ---- Intensity: ----
## 
## Trend formula: ~Zc + Zh
## Model depends on external covariates 'Zc' and 'Zh' 
## 
## Covariates provided:
##  Zc: im
##  Zh: im
## 
## Fitted coefficients for trend formula:
## (Intercept)          Zc          Zh 
##  -2.055e+01  -2.938e-05  -6.631e-05 
## 
##               Estimate      S.E. Ztest    CI95.lo    CI95.hi
## (Intercept) -2.055e+01 2.723e-01    na -2.109e+01 -2.002e+01
## Zc          -2.938e-05 1.056e-05    ** -5.007e-05 -8.691e-06
## Zh          -6.631e-05 2.193e-05    ** -1.093e-04 -2.334e-05
## 
## 
## ----------- gory details -----
## 
## Fitted regular parameters (theta): 
## (Intercept)          Zc          Zh 
##  -2.055e+01  -2.938e-05  -6.631e-05 
## 
## Fitted exp(theta): 
## (Intercept)          Zc          Zh 
##   1.186e-09   1.000e+00   9.999e-01

Can you drop one of the covariates?(No)

fit3 = ppm(I, ~Zh, covariates = list(Zh = Zh), correction = "border", rbord = 5)
## Warning: Some tiles with zero area contain points
## Warning: Some weights are zero
summary(fit3)
## Point process model
## Fitting method: maximum likelihood (Berman-Turner approximation) 
## Model was fitted using glm() 
## Algorithm converged
## Call:
## ppm(Q = I, trend = ~Zh, covariates = list(Zh = Zh), correction = "border", 
##     rbord = 5)
## Edge correction: "border" 
##  [border correction distance r = 5 ]
## 
## ----------------------------------------------------
## Quadrature scheme = data + dummy + weights
## Data pattern:
## Planar point pattern: 64 points
## Average intensity 3.21e-10 points per square meter  
## 
## Window: polygonal boundary
## single connected closed polygon with 19998 vertices
## enclosing rectangle: [-653754, -35416]x[384630, 796798]meters
## Window area =  1.99368e+11 square meters 
## Unit of length: 1 meter 
## 
## 
## Dummy quadrature points:
## ( 32 x 32 grid, plus 4 corner points)
## Planar point pattern: 841 points
## Average intensity 4.22e-09 points per square meter  
## 
## Window: polygonal boundary
## single connected closed polygon with 19998 vertices
## enclosing rectangle: [-653754, -35416]x[384630, 796798]meters
## Window area =  1.99368e+11 square meters 
## Unit of length: 1 meter 
## 
## 
## Quadrature weights:
## (counting weights based on 32 x 32 array of rectangular tiles)
## All weights:
##  range: [0, 2.49e+08]    total: 1.99e+11
## Weights on data points:
##  range: [0, 1.24e+08]    total: 7.41e+09
## Weights on dummy points:
##  range: [15600000, 2.49e+08] total: 1.92e+11
## 
## ----------------------------------------------------
## FITTED MODEL:
## 
## Nonstationary Poisson process
## 
## 
##  ---- Intensity: ----
## 
## Trend formula: ~Zh
## Model depends on external covariate 'Zh' 
## 
## Covariates provided:
##  Zh: im
## 
## Fitted coefficients for trend formula:
## (Intercept)          Zh 
##  -2.104e+01  -8.095e-05 
## 
##               Estimate      S.E. Ztest    CI95.lo    CI95.hi
## (Intercept) -2.104e+01 0.2204881    na -2.147e+01 -20.607049
## Zh          -8.095e-05 0.0000214   *** -1.229e-04  -0.000039
## 
## 
## ----------- gory details -----
## 
## Fitted regular parameters (theta): 
## (Intercept)          Zh 
##  -2.104e+01  -8.095e-05 
## 
## Fitted exp(theta): 
## (Intercept)          Zh 
##   7.291e-10   9.999e-01
AIC(fit1)
## [1] 2869
AIC(fit2)
## [1] 2860
AIC(fit3)
## [1] 2867
2 * (as.numeric(logLik(fit2)) - as.numeric(logLik(fit3)))/I$n
## [1] 0.1423

Does adding a term for clustering improve things?

fit4 = kppm(I, ~Zc + Zh, covariates = list(Zc = Zc, Zh = Zh), interaction = "Thomas", 
    correction = "border", rbord = 5)
## Warning: Some tiles with zero area contain points
## Warning: Some weights are zero
fit4
## Inhomogeneous cluster point process model
## Fitted to point pattern dataset 'I' 
## Fitted using the inhomogeneous K-function 
## 
## Trend formula: ~Zc + Zh
## 
## Fitted coefficients for trend formula:
## (Intercept)          Zc          Zh 
##  -2.055e+01  -2.938e-05  -6.631e-05 
## 
## Cluster model: Thomas process 
## Fitted cluster parameters:
##     kappa    sigma2 
## 1.000e+00 2.816e+09 
## Mean cluster size: [pixel image]

Prediction correlation.

M = quadrat.test(fit2, nx = 6, ny = 4)
## Warning: Some expected counts are small; chi^2 approximation may be
## inaccurate
cor(M$expected, M$observed)
## [1] 0.6935