date()
## [1] "Tue Apr 02 15:28:32 2013"
This analysis is for the state of Arkansas
Here I download the data and read it into R. I need the rgdal package for the readOGR() function. I print the projection information.
tmp = download.file("http://www.spc.noaa.gov/gis/svrgis/zipped/tornado.zip",
"tornado.zip", mode = "wb")
unzip("tornado.zip")
require(rgdal)
## Warning: package 'rgdal' was built under R version 2.15.3
## Warning: package 'sp' was built under R version 2.15.3
Torn = readOGR(dsn = ".", layer = "tornado")
## OGR data source with driver: ESRI Shapefile
## Source: ".", layer: "tornado"
## with 56221 features and 28 fields
## Feature type: wkbPoint with 2 dimensions
names(Torn)
## [1] "OM" "YEAR" "MONTH" "DAY" "DATE"
## [6] "TIME" "TIMEZONE" "STATE" "FIPS" "STATENUMBE"
## [11] "FSCALE" "INJURIES" "FATALITIES" "LOSS" "CROPLOSS"
## [16] "SLAT" "SLON" "ELAT" "ELON" "LENGTH"
## [21] "WIDTH" "NS" "SN" "SG" "F1"
## [26] "F2" "F3" "F4"
proj4string(Torn)
## [1] "+proj=lcc +lat_1=33 +lat_2=45 +lat_0=39 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"
Create the factor variable (Category) for the F-scale.
Torn2 = subset(Torn, FSCALE >= 0 & YEAR >= 1950)
Torn2$Category = factor(Torn2$FSCALE, ordered = TRUE)
require(maptools)
## Warning: package 'maptools' was built under R version 2.15.2
require(PBSmapping)
## Warning: package 'PBSmapping' was built under R version 2.15.3
unzip("tl_2009_05_county.zip")
AR = readOGR(dsn = ".", layer = "tl_2009_05_county")
## OGR data source with driver: ESRI Shapefile
## Source: ".", layer: "tl_2009_05_county"
## with 75 features and 17 fields
## Feature type: wkbPolygon with 2 dimensions
AR = spTransform(AR, CRS(proj4string(Torn2)))
lps = getSpPPolygonsLabptSlots(AR)
## Warning: use coordinates method
IDOneBin = cut(lps[, 1], range(lps[, 1]), include.lowest = TRUE)
ARw = unionSpatialPolygons(AR, IDOneBin)
## Warning: package 'rgeos' was built under R version 2.15.3
Next, create and summarize a planar point pattern object.
require(spatstat)
## Warning: package 'spatstat' was built under R version 2.15.2
## Warning: package 'deldir' was built under R version 2.15.2
W = as(ARw, "owin")
## Checking 1 polygon...[Checking polygon with 18656 edges...]
## done.
All.ppp = as.ppp(Torn2["Category"])
All.ppp = All.ppp[W]
unitname(All.ppp) = c("meter", "meters")
summary(All.ppp)
## Marked planar point pattern: 1611 points
## Average intensity 1.18e-08 points per square meter
##
## *Pattern contains duplicated points*
## Multitype:
## frequency proportion intensity
## 0 451 0.2800 3.29e-09
## 1 602 0.3740 4.39e-09
## 2 389 0.2410 2.84e-09
## 3 142 0.0881 1.04e-09
## 4 27 0.0168 1.97e-10
## 5 0 0.0000 0.00e+00
##
## Window: polygonal boundary
## single connected closed polygon with 18656 vertices
## enclosing rectangle: [123266, 571076]x[-659256, -259511]meters
## Window area = 1.36987e+11 square meters
## Unit of length: 1 meter
The native spatial units are meters.
Create two ppp objects and join them. This is needed for the relative risk function.
T = All.ppp
plot(split(T))
H = unmark(T[T$marks <= 2 & T$marks >= 1])
I = unmark(T[T$marks >= 3])
T2 = superimpose(H = H, I = I)
## Warning: data contain duplicated points
summary(T2)
## Marked planar point pattern: 1160 points
## Average intensity 8.47e-09 points per square meter
##
## *Pattern contains duplicated points*
## Multitype:
## frequency proportion intensity
## H 991 0.854 7.23e-09
## I 169 0.146 1.23e-09
##
## Window: polygonal boundary
## single connected closed polygon with 18656 vertices
## enclosing rectangle: [123266, 571076]x[-659256, -259511]meters
## Window area = 1.36987e+11 square meters
## Unit of length: 1 meter
plot(I, pch = 25, cols = "red", main = "")
plot(AR, add = TRUE, lwd = 0.1)
Compute spatial densities and relative risk.
eps = 40
dAll = density(unmark(T2), dimyx = c(eps, eps))
# dAllc = density(unmark(T2), xy=list(x=c(13083, 13084), y=c(-97147,
# -97146)))
mean(dAll$v, na.rm = TRUE)
## [1] 8.567e-09
dH = density(H, dimyx = c(eps, eps))
dI = density(I, dimyx = c(eps, eps))
rr = relrisk(T2, dimyx = c(eps, eps), hmax = 150000)
Statewide mean relative risk.
mean(rr$v, na.rm = TRUE)
## [1] 0.1477
sd(as.vector(rr$v), na.rm = TRUE)
## [1] 0.02047
Map them on a 4-panel plot.
par(mfrow = c(2, 2))
plot(dAll, col = terrain.colors(10), ribbon = FALSE, main = "")
plot(AR, add = TRUE, lwd = 0.1)
contour(dAll, add = TRUE)
plot(dH, col = terrain.colors(10), ribbon = FALSE, main = "")
plot(AR, add = TRUE, lwd = 0.1)
contour(dH, add = TRUE)
plot(dI, col = terrain.colors(10), ribbon = FALSE, main = "")
plot(AR, add = TRUE, lwd = 0.1)
contour(dI, add = TRUE)
plot(rr, ribbon = FALSE, main = "")
plot(AR, add = TRUE, lwd = 0.1)
contour(rr, add = TRUE)
Create a SpatialGridDataFrame (S4 class) and map the relative risk.
require(maps)
## Loading required package: maps
## Warning: package 'maps' was built under R version 2.15.3
require(maptools)
ll = "+proj=longlat +ellps=GRS80 +datum=NAD83"
brd = map("county", "Arkansas")
brd_ll = map2SpatialLines(brd, proj4string = CRS(ll))
brd_lcc = spTransform(brd_ll, CRS(proj4string(Torn2)))
rr.sgdf = as(rr, "SpatialGridDataFrame")
require(RColorBrewer)
## Loading required package: RColorBrewer
## Warning: package 'RColorBrewer' was built under R version 2.15.2
range(rr.sgdf$v, na.rm = TRUE)
## [1] 0.1166 0.1940
rng = seq(0.1, 0.2, 0.02)
cls = brewer.pal(9, "YlOrRd")
l3 = list("sp.lines", brd_lcc, lwd = 0.5, first = FALSE)
p1 = spplot(rr.sgdf, "v", col.regions = cls, at = rng, sp.layout = l3, colorkey = list(space = "bottom",
labels = list(cex = 1.2)), sub = list("Relative probability that a tornado will be intense (EF3 or stronger)",
cex = 1.2, font = 1))
p1
Rescale units to kilometers.
T = affine(All.ppp, mat = diag(c(1/1000, 1/1000)))
unitname(T) = c("km", "kms")
summary(T)
T = All.ppp
Get data on the location of cities.
unzip("ci08au12.zip")
US = readOGR(dsn = ".", layer = "ci08au12", p4s = ll)
## OGR data source with driver: ESRI Shapefile
## Source: ".", layer: "ci08au12"
## with 43603 features and 23 fields
## Feature type: wkbPoint with 2 dimensions
UST = spTransform(US, CRS(proj4string(Torn2)))
USt = subset(UST, UST$POP_1990 > 1000)
US.ppp = as.ppp(USt)
# US.ppp = affine(US.ppp, mat = diag(c(1/1000, 1/1000))) unitname(US.ppp)
# = c('km', 'kms')
window = as.owin(T)
CP.ppp = US.ppp[window]
plot(unmark(CP.ppp))
Create a map of distance from nearest city centers. This is a covariate in the ppm.
Zc = distmap(CP.ppp)
plot(Zc)
Examine the smoothed estimate of spatial density as a function of the covariate.
H = unmark(T[T$marks <= 2 & T$marks >= 1])
I = unmark(T[T$marks >= 3])
plot(rhohat(H, Zc, smoother = "kernel", method = "transform"), legend = FALSE,
main = "")
plot(rhohat(I, Zc, smoother = "kernel", method = "transform"), legend = FALSE,
main = "")
Model the spatial density of the intense tornadoes using distance from city center as a covariate.
fit1 = ppm(I, ~Zc, covariates = list(Zc = Zc), correction = "border", rbord = 5)
summary(fit1)
## Point process model
## Fitting method: maximum likelihood (Berman-Turner approximation)
## Model was fitted using glm()
## Algorithm converged
## Call:
## ppm(Q = I, trend = ~Zc, covariates = list(Zc = Zc), correction = "border",
## rbord = 5)
## Edge correction: "border"
## [border correction distance r = 5 ]
##
## ----------------------------------------------------
## Quadrature scheme = data + dummy + weights
## Data pattern:
## Planar point pattern: 169 points
## Average intensity 1.23e-09 points per square meter
##
## Window: polygonal boundary
## single connected closed polygon with 18656 vertices
## enclosing rectangle: [123266, 571076]x[-659256, -259511]meters
## Window area = 1.36987e+11 square meters
## Unit of length: 1 meter
##
##
## Dummy quadrature points:
## ( 32 x 32 grid, plus 4 corner points)
## Planar point pattern: 833 points
## Average intensity 6.08e-09 points per square meter
##
## Window: polygonal boundary
## single connected closed polygon with 18656 vertices
## enclosing rectangle: [123266, 571076]x[-659256, -259511]meters
## Window area = 1.36987e+11 square meters
## Unit of length: 1 meter
##
##
## Quadrature weights:
## (counting weights based on 32 x 32 array of rectangular tiles)
## All weights:
## range: [10900000, 1.75e+08] total: 1.37e+11
## Weights on data points:
## range: [3.5e+07, 87400000] total: 1.29e+10
## Weights on dummy points:
## range: [10900000, 1.75e+08] total: 1.24e+11
##
## ----------------------------------------------------
## FITTED MODEL:
##
## Nonstationary Poisson process
##
##
## ---- Intensity: ----
##
## Trend formula: ~Zc
## Model depends on external covariate 'Zc'
##
## Covariates provided:
## Zc: im
##
## Fitted coefficients for trend formula:
## (Intercept) Zc
## -1.969e+01 -6.888e-05
##
## Estimate S.E. Ztest CI95.lo CI95.hi
## (Intercept) -1.969e+01 1.478e-01 na -1.998e+01 -1.940e+01
## Zc -6.888e-05 1.214e-05 *** -9.268e-05 -4.508e-05
##
##
## ----------- gory details -----
##
## Fitted regular parameters (theta):
## (Intercept) Zc
## -1.969e+01 -6.888e-05
##
## Fitted exp(theta):
## (Intercept) Zc
## 2.818e-09 9.999e-01
Create another covariate (distance from nearest weak tornado).
Zh = distmap(H)
Model the spatial density using two covariates.
fit2 = ppm(I, ~Zc + Zh, covariates = list(Zc = Zc, Zh = Zh), correction = "border",
rbord = 5)
summary(fit2)
## Point process model
## Fitting method: maximum likelihood (Berman-Turner approximation)
## Model was fitted using glm()
## Algorithm converged
## Call:
## ppm(Q = I, trend = ~Zc + Zh, covariates = list(Zc = Zc, Zh = Zh),
## correction = "border", rbord = 5)
## Edge correction: "border"
## [border correction distance r = 5 ]
##
## ----------------------------------------------------
## Quadrature scheme = data + dummy + weights
## Data pattern:
## Planar point pattern: 169 points
## Average intensity 1.23e-09 points per square meter
##
## Window: polygonal boundary
## single connected closed polygon with 18656 vertices
## enclosing rectangle: [123266, 571076]x[-659256, -259511]meters
## Window area = 1.36987e+11 square meters
## Unit of length: 1 meter
##
##
## Dummy quadrature points:
## ( 32 x 32 grid, plus 4 corner points)
## Planar point pattern: 833 points
## Average intensity 6.08e-09 points per square meter
##
## Window: polygonal boundary
## single connected closed polygon with 18656 vertices
## enclosing rectangle: [123266, 571076]x[-659256, -259511]meters
## Window area = 1.36987e+11 square meters
## Unit of length: 1 meter
##
##
## Quadrature weights:
## (counting weights based on 32 x 32 array of rectangular tiles)
## All weights:
## range: [10900000, 1.75e+08] total: 1.37e+11
## Weights on data points:
## range: [3.5e+07, 87400000] total: 1.29e+10
## Weights on dummy points:
## range: [10900000, 1.75e+08] total: 1.24e+11
##
## ----------------------------------------------------
## FITTED MODEL:
##
## Nonstationary Poisson process
##
##
## ---- Intensity: ----
##
## Trend formula: ~Zc + Zh
## Model depends on external covariates 'Zc' and 'Zh'
##
## Covariates provided:
## Zc: im
## Zh: im
##
## Fitted coefficients for trend formula:
## (Intercept) Zc Zh
## -1.943e+01 -6.229e-05 -5.537e-05
##
## Estimate S.E. Ztest CI95.lo CI95.hi
## (Intercept) -1.943e+01 1.801e-01 na -1.978e+01 -1.908e+01
## Zc -6.229e-05 1.230e-05 *** -8.640e-05 -3.819e-05
## Zh -5.537e-05 2.342e-05 * -1.013e-04 -9.465e-06
##
##
## ----------- gory details -----
##
## Fitted regular parameters (theta):
## (Intercept) Zc Zh
## -1.943e+01 -6.229e-05 -5.537e-05
##
## Fitted exp(theta):
## (Intercept) Zc Zh
## 3.650e-09 9.999e-01 9.999e-01
Can you drop one of the covariates?
fit3 = ppm(I, ~Zh, covariates = list(Zh = Zh), correction = "border", rbord = 5)
AIC(fit1)
## [1] 7238
AIC(fit2)
## [1] 7234
AIC(fit3)
## [1] 7262
2 * (as.numeric(logLik(fit2)) - as.numeric(logLik(fit3)))/I$n
## [1] 0.1744
Does adding a term for clustering improve things?
fit4 = kppm(I, ~Zc + Zh, covariates = list(Zc = Zc, Zh = Zh), interaction = "Thomas",
correction = "border", rbord = 5)
fit4
## Inhomogeneous cluster point process model
## Fitted to point pattern dataset 'I'
## Fitted using the inhomogeneous K-function
##
## Trend formula: ~Zc + Zh
##
## Fitted coefficients for trend formula:
## (Intercept) Zc Zh
## -1.943e+01 -6.229e-05 -5.537e-05
##
## Cluster model: Thomas process
## Fitted cluster parameters:
## kappa sigma2
## 1 630599298
## Mean cluster size: [pixel image]
Prediction correlation.
M = quadrat.test(fit2, nx = 6, ny = 4)
## Warning: Some expected counts are small; chi^2 approximation may be
## inaccurate
cor(M$expected, M$observed)
## [1] 0.7365