Here I download the data and read it into R. I need the rgdal package for the readOGR() function. I print the projection information.
setwd("~/ASS")
tmp = download.file("http://www.spc.noaa.gov/gis/svrgis/zipped/tornado.zip",
"tornado.zip", mode = "wb")
unzip("tornado.zip")
require(rgdal)
Torn = readOGR(dsn = ".", layer = "tornado")
## OGR data source with driver: ESRI Shapefile
## Source: ".", layer: "tornado"
## with 56221 features and 28 fields
## Feature type: wkbPoint with 2 dimensions
names(Torn)
## [1] "OM" "YEAR" "MONTH" "DAY" "DATE"
## [6] "TIME" "TIMEZONE" "STATE" "FIPS" "STATENUMBE"
## [11] "FSCALE" "INJURIES" "FATALITIES" "LOSS" "CROPLOSS"
## [16] "SLAT" "SLON" "ELAT" "ELON" "LENGTH"
## [21] "WIDTH" "NS" "SN" "SG" "F1"
## [26] "F2" "F3" "F4"
proj4string(Torn)
## [1] "+proj=lcc +lat_1=33 +lat_2=45 +lat_0=39 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"
We're only interested in touchdown point (slat/slon)
Create the factor variable (Category) for the F-scale.
Torn2 = subset(Torn, FSCALE >= 0 & YEAR >= 1950)
Torn2$Category = factor(Torn2$FSCALE, ordered = TRUE)
Next I choose a domain. Here I'm interested in Florida. http://www2.census.gov/cgi-bin/shapefiles2009/state-files?state=12
require(maptools)
require(PBSmapping)
unzip("tl_2009_12_county.zip")
FL = readOGR(dsn = ".", layer = "tl_2009_12_county")
## OGR data source with driver: ESRI Shapefile
## Source: ".", layer: "tl_2009_12_county"
## with 67 features and 17 fields
## Feature type: wkbPolygon with 2 dimensions
FL = spTransform(FL, CRS(proj4string(Torn2)))
lps = getSpPPolygonsLabptSlots(FL)
## Warning: use coordinates method
IDOneBin = cut(lps[, 1], range(lps[, 1]), include.lowest = TRUE)
FLw = unionSpatialPolygons(FL, IDOneBin)
Next, create and summarize a planar point pattern object.
require(spatstat)
W = as(FLw, "owin")
## Checking 2 polygons...1, 2.
## [Checking polygon with 19948 edges...]
## done.
## Checking for cross-intersection between 2 polygons... 1.
## done.
All.ppp = as.ppp(Torn2["Category"])
All.ppp = All.ppp[W]
unitname(All.ppp) = c("meter", "meters")
summary(All.ppp)
## Marked planar point pattern: 2936 points
## Average intensity 1.68e-08 points per square meter
##
## *Pattern contains duplicated points*
##
## Coordinates are given to 2 decimal places
## i.e. rounded to the nearest multiple of 0.01 meters
##
## Multitype:
## frequency proportion intensity
## 0 1760 0.598000 1.01e-08
## 1 832 0.283000 4.77e-09
## 2 312 0.106000 1.79e-09
## 3 35 0.011900 2.01e-10
## 4 2 0.000681 1.15e-11
## 5 0 0.000000 0.00e+00
##
## Window: polygonal boundary
## 2 separate polygons (no holes)
## vertices area relative.area
## polygon 1 51 2.942e+08 0.00169
## polygon 2 19948 1.740e+11 0.99800
## enclosing rectangle: [802437, 1628516] x [-1514458, -799607] meters
## Window area = 1.74253e+11 square meters
## Unit of length: 1 meter
plot(All.ppp)
## 0 1 2 3 4 5
## 1 2 3 4 5 6
The native spatial units are meters.
Create two ppp objects and join them. This is needed for the relative risk function.
T = All.ppp
plot(split(T))
H = unmark(T[T$marks <= 2 & T$marks >= 1])
I = unmark(T[T$marks >= 3])
# H = unmark(T[T$marks == 1]) I = unmark(T[T$marks >= 2])
T2 = superimpose(H = H, I = I)
## Warning: data contain duplicated points
summary(T2)
## Marked planar point pattern: 1181 points
## Average intensity 6.78e-09 points per square meter
##
## *Pattern contains duplicated points*
##
## Coordinates are given to 2 decimal places
## i.e. rounded to the nearest multiple of 0.01 meters
##
## Multitype:
## frequency proportion intensity
## H 1140 0.9690 6.57e-09
## I 37 0.0313 2.12e-10
##
## Window: polygonal boundary
## 2 separate polygons (no holes)
## vertices area relative.area
## polygon 1 51 2.942e+08 0.00169
## polygon 2 19948 1.740e+11 0.99800
## enclosing rectangle: [802437, 1628516] x [-1514458, -799607] meters
## Window area = 1.74253e+11 square meters
## Unit of length: 1 meter
plot(I, pch = 25, cols = "red", main = "")
plot(FL, add = TRUE, lwd = 0.1)
Compute spatial densities and relative risk.
eps = 80 # may be different depending on state...
dAll = density(unmark(T2), dimyx = c(eps, eps))
mean(dAll$v, na.rm = TRUE) # v = actual densities (on a regular grid)
## [1] 6.538e-09
dH = density(H, dimyx = c(eps, eps))
dI = density(I, dimyx = c(eps, eps))
rr = relrisk(T2, dimyx = c(eps, eps), hmin = 500, hmax = 1500000)
(had to adjust hmin/hmax to get sufficient bandwidth)
Statewide mean relative risk.
mean(rr$v, na.rm = TRUE)
## [1] 0.03123
sd(as.vector(rr$v), na.rm = TRUE)
## [1] 0.0001498
Given a tornado, there's a 3.12% chance that it will be an F3 or higher.
Map them on a 4-panel plot.
par(mfrow = c(2, 2))
plot(dAll, col = terrain.colors(10), ribbon = FALSE, main = "")
plot(FL, add = TRUE, lwd = 0.1)
contour(dAll, add = TRUE)
## NULL
plot(dH, col = terrain.colors(10), ribbon = FALSE, main = "")
plot(FL, add = TRUE, lwd = 0.1)
contour(dH, add = TRUE)
## NULL
plot(dI, col = terrain.colors(10), ribbon = FALSE, main = "")
plot(FL, add = TRUE, lwd = 0.1)
contour(dI, add = TRUE)
## NULL
plot(rr, ribbon = FALSE, main = "")
plot(FL, add = TRUE, lwd = 0.1)
contour(rr, add = TRUE)
## NULL
Create a SpatialGridDataFrame (S4 class) and map the relative risk.
require(maps)
## Loading required package: maps
require(maptools)
ll = "+proj=longlat +ellps=GRS80 +datum=NAD83"
brd = map("county", "florida", plot = FALSE)
brd_ll = map2SpatialLines(brd, proj4string = CRS(ll))
brd_lcc = spTransform(brd_ll, CRS(proj4string(Torn2)))
rr.sgdf = as(rr, "SpatialGridDataFrame")
require(RColorBrewer)
## Loading required package: RColorBrewer
rr.sgdf$v = rr.sgdf$v * 100
range(rr.sgdf$v, na.rm = TRUE)
## [1] 3.090 3.148
rng = seq(3.09, 3.15, 0.01)
cls = brewer.pal(6, "YlOrRd")
l3 = list("sp.lines", brd_lcc, lwd = 0.5, first = FALSE)
p1 = spplot(rr.sgdf, "v", col.regions = cls, at = rng, sp.layout = l3, colorkey = list(space = "bottom",
labels = list(cex = 1.2)), sub = list("Relative probability (as %) that a tornado will be intense (EF3 or stronger)",
cex = 1.2, font = 1))
p1
I don't quite understand why the relative risk plot looks the way it does. When I subset on >= F3, the relative risk plot shows the highest risk in south/central East FL, with smoothly decreasing risk as you move up toward the western panhandle. Conversely, if I subset on >= F2, the pattern reverses, with the highest risk in the western panhandle and smoothly decreasing risk as you move toward southeast FL. Is there a reason that the pattern would be reversed? Also, why does it so smoothly decrease? Given the density plots, which show more events in central FL and the western panhandle, the relative risk plot seems suspicious.
Rescale units to kilometers.
T = affine(All.ppp, mat = diag(c(1/1000, 1/1000)))
unitname(T) = c("km", "kms")
summary(T)
T = All.ppp
Get data on the location of cities.
unzip("ci08au12.zip")
US = readOGR(dsn = ".", layer = "ci08au12", p4s = ll)
## OGR data source with driver: ESRI Shapefile
## Source: ".", layer: "ci08au12"
## with 43603 features and 23 fields
## Feature type: wkbPoint with 2 dimensions
UST = spTransform(US, CRS(proj4string(Torn2)))
USt = subset(UST, UST$POP_1990 > 1000)
US.ppp = as.ppp(USt)
window = as.owin(T)
CP.ppp = US.ppp[window]
plot(unmark(CP.ppp))
Create a map of distance from nearest city centers. This is a covariate in the ppm.
Zc = distmap(CP.ppp)
plot(Zc)
Examine the smoothed estimate of spatial density as a function of the covariate.
# H = unmark(T[T$marks == 1]) I = unmark(T[T$marks >= 2])
H = unmark(T[T$marks <= 2 & T$marks >= 1])
I = unmark(T[T$marks >= 3])
plot(rhohat(H, Zc, smoother = "kernel", method = "transform"), legend = FALSE,
main = "")
plot(rhohat(I, Zc, smoother = "kernel", method = "transform"), legend = FALSE,
main = "")
The farther you are from the city, the lower the density. There does appear to be some population bias.
Model the spatial density of the intense tornadoes using distance from city center as a covariate.
fit1 = ppm(I, ~Zc, covariates = list(Zc = Zc), correction = "border", rbord = 5)
summary(fit1)
## Point process model
## Fitting method: maximum likelihood (Berman-Turner approximation)
## Model was fitted using glm()
## Algorithm converged
## Call:
## ppm(Q = I, trend = ~Zc, covariates = list(Zc = Zc), correction = "border",
## rbord = 5)
## Edge correction: "border"
## [border correction distance r = 5 ]
##
## ----------------------------------------------------
## Quadrature scheme = data + dummy + weights
## Data pattern:
## Planar point pattern: 37 points
## Average intensity 2.12e-10 points per square meter
## Window: polygonal boundary
## 2 separate polygons (no holes)
## vertices area relative.area
## polygon 1 51 2.942e+08 0.00169
## polygon 2 19948 1.740e+11 0.99800
## enclosing rectangle: [802437, 1628516] x [-1514458, -799607] meters
## Window area = 1.74253e+11 square meters
## Unit of length: 1 meter
##
##
## Dummy quadrature points:
## ( 32 x 32 grid, plus 4 corner points)
## Planar point pattern: 357 points
## Average intensity 2.05e-09 points per square meter
## Window: polygonal boundary
## 2 separate polygons (no holes)
## vertices area relative.area
## polygon 1 51 2.942e+08 0.00169
## polygon 2 19948 1.740e+11 0.99800
## enclosing rectangle: [802437, 1628516] x [-1514458, -799607] meters
## Window area = 1.74253e+11 square meters
## Unit of length: 1 meter
##
##
## Quadrature weights:
## (counting weights based on 32 x 32 array of rectangular tiles)
## All weights:
## range: [3.6e+07, 5.77e+08] total: 1.74e+11
## Weights on data points:
## range: [1.26e+08, 2.88e+08] total: 9.56e+09
## Weights on dummy points:
## range: [3.6e+07, 5.77e+08] total: 1.65e+11
##
## ----------------------------------------------------
## FITTED MODEL:
##
## Nonstationary Poisson process
##
##
## ---- Intensity: ----
##
## Trend formula: ~Zc
## Model depends on external covariate 'Zc'
##
## Covariates provided:
## Zc: im
##
## Fitted coefficients for trend formula:
## (Intercept) Zc
## -2.156e+01 -6.848e-05
##
## Estimate S.E. Ztest CI95.lo CI95.hi
## (Intercept) -2.156e+01 2.615e-01 na -2.208e+01 -2.105e+01
## Zc -6.848e-05 2.497e-05 ** -1.174e-04 -1.954e-05
##
##
## ----------- gory details -----
##
## Fitted regular parameters (theta):
## (Intercept) Zc
## -2.156e+01 -6.848e-05
##
## Fitted exp(theta):
## (Intercept) Zc
## 4.319e-10 9.999e-01
Zc –> for every 1km distance away from a city, the spatial density drops by 6.85E-5.
Create another covariate (distance from nearest weak tornado).
Zh = distmap(H)
Model the spatial density using two covariates.
fit2 = ppm(I, ~Zc + Zh, covariates = list(Zc = Zc, Zh = Zh), correction = "border",
rbord = 5)
## Warning: glm.fit: algorithm did not converge
summary(fit2)
## Point process model
## Fitting method: maximum likelihood (Berman-Turner approximation)
## Model was fitted using glm()
## *** Algorithm did not converge ***
## Call:
## ppm(Q = I, trend = ~Zc + Zh, covariates = list(Zc = Zc, Zh = Zh),
## correction = "border", rbord = 5)
## Edge correction: "border"
## [border correction distance r = 5 ]
##
## ----------------------------------------------------
## Quadrature scheme = data + dummy + weights
## Data pattern:
## Planar point pattern: 37 points
## Average intensity 2.12e-10 points per square meter
## Window: polygonal boundary
## 2 separate polygons (no holes)
## vertices area relative.area
## polygon 1 51 2.942e+08 0.00169
## polygon 2 19948 1.740e+11 0.99800
## enclosing rectangle: [802437, 1628516] x [-1514458, -799607] meters
## Window area = 1.74253e+11 square meters
## Unit of length: 1 meter
##
##
## Dummy quadrature points:
## ( 32 x 32 grid, plus 4 corner points)
## Planar point pattern: 357 points
## Average intensity 2.05e-09 points per square meter
## Window: polygonal boundary
## 2 separate polygons (no holes)
## vertices area relative.area
## polygon 1 51 2.942e+08 0.00169
## polygon 2 19948 1.740e+11 0.99800
## enclosing rectangle: [802437, 1628516] x [-1514458, -799607] meters
## Window area = 1.74253e+11 square meters
## Unit of length: 1 meter
##
##
## Quadrature weights:
## (counting weights based on 32 x 32 array of rectangular tiles)
## All weights:
## range: [3.6e+07, 5.77e+08] total: 1.74e+11
## Weights on data points:
## range: [1.26e+08, 2.88e+08] total: 9.56e+09
## Weights on dummy points:
## range: [3.6e+07, 5.77e+08] total: 1.65e+11
##
## ----------------------------------------------------
## FITTED MODEL:
##
## Nonstationary Poisson process
##
##
## ---- Intensity: ----
##
## Trend formula: ~Zc + Zh
## Model depends on external covariates 'Zc' and 'Zh'
##
## Covariates provided:
## Zc: im
## Zh: im
##
## Fitted coefficients for trend formula:
## (Intercept) Zc Zh
## -2.043e+01 -3.300e-05 -2.484e-04
##
## Estimate S.E. Ztest CI95.lo CI95.hi
## (Intercept) -2.043e+01 3.063e-01 na -2.103e+01 -1.983e+01
## Zc -3.300e-05 2.029e-05 -7.278e-05 6.770e-06
## Zh -2.484e-04 5.658e-05 *** -3.593e-04 -1.375e-04
##
##
## ----------- gory details -----
##
## Fitted regular parameters (theta):
## (Intercept) Zc Zh
## -2.043e+01 -3.300e-05 -2.484e-04
##
## Fitted exp(theta):
## (Intercept) Zc Zh
## 1.337e-09 1.000e+00 9.998e-01
Both Zc (distance from nearest city) and Zh (distance from nearest weak tornado) the density decreases as distance increases; however, only distance from nearest weak tornado is significant.
Can you drop one of the covariates?
fit3 = ppm(I, ~Zh, covariates = list(Zh = Zh), correction = "border", rbord = 5)
## Warning: glm.fit: algorithm did not converge
AIC(fit1)
## [1] 1715
AIC(fit2)
## [1] 1686
AIC(fit3)
## [1] 1688
2 * (as.numeric(logLik(fit2)) - as.numeric(logLik(fit3)))/I$n
## [1] 0.09219
The AIC is minimized when both covariates are included in the model (Fit2)
Does adding a term for clustering improve things?
fit4 = kppm(I, ~Zc + Zh, covariates = list(Zc = Zc, Zh = Zh), interaction = "Thomas",
correction = "border", rbord = 5)
## Warning: glm.fit: algorithm did not converge
fit4
## Inhomogeneous cluster point process model
## Fitted to point pattern dataset 'I'
## Fitted by minimum contrast
## Summary statistic: inhomogeneous K-function
##
## Trend formula: ~Zc + Zh
##
## Fitted coefficients for trend formula:
## (Intercept) Zc Zh
## -2.043e+01 -3.300e-05 -2.485e-04
##
## Cluster model: Thomas process
## Fitted cluster parameters:
## kappa sigma2
## 6.491e-11 2.593e+09
## Mean cluster size: [pixel image]
Warning! The algorithm did not converge!
Prediction correlation.
M = quadrat.test(fit2, nx = 6, ny = 4)
## Warning: Some expected counts are small; chi^2 approximation may be
## inaccurate
cor(M$expected, M$observed)
## [1] 0.6481
The correlation between expected and observed is 0.648. However, quadrat.test returns a warning message that the chi squared approximation may be inaccurate.