Group term project for Applied Spatial Statistics: Spring 2013
South Dakota
Download the data and read it into R. Use the rgdal package for the readOGR() function. Print the projection information.
tmp = download.file("http://www.spc.noaa.gov/gis/svrgis/zipped/tornado.zip",
"tornado.zip", mode = "wb")
unzip("tornado.zip")
require(rgdal)
## Warning: package 'rgdal' was built under R version 2.15.2
## Warning: package 'sp' was built under R version 2.15.2
Torn = readOGR(dsn = ".", layer = "tornado")
## OGR data source with driver: ESRI Shapefile
## Source: ".", layer: "tornado"
## with 56221 features and 28 fields
## Feature type: wkbPoint with 2 dimensions
names(Torn)
## [1] "OM" "YEAR" "MONTH" "DAY" "DATE"
## [6] "TIME" "TIMEZONE" "STATE" "FIPS" "STATENUMBE"
## [11] "FSCALE" "INJURIES" "FATALITIES" "LOSS" "CROPLOSS"
## [16] "SLAT" "SLON" "ELAT" "ELON" "LENGTH"
## [21] "WIDTH" "NS" "SN" "SG" "F1"
## [26] "F2" "F3" "F4"
proj4string(Torn)
## [1] "+proj=lcc +lat_1=33 +lat_2=45 +lat_0=39 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"
Subset on year and F scale and create a factor variable (Category) for the F-scale. Convert length from miles and the width from yards to meters. Compute path area.
Torn2 = subset(Torn, FSCALE > 0 & YEAR >= 1950)
Torn2$Category = factor(Torn2$FSCALE, ordered = TRUE)
Torn2$Length = Torn2$LENGTH * 1609.34
Torn2$Width = Torn2$WIDTH * 0.9144
Torn2$Area = Torn2$Width * Torn2$Length
Choose a domain. Here I'm interested in the state of South Dakota (46).
tmp = download.file("http://www.census.gov/geo/cob/bdy/st/st00shp/st46_d00_shp.zip",
"st46_d00_shp.zip", mode = "wb")
unzip("st46_d00_shp.zip")
STATE = readOGR(dsn = ".", layer = "st46_d00")
## OGR data source with driver: ESRI Shapefile
## Source: ".", layer: "st46_d00"
## with 1 features and 10 fields
## Feature type: wkbPolygon with 2 dimensions
Add projection information and transform the state boundaries.
ll = "+proj=longlat +ellps=GRS80 +datum=NAD83"
proj4string(STATE) = CRS(ll)
STATE = spTransform(STATE, CRS(proj4string(Torn2)))
require(rgeos)
## Warning: package 'rgeos' was built under R version 2.15.2
stArea = gArea(STATE)
Subset by state boundary.
st = overlay(Torn2, STATE)
st = !is.na(st)
Torn2 = Torn2[st, ]
Check the subsetting.
plot(STATE)
plot(Torn2, add = TRUE)
table(Torn2$STATE)
##
## AK AL AR AZ CA CO CT DC DE FL GA HI IA ID IL IN KS KY
## 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
## LA MA MD ME MI MN MO MS MT NC ND NE NH NJ NM NV NY OH
## 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0
## OK OR PA PR RI SC SD TN TX UT VA VT WA WI WV WY
## 0 0 0 0 0 0 622 0 0 0 0 0 0 0 0 1
plot(Torn2[Torn2$STATE == "NE", ], add = TRUE, col = "red")
Compute total area of tornado paths and annual average area.
tArea = sum(Torn2$Area)
areaPerYear = tArea/length(1950:2011)
stArea/areaPerYear # Annual avg recurrence interval for any location
## [1] 11530
areaPerYear/stArea * 100 # Annual probability of getting struck by a tornado
## [1] 0.008673
Next, create and summarize a planar point pattern object.
require(spatstat)
## Warning: package 'spatstat' was built under R version 2.15.2
## Warning: package 'deldir' was built under R version 2.15.2
require(maptools)
## Warning: package 'maptools' was built under R version 2.15.2
W = as(STATE, "owin")
## Checking 1 polygon...[Checking polygon with 1700 edges...]
## done.
All.ppp = as.ppp(Torn2["Category"])
All.ppp = All.ppp[W]
unitname(All.ppp) = c("meter", "meters")
summary(All.ppp)
## Marked planar point pattern: 626 points
## Average intensity 3.14e-09 points per square meter
##
## *Pattern contains duplicated points*
## Multitype:
## frequency proportion intensity
## 1 337 0.5380 1.69e-09
## 2 225 0.3590 1.13e-09
## 3 55 0.0879 2.76e-10
## 4 8 0.0128 4.01e-11
## 5 1 0.0016 5.02e-12
##
## Window: polygonal boundary
## single connected closed polygon with 1700 vertices
## enclosing rectangle: [-653762, -35425]x[384624, 796798]meters
## Window area = 1.99367e+11 square meters
## Unit of length: 1 meter
F0-F1: weak
F2-F3: strong
F4-F5: violent
F2-F5: significant
F3-F5: intense (as an opposite we could use 'moderate')
EF0 = 65, 85
EF1 = 86, 110
EF2 = 111, 135
EF3 = 136, 165
EF4 = 166, 200
EF5 > 200
multiply by .44704 to get m/s
Create two ppp objects and join them. This is needed for the relative risk function.
T = All.ppp
plot(split(T))
H = unmark(T[T$marks <= 2 & T$marks >= 1])
I = unmark(T[T$marks >= 3])
T2 = superimpose(H = H, I = I)
## Warning: data contain duplicated points
summary(T2)
## Marked planar point pattern: 626 points
## Average intensity 3.14e-09 points per square meter
##
## *Pattern contains duplicated points*
## Multitype:
## frequency proportion intensity
## H 562 0.898 2.82e-09
## I 64 0.102 3.21e-10
##
## Window: polygonal boundary
## single connected closed polygon with 1700 vertices
## enclosing rectangle: [-653762, -35425]x[384624, 796798]meters
## Window area = 1.99367e+11 square meters
## Unit of length: 1 meter
Plot the intensy events.
plot(I, pch = 25, cols = "red", main = "")
Compute spatial densities and relative risk.
eps = 320
dAll = density(unmark(T2), dimyx = c(eps, eps))
mean(dAll$v, na.rm = TRUE)
## [1] 3.168e-09
dH = density(H, dimyx = c(eps, eps))
dI = density(I, dimyx = c(eps, eps))
bw = bw.relrisk(T2, hmax = 1e+10)
rr = relrisk(T2, dimyx = c(eps, eps), sigma = bw)
Statewide mean relative risk.
mean(rr$v, na.rm = TRUE)
## [1] 0.1018
sd(as.vector(rr$v), na.rm = TRUE)
## [1] 0.002445
range(rr$v, na.rm = TRUE)
## [1] 0.09611 0.10561
Map them on a 4-panel plot.
par(mfrow = c(2, 2))
plot(dAll, col = terrain.colors(10), ribbon = FALSE, main = "")
contour(dAll, add = TRUE)
plot(dH, col = terrain.colors(10), ribbon = FALSE, main = "")
contour(dH, add = TRUE)
plot(dI, col = terrain.colors(10), ribbon = FALSE, main = "")
contour(dI, add = TRUE)
plot(rr, ribbon = FALSE, main = "")
contour(rr, add = TRUE)
Create a SpatialGridDataFrame (S4 class).
rr.sgdf = as(rr, "SpatialGridDataFrame")
proj4string(rr.sgdf) = CRS(proj4string(Torn2))
Map the relative risk.
require(maps)
## Loading required package: maps
brd = map("county", "south dakota")
brd_ll = map2SpatialLines(brd, proj4string = CRS(ll))
brd_lcc = spTransform(brd_ll, CRS(proj4string(Torn2)))
require(RColorBrewer)
## Loading required package: RColorBrewer
## Warning: package 'RColorBrewer' was built under R version 2.15.2
range(rr.sgdf$v, na.rm = TRUE)
## [1] 0.09611 0.10561
rng = seq(0.095, 0.107, 0.002)
cls = brewer.pal(7, "YlOrRd")
l3 = list("sp.lines", brd_lcc, lwd = 0.5, first = FALSE)
p1 = spplot(rr.sgdf, "v", col.regions = cls, at = rng, sp.layout = l3, colorkey = list(space = "bottom",
labels = list(cex = 1.2)), sub = list("Relative probability that a tornado will be intense (EF3 or stronger)",
cex = 1.2, font = 1))
p1
Download the county boundaries (all US).
tmp = download.file("http://www.nws.noaa.gov/geodata/catalog/county/data/c_02ap13.zip",
"c_02ap13.zip", mode = "wb")
unzip("c_02ap13.zip")
COUNTY = readOGR(dsn = ".", layer = "c_02ap13")
## OGR data source with driver: ESRI Shapefile
## Source: ".", layer: "c_02ap13"
## with 3312 features and 11 fields
## Feature type: wkbPolygon with 2 dimensions
COUNTY = COUNTY[COUNTY$STATE == "SD", ]
COUNTY = spTransform(COUNTY, CRS(proj4string(Torn2)))
Extract the mean relative risk per county.
COUNTY$v = over(COUNTY, rr.sgdf, fn = mean)
spplot(COUNTY, "v")
county.df = as.data.frame(COUNTY)
county.df[rev(order(county.df$v))[1:6], ]
## OBJECTID_1 STATE CWA COUNTYNAME FIPS TIME_ZONE FE_AREA LON LAT
## 1872 1873 SD FSD Union 46127 C se -96.66 42.83
## 1832 1833 SD FSD Lincoln 46083 C se -96.72 43.28
## 1856 1857 SD FSD Clay 46027 C se -96.98 42.91
## 1825 1826 SD FSD Moody 46101 C ec -96.67 44.02
## 1826 1827 SD FSD Minnehaha 46099 C se -96.79 43.67
## 1851 1852 SD ABR Deuel 46039 C ne -96.67 44.76
## Shape_Leng Shape_Area v
## 1872 2.500 0.1332 0.1054
## 1832 2.007 0.1662 0.1052
## 1856 1.428 0.1190 0.1052
## 1825 1.567 0.1515 0.1051
## 1826 2.049 0.2352 0.1051
## 1851 1.730 0.1874 0.1050
Get data on the location of cities.
tmp = download.file("http://www.nws.noaa.gov/geodata/catalog/national/data/ci08au12.zip",
"ci08au12.zip", mode = "wb")
unzip("ci08au12.zip")
US = readOGR(dsn = ".", layer = "ci08au12", p4s = ll)
## OGR data source with driver: ESRI Shapefile
## Source: ".", layer: "ci08au12"
## with 43603 features and 23 fields
## Feature type: wkbPoint with 2 dimensions
UST = spTransform(US, CRS(proj4string(Torn2)))
st = overlay(UST, STATE)
st = !is.na(st)
Cities = UST[st, ]
plot(unmark(T), pch = 16, cex = 0.4)
plot(Cities, pch = 15, col = "green", cex = 0.4, add = TRUE)
C = as.ppp(Cities)
window = as.owin(T)
C = C[window]
Create a map of distance from nearest city centers. Distance from nearest city is a covariate in the point pattern model (ppm).
Zc = distmap(C)
plot(Zc)
Examine the smoothed estimate of spatial density as a function of the covariate.
H = unmark(T[T$marks <= 2 & T$marks >= 1])
I = unmark(T[T$marks >= 3])
plot(rhohat(H, Zc, smoother = "kernel", method = "transform"), legend = FALSE,
main = "")
plot(rhohat(I, Zc, smoother = "kernel", method = "transform"), legend = FALSE,
main = "")
rhat = rhohat(unmark(T), Zc, smoother = "kernel", method = "transform")
## Warning: Some tiles with zero area contain points
## Warning: Some weights are zero
rptsPerYear = max(rhat$rho)/length(1950:2011) * stArea
areaPerYear = rptsPerYear * mean(Torn2$Area)
stArea/areaPerYear # Annual avg recurrence interval for any location
## [1] 4387
areaPerYear/stArea * 100 # Annual probability of a tornado at any location
## [1] 0.02279
Model the spatial density of the intense tornadoes using distance from city center as a covariate.
fit1 = ppm(I, ~Zc, covariates = list(Zc = Zc), correction = "border", rbord = 5)
## Warning: Some tiles with zero area contain points
## Warning: Some weights are zero
summary(fit1)
## Point process model
## Fitting method: maximum likelihood (Berman-Turner approximation)
## Model was fitted using glm()
## Algorithm converged
## Call:
## ppm(Q = I, trend = ~Zc, covariates = list(Zc = Zc), correction = "border",
## rbord = 5)
## Edge correction: "border"
## [border correction distance r = 5 ]
##
## ----------------------------------------------------
## Quadrature scheme = data + dummy + weights
## Data pattern:
## Planar point pattern: 64 points
## Average intensity 3.21e-10 points per square meter
##
## Window: polygonal boundary
## single connected closed polygon with 1700 vertices
## enclosing rectangle: [-653762, -35425]x[384624, 796798]meters
## Window area = 1.99367e+11 square meters
## Unit of length: 1 meter
##
##
## Dummy quadrature points:
## ( 32 x 32 grid, plus 4 corner points)
## Planar point pattern: 841 points
## Average intensity 4.22e-09 points per square meter
##
## Window: polygonal boundary
## single connected closed polygon with 1700 vertices
## enclosing rectangle: [-653762, -35425]x[384624, 796798]meters
## Window area = 1.99367e+11 square meters
## Unit of length: 1 meter
##
##
## Quadrature weights:
## (counting weights based on 32 x 32 array of rectangular tiles)
## All weights:
## range: [0, 2.49e+08] total: 2e+11
## Weights on data points:
## range: [0, 1.24e+08] total: 7.41e+09
## Weights on dummy points:
## range: [15600000, 2.49e+08] total: 1.92e+11
##
## ----------------------------------------------------
## FITTED MODEL:
##
## Nonstationary Poisson process
##
##
## ---- Intensity: ----
##
## Trend formula: ~Zc
## Model depends on external covariate 'Zc'
##
## Covariates provided:
## Zc: im
##
## Fitted coefficients for trend formula:
## (Intercept) Zc
## -2.052e+01 -1.886e-04
##
## Estimate S.E. Ztest CI95.lo CI95.hi
## (Intercept) -2.052e+01 2.420e-01 na -2.100e+01 -2.005e+01
## Zc -1.886e-04 3.613e-05 *** -2.594e-04 -1.178e-04
##
##
## ----------- gory details -----
##
## Fitted regular parameters (theta):
## (Intercept) Zc
## -2.052e+01 -1.886e-04
##
## Fitted exp(theta):
## (Intercept) Zc
## 1.224e-09 9.998e-01
Create another covariate (distance from nearest weak tornado).
Zh = distmap(H)
Model the spatial density using two covariates.
fit2 = ppm(I, ~Zc + Zh, covariates = list(Zc = Zc, Zh = Zh), correction = "border",
rbord = 5)
## Warning: Some tiles with zero area contain points
## Warning: Some weights are zero
summary(fit2)
## Point process model
## Fitting method: maximum likelihood (Berman-Turner approximation)
## Model was fitted using glm()
## Algorithm converged
## Call:
## ppm(Q = I, trend = ~Zc + Zh, covariates = list(Zc = Zc, Zh = Zh),
## correction = "border", rbord = 5)
## Edge correction: "border"
## [border correction distance r = 5 ]
##
## ----------------------------------------------------
## Quadrature scheme = data + dummy + weights
## Data pattern:
## Planar point pattern: 64 points
## Average intensity 3.21e-10 points per square meter
##
## Window: polygonal boundary
## single connected closed polygon with 1700 vertices
## enclosing rectangle: [-653762, -35425]x[384624, 796798]meters
## Window area = 1.99367e+11 square meters
## Unit of length: 1 meter
##
##
## Dummy quadrature points:
## ( 32 x 32 grid, plus 4 corner points)
## Planar point pattern: 841 points
## Average intensity 4.22e-09 points per square meter
##
## Window: polygonal boundary
## single connected closed polygon with 1700 vertices
## enclosing rectangle: [-653762, -35425]x[384624, 796798]meters
## Window area = 1.99367e+11 square meters
## Unit of length: 1 meter
##
##
## Quadrature weights:
## (counting weights based on 32 x 32 array of rectangular tiles)
## All weights:
## range: [0, 2.49e+08] total: 2e+11
## Weights on data points:
## range: [0, 1.24e+08] total: 7.41e+09
## Weights on dummy points:
## range: [15600000, 2.49e+08] total: 1.92e+11
##
## ----------------------------------------------------
## FITTED MODEL:
##
## Nonstationary Poisson process
##
##
## ---- Intensity: ----
##
## Trend formula: ~Zc + Zh
## Model depends on external covariates 'Zc' and 'Zh'
##
## Covariates provided:
## Zc: im
## Zh: im
##
## Fitted coefficients for trend formula:
## (Intercept) Zc Zh
## -2.012e+01 -1.676e-04 -5.369e-05
##
## Estimate S.E. Ztest CI95.lo CI95.hi
## (Intercept) -2.012e+01 2.773e-01 na -2.067e+01 -1.958e+01
## Zc -1.676e-04 3.730e-05 *** -2.407e-04 -9.448e-05
## Zh -5.369e-05 2.064e-05 ** -9.415e-05 -1.323e-05
##
##
## ----------- gory details -----
##
## Fitted regular parameters (theta):
## (Intercept) Zc Zh
## -2.012e+01 -1.676e-04 -5.369e-05
##
## Fitted exp(theta):
## (Intercept) Zc Zh
## 1.825e-09 9.998e-01 9.999e-01
Can you drop one of the covariates? no
fit3 = ppm(I, ~Zh, covariates = list(Zh = Zh), correction = "border", rbord = 5)
## Warning: Some tiles with zero area contain points
## Warning: Some weights are zero
AIC(fit1)
## [1] 2852
AIC(fit2)
## [1] 2845
AIC(fit3)
## [1] 2867
2 * (as.numeric(logLik(fit2)) - as.numeric(logLik(fit3)))/I$n
## [1] 0.3733
Does adding a term for clustering improve things?
fit4 = kppm(I, ~Zc + Zh, covariates = list(Zc = Zc, Zh = Zh), interaction = "Thomas",
correction = "border", rbord = 5)
## Warning: Some tiles with zero area contain points
## Warning: Some weights are zero
fit4
## Inhomogeneous cluster point process model
## Fitted to point pattern dataset 'I'
## Fitted using the inhomogeneous K-function
##
## Trend formula: ~Zc + Zh
##
## Fitted coefficients for trend formula:
## (Intercept) Zc Zh
## -2.012e+01 -1.676e-04 -5.369e-05
##
## Cluster model: Thomas process
## Fitted cluster parameters:
## kappa sigma2
## 1.000e+00 2.816e+09
## Mean cluster size: [pixel image]
Prediction correlation.
M = quadrat.test(fit2, nx = 8, ny = 8)
## Warning: Some expected counts are small; chi^2 approximation may be
## inaccurate
cor.test(M$expected, M$observed)
##
## Pearson's product-moment correlation
##
## data: M$expected and M$observed
## t = 4.336, df = 56, p-value = 6.101e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2792 0.6726
## sample estimates:
## cor
## 0.5014