This lecture is about Spatial Point Pattern Analysis with application to environmental issues. The analysis of point patterns is focused on the spatial distribution of the observed events and making inference about the underlying process that generated them. In particular, there are two main issues of interest: the distribution of events in space and the existence of possible interactions between them.
First, we install and load most advanced R package for handling
spatial data: sf()
library(sf)
In the following we will consider locations of asthmatic and non-asthmatic cases of children in North Derbyshire, UK, 1992.
In 1992, a survey was conducted of 2133 school-children in 10 schools within the area, five of which were chosen because the head teacher had previously expressed concern about apparently high asthma levels among the children. Parents completed a questionnaire for each child, which included the question has the child ever suffered from asthma?
asthma_spdf <- st_read("sppa_bundle/spasthma.shp")
Reading layer `spasthma' from data source
`C:\Users\tkuan\DataAndInfo\Class 1 spatial point pattern analysis of pollution and disease-20241122\sppa_bundle\spasthma.shp'
using driver `ESRI Shapefile'
Simple feature collection with 1291 features and 9 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: 0.00401944 ymin: 0.003024965 xmax: 0.9424529 ymax: 0.8257563
CRS: NA
summary(asthma_spdf)
Nsmokers Asthma HayFever Age
Min. :0.0000 Length:1291 Min. :0.0000 Min. : 0.000
1st Qu.:0.0000 Class :character 1st Qu.:0.0000 1st Qu.: 7.000
Median :0.0000 Mode :character Median :0.0000 Median : 8.900
Mean :0.5926 Mean :0.1387 Mean : 8.639
3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:10.400
Max. :3.0000 Max. :1.0000 Max. :15.000
Gender roaddist2 d2source1 d2source2
Min. :0.000 Min. : 47 Min. :0.000013 Min. :0.000257
1st Qu.:1.000 1st Qu.: 131615 1st Qu.:0.000615 1st Qu.:0.003706
Median :1.000 Median : 652486 Median :0.001789 Median :0.005584
Mean :1.479 Mean : 976526 Mean :0.006816 Mean :0.006291
3rd Qu.:2.000 3rd Qu.:1821533 3rd Qu.:0.015889 3rd Qu.:0.007848
Max. :9.000 Max. :5937999 Max. :0.031076 Max. :0.017082
d2source3 geometry
Min. :0.00000 POINT :1291
1st Qu.:0.01465 epsg:NA: 0
Median :0.01904
Mean :0.01835
3rd Qu.:0.02455
Max. :0.04134
plot(asthma_spdf)
library(tidyverse)
ggplot(asthma_spdf) +
geom_sf(aes(color = as.factor(Asthma)), alpha = 0.5, shape = 1) +
theme_minimal()
ggplot(asthma_spdf) +
geom_sf() +
facet_wrap(~Asthma) +
theme_minimal()
library(sp)
library(spatstat)
#asthma_sp <- as(asthma_spdf, "Spatial")
#asthma_sf <- st_as_sf(asthma_sp)
#asthma_sp <- as(asthma_spdf, "Spatial")
asthma_sp_case <- asthma_sp[asthma_sp$Asthma == "case", ]
asthma_sp_control <- asthma_sp[asthma_sp$Asthma == "control", ]
window<- owin(c(0, 1), c(0, 1))
head(asthma_sp_case)
asthma_ppp_case <- ppp(x = coordinates(asthma_sp_case)[,1],
y = coordinates(asthma_sp_case)[,2],
window = window)
Warning: data contain duplicated points
asthma_ppp_control <- ppp(x = coordinates(asthma_sp_control)[,1],
y = coordinates(asthma_sp_control)[,2],
window = window)
Warning: data contain duplicated points
head(asthma_ppp_case)
Planar point pattern: 6 points
window: rectangle = [0, 1] x [0, 1] units
set.seed(120109)
r<- seq(0, 0.5, by = 0.001)
envcase <- envelope(asthma_ppp_case , fun=Gest, r=r, nrank=2, nsim=99)
Generating 99 simulations of CSR ...
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
96, 97, 98,
99.
Done.
envcontrol <- envelope(asthma_ppp_control, fun=Gest, r=r, nrank=2, nsim=99)
Generating 99 simulations of CSR ...
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
96, 97, 98,
99.
Done.
Gasthma <- rbind(envcase, envcontrol)
Gasthma <- cbind(Gasthma,
y = rep(c("case", "control"), each=length(r)))
head(Gasthma)
Function value object (class ‘fv’)
for the function r -> G(r)
.....................................................................
.....................................................................
Default plot formula: .~.x
where “.” stands for ‘obs’, ‘theo’, ‘hi’, ‘lo’
Recommended range of argument r: [0, 0.005]
Available range of argument r: [0, 0.005]
ggplot(Gasthma, aes(theo, obs)) +
geom_line() +
geom_ribbon(aes(ymin = lo, ymax = hi), fill = "lightblue", alpha = 0.5) +
labs(x=expression(G(r)), y=expression(hat(G)(r))) +
facet_wrap(~y) +
theme_minimal()
asthma_industry <- st_read("sppa_bundle/spsrc.shp")
Reading layer `spsrc' from data source
`C:\Users\tkuan\DataAndInfo\Class 1 spatial point pattern analysis of pollution and disease-20241122\sppa_bundle\spsrc.shp'
using driver `ESRI Shapefile'
Simple feature collection with 3 features and 1 field
Geometry type: POINT
Dimension: XY
Bounding box: xmin: 0.2190668 ymin: 0.2114311 xmax: 0.7489117 ymax: 0.5834719
CRS: NA
asthma_mroads <- st_read("sppa_bundle/sproads.shp")
Reading layer `sproads' from data source
`C:\Users\tkuan\DataAndInfo\Class 1 spatial point pattern analysis of pollution and disease-20241122\sppa_bundle\sproads.shp'
using driver `ESRI Shapefile'
Simple feature collection with 1 feature and 1 field
Geometry type: MULTILINESTRING
Dimension: XY
Bounding box: xmin: -0.006941459 ymin: 0.04139416 xmax: 0.9652718 ymax: 0.7954428
CRS: NA
ggplot(asthma_spdf) +
geom_sf(aes(color = Asthma)) +
geom_sf(data = asthma_industry, size = 5, aes(color = Source), alpha = 0.7) +
geom_sf(data = asthma_mroads) +
theme_minimal(base_size = 10) +
scale_color_brewer(palette = "Set1") +
theme(legend.position ="right")
library(GGally)
asthma_spdf$Asthma <- as.factor(asthma_spdf$Asthma) # convert into factor
asthma_spdf$asthma <- ifelse(asthma_spdf$Asthma == "case", 1, 0) # add a binary copy
asthma_spdf <- asthma_spdf[, c(2,11,1,3:10)] #reorder
summary(asthma_spdf)
Asthma asthma Nsmokers HayFever
case : 215 Min. :0.0000 Min. :0.0000 Min. :0.0000
control:1076 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
Median :0.0000 Median :0.0000 Median :0.0000
Mean :0.1665 Mean :0.5926 Mean :0.1387
3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
Max. :1.0000 Max. :3.0000 Max. :1.0000
Age Gender roaddist2 d2source1
Min. : 0.000 Min. :0.000 Min. : 47 Min. :0.000013
1st Qu.: 7.000 1st Qu.:1.000 1st Qu.: 131615 1st Qu.:0.000615
Median : 8.900 Median :1.000 Median : 652486 Median :0.001789
Mean : 8.639 Mean :1.479 Mean : 976526 Mean :0.006816
3rd Qu.:10.400 3rd Qu.:2.000 3rd Qu.:1821533 3rd Qu.:0.015889
Max. :15.000 Max. :9.000 Max. :5937999 Max. :0.031076
d2source2 d2source3 geometry
Min. :0.000257 Min. :0.00000 POINT :1291
1st Qu.:0.003706 1st Qu.:0.01465 epsg:NA: 0
Median :0.005584 Median :0.01904
Mean :0.006291 Mean :0.01835
3rd Qu.:0.007848 3rd Qu.:0.02455
Max. :0.017082 Max. :0.04134
pairs_asthma<- asthma_spdf %>%
st_drop_geometry() %>%
select(where(is.numeric)) %>%
na.omit()
ggpairs(pairs_asthma) + theme_minimal(base_size = 6)
asthma_spdf$Xcoord <- st_coordinates(asthma_spdf)[,“X”]
asthma_spdf$Ycoord <-
st_coordinates(asthma_spdf)[,“Y”]asthma_spdf$Xcoord <-
st_coordinates(asthma_spdf)[,“X”] asthma_spdf$Ycoord <-
st_coordinates(asthma_spdf)[,“Y”]
summary(asthma_spdf)
asthma_spdf <- asthma_spdf[!(asthma_spdf$Gender >2), ]
library(mgcv)
asthma_spdf$Xcoord <- st_coordinates(asthma_spdf)[,"X"]
asthma_spdf$Ycoord <- st_coordinates(asthma_spdf)[,"Y"]
asthma1 <- gam(asthma ~ s(Xcoord, Ycoord), asthma_spdf, family = binomial)
summary(asthma1)
Family: binomial
Link function: logit
Formula:
asthma ~ s(Xcoord, Ycoord)
Parametric coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.62331 0.07559 -21.47 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Approximate significance of smooth terms:
edf Ref.df Chi.sq p-value
s(Xcoord,Ycoord) 3.337 4.313 7.74 0.11
R-sq.(adj) = 0.00498 Deviance explained = 0.865%
UBRE = -0.099947 Scale est. = 1 n = 1290
asthma2 <- gam(asthma ~ d2source1 + s(Xcoord, Ycoord), asthma_spdf, family = binomial)
summary(asthma2)
Family: binomial
Link function: logit
Formula:
asthma ~ d2source1 + s(Xcoord, Ycoord)
Parametric coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.3643 0.1904 -7.164 7.85e-13 ***
d2source1 -38.7362 26.5084 -1.461 0.144
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Approximate significance of smooth terms:
edf Ref.df Chi.sq p-value
s(Xcoord,Ycoord) 2.595 3.115 2.695 0.486
R-sq.(adj) = 0.00539 Deviance explained = 0.92%
UBRE = -0.10004 Scale est. = 1 n = 1290
–> Suing industry.
asthma3 <- gam(asthma ~ d2source1 + d2source2 + s(Xcoord, Ycoord), asthma_spdf, family = binomial)
summary(asthma3)
Family: binomial
Link function: logit
Formula:
asthma ~ d2source1 + d2source2 + s(Xcoord, Ycoord)
Parametric coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.3848 0.1776 -7.795 6.42e-15 ***
d2source1 0.0000 0.0000 NaN NaN
d2source2 -38.7362 26.5084 -1.461 0.144
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Approximate significance of smooth terms:
edf Ref.df Chi.sq p-value
s(Xcoord,Ycoord) 2.595 3.115 8.93 0.0393 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Rank: 31/32
R-sq.(adj) = 0.00539 Deviance explained = 0.92%
UBRE = -0.10004 Scale est. = 1 n = 1290
asthma4 <- gam(asthma ~ sqrt(d2source1) + sqrt(d2source2) + sqrt(d2source3) + s(Xcoord, Ycoord), asthma_spdf, family = binomial)
summary(asthma4)
Family: binomial
Link function: logit
Formula:
asthma ~ sqrt(d2source1) + sqrt(d2source2) + sqrt(d2source3) +
s(Xcoord, Ycoord)
Parametric coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -2.3840 0.7767 -3.070 0.00214 **
sqrt(d2source1) -1.7035 5.7647 -0.295 0.76761
sqrt(d2source2) -9.1876 5.3833 -1.707 0.08788 .
sqrt(d2source3) 12.5627 7.5391 1.666 0.09565 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Approximate significance of smooth terms:
edf Ref.df Chi.sq p-value
s(Xcoord,Ycoord) 2 2.001 5.468 0.065 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
R-sq.(adj) = 0.00569 Deviance explained = 1.09%
UBRE = -0.099413 Scale est. = 1 n = 1290
asthma5 <- gam(asthma ~ sqrt(d2source1) + sqrt(d2source2) + sqrt(d2source3) + sqrt(roaddist2) + s(Xcoord, Ycoord), asthma_spdf, family = binomial)
summary(asthma5)
Family: binomial
Link function: logit
Formula:
asthma ~ sqrt(d2source1) + sqrt(d2source2) + sqrt(d2source3) +
sqrt(roaddist2) + s(Xcoord, Ycoord)
Parametric coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -2.4517921 0.7843198 -3.126 0.00177 **
sqrt(d2source1) -1.5765054 5.7675852 -0.273 0.78459
sqrt(d2source2) -9.7617630 5.4384735 -1.795 0.07266 .
sqrt(d2source3) 12.5369289 7.5638181 1.657 0.09742 .
sqrt(roaddist2) 0.0001251 0.0001687 0.742 0.45828
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Approximate significance of smooth terms:
edf Ref.df Chi.sq p-value
s(Xcoord,Ycoord) 2.001 2.001 6.013 0.0495 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
R-sq.(adj) = 0.00519 Deviance explained = 1.14%
UBRE = -0.098288 Scale est. = 1 n = 1290
asthma6 <- gam(asthma ~ sqrt(d2source1) + sqrt(d2source2) + sqrt(d2source3) + sqrt(roaddist2) + Nsmokers + Age + HayFever + I(Gender == 1) + s(Xcoord, Ycoord), asthma_spdf, family = binomial)
summary(asthma6)
Family: binomial
Link function: logit
Formula:
asthma ~ sqrt(d2source1) + sqrt(d2source2) + sqrt(d2source3) +
sqrt(roaddist2) + Nsmokers + Age + HayFever + I(Gender ==
1) + s(Xcoord, Ycoord)
Parametric coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -2.420e+00 9.116e-01 -2.654 0.00795 **
sqrt(d2source1) 1.285e+00 6.042e+00 0.213 0.83158
sqrt(d2source2) -1.040e+01 5.767e+00 -1.804 0.07125 .
sqrt(d2source3) 1.175e+01 7.851e+00 1.496 0.13456
sqrt(roaddist2) 1.479e-04 1.718e-04 0.861 0.38926
Nsmokers 4.740e-02 9.779e-02 0.485 0.62783
Age -6.204e-02 3.763e-02 -1.649 0.09917 .
HayFever 1.182e+00 1.872e-01 6.316 2.68e-10 ***
I(Gender == 1)TRUE 3.578e-01 1.562e-01 2.292 0.02193 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Approximate significance of smooth terms:
edf Ref.df Chi.sq p-value
s(Xcoord,Ycoord) 2.001 2.001 7.642 0.0219 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
R-sq.(adj) = 0.0398 Deviance explained = 4.91%
UBRE = -0.12607 Scale est. = 1 n = 1290
library(texreg)
screenreg(
list(asthma1, asthma2, asthma3, asthma4, asthma5, asthma6),
digits = 3
)
=======================================================================================================
Model 1 Model 2 Model 3 Model 4 Model 5 Model 6
-------------------------------------------------------------------------------------------------------
(Intercept) -1.623 *** -1.364 *** -1.385 *** -2.384 ** -2.452 ** -2.420 **
(0.076) (0.190) (0.178) (0.777) (0.784) (0.912)
EDF: s(Xcoord,Ycoord) 3.337 2.595 2.595 * 2.000 2.001 * 2.001 *
(4.313) (3.115) (3.115) (2.001) (2.001) (2.001)
d2source1 -38.736 0.000
(26.508) (0.000)
d2source2 -38.736
(26.508)
sqrt(d2source1) -1.703 -1.577 1.285
(5.765) (5.768) (6.042)
sqrt(d2source2) -9.188 -9.762 -10.402
(5.383) (5.438) (5.767)
sqrt(d2source3) 12.563 12.537 11.748
(7.539) (7.564) (7.851)
sqrt(roaddist2) 0.000 0.000
(0.000) (0.000)
Nsmokers 0.047
(0.098)
Age -0.062
(0.038)
HayFever 1.182 ***
(0.187)
Gender == 1TRUE 0.358 *
(0.156)
-------------------------------------------------------------------------------------------------------
AIC 1161.068 1160.945 1160.945 1161.757 1163.208 1127.367
BIC 1183.458 1184.666 1184.666 1192.734 1199.348 1184.157
Log Likelihood -576.197 -575.877 -575.877 -574.878 -574.604 -552.683
Deviance 1152.394 1151.755 1151.755 1149.756 1149.207 1105.366
Deviance explained 0.009 0.009 0.009 0.011 0.011 0.049
Dispersion 1.000 1.000 1.000 1.000 1.000 1.000
R^2 0.005 0.005 0.005 0.006 0.005 0.040
GCV score -0.100 -0.100 -0.100 -0.099 -0.098 -0.126
Num. obs. 1290 1290 1290 1290 1290 1290
Num. smooth terms 1 1 1 1 1 1
=======================================================================================================
*** p < 0.001; ** p < 0.01; * p < 0.05
library(viridis)
library(viridisLite)
asthma_spdf$predictionsM6 <- asthma6$fitted.values
ggplot(asthma_spdf) +
geom_sf(aes(color=predictionsM6)) +
geom_sf(data = asthma_industry, size = 5, aes(shape=Source), alpha=0.7)+
geom_sf(data = asthma_mroads)+
theme_minimal(base_size=10) +
scale_color_viridis_c(direction = -1) + # larger prob get darker values
theme(legend.position="right")