This Rpubs file was created for paper about Baddeley-Geyer process and its use in agglomeration modelling, for reproducibility of results
Paper uses simulated data based on Kopczewska et al. (2017) codes and real data obtained upon request from Sweeney & Gómez-Antonio (2016).
File is divided into two parts - simulation approach and real data application. In both cases, we start with data reading, then proceed to hyperparameter optimization, model estimation and, finally, to goodness-of-fit.
# set working directory – please set own path
setwd("C:/my_folder_with_data") # please set own path
# R version 4.5.2
library(spatstat) # version 3.5-0
library(sp) # version 2.2-0
library(spdep) # version 1.4-1
set.seed(1609211) # for reproducibility
# line pattern of 'roads'
L <- psp(runif(10, min=10, max=60),
runif(10, min=10, max=60),
runif(10, min=10, max=60), runif(10, min=10, max=60),
owin(c(10,60), c(10,60))) # line segment pattern
Y <- rpoisppOnLines(0.15, L) # Poisson points on lines
ex<-runif(Y$n, min=-2, max=2)
ey<-runif(Y$n, min=-2, max=2)
Y$x <- Y$x + ex # jittering the coordinates, so 'firms' are not directly on the 'roads'
Y$y <- Y$y + ey
n<-20 # creation of 'core'
x1<-runif(n, min=30, max=40)
y1<-runif(n, min=30, max=40)
punkty<-cbind(x1, y1)
xcord<-punkty[,1]
ycord<-punkty[,2]
centrum <- ppp(xcord, ycord, c(10,60), c(10,60)) # study window
cp <- superimpose(Y, centrum) # study pattern, 52 points
cp <- unmark(cp)
linie <- as.SpatialLines.psp(L) # does not work - maptools:: compatible with spatstat:: and R needed
# so we will load data saved earlier, containing pixel image of distance to 'roads'
load("simulated_roads_aic_ppl.RData")
# !!! If you do possess older (AND COMPATIBLE) versions of both R, maptools:: and spatstat::
# you may recreate 'roads' with the code below:
linie <- as.SpatialLines.psp(L)
cp <- unmark(cp)
pkt <- cbind(cp$x, cp$y)
pkt.sp<-SpatialPoints(pkt)
proj4string(pkt.sp)<-CRS("+proj=longlat +datum=NAD83") # spherical
pkt.sp<-spTransform(pkt.sp, CRS("+proj=longlat +datum=NAD83"))
proj4string(linie)<-CRS("+proj=longlat +datum=NAD83") # spherical
roads.sp<-spTransform(linie, CRS("+proj=longlat +datum=NAD83"))
dtl <- dist2Line(pkt.sp, roads.sp)
dtl[,1] <- dtl[,1]/10000*0.85
marks(cp) <- dtl[,1]
drogi <- Smooth.ppp(cp)
# and then simply plot drogi pixel image as on Fig. 1cPlot below represents study pattern and covariates:
plot(L, main="")
points(cp, pch="*", col="red") # Figure 1a
s1<-35
s2<-35
scord<-as.matrix(cbind(s1,s2))
srodek_ppp <- ppp(x = scord[1], y = scord[2], c(10,60), c(10,60)) # creation of 'centre'
do_sr <- crossdist(cp, srodek_ppp) # calculation of distances to 'centre'
marks(cp) <- do_sr
srodek <- idw(cp, at='pixels') # pixel image covariate
plot(srodek) # Figure 1b
plot(drogi) # Figure 1cFigure 1: Study pattern (a) and covariates for simulated data: b – distance to centre, c – distance to the closest line from line pattern of roads
## Source: own calculations in spatstat::
After creating pattern and covariates, we can start with hyperparameters optimisation; interaction graph shows possible radii intervals as (approximately) [0.1, 5] and [5,10]:
m1 <- ppm(cp ~ drogi+srodek, PairPiece(1:10)) # check for all possible radii
f1 <- fitin(m1)
plot(f1, main="") # Figure 2Figure 2: Interaction curve, simulated data
## Source: own calculations in spatstat::
We can now optimise profile pseudolikelihood on two abovementioned intervals, in order to find two sets of parameters for Baddeley-Geyer process to estimate: (r1, r2) - radii and (s1, s2) - saturation parameters:
df3 <- expand.grid(r=seq(1,5, by=0.25), sat=c(1,2,3,4))
pG30 <- profilepl(df3, Geyer, cp ~ drogi+srodek, aic=TRUE, rbord=5) # progress output omitted
r1 <- pG30$fit$interaction$par$r # first radius
s1 <- pG30$fit$interaction$par$sat # first saturation parameter
df4 <- expand.grid(r=seq(5, 10, by=0.25), sat=c(1,2,3,4))
pG31 <- profilepl(df4, Geyer, cp ~ drogi+srodek, aic=TRUE, rbord=5) # progress output omitted
r2 <- pG31$fit$interaction$par$r # second radius
s2 <- pG31$fit$interaction$par$sat # second saturation parameter
m1 <- ppm(cp ~ drogi+srodek, BadGey(c(r1,r2), c(s1,s2))) # pattern ~ covariates, interaction(param)After model is estimated, we can check its goodness-of-fit with QQplot and residual G function plot:
# Goodness-of-fit
qqplot.ppm(m1) # Figure 3a
Gr <- Gres(m1)
plot(Gr) # Figure 3bFigure 3: QQ-plot and residual G function
## Source: own calculations in spatstat::
After estimation of model 1 (object m1 from above), we performed a following simulation: we simulated 1000 realisation of estimated models which preserve the number of points in each simulated pattern (set with rmhcontrol(p=1)). Then, for each realisation, we estimated two models - one ‘true’, with two sets of parameters and one with r1 and s1 only. For each of models, we preserved values of AIC, pseudo-likelihood and coefficients (not used in the latter comparison):
sim_m1 <- simulate.ppm(m1, nsim=1000, control=rmhcontrol(p=1))
eksp1_2prom <- matrix(nrow=length(sim_m1), ncol=2)
eksp1_1prom <- matrix(nrow=length(sim_m1), ncol=2)
colnames(eksp1_2prom) <- c("roads", "center")
colnames(eksp1_1prom) <- c("roads", "center")
akaike_mtrx_eksp1 <- matrix(nrow=length(sim_m1), ncol=2)
colnames(akaike_mtrx_eksp1) <- c("2 rad", "1 rad")
plklhd_eksp <- matrix(nrow=length(sim_m1), ncol=2)
colnames(plklhd_eksp) <- c("2 rad", "1 rad")
for (i in 1:length(sim_m1)) {
pattern <- sim_m1[[i]]
eksp1_mod2prom <- ppm(pattern ~ drogi+srodek, BadGey(c(r1,r2), c(s1,s2)))
eksp1_2prom[i,1] <- eksp1_mod2prom$coef[2]
eksp1_2prom[i,2] <- eksp1_mod2prom$coef[3]
akaike_mtrx_eksp1[i,1] <- AIC(eksp1_mod2prom, takeuchi=F)
plklhd_eksp[i,1] <- logLik(eksp1_mod2prom)
eksp1_mod1prom <- ppm(pattern ~ drogi+srodek, Geyer(r1,s1))
eksp1_1prom[i,1] <- eksp1_mod1prom$coef[2]
eksp1_1prom[i,2] <- eksp1_mod1prom$coef[3]
akaike_mtrx_eksp1[i,2] <- AIC(eksp1_mod1prom, takeuchi=F)
plklhd_eksp[i,2] <- logLik(eksp1_mod1prom)
}Comparison of two specifications in terms of AIC and logLik is given below:
summary(akaike_mtrx_eksp1)## 2 rad 1 rad
## Min. :173.0 Min. :191.4
## 1st Qu.:234.9 1st Qu.:257.4
## Median :257.6 Median :280.0
## Mean :256.1 Mean :278.6
## 3rd Qu.:278.1 3rd Qu.:299.5
## Max. :329.8 Max. :354.3
summary(plklhd_eksp)## 2 rad 1 rad
## Min. :-159.88 Min. :-173.14
## 1st Qu.:-134.04 1st Qu.:-145.75
## Median :-123.81 Median :-135.98
## Mean :-123.07 Mean :-135.30
## 3rd Qu.:-112.44 3rd Qu.:-124.72
## Max. : -81.51 Max. : -91.72
Lastly, we can compare densities of AIC and logLik for both models:
plot(density(akaike_mtrx_eksp1[,2]), col="black", main="")
lines(density(akaike_mtrx_eksp1[,1]), col="red", add=T)
legend("topleft", c("1rad", "2rad"), col=c("black", "red"), lty=1)
plot(density(plklhd_eksp[,2]), col="green", lty=2, main="")
lines(density(plklhd_eksp[,1]), col="blue", add=T, lty=2)
legend("topleft", c("1rad", "2rad"), col=c("green", "blue"), lty=2)Figure 4:
## Source: own calculations in spatstat::
Data for this part of analysis was obtained uopn request from Sweeney & Gómez-Antonio, and unfortunately cannot be shown here. We will focus here on results only:
mod.d <- ppm(unmark(d), f0, covariates=e.var, PairPiece(0.1:10))
f.d <- fitin(mod.d)
plot(f.d, main="") # Figure 5Figure 5: Interaction plot – real data
## Source: own calculations in spatstat::
After analysis of interaction plot, two sets of radii was found through maximisation of profile pseudo-likelihood on two intervals ([0.2, 2], [2.6, 4]) with saturation values from {1, 2, …, 8}. Estimated coefficients and interaction parameters are given below:
# hyperperametr optimisation part is skipped
g0.new <- update(g0, BadGey(c(0.8, 2.8), c(3, 6)))
smr <-summary(g0.new)
smr$coefs.SE.CI[,c(1,5)]## Estimate Ztest
## (Intercept) -2.383866684 ***
## CTR -0.097469412 ***
## R.R -0.109367701 *
## R.A -0.062825865 **
## R.M40 0.046816344 *
## Interact.1 0.490020182 ***
## Interact.2 0.211466477 ***
## CTR:R.R 0.004918363
Figure 6 shows comparison of residual G functions for original and modified models:
par(mfrow=c(1,2))
plot(Gres(g0))
plot(Gres(g0.new))
par(mfrow=c(1,1)) # Figure 6Figure 6: Residual G function: left – original model, right – new model
## Source: own calculations in spatstat::
Figure 7 shows comparison of Pearson residuals for original and modified models:
par(mfrow=c(1,2))
diagnose.ppm(g0, which="smooth", type="pearson")
diagnose.ppm(g0.new, which="smooth", type="pearson")
par(mfrow=c(1,1)) # Figure 7Figure 7: Pearson residuals: left – original model, right – new model
## Source: own calculations in spatstat::
Figure 8 shows lurking plot of modified model’s coefficients:
# Code source: Miguel Gómez-Antonio, Stuart Sweeney
# CTR
# example show for CTR covariate
# one can substitute var for e.var$R.A, e.var$R.R and e.var$R.M40, respectively
# and varn for "Road A", "Road R" and "Road M40", respectively
# in order to reproduce other plots
obj<-g0.new # estimated model
var<-e.var$CTR # chosen covariate
varn<-"Center" # string varname
type_='raw'
l<-lurking(obj,var,plot.it=F,covname=varn,type=type_)
l$theoretical$sd<-lurking(update(obj,interaction=Poisson()),var,plot.it=F,covname=varn,type=type_)$theoretical$sd
plot(l[[2]]$covariate,l[[2]]$mean,lty=2,type="l",ylab="cumulative residuals",xlab=varn,ylim=c(min(l[[2]]$mean-2*l[[2]]$sd),max(l[[2]]$mean+2*l[[2]]$sd)))
lines(l[[2]]$covariate,l[[2]]$mean+1.96*l[[2]]$sd,lty=3)
lines(l[[2]]$covariate,l[[2]]$mean-1.96*l[[2]]$sd,lty=3)
lines(l[[1]]$covariate,l[[1]]$value)
title(main=varn,line=-1,cex=0.5) # Figure 8aFigure 8: Lurking plots – new model
## Source: own calculations in spatstat::
Figure 9 shows L function for a modified model:
L.g0 <- envelope(g0.new, fun=Lest, nsim=99, global=T)
plot(L.g0,.-r~r)Figure 9: L function – new model
## Source: own calculations in spatstat::