This Rpubs file was created for paper about Baddeley-Geyer process and its use in agglomeration modelling, for reproducibility of results

Paper uses simulated data based on Kopczewska et al. (2017) codes and real data obtained upon request from Sweeney & Gómez-Antonio (2016).

File is divided into two parts - simulation approach and real data application. In both cases, we start with data reading, then proceed to hyperparameter optimization, model estimation and, finally, to goodness-of-fit.

Simulation approach

Simulated study pattern and covariates

# set working directory – please set own path
setwd("C:/my_folder_with_data") # please set own path

# R version 4.5.2
library(spatstat) # version 3.5-0
library(sp) # version 2.2-0
library(spdep) # version 1.4-1
set.seed(1609211) # for reproducibility

# line pattern of 'roads'
L <- psp(runif(10, min=10, max=60), 
         runif(10, min=10, max=60), 
         runif(10, min=10, max=60), runif(10, min=10, max=60), 
         owin(c(10,60), c(10,60))) # line segment pattern
Y <- rpoisppOnLines(0.15, L) # Poisson points on lines

ex<-runif(Y$n, min=-2, max=2)
ey<-runif(Y$n, min=-2, max=2)
Y$x <- Y$x + ex # jittering the coordinates, so 'firms' are not directly on the 'roads'
Y$y <- Y$y + ey

n<-20 # creation of 'core'
x1<-runif(n, min=30, max=40)
y1<-runif(n, min=30, max=40)
punkty<-cbind(x1, y1)

xcord<-punkty[,1]
ycord<-punkty[,2]

centrum <- ppp(xcord, ycord, c(10,60), c(10,60)) # study window
cp <- superimpose(Y, centrum) # study pattern, 52 points
cp <- unmark(cp)

linie <- as.SpatialLines.psp(L) # does not work - maptools:: compatible with spatstat:: and R needed
# so we will load data saved earlier, containing pixel image of distance to 'roads'
load("simulated_roads_aic_ppl.RData")

# !!! If you do possess older (AND COMPATIBLE) versions of both R, maptools:: and spatstat::
# you may recreate 'roads' with the code below:
linie <- as.SpatialLines.psp(L) 
cp <- unmark(cp)
pkt <- cbind(cp$x, cp$y)
pkt.sp<-SpatialPoints(pkt)
proj4string(pkt.sp)<-CRS("+proj=longlat +datum=NAD83") # spherical
pkt.sp<-spTransform(pkt.sp, CRS("+proj=longlat +datum=NAD83"))
proj4string(linie)<-CRS("+proj=longlat +datum=NAD83") # spherical
roads.sp<-spTransform(linie, CRS("+proj=longlat +datum=NAD83"))
dtl <- dist2Line(pkt.sp, roads.sp)
dtl[,1] <- dtl[,1]/10000*0.85
marks(cp) <- dtl[,1]
drogi <- Smooth.ppp(cp)
# and then simply plot drogi pixel image as on Fig. 1c

Plot below represents study pattern and covariates:

plot(L, main="")
points(cp, pch="*", col="red") # Figure 1a

s1<-35
s2<-35
scord<-as.matrix(cbind(s1,s2))
srodek_ppp <- ppp(x = scord[1], y = scord[2], c(10,60), c(10,60)) # creation of 'centre'
do_sr <- crossdist(cp, srodek_ppp) # calculation of distances to 'centre'
marks(cp) <- do_sr
srodek <- idw(cp, at='pixels') # pixel image covariate
plot(srodek) # Figure 1b

plot(drogi) # Figure 1c

Figure 1: Study pattern (a) and covariates for simulated data: b – distance to centre, c – distance to the closest line from line pattern of roads

## Source: own calculations in spatstat::

Estimation results

After creating pattern and covariates, we can start with hyperparameters optimisation; interaction graph shows possible radii intervals as (approximately) [0.1, 5] and [5,10]:

m1 <- ppm(cp ~ drogi+srodek, PairPiece(1:10)) # check for all possible radii
f1 <- fitin(m1)
plot(f1, main="") # Figure 2

Figure 2: Interaction curve, simulated data

## Source: own calculations in spatstat::

We can now optimise profile pseudolikelihood on two abovementioned intervals, in order to find two sets of parameters for Baddeley-Geyer process to estimate: (r1, r2) - radii and (s1, s2) - saturation parameters:

df3 <- expand.grid(r=seq(1,5, by=0.25), sat=c(1,2,3,4))
pG30 <- profilepl(df3, Geyer, cp ~ drogi+srodek, aic=TRUE, rbord=5) # progress output omitted

r1 <- pG30$fit$interaction$par$r # first radius
s1 <- pG30$fit$interaction$par$sat # first saturation parameter

df4 <- expand.grid(r=seq(5, 10, by=0.25), sat=c(1,2,3,4))
pG31 <- profilepl(df4, Geyer, cp ~ drogi+srodek, aic=TRUE, rbord=5) # progress output omitted

r2 <- pG31$fit$interaction$par$r # second radius
s2 <- pG31$fit$interaction$par$sat # second saturation parameter

m1 <- ppm(cp ~ drogi+srodek, BadGey(c(r1,r2), c(s1,s2))) # pattern ~ covariates, interaction(param)

After model is estimated, we can check its goodness-of-fit with QQplot and residual G function plot:

# Goodness-of-fit
qqplot.ppm(m1) # Figure 3a

Gr <- Gres(m1)
plot(Gr) # Figure 3b

Figure 3: QQ-plot and residual G function

## Source: own calculations in spatstat::

Comparison of models with one and two radii

After estimation of model 1 (object m1 from above), we performed a following simulation: we simulated 1000 realisation of estimated models which preserve the number of points in each simulated pattern (set with rmhcontrol(p=1)). Then, for each realisation, we estimated two models - one ‘true’, with two sets of parameters and one with r1 and s1 only. For each of models, we preserved values of AIC, pseudo-likelihood and coefficients (not used in the latter comparison):

sim_m1 <- simulate.ppm(m1, nsim=1000, control=rmhcontrol(p=1))

eksp1_2prom <- matrix(nrow=length(sim_m1), ncol=2)
eksp1_1prom <- matrix(nrow=length(sim_m1), ncol=2)
colnames(eksp1_2prom) <- c("roads", "center")
colnames(eksp1_1prom) <- c("roads", "center")

akaike_mtrx_eksp1 <- matrix(nrow=length(sim_m1), ncol=2)
colnames(akaike_mtrx_eksp1) <- c("2 rad", "1 rad")

plklhd_eksp <- matrix(nrow=length(sim_m1), ncol=2)
colnames(plklhd_eksp) <- c("2 rad", "1 rad")

for (i in 1:length(sim_m1)) {
  pattern <- sim_m1[[i]]
  eksp1_mod2prom <- ppm(pattern ~ drogi+srodek, BadGey(c(r1,r2), c(s1,s2)))
  eksp1_2prom[i,1] <- eksp1_mod2prom$coef[2]
  eksp1_2prom[i,2] <- eksp1_mod2prom$coef[3]
  akaike_mtrx_eksp1[i,1] <- AIC(eksp1_mod2prom, takeuchi=F)
  plklhd_eksp[i,1] <- logLik(eksp1_mod2prom)
  eksp1_mod1prom <- ppm(pattern ~ drogi+srodek, Geyer(r1,s1))
  eksp1_1prom[i,1] <- eksp1_mod1prom$coef[2]
  eksp1_1prom[i,2] <- eksp1_mod1prom$coef[3]
  akaike_mtrx_eksp1[i,2] <- AIC(eksp1_mod1prom, takeuchi=F)
  plklhd_eksp[i,2] <- logLik(eksp1_mod1prom)
}

Comparison of two specifications in terms of AIC and logLik is given below:

summary(akaike_mtrx_eksp1)
##      2 rad           1 rad      
##  Min.   :173.0   Min.   :191.4  
##  1st Qu.:234.9   1st Qu.:257.4  
##  Median :257.6   Median :280.0  
##  Mean   :256.1   Mean   :278.6  
##  3rd Qu.:278.1   3rd Qu.:299.5  
##  Max.   :329.8   Max.   :354.3
summary(plklhd_eksp)
##      2 rad             1 rad        
##  Min.   :-159.88   Min.   :-173.14  
##  1st Qu.:-134.04   1st Qu.:-145.75  
##  Median :-123.81   Median :-135.98  
##  Mean   :-123.07   Mean   :-135.30  
##  3rd Qu.:-112.44   3rd Qu.:-124.72  
##  Max.   : -81.51   Max.   : -91.72

Lastly, we can compare densities of AIC and logLik for both models:

plot(density(akaike_mtrx_eksp1[,2]), col="black", main="")
lines(density(akaike_mtrx_eksp1[,1]), col="red", add=T)
legend("topleft", c("1rad", "2rad"), col=c("black", "red"), lty=1)

plot(density(plklhd_eksp[,2]), col="green", lty=2, main="")
lines(density(plklhd_eksp[,1]), col="blue", add=T, lty=2)
legend("topleft", c("1rad", "2rad"), col=c("green", "blue"), lty=2)

Figure 4:

## Source: own calculations in spatstat::

Real data example – on Sweeney and Gómez-Antonio (2016) data

Data for this part of analysis was obtained uopn request from Sweeney & Gómez-Antonio, and unfortunately cannot be shown here. We will focus here on results only:

Interaction plot

mod.d <- ppm(unmark(d), f0, covariates=e.var, PairPiece(0.1:10))
f.d <- fitin(mod.d)
plot(f.d, main="") # Figure 5

Figure 5: Interaction plot – real data

## Source: own calculations in spatstat::

Estimation results

After analysis of interaction plot, two sets of radii was found through maximisation of profile pseudo-likelihood on two intervals ([0.2, 2], [2.6, 4]) with saturation values from {1, 2, …, 8}. Estimated coefficients and interaction parameters are given below:

# hyperperametr optimisation part is skipped

g0.new <- update(g0, BadGey(c(0.8, 2.8), c(3, 6)))
smr <-summary(g0.new) 
smr$coefs.SE.CI[,c(1,5)]
##                 Estimate Ztest
## (Intercept) -2.383866684   ***
## CTR         -0.097469412   ***
## R.R         -0.109367701     *
## R.A         -0.062825865    **
## R.M40        0.046816344     *
## Interact.1   0.490020182   ***
## Interact.2   0.211466477   ***
## CTR:R.R      0.004918363

Goodness-of-fit:

Residual G function:

Figure 6 shows comparison of residual G functions for original and modified models:

par(mfrow=c(1,2))
plot(Gres(g0)) 
plot(Gres(g0.new)) 
par(mfrow=c(1,1)) # Figure 6

Figure 6: Residual G function: left – original model, right – new model

## Source: own calculations in spatstat::

Pearson residuals:

Figure 7 shows comparison of Pearson residuals for original and modified models:

par(mfrow=c(1,2))
diagnose.ppm(g0, which="smooth", type="pearson")
diagnose.ppm(g0.new, which="smooth", type="pearson")
par(mfrow=c(1,1)) # Figure 7

Figure 7: Pearson residuals: left – original model, right – new model

## Source: own calculations in spatstat::

Lurking plots:

Figure 8 shows lurking plot of modified model’s coefficients:

# Code source: Miguel Gómez-Antonio, Stuart Sweeney
# CTR
# example show for CTR covariate
# one can substitute var for e.var$R.A, e.var$R.R and e.var$R.M40, respectively
# and varn for "Road A", "Road R" and "Road M40", respectively
# in order to reproduce other plots
obj<-g0.new # estimated model
var<-e.var$CTR # chosen covariate
varn<-"Center" # string varname
type_='raw'
l<-lurking(obj,var,plot.it=F,covname=varn,type=type_)
l$theoretical$sd<-lurking(update(obj,interaction=Poisson()),var,plot.it=F,covname=varn,type=type_)$theoretical$sd
plot(l[[2]]$covariate,l[[2]]$mean,lty=2,type="l",ylab="cumulative residuals",xlab=varn,ylim=c(min(l[[2]]$mean-2*l[[2]]$sd),max(l[[2]]$mean+2*l[[2]]$sd)))
lines(l[[2]]$covariate,l[[2]]$mean+1.96*l[[2]]$sd,lty=3)
lines(l[[2]]$covariate,l[[2]]$mean-1.96*l[[2]]$sd,lty=3)
lines(l[[1]]$covariate,l[[1]]$value)
title(main=varn,line=-1,cex=0.5) # Figure 8a

Figure 8: Lurking plots – new model

## Source: own calculations in spatstat::

L function:

Figure 9 shows L function for a modified model:

L.g0 <- envelope(g0.new, fun=Lest, nsim=99, global=T)
plot(L.g0,.-r~r)

Figure 9: L function – new model

## Source: own calculations in spatstat::