R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

Functions for Eigenvector Spatial Filtering

This R code uses two R packages to estimate Spatial Eigenvector Filtering: spatialreg and spfilteR. The functions SpatialFiltering() and fitted() are included in the spatialreg package, whereas the functions getEVs(), lmFilter(), and glmFilter() in the spfilteR package. The estimations are carried out based on the Columbus, OH dataset.

Creates a function called plot.map() that will display values in the ESRI shapefile
require(RColorBrewer); require(maptools); require(classInt)
plot.map <- function(theme, poly, color = NULL, ncl = 9, main = NULL){
  if(is.null(color)) color <- "YlGnBu"
  int <- classIntervals(theme, n = ncl, style = "quantile")$brks
  pal <- brewer.pal(ncl, color)
  cols <- pal[findInterval(theme, int, rightmost.closed = TRUE)]
  plot(poly, col = cols)
  title(main = main)
}
Open the ESRI shapefile and prepare the data
library(spdep)     ## this opens the spatial dependency package
library(maptools)
library(rgdal)
col.poly <- readOGR(system.file("etc/shapes/columbus.shp", package = "spdep")[1])
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\R\R-4.1.2\library\spdep\etc\shapes\columbus.shp", layer: "columbus"
## with 49 features
## It has 20 fields
## Integer64 fields read as strings:  COLUMBUS_ COLUMBUS_I POLYID
The readOGR() function opens polygon shapefiles
col.data <- slot(col.poly, "data")
attach(col.data)  ## directly call the variables in the data
colnames(col.data)
##  [1] "AREA"       "PERIMETER"  "COLUMBUS_"  "COLUMBUS_I" "POLYID"    
##  [6] "NEIG"       "HOVAL"      "INC"        "CRIME"      "OPEN"      
## [11] "PLUMB"      "DISCBD"     "X"          "Y"          "NSA"       
## [16] "NSB"        "EW"         "CP"         "THOUS"      "NEIGNO"
plot.map(CRIME, col.poly, main = "CRIME")

Create the neighborhood and spatial weight matrix objects
col.nb <- poly2nb(col.poly, queen = FALSE)
col.listw <- nb2listw(col.nb, style = "C")  ## a C-scheme SWM (globally standardized) 
plot(col.poly, col = "gray", border = "white")
coords <- coordinates(col.poly)
plot(col.nb, coords, col = "blue", add = TRUE)

OLS regression and the results
lm.ols <- lm(CRIME ~ INC + HOVAL)
summary(lm.ols)
## 
## Call:
## lm(formula = CRIME ~ INC + HOVAL)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -34.418  -6.388  -1.580   9.052  28.649 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  68.6190     4.7355  14.490  < 2e-16 ***
## INC          -1.5973     0.3341  -4.780 1.83e-05 ***
## HOVAL        -0.2739     0.1032  -2.654   0.0109 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.43 on 46 degrees of freedom
## Multiple R-squared:  0.5524, Adjusted R-squared:  0.5329 
## F-statistic: 28.39 on 2 and 46 DF,  p-value: 9.341e-09
lm.ols.pred <- predict(lm.ols)
lm.ols.res <- residuals(lm.ols)
Maps of spatial patterns of CRIME, predicted values, and residuals of OLS
par(mfrow = c(2,2))
par(mar = c(1,1,1,1))
plot.map(CRIME, col.poly, main = "CRIME")
plot.map(lm.ols.pred, col.poly, main = "Predicted Crime")
plot.map(lm.ols.res, col.poly, main = "Residuals")
plot(CRIME, lm.ols.pred, pch = 20)
abline(coef(lm(lm.ols.pred ~ CRIME)), col = "red")

Moran tests and tests for normality and homoscedasticity on OLS residuals
moran.test(CRIME, col.listw)
## 
##  Moran I test under randomisation
## 
## data:  CRIME  
## weights: col.listw    
## 
## Moran I statistic standard deviate = 5.6761, p-value = 6.891e-09
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.519389978      -0.020833333       0.009058413
moran.test(lm.ols.pred, col.listw)
## 
##  Moran I test under randomisation
## 
## data:  lm.ols.pred  
## weights: col.listw    
## 
## Moran I statistic standard deviate = 4.4671, p-value = 3.964e-06
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.397473514      -0.020833333       0.008768618
moran.test(lm.ols.res, col.listw)
## 
##  Moran I test under randomisation
## 
## data:  lm.ols.res  
## weights: col.listw    
## 
## Moran I statistic standard deviate = 2.9009, p-value = 0.001861
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.250567386      -0.020833333       0.008753236
shapiro.test(lm.ols.res)  ## Shapiro-Wilk test for normality
## 
##  Shapiro-Wilk normality test
## 
## data:  lm.ols.res
## W = 0.97708, p-value = 0.4497
library(lmtest)
bptest(lm.ols)            ## Breusch-Pagan test for homoscedasticity
## 
##  studentized Breusch-Pagan test
## 
## data:  lm.ols
## BP = 7.2166, df = 2, p-value = 0.0271

Generate a series of hypothetical potential spatial patterns

n <- length(col.nb)
C <- listw2mat(col.listw)  ## convert list file to the original C-scheme matrix
M <- diag(1,n) - 1/n  ## M = I - 11'/n
MCM <- M %*% C %*% M
E.MCM <- eigen(MCM)$vectors
L.MCM <- eigen(MCM)$values
Spatial patterns tied to the residuals of the model y = X\(\beta\) + \(\epsilon\)
y <- CRIME                 ## y dependent variable (col.data attached) 
X <- cbind(1, INC, HOVAL)  ## design matrix X of explanatory variables (col.data attached)
M <- diag(1,n) - tcrossprod(X %*% qr.solve(crossprod(X)), X)  ## M = I - X(X'X)^-1^X'

See My = Me = e

My <- M %*% CRIME
Me <- M %*% lm.ols.res
e <- lm.ols.res
combined <- cbind(My, Me, e)
colnames(combined) <- c("MY", "Me", "e" )
head(combined)  ## see My = Me = e
##            MY          Me           e
## 1   0.3465419   0.3465419   0.3465419
## 2  -3.6947990  -3.6947990  -3.6947990
## 3  -5.2873940  -5.2873940  -5.2873940
## 4 -19.9855151 -19.9855151 -19.9855151
## 5   6.4475490   6.4475490   6.4475490
## 6  -9.0734793  -9.0734793  -9.0734793
Eigenvectors and eigenvalues of MCM
MCM <- M %*% C %*% M  ## M = X(X'X)^-1^X'
eig <- eigen(MCM)
E <- eig$vectors
L <- eig$values
Plot of Moran's I values vs spatial patterns tied to the model
Ones <- rep(1,n)
mi <- L %*% (n/(Ones %*% C %*% Ones))
par(mar = c(4,4,3,2))
plot(mi, ylim = c(-1,1), pch = 20, xlab = "Eigenvector Spatial Pattern", ylab = "Moran's I")
abline(0, 0, lty = 3, lwd = 1.5, col = "red")

Display of the first 4 of n (= 49) spatial patterns tied to this model

If you want to press return key to go to the next plot, use the argument pause{}

library(sm)
par(mfrow = c(2,2))
par(mar = c(1,1,1,1))
for(i in 1:4){plot.map(E[,i], col.poly, main = paste("EV", i, ": Moran's I = ",
                                                     round(mi[i], 3)))}

1. spatialreg functions

A replication of Thayn and Simanis’s 2011 paper (Accounting for spatial autocorrelation in linear regression model using spatial filtering with eigenvectors, Annals of the Association of American Geography) is conducted. The function SpatialFiltering() generates MCM and calculates its eigenvectors and then select a subset of eigenvectors.

library(spatialreg)

1-1 Spatial filter based on the SDM Deta Generating Process [M = X(X’X)-1X’]

sf.err <- SpatialFiltering(CRIME ~ INC + HOVAL, nb = col.nb, style = "C", alpha = 0.25, ExactEV = TRUE)
E.sel <- fitted(sf.err)
Spatially filtered OLS regression and the results
lm.sf <- lm(CRIME ~ INC + HOVAL + E.sel)
summary(lm.sf)
## 
## Call:
## lm(formula = CRIME ~ INC + HOVAL + E.sel)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.186  -5.786  -1.705   5.825  19.811 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  68.61896    3.89239  17.629  < 2e-16 ***
## INC          -1.59731    0.27464  -5.816  7.3e-07 ***
## HOVAL        -0.27393    0.08483  -3.229  0.00241 ** 
## E.selvec3   -29.83284    9.39910  -3.174  0.00281 ** 
## E.selvec5   -24.67939    9.39910  -2.626  0.01201 *  
## E.selvec10  -24.27530    9.39910  -2.583  0.01338 *  
## E.selvec4    14.70110    9.39910   1.564  0.12530    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.399 on 42 degrees of freedom
## Multiple R-squared:  0.7239, Adjusted R-squared:  0.6844 
## F-statistic: 18.35 on 6 and 42 DF,  p-value: 2.513e-10
The spatial patterns of selected eigenvectors
par(mfrow = c(2,2))
par(mar = c(1,1,1,1))
for(i in 1:4) plot.map(E.sel[,i], col.poly, main = colnames(E.sel)[i])

par(mfrow = c(1,1))
The Moran test for the filter eigenvectors (Eq 5 in Thayn and Simanis 2011, p.6)
b <- matrix(lm.sf$coefficients[4:7])
Evec.sel_x_b <- E.sel %*% b
moran.test(Evec.sel_x_b, col.listw)
## 
##  Moran I test under randomisation
## 
## data:  Evec.sel_x_b  
## weights: col.listw    
## 
## Moran I statistic standard deviate = 7.3364, p-value = 1.097e-13
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.675919790      -0.020833333       0.009019662
Spatial patterns for ESF predicted values and residuals
lm.sf.pred <- predict(lm.sf)
lm.sf.res <- residuals(lm.sf)
par(mfrow = c(2,2))
par(mar = c(1,1,1,1))
plot.map(CRIME, col.poly, main = "CRIME")
plot.map(lm.sf.pred, col.poly, main = "SF Predicted Crime")
plot.map(lm.sf.res, col.poly, main = "SF Residuals")
plot(CRIME, lm.sf.pred, pch = 20)
abline(coef(lm(lm.sf.pred ~ CRIME)), col = "red")

par(mfrow = c(1,1))
Moran tests and tests for for normality and homoscedasticity on ESF residuals
moran.test(CRIME, col.listw)
## 
##  Moran I test under randomisation
## 
## data:  CRIME  
## weights: col.listw    
## 
## Moran I statistic standard deviate = 5.6761, p-value = 6.891e-09
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.519389978      -0.020833333       0.009058413
moran.test(lm.sf.pred, col.listw)
## 
##  Moran I test under randomisation
## 
## data:  lm.sf.pred  
## weights: col.listw    
## 
## Moran I statistic standard deviate = 6.1209, p-value = 4.653e-10
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.561184184      -0.020833333       0.009041537
moran.test(lm.sf.res, col.listw)
## 
##  Moran I test under randomisation
## 
## data:  lm.sf.res  
## weights: col.listw    
## 
## Moran I statistic standard deviate = 0.077036, p-value = 0.4693
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      -0.013613053      -0.020833333       0.008784613
shapiro.test(lm.sf.res)  ## Shapiro-Wilks test for normality
## 
##  Shapiro-Wilk normality test
## 
## data:  lm.sf.res
## W = 0.97436, p-value = 0.3577
bptest(lm.sf)            ## Breusch-Pagan test for homoscedasticity
## 
##  studentized Breusch-Pagan test
## 
## data:  lm.sf
## BP = 9.4701, df = 6, p-value = 0.1488

1-2 Spatial filter based on the 2 arguments Data Generating Process [M = I - 1’1/n]

sf.err.2arg <- SpatialFiltering(CRIME ~ 1, ~ INC + HOVAL, data = col.data, 
                                nb = col.nb, style = "C", alpha = 0.25, ExactEV = TRUE)
E.sel.2arg <- fitted(sf.err.2arg)
dim(E.sel.2arg)   ## 49 3
## [1] 49  3
cn <- dim(E.sel.2arg)[2]
par(mfrow = c(2,2))
par(mar = c(1,1,1,1))
for(i in 1:cn) plot.map(E.sel.2arg[,i], col.poly, main = colnames(E.sel.2arg)[i])
par(mfrow = c(1,1))

Spatially filtered OLS regression and the results
lm.sf.2arg <- lm(CRIME ~ INC + HOVAL + E.sel.2arg)
summary(lm.sf.2arg)
## 
## Call:
## lm(formula = CRIME ~ INC + HOVAL + E.sel.2arg)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -25.5483  -6.7367  -0.2414   6.7880  17.3290 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      70.48686    4.12822  17.074  < 2e-16 ***
## INC              -1.62811    0.29644  -5.492 1.99e-06 ***
## HOVAL            -0.31101    0.08752  -3.553 0.000938 ***
## E.sel.2argvec3  -34.07197    9.76398  -3.490 0.001131 ** 
## E.sel.2argvec6   19.53315    9.96586   1.960 0.056496 .  
## E.sel.2argvec15 -23.35338    9.72130  -2.402 0.020683 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.593 on 43 degrees of freedom
## Multiple R-squared:  0.7056, Adjusted R-squared:  0.6713 
## F-statistic: 20.61 on 5 and 43 DF,  p-value: 1.906e-10
lm.sf.pred.2arg <- predict(lm.sf.2arg)
lm.sf.res.2arg <- residuals(lm.sf.2arg)
par(mfrow = c(2,2))
par(mar = c(1,1,1,1))
plot.map(CRIME, col.poly, main = "CRIME")
plot.map(lm.sf.pred.2arg, col.poly, main = "SF Predicted Crime")
plot.map(lm.sf.res.2arg, col.poly, main = "SF Residuals")
plot(CRIME, lm.sf.pred.2arg, pch = 20)
abline(coef(lm(lm.sf.pred.2arg ~ CRIME)), col = "red")

Moran tests, Breusch-Pagan test for homoscedasticity and Shapiro-Wilk test for normality on ESF residuals
moran.test(CRIME, col.listw)
## 
##  Moran I test under randomisation
## 
## data:  CRIME  
## weights: col.listw    
## 
## Moran I statistic standard deviate = 5.6761, p-value = 6.891e-09
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.519389978      -0.020833333       0.009058413
moran.test(lm.sf.pred.2arg, col.listw)
## 
##  Moran I test under randomisation
## 
## data:  lm.sf.pred.2arg  
## weights: col.listw    
## 
## Moran I statistic standard deviate = 5.1985, p-value = 1.004e-07
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.471885353      -0.020833333       0.008983327
moran.test(lm.sf.res.2arg, col.listw)
## 
##  Moran I test under randomisation
## 
## data:  lm.sf.res.2arg  
## weights: col.listw    
## 
## Moran I statistic standard deviate = 0.38457, p-value = 0.3503
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.015551912      -0.020833333       0.008951394
shapiro.test(lm.sf.res.2arg)
## 
##  Shapiro-Wilk normality test
## 
## data:  lm.sf.res.2arg
## W = 0.97386, p-value = 0.3424
bptest(lm.sf.2arg)
## 
##  studentized Breusch-Pagan test
## 
## data:  lm.sf.2arg
## BP = 7.8562, df = 5, p-value = 0.1643

2. spfilteR functions

Juhl, Sebastian. 2021. An R package for Semiparametric Spatial Filtering with Eigenvectors in (Generalized) Linear Models, The R Journal, 13/2, pp.450-459

Load necessary packages and data
library(spfilteR)
library(spdep)
col.listw <- nb2listw(col.nb, style = "W")  ## a W-scheme SWM (row-standardized) 
W <- listw2mat(col.listw)                   ## a row-standardized SWM 
isSymmetric(W)         ## asymmetric SWM
## [1] FALSE
sum(colSums(W))        ## number of tracks
## [1] 49
LM tests for alternative spatial econometric models
col.LM.test <- lm.LMtests(lm(CRIME ~ INC + HOVAL), listw = col.listw, test = "all")   ## W-scheme for ML test
summary(col.LM.test)  ## the spatial lag model is robust
##  Lagrange multiplier diagnostics for spatial dependence
## data:  
## model: lm(formula = CRIME ~ INC + HOVAL)
## weights: col.listw
##  
##        statistic parameter  p.value   
## LMerr    5.81488         1 0.015891 * 
## LMlag    8.75991         1 0.003079 **
## RLMerr   0.12715         1 0.721409   
## RLMlag   3.07217         1 0.079643 . 
## SARMA    8.88705         2 0.011754 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

2-1 Supervised spatial filtering

The getEVs() function with projection matrix M = I - 11’/n
EVs <- getEVs(W = W, covars = NULL)  
E <- EVs$vectors
L <- EVs$values
MI <- EVs$moran
The getEVs() function with projection matrix M = I - X(X’X)-1X’
X.reduce <- cbind(INC, HOVAL)       ## X can exclude col of ones for getEVs()
X <- X.reduce
EVs.x <- getEVs(W = W, covars = X)  ## doesn't matter even if X includes col of ones
E.x <- EVs.x$vectors
L.x <- EVs.x$values
MI.x <- EVs.x$moran
Candidate set of eigenvalues
Ec <- EVs$moran/max(EVs$moran) >= .25
Ec.x <- EVs.x$moran/max(EVs.x$moran) >= .25
Estimations of ESF
E.select <- E[, Ec]
dim(E.select)  ## 49 13
## [1] 49 13
esf.E.select <- lm(CRIME ~ INC + HOVAL + E.select)
summary(esf.E.select)
## 
## Call:
## lm(formula = CRIME ~ INC + HOVAL + E.select)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -27.9796  -4.2583   0.3273   4.2377  17.6974 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  55.4513     5.6727   9.775 2.85e-11 ***
## INC          -0.6258     0.3955  -1.582 0.123121    
## HOVAL        -0.2947     0.1112  -2.650 0.012256 *  
## E.select1    27.9030     9.5292   2.928 0.006137 ** 
## E.select2   -23.1228    11.5052  -2.010 0.052690 .  
## E.select3   -12.6447    10.5242  -1.201 0.238113    
## E.select4    36.1789     9.6575   3.746 0.000687 ***
## E.select5    24.0001    10.3243   2.325 0.026392 *  
## E.select6    23.2988     9.4418   2.468 0.018958 *  
## E.select7    -2.0542     9.7090  -0.212 0.833737    
## E.select8    -7.7000     9.8670  -0.780 0.440730    
## E.select9     7.6087     9.5918   0.793 0.433298    
## E.select10  -20.3547     9.3289  -2.182 0.036341 *  
## E.select11   10.3514    10.0987   1.025 0.312806    
## E.select12    3.6173     9.2723   0.390 0.698951    
## E.select13   10.2829     9.1395   1.125 0.268659    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.097 on 33 degrees of freedom
## Multiple R-squared:  0.7968, Adjusted R-squared:  0.7044 
## F-statistic: 8.625 on 15 and 33 DF,  p-value: 1.552e-07
E.select.x <- E.x[, Ec.x]
dim(E.select.x)  ## 49 12
## [1] 49 12
esf.E.select.x <- lm(CRIME ~ INC + HOVAL + E.select.x)
summary(esf.E.select.x)
## 
## Call:
## lm(formula = CRIME ~ INC + HOVAL + E.select.x)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -27.2903  -4.0291  -0.1805   6.4133  17.8118 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   68.61896    3.98069  17.238  < 2e-16 ***
## INC           -1.59731    0.28087  -5.687  2.2e-06 ***
## HOVAL         -0.27393    0.08675  -3.158  0.00333 ** 
## E.select.x1  -17.13543    9.61234  -1.783  0.08358 .  
## E.select.x2    7.76821    9.61234   0.808  0.42462    
## E.select.x3  -22.19057    9.61234  -2.309  0.02718 *  
## E.select.x4    8.23676    9.61234   0.857  0.39750    
## E.select.x5   28.52586    9.61234   2.968  0.00546 ** 
## E.select.x6    7.00405    9.61234   0.729  0.47120    
## E.select.x7    9.62792    9.61234   1.002  0.32360    
## E.select.x8   -0.41719    9.61234  -0.043  0.96564    
## E.select.x9   11.74651    9.61234   1.222  0.23010    
## E.select.x10 -24.60275    9.61234  -2.559  0.01510 *  
## E.select.x11 -15.96973    9.61234  -1.661  0.10583    
## E.select.x12   2.27931    9.61234   0.237  0.81398    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.612 on 34 degrees of freedom
## Multiple R-squared:  0.7662, Adjusted R-squared:   0.67 
## F-statistic:  7.96 on 14 and 34 DF,  p-value: 4.219e-07
Extract ESF residuals
esf.resid <- resid(esf.E.select)
esf.resid.x <- resid(esf.E.select.x)
Check for remaining spatial autocorrelation in model residuals
MI.resid(esf.resid, x = X, W = W, alternative = "greater")    ## W = MCM NOT work
##            I          EI      VarI         zI        pI  
## 1 -0.1593143 -0.03458911 0.5438445 -0.1691284 0.5671522
MI.resid(esf.resid.x, x = X, W = W, alternative = "greater")  ## W = MCM NOT work
##           I          EI      VarI         zI        pI  
## 1 -0.127422 -0.03458911 0.5438445 -0.1258823 0.5500875
Calculate partial R2 and VIF
round(partialR2(y = y, x = X, evecs = E.select), 4)
##  [1] 0.0757 0.0049 0.0007 0.1392 0.0237 0.1206 0.0000 0.0009 0.0176 0.0496
## [11] 0.0118 0.0084 0.0228
round(partialR2(y = y, x = X, evecs = E.select.x), 4)
##  [1] 0.0488 0.0100 0.0819 0.0113 0.1353 0.0082 0.0154 0.0000 0.0229 0.1006
## [11] 0.0424 0.0009
round(vif.ev(x = X, evecs = E.select, na.rm = TRUE), 4)
##  [1] 1.0455 1.3357 1.1709 1.0601 1.1351 1.0436 1.0827 1.0908 1.0604 1.0279
## [11] 1.1376 1.0174 1.0049
round(vif.ev(x = X, evecs = E.select.x, na.rm = TRUE), 4)
##  [1] 1 1 1 1 1 1 1 1 1 1 1 1

2-2 Unsupervised spatial filtering

X.reduce <- cbind(INC, HOVAL)  ## X should not include col of ones for lmFilter()
(esf.us.null <- lmFilter(y = y, x = X.reduce, MX = NULL, W = W, objfn = "p", sig = .1, 
                         bonferroni = TRUE, positive = TRUE, ideal.setsize = TRUE))  ## x excludes col of ones
## 1 out of 4 candidate eigenvectors selected
(esf.us.x <- lmFilter(y = y, x = X.reduce, MX = X.reduce, W = W, objfn = "p", sig = .1, 
                      bonferroni = TRUE, positive = TRUE, ideal.setsize = TRUE))  ## MX = X.exd
## 0 out of 4 candidate eigenvectors selected
summary(esf.us.null, EV = TRUE)
## 
##  - Spatial Filtering with Eigenvectors (Linear Model)  -
## 
## Coefficients (OLS):
##               Estimate         SE      p-value    
## (Intercept) 66.7795991 4.49418923 5.706837e-19 ***
## INC         -1.3903289 0.32269008 8.813167e-05 ***
## HOVAL       -0.3034867 0.09742432 3.196573e-03  **
## 
## Adjusted R-squared:
##   Initial  Filtered 
## 0.5329433 0.5890135 
## 
## Filtered for positive spatial autocorrelation
## 1 out of 4 candidate eigenvectors selected
## Objective Function: "p" (significance level = 0.1)
## Bonferroni correction: TRUE (adjusted significance level = 0.025)
## 
## Summary of selected eigenvectors:
##      Estimate       SE     p-value partialR2     VIF        MI   
## ev_4 29.78959 11.04402 0.009802012 0.1391793 1.06005 0.8854459 **
## 
## Moran's I (Residuals):
##           Observed    Expected  Variance         z   p-value  
## Initial  0.2498625 -0.03458911 0.5438445 0.3857190 0.3498524  
## Filtered 0.1901926 -0.05435537 0.5997218 0.3157833 0.3760835
summary(esf.us.x, EV = TRUE)
## 
##  - Spatial Filtering with Eigenvectors (Linear Model)  -
## 
## Coefficients (OLS):
##               Estimate        SE     p-value    
## (Intercept) 68.6189611 4.7354861 9.21089e-19 ***
## INC         -1.5973108 0.3341308 1.82896e-05 ***
## HOVAL       -0.2739315 0.1031987 1.08745e-02   *
## 
## Adjusted R-squared:
##   Initial  Filtered 
## 0.5329433 0.5329433 
## 
## Filtered for positive spatial autocorrelation
## 0 out of 4 candidate eigenvectors selected
## Objective Function: "p" (significance level = 0.1)
## Bonferroni correction: TRUE (adjusted significance level = 0.025)
## 
## No eigenvectors selected
## 
## Moran's I (Residuals):
##           Observed    Expected  Variance        z   p-value  
## Initial  0.2498625 -0.03458911 0.5438445 0.385719 0.3498524  
## Filtered 0.2498625 -0.03458911 0.5438445 0.385719 0.3498524

2-3 Spatial filtering in generalized linear models

Define dependent variables
y.bin <- col.data$EW
y.count <- col.data$NEIG
Set the seed (because of ‘boot.MI’)
set.seed(123)
logit model
(esf.logit <- glmFilter(y = y.bin, x = NULL, W = W, objfn = "p", model = "logit", 
                        optim.method = "BFGS", sig = .05, bonferroni = FALSE, 
                        resid.type = "pearson", boot.MI = 100))
## 3 out of 13 candidate eigenvectors selected
summary(esf.logit)
## 
##  - Spatial Filtering with Eigenvectors (Logit Model)  -
## 
## Coefficients (ML):
##              Estimate        SE   p-value  
## (Intercept) 0.1135685 0.4875494 0.8168663  
## 
## Model Fit:
##          logL      AIC      BIC     
## Initial  -33.13297 68.26594 70.15776
## Filtered -17.9432  43.8864  51.45368
## 
## Filtered for positive spatial autocorrelation
## 3 out of 13 candidate eigenvectors selected
## Condition Number (Multicollinearity): 1
## Objective Function: "p" (significance level = 0.05)
## Bonferroni correction: FALSE
## 
## Moran's I (PearsonResiduals):
##           Observed    Expected   Variance        z    p-value   
## Initial  0.7843555 -0.02083333 0.01154227 7.494662 0.00990099 **
## Filtered 0.7683664 -0.07855465 0.01060303 8.224849 0.00990099 **
probit model
(esf.probit <- glmFilter(y = y.bin, x = NULL, W = W, objfn = "BIC", model = "probit",
                         optim.method = "BFGS", min.reduction = 0.5, resid.type = "deviance",
                         boot.MI = 100))
## 0 out of 13 candidate eigenvectors selected
summary(esf.probit)
## 
##  - Spatial Filtering with Eigenvectors (Probit Model)  -
## 
## Coefficients (ML):
##              Estimate        SE   p-value  
## (Intercept) 0.2322727 0.1808111 0.2050918  
## 
## Model Fit:
##          logL      AIC      BIC     
## Initial  -33.13297 68.26594 70.15776
## Filtered -33.13297 68.26594 70.15776
## 
## Filtered for positive spatial autocorrelation
## 0 out of 13 candidate eigenvectors selected
## Objective Function: "BIC"
## 
## Moran's I (DevianceResiduals):
##           Observed    Expected   Variance        z    p-value   
## Initial  0.7850112 -0.02083333 0.01070850 7.787300 0.00990099 **
## Filtered 0.7850112 -0.02083333 0.01010504 8.016451 0.00990099 **
poisson model
(esf.poisson <- glmFilter(y = y.count, x = NULL, W = W, objfn = "pMI", model = "poisson",
                          optim.method = "BFGS", sig = .1, resid.type = "pearson",
                          boot.MI = 100))
## 1 out of 13 candidate eigenvectors selected
summary(esf.poisson)
## 
##  - Spatial Filtering with Eigenvectors (Poisson Model)  -
## 
## Coefficients (ML):
##             Estimate         SE      p-value    
## (Intercept) 3.172685 0.02968203 9.056683e-58 ***
## 
## Model Fit:
##          logL      AIC      BIC     
## Initial  -342.3614 686.7227 688.6146
## Filtered -274.5757 553.1513 556.935 
## 
## Filtered for positive spatial autocorrelation
## 1 out of 13 candidate eigenvectors selected
## Condition Number (Multicollinearity): 1
## Objective Function: "pMI"
## 
## Moran's I (PearsonResiduals):
##           Observed    Expected   Variance        z    p-value   
## Initial  0.7594687 -0.02083333 0.01099823 7.440489 0.00990099 **
## Filtered 0.5840610 -0.04322938 0.01221962 5.674660 0.00990099 **

3. Spatial Econometric Models

library(spatialreg)
SAR model (SLM)
sar.col <- lagsarlm(CRIME ~ INC + HOVAL, listw = col.listw)
summary(sar.col)
## 
## Call:lagsarlm(formula = CRIME ~ INC + HOVAL, listw = col.listw)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -36.92792  -5.56906  -0.92424   6.21002  24.68336 
## 
## Type: lag 
## Coefficients: (asymptotic standard errors) 
##              Estimate Std. Error z value  Pr(>|z|)
## (Intercept) 45.264975   7.175796  6.3080 2.827e-10
## INC         -1.036346   0.305252 -3.3950 0.0006862
## HOVAL       -0.259418   0.088797 -2.9215 0.0034837
## 
## Rho: 0.42281, LR test value: 9.7192, p-value: 0.0018235
## Asymptotic standard error: 0.11558
##     z-value: 3.6582, p-value: 0.00025398
## Wald statistic: 13.383, p-value: 0.00025398
## 
## Log likelihood: -182.5176 for lag model
## ML residual variance (sigma squared): 95.723, (sigma: 9.7838)
## Number of observations: 49 
## Number of parameters estimated: 5 
## AIC: 375.04, (AIC for lm: 382.75)
## LM test for residual autocorrelation
## test value: 0.50522, p-value: 0.47722
sar2sls.col <- stsls(CRIME ~ INC + HOVAL, listw = col.listw)  ## 2 stage least square estimation
summary(sar2sls.col)
## 
## Call:stsls(formula = CRIME ~ INC + HOVAL, listw = col.listw)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -36.90706  -5.49372  -0.92969   6.21749  24.71632 
## 
## Coefficients: 
##              Estimate Std. Error t value  Pr(>|t|)
## Rho          0.419294   0.187585  2.2352  0.025403
## (Intercept) 45.459092  11.191512  4.0619 4.867e-05
## INC         -1.041009   0.388612 -2.6788  0.007389
## HOVAL       -0.259538   0.092406 -2.8087  0.004975
## 
## Residual variance (sigma squared): 104.33, (sigma: 10.214)
SEM model
errorsalm.col <- errorsarlm(CRIME ~ INC + HOVAL, listw = col.listw)
summary(errorsalm.col)
## 
## Call:errorsarlm(formula = CRIME ~ INC + HOVAL, listw = col.listw)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -34.4004  -6.0196  -1.3928   7.0105  24.0611 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##              Estimate Std. Error z value  Pr(>|z|)
## (Intercept) 60.375188   5.325070 11.3379 < 2.2e-16
## INC         -0.961044   0.331146 -2.9022  0.003706
## HOVAL       -0.303198   0.092641 -3.2728  0.001065
## 
## Lambda: 0.54847, LR test value: 8.1273, p-value: 0.0043603
## Asymptotic standard error: 0.13138
##     z-value: 4.1747, p-value: 2.9832e-05
## Wald statistic: 17.428, p-value: 2.9832e-05
## 
## Log likelihood: -183.3136 for error model
## ML residual variance (sigma squared): 94.968, (sigma: 9.7451)
## Number of observations: 49 
## Number of parameters estimated: 5 
## AIC: NA (not available for weighted model), (AIC for lm: 382.75)
Spatial Durban Model (y = \(\rho\)Wy + X\(\beta\) + WX\(\theta\) + \(\epsilon\))
durbin.col <- lagsarlm(CRIME ~ INC + HOVAL, listw = col.listw, type = "mixed")  ## SDM
summary(durbin.col, Nagelkerke = TRUE)
## 
## Call:lagsarlm(formula = CRIME ~ INC + HOVAL, listw = col.listw, type = "mixed")
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -37.31038  -6.07084  -0.51813   6.58699  23.49170 
## 
## Type: mixed 
## Coefficients: (asymptotic standard errors) 
##              Estimate Std. Error z value  Pr(>|z|)
## (Intercept) 41.175254  12.086052  3.4068 0.0006572
## INC         -0.926376   0.333824 -2.7750 0.0055194
## HOVAL       -0.296256   0.091779 -3.2279 0.0012469
## lag.INC     -0.385647   0.553973 -0.6961 0.4863367
## lag.HOVAL    0.235127   0.186005  1.2641 0.2061962
## 
## Rho: 0.43826, LR test value: 6.0071, p-value: 0.014249
## Asymptotic standard error: 0.14885
##     z-value: 2.9444, p-value: 0.0032362
## Wald statistic: 8.6693, p-value: 0.0032362
## 
## Log likelihood: -181.7106 for mixed model
## ML residual variance (sigma squared): 92.238, (sigma: 9.6041)
## Nagelkerke pseudo-R-squared: 0.64483 
## Number of observations: 49 
## Number of parameters estimated: 7 
## AIC: 377.42, (AIC for lm: 381.43)
## LM test for residual autocorrelation
## test value: 0.77351, p-value: 0.37913
Spatial Durban Error Model (y = X\(\beta\) + WX\(\theta\) + u, u = \(\lambda\)Wu + \(\epsilon\))
durbin.err.col <- errorsarlm(CRIME ~ INC + HOVAL, listw = col.listw, etype = "emixed")  ## SDEM
summary(durbin.err.col, Nagelkerke = TRUE)
## 
## Call:errorsarlm(formula = CRIME ~ INC + HOVAL, listw = col.listw, 
##     etype = "emixed")
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -37.15973  -6.26537  -0.38204   6.30980  23.83842 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##              Estimate Std. Error z value  Pr(>|z|)
## (Intercept) 72.696477   8.964201  8.1096 4.441e-16
## INC         -1.057194   0.320827 -3.2952 0.0009834
## HOVAL       -0.277484   0.091953 -3.0177 0.0025471
## lag.INC     -1.041591   0.576119 -1.8079 0.0706154
## lag.HOVAL    0.096160   0.201743  0.4766 0.6336149
## 
## Lambda: 0.44805, LR test value: 5.943, p-value: 0.014776
## Asymptotic standard error: 0.14878
##     z-value: 3.0115, p-value: 0.0025999
## Wald statistic: 9.0689, p-value: 0.0025999
## 
## Log likelihood: -181.7427 for error model
## ML residual variance (sigma squared): 92.106, (sigma: 9.5972)
## Nagelkerke pseudo-R-squared: 0.64436 
## Number of observations: 49 
## Number of parameters estimated: 7 
## AIC: NA (not available for weighted model), (AIC for lm: 381.43)