1 Introduction

As a final project, using the knowledge gained from the classes, we will examine the factors influencing voter turnout. We decided to choose Polish elections because of the access to complete election data on the communes level, knowledge of the local context and the interest in political trends in our country.

2 About the dataset

The dataset used in this project contains multiple location based (on the commune level) tables with information about the standard of living in a given commune, level of development, culture, expenses and many others obtained mostly from the Polish Central Statistical Office website.

3 Preparations

3.1 Load libraries

We load multiple libraries for data manipulation and visualization together with tools for data modelling.

# data manipulation
library(tidyverse)
library(tidyr)
library(dplyr)
library(stringr)
library(sf)
library(rmapshaper)
library(tigris)

#visualizations
library(ggplot2)
library(rgdal)
library(leaflet)
library(RColorBrewer)

#modeling and analysis
library(spdep)
library(lmtest)
library(spatialreg)
library(texreg) #for table comparison between different models

# library(rgeos)

3.2 Load Data

We load the following csv files and shape files:

data/bezrobocie.csv - unemployment in Poland (by communes) in 2019
dochody na mieszk 2016-2019.csv - income of the commune per capita in 2016-2019
feminizacja.csv - feminization rate (number of female inhabitants per 100 male inhabitants)
ludnosc 2016-2019.csv - population density in km² in 2016-2019
ludnosc ogol przed poprod.csv - total population, pre- and post-working age population in 2016-2019
saldo migracji stalych na 1000 osob.csv - permanent migration balance between municipalities per 1000 people in 2016-2019
swiadczenie wychowawcze 500 sty-czer.csv - average monthly number of families receiving the 500+ social benefit from January to June 2019
wodkangaz.csv - share of the population using water, sewage and gas
wydatki na mieszk 2016-2019.csv - expenses of the commune per capita in 2016-2019
data from 2019 elections (source):
- wyniki_gl_na_listy_po_obwodach_sejm.csv - data from elections in 2019 per electoral circuit
- wyniki_gl_na_listy_po_gminach_sejm.csv - data from elections in 2019 per commune
powierzchnia.csv - commune area in km²
Wojewodztwa - dataframe of the voivodeships containing spatial polygons with attributes
Powiaty - dataframe of the districts containing spatial polygons with attributes
Gminy - dataframe of the communes containing spatial polygons with attributes

# csv files from GUS
unempl <- read.csv("data/bezrobocie.csv", header=TRUE, dec=",", sep=";", encoding = "UTF-8")
inc <- read.csv("data/dochody na mieszk 2016-2019.csv", header=TRUE, dec=",", sep=";", encoding = "UTF-8")
fem <- read.csv("data/feminizacja.csv", header=TRUE, dec=",", sep=";", encoding = "UTF-8")
people <- read.csv("data/ludnosc 2016-2019.csv", header=TRUE, dec=",", sep=";", encoding = "UTF-8")
people_prod <- read.csv("data/ludnosc ogol przed poprod.csv", header=TRUE, dec=",", sep=";", encoding = "UTF-8")
migration <- read.csv("data/saldo migracji stalych na 1000 osob.csv", header=TRUE, dec=",", sep=";", encoding = "UTF-8")
soc_ben <-read.csv("data/swiadczenie wychowawcze 500 sty-czer.csv", header=TRUE, dec=",", sep=";", encoding = "UTF-8")
wat_sew_gas <- read.csv("data/wodkangaz.csv", header=TRUE, dec=",", sep=";", encoding = "UTF-8")
exp <- read.csv("data/wydatki na mieszk 2016-2019.csv", header=TRUE, dec=",", sep=";", encoding = "UTF-8")
area <- read.csv("data/powierzchnia.csv", header=TRUE, sep=",", encoding = "UTF-8")

# data from elections (source: https://sejmsenat2019.pkw.gov.pl/sejmsenat2019/pl/dane_w_arkuszach)
elect <- read.csv("data/wyniki_gl_na_listy_po_obwodach_sejm.csv", header=TRUE, dec=",", sep=";", encoding = "UTF-8")
elect_gm <- read.csv("data/wyniki_gl_na_listy_po_gminach_sejm.csv", header=TRUE, dec=",", sep=";", encoding = "UTF-8")

# shapefiles
voi <-readOGR("data", "Wojewodztwa", encoding = "UTF-8")
pov <-readOGR("data", "Powiaty", encoding = "UTF-8")
com <-readOGR("data", "Gminy", encoding = "UTF-8")
com_shp <- read_sf('data/Gminy.shp')

3.3 Data manipulation

# changing projections
voi<-spTransform(voi, CRS("+proj=longlat +datum=NAD83"))
pov<-spTransform(pov, CRS("+proj=longlat +datum=NAD83"))
com<-spTransform(com, CRS("+proj=longlat +datum=NAD83"))

# expenditures - we calculate a mean of the years since last elections
colnames(exp) <- c("Kod", "Nazwa", "gminy_2016_pln", "gminy_2017_pln",
                   "gminy_2018_pln",  "gminy_2019_pln", "X")
# there are some NA's but last year is full so we can omit NA's and won't have any lacks of data
exp$expenses<-rowMeans(exp[,3:6], na.rm = T) #mean of years 2016-2019
exp[2:7] <- NULL # we leave only code & created data

# feminization
colnames(fem)[3] <- "femin"
fem[c(2,4)] <- NULL

# income - we calculate a mean of the years since last elections
colnames(inc) <- c("Kod", "Nazwa", "gminy_2016_pln", "gminy_2017_pln",
                   "gminy_2018_pln",  "gminy_2019_pln", "X")
inc$income<-rowMeans(inc[,3:6], na.rm = T)
inc[2:7] <- NULL

# migration - we calculate a mean of the years since last elections
colnames(migration) <- c("Kod", "Nazwa", "saldo_migracji_2016", "saldo_migracji_2017",
                         "saldo_migracji_2018",  "saldo_migracji_2019", "X")
migration$migration<-rowMeans(migration[,3:6], na.rm = T)
migration[2:7] <- NULL

# people_density 
colnames(people)[6] <- "people_density2019"
people[c(2:5,7)] <- NULL

# people_prod - we divide number of pre- and post-working age population by total population
colnames(people_prod) <- c("Kod", "Nazwa", "All_2016", "All_2017", "All_2018",
                           "All_2019","przedprodukcyjny_2016", "przedprodukcyjny_2017",
                           "przedprodukcyjny_2018", "przedprodukcyjny_2019",
                           "poprodukcyjny_2016", "poprodukcyjny_2017", "poprodukcyjny_2018",
                           "poprodukcyjny_2019", "X")

people_prod$prework <- people_prod$przedprodukcyjny_2019 / people_prod$All_2019
people_prod$postwork <- people_prod$poprodukcyjny_2019 / people_prod$All_2019
people_prod[c(2:5, 7:15)] <- NULL

# soc_ben - we divide number of families receiving 500+ by total population of the commune
colnames(soc_ben)[3] <- "500+number_per_moth"
soc_ben$benefit500 <- soc_ben$`500+number_per_moth` / people_prod$All_2019
soc_ben[c(2:4)] <- NULL

# unemployment in 2019
colnames(unempl)[3] <- "unemployment"
unempl[c(2,4)] <- NULL


# wat_sew_gas: 3 separate variables
colnames(wat_sew_gas)[3:5] <- c("water", "sewage", "gas")
wat_sew_gas[c(2,6)] <- NULL

# area
colnames(area)[3] <- "area"
area[c(2,4)] <- NULL
colnames(area)[1] <- "Kod"

### merging all sets into one data by the code of commune
data <- Reduce(function(x,y) merge(x = x, y = y, by = "Kod"), 
               list(exp, fem, inc, migration, people, people_prod, soc_ben,
                    unempl, wat_sew_gas, area))

# The last digit in this code defines the type of gmina and is not needed. 
# In data from elections this digit isn't included so we'll remove it from here also.
data$Kod <- 
  data$Kod %>% as.character() %>% substr(1, nchar(data$Kod)-1) %>% as.numeric()

#election data on the commune level preperation
matched <- intersect(data$Kod, elect_gm$Kod.TERYT)
all <-  union(data$Kod, elect_gm$Kod.TERYT)
non_matched <- all[!all %in% matched]
# There are some numbers that don't match. Most of them results from differences in Warsaw.
# It is a city with code 146501, but in data from election it is divided by
# districts (codes 146502-146519). We need to merge this data together so we'll 
# have one data for Warsaw.
elect_gm[c(5, 7:25)] <- NULL # Removing redundant columns

# Renaming columns: column with number of people entitled to vote and 
# column with number of valid votes
colnames(elect_gm)[5:6] <- c("entitled", "votes") 

# summing Warsaw data to the 1 obs
elect_gm[elect_gm$Kod.TERYT == 146502, 5:16] <- 
  colSums(elect_gm[elect_gm$Kod.TERYT >= 146501 &
                     elect_gm$Kod.TERYT <= 146519,
                   5:16]) # Sum of district values for numerical columns to the 1st district
# Renaming 1st district to the code and name of Warsaw
elect_gm[elect_gm$Kod.TERYT == 146502, 1:2] <- c(146501, "Warszawa")

# removing rest of Warsaw districts and abroad/ships
elect_gm <- elect_gm[elect_gm$Kod.TERYT < 146502 | elect_gm$Kod.TERYT > 149901,]

## Adding new variables from election results
# frequency - our target variable - in %
elect_gm$freq <- elect_gm$votes / elect_gm$entitled * 100
# votes for oposition - as a fraction of all votes: 
# a will to change the current government as a possible factor to drive people to the elections
elect_gm$opos <- rowSums(elect_gm[c(7:11, 13:16)], na.rm = T) / elect_gm$votes
elect_gm[c(7:16)] <- NULL # removing redundant variables

# merging data community level
data <- merge(x = data, y = elect_gm, by.x = "Kod", by.y = "Kod.TERYT")

## places & districts
# There are some obs without teritorial codes, they are empty and from the 
# unique places outside the country like military bases that didn't participate 
# in voting -> we can remove them.
elect <- elect[!is.na(elect$Kod.TERYT),]

matched <- intersect(data$Kod, elect$Kod.TERYT)
all <-  union(data$Kod, elect$Kod.TERYT)
non_matched <- all[!all %in% matched] 
#Still we have the same problem with Warsaw divided by districts and with ships and abroad

#recoding Warsaw districts to general Warsaw code
elect[elect$Kod.TERYT >= 146502 & elect$Kod.TERYT <= 146519, 2] <- 146501

# WE will need only generally available locations and we will use them to count for 
# community the average of people enabled to vote at a one place (obwod) to see 
# if maybe possibility of bigger crowds and queues  makes people less interested in 
# voting in contrast to regions with more locations per number of enabled people. 
# We will also calculate the number of physical places which may be useful later.

elect <- elect %>% 
  filter(Typ.obwodu == "stały") %>% 
  group_by(Kod.TERYT) %>%
  summarise(places = length(unique(Siedziba)),
            mean_enabled = mean(as.numeric(Liczba.wyborców.uprawnionych.do.głosowania))) 
# Adding this data to our main one
data <- merge(x = data, y = elect, by.x = "Kod", by.y = "Kod.TERYT")

# now we can use area and this number of voting places to make a new variable
data$place_area <- data$area / data$places 
#how big area more or less is for one place of voting. Bigger areas may mean for example 
# that some people have farther from home and may be less willing to go there. 
# It is a simplification, but it allows to measure this aspect which comes from 
# the more detailed info and to average it on the level of gminy

# geo join for visualizations
com$JPT_KOD_JE <- str_sub(com$JPT_KOD_JE, 1, nchar(com$JPT_KOD_JE)-1)
com$JPT_KOD_JE <- gsub("^0", "",com$JPT_KOD_JE)
com_shp$JPT_KOD_JE <- str_sub(com_shp$JPT_KOD_JE, 1, nchar(com_shp$JPT_KOD_JE)-1)
com_shp$JPT_KOD_JE <- gsub("^0", "",com_shp$JPT_KOD_JE)
data$Kod <- as.character(data$Kod)

com_joined <- geo_join(com, data, 'JPT_KOD_JE', 'Kod', how = "left")
com_joined_vis <- geo_join(com_shp, data, 'JPT_KOD_JE', 'Kod', how = "left")
com_joined_vis <- st_as_sf(com_joined)

data <- as.data.frame(com_joined)
data <- data[30:54]

#simplify polygons for better performance
com_joined_light <- ms_simplify(com_joined_vis)

4 Elections data and unemployment rate by commune

palFreq <- colorNumeric("BuGn", NULL)
palOpos <- colorNumeric("Blues", NULL)
palUnemploy <- colorNumeric("OrRd", NULL)


com_joined_vis <- 
  com_joined_vis %>% 
  dplyr::mutate(lab_txt = paste0('<strong>Gmina</strong>: ',
                                 Gmina,
                                 '<br><strong>Powiat</strong>: ',
                                 Powiat,
                                 '<br><strong>Województwo</strong>: ',
                                 Województwo,
                                 '<br><strong>Election turnout</strong>: ',
                                 round(freq,2), '%',
                                 '<br><strong>Opposition results</strong>: ',
                                 round(opos*100,2), '%',
                                 '<br><strong>unemployment</strong>: ',
                                 unemployment, '%'
  ))

labs <- as.list(com_joined_vis$lab_txt)

leaflet(com_joined_light,
        options = leafletOptions(minZoom = 7)) %>%
  addPolygons(color = "#575757",
              weight = 1,
              smoothFactor = 0.8,
              opacity = 0.5,
              fillOpacity = 0.75,
              fillColor = ~palFreq(freq),
              label = lapply(labs, htmltools::HTML),
              highlightOptions = highlightOptions(color = "white",
                                                  weight = 2,
                                                  bringToFront = TRUE),
              group = "Turnout") %>%
  
  addPolygons(color = "#575757",
              weight = 1,
              smoothFactor = 0.8,
              opacity = 0.5,
              fillOpacity = 0.75,
              fillColor = ~palOpos(opos),
              label = lapply(labs, htmltools::HTML),
              highlightOptions = highlightOptions(color = "white",
                                                  weight = 2,
                                                  bringToFront = TRUE),
              group = "Oposition results") %>%
  
  addPolygons(color = "#575757",
              weight = 1,
              smoothFactor = 0.8,
              opacity = 0.5,
              fillOpacity = 0.75,
              fillColor = ~palUnemploy(unemployment),
              label = lapply(labs, htmltools::HTML),
              highlightOptions = highlightOptions(color = "white",
                                                  weight = 2,
                                                  bringToFront = TRUE),
              group = "Unemployment") %>%
  
  addLegend(position = "bottomleft" ,
            pal = palFreq,
            values = ~freq,
            opacity = .5,
            bins = 5,
            title = 'Election turnout',
            labFormat = labelFormat(suffix = '%'),
            group = "Turnout") %>% 
  
  addLegend(position = "bottomleft" ,
            pal = palOpos,
            values = ~opos*100,
            opacity = .5,
            bins = 5,
            title = 'Oposition results',
            labFormat = labelFormat(suffix = '%'),
            group = "Turnout") %>% 
  
  addLegend(position = "bottomleft" ,
            pal = palUnemploy,
            values = ~unemployment,
            opacity = .5,
            bins = 5,
            title = 'Unemployment rate',
            labFormat = labelFormat(suffix = '%'),
            group = "Turnout") %>% 
  
  addLayersControl(baseGroups = c("Turnout", "Oposition results", "Unemployment"),
                   options = layersControlOptions(collapsed = FALSE))

We can clearly see the higher voter turnout near the biggest Polish urban agglomerations: Warsaw, Poznan, Cracow, Wroclaw, Gdansk, Gdynia, Sopot. However, there are some non near the urban agglomerations communes with high turnout (e.g. Cisna, Godziszów, Kłodawa, Mielno.)

In terms of oposition results, there can be seen a clear difference between east and west of Poland.

Communes with high level of unemployment seem to get lower voter turnout, which may be a consequence of a sense of lack of agency.

5 Spatial weight matrices

We will create 5 different types of spatial weight matrices: contiguity matrix, symetrical matrix of nearest neighbors, matrix of neighbors within the constant range giving at least 1 neighbors for every commune, inverse distance matrix and inverse squared distance matrix.

Summary of the spatial weight matrix:

# Contiguity matrix
cont.nb <- poly2nb(as(com, "SpatialPolygons"))
cont.listw <- nb2listw(cont.nb, style="W")
cont.listw

## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 2477 
## Number of nonzero links: 14130 
## Percentage nonzero weights: 0.230298 
## Average number of links: 5.704481 
## 
## Weights style: W 
## Weights constants summary:
##      n      nn   S0       S1       S2
## W 2477 6135529 2477 951.8483 10338.64

# coordinates of nts4 units 
# (geometric center of gravity)
crds<-coordinates(com)
colnames(crds)<-c("cx", "cy")

# spatial weights matrix – k nearest neighbours
knn<-knearneigh(crds, k=1)
knn.nb<-knn2nb(knn)

We transform matrix to be symmetrical and we calculate at what distance all units have at least one neighbour.

sym.knn.nb <- make.sym.nb(knn.nb)

# knn as list class
knn.listw <- nb2listw(sym.knn.nb)

# matrix within the radius that gives at least 1 neighbour to every unit:
# at what distance all units have at least one neighbour?

# implicitly k=1, list of closests neighbours
kkk <- knn2nb(knearneigh(crds)) 

# max of distances between closests neighbours
all <- max(unlist(nbdists(kkk, crds))) 

# neighbours in radius of d km 
# guaranted that all regions have at least one neighbor
all.nb <- dnearneigh(crds, 0, all) 
radius.listw <- nb2listw(all.nb)

Additionally, we calculate inverse distance matrix and inverse squared distance matrix.

# spatial weights matrix - inverse distance matrix
# knn_inv<-knearneigh(crds, k=2476) # we have 2477 units on the map
knn_inv <- readRDS("data/knn_inv.rds")
knn_inv.nb <- knn2nb(knn_inv)
dist <- nbdists(knn_inv.nb, crds)  
dist1 <- lapply(dist, function(x) 1/x)  # object of listw class
knn_inv.dist.listw <- nb2listw(knn_inv.nb, glist=dist1)

# spatial weights matrix - inverse squared distance matrix
dist2<-lapply(dist, function(x) 1/x^2)  # object of listw class
knn_inv.dist2.listw <- nb2listw(knn_inv.nb, glist=dist2)

6 Initial spatial statistics for turnout

6.1 Global Moran I

We use global Moran I test with different listw objects to check spatial dependency of the neighbouring locations.

moran1<-moran.test(data$freq, cont.listw)
moran2<-moran.test(data$freq, knn.listw)
moran3<-moran.test(data$freq, radius.listw)
moran4<-moran.test(data$freq, knn_inv.dist.listw)
moran5<-moran.test(data$freq, knn_inv.dist2.listw)

listw_object <- c("cont.listw","knn.listw","radius.listw",
                  "knn_inv.dist.listw","knn_inv.dist2.listw")
Moran_standard_deviate <- c(moran1$statistic[[1]],moran2$statistic[[1]],moran3$statistic[[1]],
                            moran4$statistic[[1]],moran5$statistic[[1]])
Moran_p_value <- c(moran1$p.value[[1]],moran2$p.value[[1]],moran3$p.value[[1]],
                   moran4$p.value[[1]],moran5$p.value[[1]])
Moran_statistic <- c(moran1$estimate[[1]],moran2$estimate[[1]],moran3$estimate[[1]],
                     moran4$estimate[[1]],moran5$estimate[[1]])
Moran_expectation <- c(moran1$estimate[[2]],moran2$estimate[[2]],moran3$estimate[[2]],
                       moran4$estimate[[2]],moran5$estimate[[2]])
Moran_variance <- c(moran1$estimate[[3]],moran2$estimate[[3]],moran3$estimate[[3]],
                    moran4$estimate[[3]],moran5$estimate[[3]])
Moran_I_results <- data.frame(listw_object, Moran_statistic,Moran_standard_deviate,
                              Moran_p_value, Moran_expectation, Moran_variance)

knitr::kable(Moran_I_results,
 caption = "Moran tests results")

Moran tests results
listw_object	Moran_statistic	Moran_standard_deviate	Moran_expectation	Moran_variance
cont.listw	0.6867599	55.23474	-0.0004039	0.0001548
knn.listw	0.7076485	28.85822	-0.0004039	0.0006020
radius.listw	0.6366159	60.79280	-0.0004039	0.0001098
knn_inv.dist.listw	0.0927886	112.31017	-0.0004039	0.0000007
knn_inv.dist2.listw	0.3598668	56.03902	-0.0004039	0.0000413

Positive spatial autocorrelation in all test, however 4th test with knn_inv.dist.listw listw object presents the lowest Moran I statistics (0.093).

6.2 Moran scatterplot

We plot Moran I scatterplot using standardized turnout variable and coefficients from the linear regression. We also map the Moran I scatterplot quarts for the future analysis. In this part we used only standard contiguity matrix to limit the length of the output.

freq_s<-as.data.frame(scale(data$freq))

freq_l <- lag.listw(cont.listw, freq_s$V1) # spatial lag of x
morlm <- lm(freq_l~freq_s$V1) # linear regression            
summary(morlm)

## 
## Call:
## lm(formula = freq_l ~ freq_s$V1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.60576 -0.25135  0.03322  0.30488  1.77107 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.030481   0.009475  -3.217  0.00131 ** 
## freq_s$V1    0.686760   0.009477  72.465  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4716 on 2475 degrees of freedom
## Multiple R-squared:  0.6797, Adjusted R-squared:  0.6795 
## F-statistic:  5251 on 1 and 2475 DF,  p-value: < 2.2e-16

slope <- morlm$coefficients[2] # coefficient from regression
intercept <- morlm$coefficients[1] # constant term in regression

plot(freq_s$V1, freq_l, xlab="freq_s",ylab="spatial lag of freq_s", pch="*")  
abline(intercept, slope)  # regression line
abline(h=0, lty=2) # supplementary horizontal line y=0
abline(v=0, lty=2) # supplementary vertical line x=0

# Mapping of moranscatterplots quarts

cond1<-ifelse(freq_s>=0 & freq_l>=0, 1,0)
cond2<-ifelse(freq_s>=0 & freq_l<0, 2,0)
cond3<-ifelse(freq_s<0 & freq_l<0, 3,0)
cond4<-ifelse(freq_s<0 & freq_l>=0, 4,0)
cond.all<-cond1+cond2+cond3+cond4 # all quarters in one
cond<-as.data.frame(cond.all)

The map with all communes assigned to quarters is presented below:

brks<-c(1,2,3,4)
cols<-c("#22577A", "#38A3A5", "#57CC99", "#8AFFA5")
plot(com, col=cols[findInterval(cond$V1, brks)])
legend("bottomleft",
       legend=c("I Q - HH – high surrounded by high",
                "II Q - LH - low surrounded by high",
                "III Q - LL - low surrounded by low",
                "IV Q - HL - high surrounded by low"),
       fill=cols, bty="n", cex=0.80)
title(main="Mapping of Moran I scatterplot results
voter turnout in 2019 elections")

Now we use join-count test and plot the results on the charts below:

# join count test
var.factor<-factor(cut(data$freq, breaks=c(0, 50, 60, 100),
                       labels=c("low", "medium", "high")))


# parameters of graphics
brks1<-c(0, 50, 60, 100)
cols<-c("red", "blue", "green")

# scatterplot of values
plot(data$freq, bg=cols[findInterval(data$freq, brks1)], pch=21)
abline(h=c(50,60), lty=3)

# spatial distribution with three colours
plot(com, col=cols[findInterval(data$freq, brks1)])
plot(pov, add=TRUE, lwd=1)
title(main="Voter turnout in 2019 elections")
legend("bottomleft", legend=c("low", "medium", "high"),
       leglabs(brks1), fill=cols, bty="n")

Exemplary summary of the joincount.tests for first three basic matrices: contiguity, knn and radius:

joincount.test(var.factor, cont.listw)

## 
##  Join count test under nonfree sampling
## 
## data:  var.factor 
## weights: cont.listw 
## 
## Std. deviate for low = 35.297, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Same colour statistic           Expectation              Variance 
##            160.813690             60.975363              8.000348 
## 
## 
##  Join count test under nonfree sampling
## 
## data:  var.factor 
## weights: cont.listw 
## 
## Std. deviate for medium = 18.864, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Same colour statistic           Expectation              Variance 
##             434.19003             345.22536              22.24236 
## 
## 
##  Join count test under nonfree sampling
## 
## data:  var.factor 
## weights: cont.listw 
## 
## Std. deviate for high = 35.829, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Same colour statistic           Expectation              Variance 
##            188.281700             77.250000              9.603242

joincount.test(var.factor, knn.listw)

## 
##  Join count test under nonfree sampling
## 
## data:  var.factor 
## weights: knn.listw 
## 
## Std. deviate for low = 19.147, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Same colour statistic           Expectation              Variance 
##             164.66667              60.97536              29.32788 
## 
## 
##  Join count test under nonfree sampling
## 
## data:  var.factor 
## weights: knn.listw 
## 
## Std. deviate for medium = 10.889, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Same colour statistic           Expectation              Variance 
##             437.54167             345.22536              71.87856 
## 
## 
##  Join count test under nonfree sampling
## 
## data:  var.factor 
## weights: knn.listw 
## 
## Std. deviate for high = 21.613, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Same colour statistic           Expectation              Variance 
##             204.91667              77.25000              34.89112

joincount.test(var.factor, radius.listw)

## 
##  Join count test under nonfree sampling
## 
## data:  var.factor 
## weights: radius.listw 
## 
## Std. deviate for low = 38.784, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Same colour statistic           Expectation              Variance 
##            149.478203             60.975363              5.207157 
## 
## 
##  Join count test under nonfree sampling
## 
## data:  var.factor 
## weights: radius.listw 
## 
## Std. deviate for medium = 20.366, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Same colour statistic           Expectation              Variance 
##             415.62302             345.22536              11.94858 
## 
## 
##  Join count test under nonfree sampling
## 
## data:  var.factor 
## weights: radius.listw 
## 
## Std. deviate for high = 41.621, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Same colour statistic           Expectation              Variance 
##            180.621421             77.250000              6.168576

All results of the tests present spatial autocorrelation.

6.3 Local Moran I (LISA) statistics

locM <- localmoran(spNamedVec("freq", data), cont.listw)
data$id <- 1:nrow(data)
oid1 <- order(data$id)
locMorMat <- printCoefmat(data.frame(locM[oid1,], row.names=data$Kod[oid1]), check.names = FALSE)

We calculate Local Moran I statistics, which can be seen on the map below.

# map of the significance of Moran's local statistics
names(locMorMat)[5] <- "Prob"
brks <- c(min(locMorMat[,5]), 0.05000, 0.95000, max(locMorMat[,5]))
cols <- c("#66DE93", "#FFEAC9", "#FF616D")

plot(com, col=cols[findInterval(locMorMat[,5], brks)])
legend("bottomleft",
       legend=c("Surrounded by relatively high values, locM>0",
                "Insignificant",
                "Surrounded by relatively low values, locM<0"),
       fill=cols, bty="n", cex=0.75)
title(main=" Local Moran I statistics ", cex=0.7)
plot(pov, add=TRUE, lwd=2)

6.4 Moran I test for oposition results

This time we calculate Moran I test using one of the explanatory variables - oposition results - to compare autocorellation results.

moran1_opos <- moran.test(data$opos, cont.listw)
moran2_opos <- moran.test(data$opos, knn.listw)
moran3_opos <- moran.test(data$opos, radius.listw)
moran4_opos <- moran.test(data$opos, knn_inv.dist.listw)
moran5_opos <- moran.test(data$opos, knn_inv.dist2.listw)

listw_object <- c("cont.listw","knn.listw","radius.listw",
                  "knn_inv.dist.listw","knn_inv.dist2.listw")
Moran_standard_deviate <- 
  c(moran1_opos$statistic[[1]],moran2_opos$statistic[[1]],moran3_opos$statistic[[1]],
    moran4_opos$statistic[[1]],moran5_opos$statistic[[1]])
Moran_p_value <- 
  c(moran1_opos$p.value[[1]],moran2_opos$p.value[[1]],moran3_opos$p.value[[1]],
    moran4_opos$p.value[[1]],moran5_opos$p.value[[1]])
Moran_statistic <- 
  c(moran1_opos$estimate[[1]],moran2_opos$estimate[[1]],moran3_opos$estimate[[1]],
    moran4_opos$estimate[[1]],moran5_opos$estimate[[1]])
Moran_expectation <- 
  c(moran1_opos$estimate[[2]],moran2_opos$estimate[[2]],moran3_opos$estimate[[2]],
    moran4_opos$estimate[[2]],moran5_opos$estimate[[2]])
Moran_variance <- 
  c(moran1_opos$estimate[[3]],moran2_opos$estimate[[3]],moran3_opos$estimate[[3]],
    moran4_opos$estimate[[3]],moran5_opos$estimate[[3]])
Moran_I_opos_results <- data.frame(listw_object, Moran_statistic,Moran_standard_deviate,
                                   Moran_p_value, Moran_expectation, Moran_variance)

knitr::kable(Moran_I_opos_results,
 caption = "Moran I tests results for the oposition results")

Moran I tests results for the oposition results
listw_object	Moran_statistic	Moran_standard_deviate	Moran_expectation	Moran_variance
cont.listw	0.7705583	61.95724	-0.0004039	0.0001548
knn.listw	0.7780816	31.72207	-0.0004039	0.0006023
radius.listw	0.7327305	69.95032	-0.0004039	0.0001098
knn_inv.dist.listw	0.2335127	281.84257	-0.0004039	0.0000007
knn_inv.dist2.listw	0.5346506	83.20828	-0.0004039	0.0000413

The positive spatial autocorellation has been recognised in all tests, however the values of the statistics are significantly lower for knn_inv.dist.listw and knn_inv.dist2.listw objects.

7 Models

in this part of the report we will focus on the data modeling using different spatial models. Due to inconclusive results, we test the models on the changed data later in this chapter.

7.1 Initial model

At the beginning we use all available variables.

7.1.1 OLS

7.1.1.1 Regression and statistical tests

We start with classical linear regression model.

formula <- freq ~ expenses + femin + income + migration + people_density2019 + prework +
  postwork + benefit500 + unemployment + water + sewage + gas + opos + mean_enabled + place_area

model_lm <- lm(formula, data=data) 
summary(model_lm)

## 
## Call:
## lm(formula = formula, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -24.871  -2.473  -0.020   2.532  39.713 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         4.715e+00  3.497e+00   1.348    0.178    
## expenses            1.459e-05  3.837e-04   0.038    0.970    
## femin               1.242e-01  2.982e-02   4.163 3.25e-05 ***
## income              4.640e-04  3.705e-04   1.252    0.211    
## migration           4.087e-01  1.731e-02  23.618  < 2e-16 ***
## people_density2019  1.055e-03  2.590e-04   4.073 4.78e-05 ***
## prework             7.263e+01  9.389e+00   7.736 1.48e-14 ***
## postwork            8.655e+01  5.281e+00  16.388  < 2e-16 ***
## benefit500          1.422e+02  1.560e+01   9.115  < 2e-16 ***
## unemployment       -4.588e-01  3.770e-02 -12.170  < 2e-16 ***
## water              -3.664e-03  5.313e-03  -0.690    0.490    
## sewage              2.672e-02  4.191e-03   6.375 2.18e-10 ***
## gas                 3.219e-02  3.671e-03   8.767  < 2e-16 ***
## opos               -1.134e+01  8.571e-01 -13.228  < 2e-16 ***
## mean_enabled        2.201e-03  3.305e-04   6.660 3.37e-11 ***
## place_area         -6.708e-02  7.197e-03  -9.321  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.076 on 2461 degrees of freedom
## Multiple R-squared:  0.6505, Adjusted R-squared:  0.6484 
## F-statistic: 305.4 on 15 and 2461 DF,  p-value: < 2.2e-16

Expenses, income and water variables are insignificant but we’ll keep them for now because it may change for other models.

bptest(model_lm)

## 
##  studentized Breusch-Pagan test
## 
## data:  model_lm
## BP = 365.26, df = 15, p-value < 2.2e-16

According to Breusch-Pagan test, there is heteroscedasticity.

resettest(model_lm, power=2, type="regressor")

## 
##  RESET test
## 
## data:  model_lm
## RESET = 34.318, df1 = 15, df2 = 2446, p-value < 2.2e-16

Additionally, Ramsey’s RESET test for functional form indicates that the model is misspecified.

7.1.1.2 spatial distribution of OLS residuals

Summary of the model residuals:

summary(model_lm$residuals)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -24.8707  -2.4735  -0.0202   0.0000   2.5324  39.7129

OLS model residuals map:

res <- model_lm$residuals
brks <- c(min(res), mean(res)-sd(res), mean(res), mean(res)+sd(res), max(res))
cols <- c("steelblue4","lightskyblue","thistle1","plum3")
plot(com, col=cols[findInterval(res,brks)])
plot(pov, add=TRUE, lwd=2)
title(main="OLS model residuals")
legend("bottomleft",
       legend=c("<mean-sd", "(mean-sd, mean)", "(mean, mean+sd)", ">mean+sd"),
       leglabs(brks1), fill=cols, bty="n")

Global Moran I tests for autocorrelation of residuals from our OLS model for all matrices:

# contiguity matrix
moran1_lm <- lm.morantest(model_lm, cont.listw)

# resid<-factor(cut(model_lm$residuals, breaks=c(-100, 0, 100),
#                   labels=c("negative","positive")))
# joincount.test(resid, cont.listw)

# knn matrix
moran2_lm <- lm.morantest(model_lm, knn.listw)
# joincount.test(resid, knn.listw)

# radius matrix
moran3_lm <- lm.morantest(model_lm, radius.listw)
# joincount.test(resid, radius.listw)

# inverse distance matrix
moran4_lm <- lm.morantest(model_lm, knn_inv.dist.listw)

# squared inverse distance matrix
moran5_lm <- lm.morantest(model_lm, knn_inv.dist2.listw)
# joincount.test(resid, knn_inv.dist2.listw)


listw_object <- c("cont.listw","knn.listw","radius.listw","knn_inv.dist.listw","knn_inv.dist2.listw")

Moran_standard_deviate <- c(moran1_lm$statistic[[1]],moran2_lm$statistic[[1]],
                            moran3_lm$statistic[[1]],moran4_lm$statistic[[1]],
                            moran5_lm$statistic[[1]])
Moran_p_value <- c(moran1_lm$p.value[[1]],moran2_lm$p.value[[1]],moran3_lm$p.value[[1]],
                   moran4_lm$p.value[[1]],moran5_lm$p.value[[1]])
Moran_statistic <- c(moran1_lm$estimate[[1]],moran2_lm$estimate[[1]],moran3_lm$estimate[[1]],
                     moran4_lm$estimate[[1]],moran5_lm$estimate[[1]])
Moran_expectation <- c(moran1_lm$estimate[[2]],moran2_lm$estimate[[2]],moran3_lm$estimate[[2]],
                       moran4_lm$estimate[[2]],moran5_lm$estimate[[2]])
Moran_variance <- c(moran1_lm$estimate[[3]],moran2_lm$estimate[[3]],moran3_lm$estimate[[3]],
                    moran4_lm$estimate[[3]],moran5_lm$estimate[[3]])
Moran_I_lm_results <- data.frame(listw_object, Moran_statistic,
                                   Moran_standard_deviate, Moran_p_value,
                                   Moran_expectation, Moran_variance)

knitr::kable(Moran_I_lm_results,
 caption = "Moran tests results for the OLS model")

Moran tests results for the OLS model
listw_object	Moran_statistic	Moran_standard_deviate	Moran_expectation	Moran_variance
cont.listw	0.3825301	31.12357	-0.0026357	0.0001531
knn.listw	0.4116828	16.88732	-0.0026588	0.0006020
radius.listw	0.3350562	32.45285	-0.0024165	0.0001081
knn_inv.dist.listw	0.0326349	44.40208	-0.0007690	0.0000006
knn_inv.dist2.listw	0.1638443	25.98226	-0.0015918	0.0000405

Spatial autocorrelation is noticeable in each test result, which justifies using spatial methods, although Moran’s I statistics differ depending on the matrix.

7.2 Manski Model - GNS

This and some other models had problems when using standard formula of the code, ending in lacks of results for some components of the models. This was probably caused by some computational problems with Hessian matrix. Two methods allowed us to omit this problem:

Providing a vector of powered spatial weights matrix traces with trs argument. This vector is also highly recommended by authors of “spdep” package when using the numerical Hessian to get the standard error of lambda. It may be used to get around some problems raised when the numerical Hessian is poorly conditioned and generates NaNs. It did not always deal with our problem, hence we also had to use 2nd method.
Removing method = ‘LU’ argument, which resulted in much longer computational time needed to estimate the models but generally solved the problem when 1st method wasn’t enough.

Let’s start with the models using contiguity matrix.

GNS_1<-sacsarlm(formula, data=data, listw=cont.listw, type="sacmixed")
summary(GNS_1)

## 
## Call:sacsarlm(formula = formula, data = data, listw = cont.listw, 
##     type = "sacmixed")
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -15.891674  -1.772866  -0.019107   1.708025  28.061447 
## 
## Type: sacmixed 
## Coefficients: (asymptotic standard errors) 
##                           Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)            -9.5534e-01  3.7926e+00 -0.2519  0.801121
## expenses               -3.1407e-05  2.8621e-04 -0.1097  0.912620
## femin                   1.0231e-01  2.3699e-02  4.3170 1.582e-05
## income                  3.4560e-04  2.7617e-04  1.2514  0.210781
## migration               2.6141e-01  1.6205e-02 16.1321 < 2.2e-16
## people_density2019      8.5456e-05  2.2043e-04  0.3877  0.698256
## prework                 2.1939e+01  8.3932e+00  2.6138  0.008953
## postwork                4.1724e+01  5.1938e+00  8.0335 8.882e-16
## benefit500              3.2055e+01  1.5209e+01  2.1077  0.035058
## unemployment           -3.4309e-01  4.6521e-02 -7.3749 1.645e-13
## water                   8.4699e-03  5.4938e-03  1.5417  0.123143
## sewage                  2.1624e-02  3.4778e-03  6.2177 5.046e-10
## gas                     3.0057e-02  3.7928e-03  7.9247 2.220e-15
## opos                   -1.6905e+00  1.2509e+00 -1.3515  0.176549
## mean_enabled            7.1191e-04  2.6209e-04  2.7163  0.006602
## place_area             -4.9895e-02  6.3922e-03 -7.8056 5.995e-15
## lag.expenses           -1.0328e-03  5.4486e-04 -1.8955  0.058032
## lag.femin              -1.1087e-01  3.7652e-02 -2.9447  0.003233
## lag.income              9.6450e-04  5.2830e-04  1.8257  0.067899
## lag.migration          -1.1146e-01  2.6287e-02 -4.2403 2.232e-05
## lag.people_density2019  1.0971e-03  4.1487e-04  2.6445  0.008181
## lag.prework             7.2591e+00  1.2055e+01  0.6021  0.547083
## lag.postwork           -1.3951e+01  7.2652e+00 -1.9202  0.054829
## lag.benefit500         -4.7201e+00  2.0768e+01 -0.2273  0.820208
## lag.unemployment        2.9984e-01  5.8229e-02  5.1493 2.615e-07
## lag.water              -8.4619e-03  6.7496e-03 -1.2537  0.209952
## lag.sewage             -1.3304e-02  5.1977e-03 -2.5596  0.010480
## lag.gas                -2.7144e-02  5.0546e-03 -5.3701 7.871e-08
## lag.opos               -2.1750e+00  1.4413e+00 -1.5090  0.131292
## lag.mean_enabled        3.4868e-04  4.2863e-04  0.8135  0.415948
## lag.place_area          4.0750e-02  9.0682e-03  4.4937 7.000e-06
## 
## Rho: 0.78981
## Asymptotic standard error: 0.023853
##     z-value: 33.112, p-value: < 2.22e-16
## Lambda: -0.32952
## Asymptotic standard error: 0.056677
##     z-value: -5.814, p-value: 6.101e-09
## 
## LR test value: 1303.4, p-value: < 2.22e-16
## 
## Log likelihood: -6335.551 for sacmixed model
## ML residual variance (sigma squared): 8.2213, (sigma: 2.8673)
## Number of observations: 2477 
## Number of parameters estimated: 34 
## AIC: 12739, (AIC for lm: 14009)

Expenses, income, density, water and oposition results variables are statistically insignificant.
Spatial lags: prework population, benefit500, water, oposition results, mean_enabled are statistically insignificant. RHO and Lambda are statistically significant, but they have opposite signs and it looks they reduce each other, thus there are too many components in the model. AIC equals 12739 and is much smaller than linear model AIC (14009), but due to the mentioned opposite effects of the Rho and Lambda, we look for a solution in spatial models with 2 components.

7.2.1 Keejian-Prucha model - SAC

SAC_1<-sacsarlm(formula, data=data, listw=cont.listw, method = 'LU')
summary(SAC_1)

## 
## Call:sacsarlm(formula = formula, data = data, listw = cont.listw, 
##     method = "LU")
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -17.902695  -1.938867  -0.031427   1.886621  32.226287 
## 
## Type: sac 
## Coefficients: (numerical Hessian approximate standard errors) 
##                       Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)        -7.1793e+00  3.0415e+00 -2.3605   0.01825
## expenses           -7.1207e-05  2.8542e-04 -0.2495   0.80299
## femin               1.0361e-01  2.4165e-02  4.2877 1.805e-05
## income              4.2914e-04  2.7550e-04  1.5577   0.11931
## migration           2.6072e-01  1.5410e-02 16.9190 < 2.2e-16
## people_density2019  3.9368e-04  2.1313e-04  1.8471   0.06473
## prework             3.6092e+01  7.7959e+00  4.6296 3.664e-06
## postwork            5.7790e+01  4.5094e+00 12.8156 < 2.2e-16
## benefit500          9.0041e+01  1.3186e+01  6.8285 8.583e-12
## unemployment       -2.2555e-01  3.7896e-02 -5.9518 2.652e-09
## water               3.5233e-03  5.1295e-03  0.6869   0.49217
## sewage              2.7396e-02  3.4488e-03  7.9436 1.998e-15
## gas                 2.1984e-02  3.3041e-03  6.6535 2.862e-11
## opos               -5.4559e+00  7.8338e-01 -6.9645 3.295e-12
## mean_enabled        1.0655e-03  2.6571e-04  4.0099 6.075e-05
## place_area         -5.0569e-02  6.2041e-03 -8.1509 4.441e-16
## 
## Rho: 0.49463
## Approximate (numerical Hessian) standard error: 0.027921
##     z-value: 17.715, p-value: < 2.22e-16
## Lambda: 0.20518
## Approximate (numerical Hessian) standard error: 0.053339
##     z-value: 3.8466, p-value: 0.00011976
## 
## LR test value: 1094.9, p-value: < 2.22e-16
## 
## Log likelihood: -6439.823 for sac model
## ML residual variance (sigma squared): 10.023, (sigma: 3.166)
## Number of observations: 2477 
## Number of parameters estimated: 19 
## AIC: 12918, (AIC for lm: 14009)

There are still some insignificant variables (expenses, income, people density, water), but this time Rho (0.49463) and Lambda (0.20518) have effects in the same direction (although they are weaker than before). On the other hand AIC (12918) is higher than in Manski Model, but still much lower than in linear model.

moran.test(SAC_1$residuals, cont.listw)

## 
##  Moran I test under randomisation
## 
## data:  SAC_1$residuals  
## weights: cont.listw    
## 
## Moran I statistic standard deviate = -0.77203, p-value = 0.78
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##     -0.0099985324     -0.0004038772      0.0001544501

Moran I test shows that the problem of spatial autocorrelation of residuals is no longer present.

7.2.2 Spatial Durbin Model

SDM_1<-lagsarlm(formula, data=data, listw=cont.listw, type="mixed") 
summary(SDM_1)

## 
## Call:lagsarlm(formula = formula, data = data, listw = cont.listw, 
##     type = "mixed")
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -16.289088  -1.896114  -0.012192   1.762914  30.107762 
## 
## Type: mixed 
## Coefficients: (asymptotic standard errors) 
##                           Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)             9.9107e-02  4.8451e+00  0.0205  0.983680
## expenses               -7.4662e-05  2.8696e-04 -0.2602  0.794723
## femin                   1.0588e-01  2.3504e-02  4.5050 6.638e-06
## income                  3.9910e-04  2.7685e-04  1.4416  0.149415
## migration               2.6063e-01  1.6059e-02 16.2294 < 2.2e-16
## people_density2019     -5.8407e-05  2.2034e-04 -0.2651  0.790950
## prework                 2.3071e+01  8.2268e+00  2.8044  0.005041
## postwork                4.4292e+01  5.0848e+00  8.7107 < 2.2e-16
## benefit500              3.5387e+01  1.4964e+01  2.3648  0.018041
## unemployment           -3.3491e-01  4.6155e-02 -7.2562 3.981e-13
## water                   9.3997e-03  5.3808e-03  1.7469  0.080656
## sewage                  2.1851e-02  3.4371e-03  6.3573 2.053e-10
## gas                     2.8802e-02  3.7222e-03  7.7378 1.021e-14
## opos                   -1.6820e+00  1.2302e+00 -1.3673  0.171543
## mean_enabled            7.9281e-04  2.6062e-04  3.0420  0.002350
## place_area             -4.9828e-02  6.3166e-03 -7.8885 3.109e-15
## lag.expenses           -1.2980e-03  6.0761e-04 -2.1362  0.032660
## lag.femin              -1.2056e-01  4.1763e-02 -2.8869  0.003891
## lag.income              1.3198e-03  5.8803e-04  2.2444  0.024805
## lag.migration          -3.4660e-02  2.6758e-02 -1.2953  0.195201
## lag.people_density2019  1.8350e-03  4.6190e-04  3.9727 7.106e-05
## lag.prework             2.0620e+01  1.2896e+01  1.5989  0.109840
## lag.postwork            3.5105e-01  7.5028e+00  0.0468  0.962681
## lag.benefit500          1.4438e+01  2.2203e+01  0.6503  0.515524
## lag.unemployment        2.3931e-01  6.0708e-02  3.9421 8.078e-05
## lag.water              -1.0431e-02  7.1438e-03 -1.4602  0.144243
## lag.sewage             -9.7418e-03  5.6857e-03 -1.7134  0.086644
## lag.gas                -2.2034e-02  5.4215e-03 -4.0642 4.820e-05
## lag.opos               -4.1206e+00  1.4644e+00 -2.8137  0.004897
## lag.mean_enabled        8.4139e-04  4.7262e-04  1.7803  0.075034
## lag.place_area          3.2161e-02  9.8749e-03  3.2569  0.001126
## 
## Rho: 0.64511, LR test value: 821.37, p-value: < 2.22e-16
## Asymptotic standard error: 0.019432
##     z-value: 33.198, p-value: < 2.22e-16
## Wald statistic: 1102.1, p-value: < 2.22e-16
## 
## Log likelihood: -6345.8 for mixed model
## ML residual variance (sigma squared): 8.9774, (sigma: 2.9962)
## Number of observations: 2477 
## Number of parameters estimated: 33 
## AIC: 12758, (AIC for lm: 13577)
## LM test for residual autocorrelation
## test value: 15.121, p-value: 0.00010082

There are many many insignificant variables and lags, although only water has both of them insignificant (and it is close to being significant). Rho is significant and equal to 0.64511, so it’s almost as much as both effects from SAC jointly. Also AIC is much closer to the one from Manski model, but insignificant variables are still a problem.

moran.test(SDM_1$residuals, cont.listw)

## 
##  Moran I test under randomisation
## 
## data:  SDM_1$residuals  
## weights: cont.listw    
## 
## Moran I statistic standard deviate = -1.5748, p-value = 0.9423
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##     -0.0199754273     -0.0004038772      0.0001544614

Similar to the SAC model, Moran I test shows that the problem of spatial autocorrelation of residuals is no longer present.

7.2.3 Spatial Durbin Error Model

SDEM_1<-errorsarlm(formula, data=data, listw=cont.listw, etype="emixed", method="LU")
summary(SDEM_1)

## 
## Call:errorsarlm(formula = formula, data = data, listw = cont.listw, 
##     etype = "emixed", method = "LU")
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -16.415926  -1.859265   0.003355   1.820837  30.440312 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##                           Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)            13.69149843  8.60279634  1.5915 0.1114932
## expenses               -0.00035786  0.00031853 -1.1235 0.2612369
## femin                   0.08697045  0.02536962  3.4281 0.0006077
## income                  0.00072741  0.00030835  2.3590 0.0183233
## migration               0.28512749  0.01646089 17.3215 < 2.2e-16
## people_density2019      0.00022990  0.00022435  1.0247 0.3054843
## prework                32.18287036  8.41859140  3.8228 0.0001319
## postwork               54.05511495  5.03709850 10.7314 < 2.2e-16
## benefit500             51.04013987 14.77578086  3.4543 0.0005517
## unemployment           -0.33058869  0.04320895 -7.6509 1.998e-14
## water                   0.00778959  0.00522940  1.4896 0.1363359
## sewage                  0.02174158  0.00363151  5.9869 2.139e-09
## gas                     0.02784346  0.00360732  7.7186 1.177e-14
## opos                   -3.07465001  1.13679940 -2.7047 0.0068375
## mean_enabled            0.00109234  0.00027741  3.9376 8.231e-05
## place_area             -0.04996866  0.00644155 -7.7572 8.660e-15
## lag.expenses           -0.00207122  0.00083046 -2.4941 0.0126287
## lag.femin              -0.12466785  0.06078356 -2.0510 0.0402657
## lag.income              0.00240668  0.00080497  2.9898 0.0027920
## lag.migration           0.22784524  0.03484717  6.5384 6.217e-11
## lag.people_density2019  0.00413944  0.00064536  6.4142 1.416e-10
## lag.prework            57.19489333 18.54866861  3.0835 0.0020458
## lag.postwork           46.14464073 10.41495638  4.4306 9.397e-06
## lag.benefit500         85.92281121 31.82908622  2.6995 0.0069442
## lag.unemployment       -0.00953519  0.07740618 -0.1232 0.9019615
## lag.water              -0.01431609  0.01049493 -1.3641 0.1725377
## lag.sewage              0.00231184  0.00840053  0.2752 0.7831616
## lag.gas                -0.00099799  0.00787379 -0.1267 0.8991399
## lag.opos               -9.88802476  1.83663826 -5.3838 7.294e-08
## lag.mean_enabled        0.00217559  0.00068879  3.1586 0.0015854
## lag.place_area         -0.00674622  0.01453365 -0.4642 0.6425191
## 
## Lambda: 0.66417, LR test value: 810.51, p-value: < 2.22e-16
## Approximate (numerical Hessian) standard error: 0.01903
##     z-value: 34.9, p-value: < 2.22e-16
## Wald statistic: 1218, p-value: < 2.22e-16
## 
## Log likelihood: -6351.226 for error model
## ML residual variance (sigma squared): 8.9566, (sigma: 2.9928)
## Number of observations: 2477 
## Number of parameters estimated: 33 
## AIC: 12768, (AIC for lm: 13577)

Water variable stays insignificant, but income is now significant and the rest of insignificant variables (expenses and people_density) have significant lags. However, there are still too many insignificant lags. Lambda is strong, similar to the Rho in SDM. Unfortunately the AIC is a little bit higher now than for SDM, but we have much more significant variables, so from this point of view it is probably better model, more free from redundant variables.

moran.test(SDEM_1$residuals, cont.listw)

## 
##  Moran I test under randomisation
## 
## data:  SDEM_1$residuals  
## weights: cont.listw    
## 
## Moran I statistic standard deviate = -2.0003, p-value = 0.9773
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##     -0.0252624851     -0.0004038772      0.0001544483

And according to the Moran I test it deals with spatial autocorrelation very well.

7.2.4 SAR, SLX and SEM models

Many insignificant lags may suggest that it is worth checking even more simple models.

7.2.4.1 SAR

SAR_1<-lagsarlm(formula, data=data, listw=cont.listw, method="LU")
summary(SAR_1)

## 
## Call:lagsarlm(formula = formula, data = data, listw = cont.listw, 
##     method = "LU")
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -17.641629  -1.943728  -0.034409   1.876567  31.628286 
## 
## Type: lag 
## Coefficients: (numerical Hessian approximate standard errors) 
##                       Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)        -9.73925118  2.74865664 -3.5433 0.0003952
## expenses           -0.00020362  0.00029251 -0.6961 0.4863614
## femin               0.08998938  0.02318106  3.8820 0.0001036
## income              0.00056039  0.00028110  1.9936 0.0461969
## migration           0.24254860  0.01406142 17.2492 < 2.2e-16
## people_density2019  0.00053357  0.00019888  2.6829 0.0072976
## prework            35.72024783  7.31360534  4.8841 1.039e-06
## postwork           55.75718850  4.16120191 13.3993 < 2.2e-16
## benefit500         86.11063150 12.13993854  7.0932 1.311e-12
## unemployment       -0.17171737  0.03009231 -5.7064 1.154e-08
## water               0.00329279  0.00402337  0.8184 0.4131195
## sewage              0.02783850  0.00324190  8.5871 < 2.2e-16
## gas                 0.01828685  0.00285955  6.3950 1.605e-10
## opos               -5.23367067  0.68130874 -7.6818 1.577e-14
## mean_enabled        0.00103451  0.00025475  4.0608 4.890e-05
## place_area         -0.04316315  0.00560917 -7.6951 1.421e-14
## 
## Rho: 0.57206, LR test value: 1079.9, p-value: < 2.22e-16
## Approximate (numerical Hessian) standard error: 0.014676
##     z-value: 38.98, p-value: < 2.22e-16
## Wald statistic: 1519.4, p-value: < 2.22e-16
## 
## Log likelihood: -6447.282 for lag model
## ML residual variance (sigma squared): 9.9664, (sigma: 3.157)
## Number of observations: 2477 
## Number of parameters estimated: 18 
## AIC: 12931, (AIC for lm: 14009)

moran.test(SAR_1$residuals, cont.listw)

## 
##  Moran I test under randomisation
## 
## data:  SAR_1$residuals  
## weights: cont.listw    
## 
## Moran I statistic standard deviate = 2.4047, p-value = 0.008094
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.0294822644     -0.0004038772      0.0001544661

Expenses and water are insignificant Rho is significant but AIC is quite high, comparing to the previous models so we may be lacking some information here. Moran I shows that indeed we still have spatial autocorrelation of residuals in this model so it is not enough to have only Rho.

7.2.4.2 SLX

SLX_1<-lmSLX(formula, data=data, listw=cont.listw)
summary(SLX_1)

## 
## Call:
## lm(formula = formula(paste("y ~ ", paste(colnames(x)[-1], collapse = "+"))), 
##     data = as.data.frame(x), weights = weights)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -18.703  -2.285  -0.024   2.280  37.748 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            -9.179e+00  6.023e+00  -1.524 0.127663    
## expenses               -3.344e-04  3.565e-04  -0.938 0.348390    
## femin                   9.799e-02  2.922e-02   3.354 0.000809 ***
## income                  7.062e-04  3.439e-04   2.054 0.040109 *  
## migration               2.872e-01  1.995e-02  14.395  < 2e-16 ***
## people_density2019     -1.632e-04  2.739e-04  -0.596 0.551372    
## prework                 3.278e+01  1.022e+01   3.207 0.001360 ** 
## postwork                5.707e+01  6.310e+00   9.045  < 2e-16 ***
## benefit500              4.909e+01  1.860e+01   2.639 0.008357 ** 
## unemployment           -3.069e-01  5.737e-02  -5.349 9.68e-08 ***
## water                   6.154e-03  6.689e-03   0.920 0.357683    
## sewage                  2.578e-02  4.273e-03   6.034 1.84e-09 ***
## gas                     2.871e-02  4.627e-03   6.205 6.42e-10 ***
## opos                   -3.460e+00  1.528e+00  -2.264 0.023664 *  
## mean_enabled            1.120e-03  3.238e-04   3.461 0.000548 ***
## place_area             -4.862e-02  7.853e-03  -6.192 6.96e-10 ***
## lag.expenses           -1.663e-03  7.547e-04  -2.203 0.027680 *  
## lag.femin               5.611e-03  5.190e-02   0.108 0.913917    
## lag.income              2.049e-03  7.294e-04   2.810 0.005001 ** 
## lag.migration           2.574e-01  3.128e-02   8.231 2.99e-16 ***
## lag.people_density2019  3.189e-03  5.695e-04   5.600 2.38e-08 ***
## lag.prework             7.879e+01  1.589e+01   4.957 7.65e-07 ***
## lag.postwork            5.864e+01  9.013e+00   6.506 9.31e-11 ***
## lag.benefit500          9.951e+01  2.738e+01   3.634 0.000285 ***
## lag.unemployment       -3.896e-02  7.493e-02  -0.520 0.603206    
## lag.water              -7.421e-03  8.881e-03  -0.836 0.403463    
## lag.sewage              8.899e-03  7.036e-03   1.265 0.206117    
## lag.gas                -6.142e-03  6.704e-03  -0.916 0.359668    
## lag.opos               -1.211e+01  1.801e+00  -6.725 2.18e-11 ***
## lag.mean_enabled        2.899e-03  5.827e-04   4.976 6.95e-07 ***
## lag.place_area         -7.395e-03  1.220e-02  -0.606 0.544415    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.725 on 2446 degrees of freedom
## Multiple R-squared:   0.71,  Adjusted R-squared:  0.7064 
## F-statistic: 199.6 on 30 and 2446 DF,  p-value: < 2.2e-16

moran.test(SLX_1$residuals, cont.listw)

## 
##  Moran I test under randomisation
## 
## data:  SLX_1$residuals  
## weights: cont.listw    
## 
## Moran I statistic standard deviate = 32.595, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.4046348681     -0.0004038772      0.0001544189

Many variables and lags were insignificant up to now and they are still insignificant even with this simplest model with only Thetas as spatial components. Moreover this model absolutely doesn’t deal with autocorrelation.

7.2.4.3 SEM

SEM_1<-errorsarlm(formula, data=data, listw=cont.listw, method="LU")
summary(SEM_1)

## 
## Call:errorsarlm(formula = formula, data = data, listw = cont.listw, 
##     method = "LU")
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -15.066525  -1.869655  -0.044967   1.844696  28.214724 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##                       Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)         2.6024e+01  2.9664e+00  8.7727 < 2.2e-16
## expenses            2.4886e-04  2.8494e-04  0.8734 0.3824526
## femin               1.1816e-01  2.3512e-02  5.0254 5.023e-07
## income              4.6648e-05  2.7452e-04  0.1699 0.8650658
## migration           2.6513e-01  1.5844e-02 16.7333 < 2.2e-16
## people_density2019  2.7781e-04  2.1881e-04  1.2696 0.2042227
## prework             2.7035e+01  8.2867e+00  3.2625 0.0011045
## postwork            4.2664e+01  5.0981e+00  8.3686 < 2.2e-16
## benefit500          5.1730e+01  1.4985e+01  3.4521 0.0005563
## unemployment       -3.8823e-01  4.4505e-02 -8.7233 < 2.2e-16
## water               6.5700e-03  5.4313e-03  1.2096 0.2264166
## sewage              2.1424e-02  3.4611e-03  6.1900 6.017e-10
## gas                 3.1381e-02  3.6972e-03  8.4879 < 2.2e-16
## opos               -2.7399e+00  1.1574e+00 -2.3672 0.0179230
## mean_enabled        7.1796e-04  2.6290e-04  2.7309 0.0063161
## place_area         -5.4007e-02  6.3794e-03 -8.4658 < 2.2e-16
## 
## Lambda: 0.78648, LR test value: 1035.2, p-value: < 2.22e-16
## Approximate (numerical Hessian) standard error: 0.015118
##     z-value: 52.022, p-value: < 2.22e-16
## Wald statistic: 2706.3, p-value: < 2.22e-16
## 
## Log likelihood: -6469.68 for error model
## ML residual variance (sigma squared): 9.3426, (sigma: 3.0566)
## Number of observations: 2477 
## Number of parameters estimated: 18 
## AIC: 12975, (AIC for lm: 14009)

moran.test(SEM_1$residuals, cont.listw)

## 
##  Moran I test under randomisation
## 
## data:  SEM_1$residuals  
## weights: cont.listw    
## 
## Moran I statistic standard deviate = -4.6134, p-value = 1
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##     -0.0577573605     -0.0004038772      0.0001545530

Spatial Error Model has quite many insignificant variables, not only expenses and water, but also income and people density. Its AIC is also very high, so it can not be considered as a good model either.

7.2.5 Comparison of the models

We use Likelihood ratio test to compare the models.

# LR.sarlm(GNS_1, SAC_1)
# LR.sarlm(GNS_1, SDEM_1)
# LR.sarlm(GNS_1, SDM_1)
# LR.sarlm(GNS_1, SAR_1)
# LR.sarlm(GNS_1, SLX_1) 
# LR.sarlm(GNS_1, SEM_1)
model_of_comparison <- c("SAC","SDEM","SDM","SAR","SLX","SEM")
Likelihood_ratio <- c(LR.sarlm(GNS_1, SAC_1)[[1]][[1]], LR.sarlm(GNS_1, SDEM_1)[[1]][[1]],
                      LR.sarlm(GNS_1, SDM_1)[[1]][[1]], LR.sarlm(GNS_1, SAR_1)[[1]][[1]],
                      LR.sarlm(GNS_1, SLX_1)[[1]][[1]], LR.sarlm(GNS_1, SEM_1)[[1]][[1]])
df <- c(LR.sarlm(GNS_1, SAC_1)[[2]][[1]],LR.sarlm(GNS_1, SDEM_1)[[2]][[1]],
        LR.sarlm(GNS_1, SDM_1)[[2]][[1]],LR.sarlm(GNS_1, SAR_1)[[2]][[1]],
        LR.sarlm(GNS_1, SLX_1)[[2]][[1]],LR.sarlm(GNS_1, SEM_1)[[2]][[1]])
p_value <- c(LR.sarlm(GNS_1, SAC_1)[[3]][[1]],LR.sarlm(GNS_1, SDEM_1)[[3]][[1]],
             LR.sarlm(GNS_1, SDM_1)[[3]][[1]], LR.sarlm(GNS_1, SAR_1)[[3]][[1]],
             LR.sarlm(GNS_1, SLX_1)[[3]][[1]],LR.sarlm(GNS_1, SEM_1)[[3]][[1]])
Log_likelihood_GNS <- c(LR.sarlm(GNS_1, SAC_1)[[4]][[1]],LR.sarlm(GNS_1, SDEM_1)[[4]][[1]],
                        LR.sarlm(GNS_1, SDM_1)[[4]][[1]],LR.sarlm(GNS_1, SAR_1)[[4]][[1]],
                        LR.sarlm(GNS_1, SLX_1)[[4]][[1]],LR.sarlm(GNS_1, SEM_1)[[4]][[1]])
Log_likelihood_comparison <- c(LR.sarlm(GNS_1, SAC_1)[[4]][[2]],LR.sarlm(GNS_1, SDEM_1)[[4]][[2]],
                        LR.sarlm(GNS_1, SDM_1)[[4]][[2]],LR.sarlm(GNS_1, SAR_1)[[4]][[2]],
                        LR.sarlm(GNS_1, SLX_1)[[4]][[2]],LR.sarlm(GNS_1, SEM_1)[[4]][[2]])

comparison_df <- data.frame(model_of_comparison, Likelihood_ratio, df, p_value,
                            Log_likelihood_GNS, Log_likelihood_comparison)

knitr::kable(comparison_df,
 caption = "Comparison of the models - GNS vs others")

Comparison of the models - GNS vs others
model_of_comparison	Likelihood_ratio	df	p_value	Log_likelihood_GNS	Log_likelihood_comparison
SAC	208.54547	15	0e+00	-6335.551	-6439.823
SDEM	31.35071	1	0e+00	-6335.551	-6351.226
SDM	20.49814	1	6e-06	-6335.551	-6345.800
SAR	223.46232	16	0e+00	-6335.551	-6447.282
SLX	841.86538	2	0e+00	-6335.551	-6756.483
SEM	268.25931	16	0e+00	-6335.551	-6469.680

Results of the likelihood ratio test suggest that GNS is the best model in the compared group of models.

Let’s compare SAR and SEM with the SDM and SDEM models.

LR.sarlm(SDM_1, SAR_1)

## 
##  Likelihood ratio for spatial linear models
## 
## data:  
## Likelihood ratio = 202.96, df = 15, p-value < 2.2e-16
## sample estimates:
## Log likelihood of SDM_1 Log likelihood of SAR_1 
##               -6345.800               -6447.282

LR.sarlm(SDEM_1, SEM_1)

## 
##  Likelihood ratio for spatial linear models
## 
## data:  
## Likelihood ratio = 236.91, df = 15, p-value < 2.2e-16
## sample estimates:
## Log likelihood of SDEM_1  Log likelihood of SEM_1 
##                -6351.226                -6469.680

screenreg(list(GNS_1, SAC_1, SDM_1, SDEM_1, SAR_1, SEM_1))

## 
## ==========================================================================================================
##                         Model 1       Model 2       Model 3       Model 4       Model 5       Model 6     
## ----------------------------------------------------------------------------------------------------------
## rho                         0.79 ***      0.49 ***      0.65 ***                    0.57 ***              
##                            (0.02)        (0.03)        (0.02)                      (0.01)                 
## (Intercept)                -0.96         -7.18 *        0.10         13.69         -9.74 ***     26.02 ***
##                            (3.79)        (3.04)        (4.85)        (8.60)        (2.75)        (2.97)   
## expenses                   -0.00         -0.00         -0.00         -0.00         -0.00          0.00    
##                            (0.00)        (0.00)        (0.00)        (0.00)        (0.00)        (0.00)   
## femin                       0.10 ***      0.10 ***      0.11 ***      0.09 ***      0.09 ***      0.12 ***
##                            (0.02)        (0.02)        (0.02)        (0.03)        (0.02)        (0.02)   
## income                      0.00          0.00          0.00          0.00 *        0.00 *        0.00    
##                            (0.00)        (0.00)        (0.00)        (0.00)        (0.00)        (0.00)   
## migration                   0.26 ***      0.26 ***      0.26 ***      0.29 ***      0.24 ***      0.27 ***
##                            (0.02)        (0.02)        (0.02)        (0.02)        (0.01)        (0.02)   
## people_density2019          0.00          0.00         -0.00          0.00          0.00 **       0.00    
##                            (0.00)        (0.00)        (0.00)        (0.00)        (0.00)        (0.00)   
## prework                    21.94 **      36.09 ***     23.07 **      32.18 ***     35.72 ***     27.03 ** 
##                            (8.39)        (7.80)        (8.23)        (8.42)        (7.31)        (8.29)   
## postwork                   41.72 ***     57.79 ***     44.29 ***     54.06 ***     55.76 ***     42.66 ***
##                            (5.19)        (4.51)        (5.08)        (5.04)        (4.16)        (5.10)   
## benefit500                 32.06 *       90.04 ***     35.39 *       51.04 ***     86.11 ***     51.73 ***
##                           (15.21)       (13.19)       (14.96)       (14.78)       (12.14)       (14.99)   
## unemployment               -0.34 ***     -0.23 ***     -0.33 ***     -0.33 ***     -0.17 ***     -0.39 ***
##                            (0.05)        (0.04)        (0.05)        (0.04)        (0.03)        (0.04)   
## water                       0.01          0.00          0.01          0.01          0.00          0.01    
##                            (0.01)        (0.01)        (0.01)        (0.01)        (0.00)        (0.01)   
## sewage                      0.02 ***      0.03 ***      0.02 ***      0.02 ***      0.03 ***      0.02 ***
##                            (0.00)        (0.00)        (0.00)        (0.00)        (0.00)        (0.00)   
## gas                         0.03 ***      0.02 ***      0.03 ***      0.03 ***      0.02 ***      0.03 ***
##                            (0.00)        (0.00)        (0.00)        (0.00)        (0.00)        (0.00)   
## opos                       -1.69         -5.46 ***     -1.68         -3.07 **      -5.23 ***     -2.74 *  
##                            (1.25)        (0.78)        (1.23)        (1.14)        (0.68)        (1.16)   
## mean_enabled                0.00 **       0.00 ***      0.00 **       0.00 ***      0.00 ***      0.00 ** 
##                            (0.00)        (0.00)        (0.00)        (0.00)        (0.00)        (0.00)   
## place_area                 -0.05 ***     -0.05 ***     -0.05 ***     -0.05 ***     -0.04 ***     -0.05 ***
##                            (0.01)        (0.01)        (0.01)        (0.01)        (0.01)        (0.01)   
## lag.expenses               -0.00                       -0.00 *       -0.00 *                              
##                            (0.00)                      (0.00)        (0.00)                               
## lag.femin                  -0.11 **                    -0.12 **      -0.12 *                              
##                            (0.04)                      (0.04)        (0.06)                               
## lag.income                  0.00                        0.00 *        0.00 **                             
##                            (0.00)                      (0.00)        (0.00)                               
## lag.migration              -0.11 ***                   -0.03          0.23 ***                            
##                            (0.03)                      (0.03)        (0.03)                               
## lag.people_density2019      0.00 **                     0.00 ***      0.00 ***                            
##                            (0.00)                      (0.00)        (0.00)                               
## lag.prework                 7.26                       20.62         57.19 **                             
##                           (12.06)                     (12.90)       (18.55)                               
## lag.postwork              -13.95                        0.35         46.14 ***                            
##                            (7.27)                      (7.50)       (10.41)                               
## lag.benefit500             -4.72                       14.44         85.92 **                             
##                           (20.77)                     (22.20)       (31.83)                               
## lag.unemployment            0.30 ***                    0.24 ***     -0.01                                
##                            (0.06)                      (0.06)        (0.08)                               
## lag.water                  -0.01                       -0.01         -0.01                                
##                            (0.01)                      (0.01)        (0.01)                               
## lag.sewage                 -0.01 *                     -0.01          0.00                                
##                            (0.01)                      (0.01)        (0.01)                               
## lag.gas                    -0.03 ***                   -0.02 ***     -0.00                                
##                            (0.01)                      (0.01)        (0.01)                               
## lag.opos                   -2.18                       -4.12 **      -9.89 ***                            
##                            (1.44)                      (1.46)        (1.84)                               
## lag.mean_enabled            0.00                        0.00          0.00 **                             
##                            (0.00)                      (0.00)        (0.00)                               
## lag.place_area              0.04 ***                    0.03 **      -0.01                                
##                            (0.01)                      (0.01)        (0.01)                               
## lambda                     -0.33 ***      0.21 ***                    0.66 ***                    0.79 ***
##                            (0.06)        (0.05)                      (0.02)                      (0.02)   
## ----------------------------------------------------------------------------------------------------------
## R^2                         0.83          0.79          0.81          0.81          0.79          0.81    
## AIC                     12739.10      12917.65      12757.60      12768.45      12930.56      12975.36    
## BIC                     12936.80      13028.13      12949.49      12960.34      13035.23      13080.03    
## Deviance                20364.16      24827.99      22236.96      22185.43      24686.78      23141.63    
## Log Likelihood          -6335.55      -6439.82      -6345.80      -6351.23      -6447.28      -6469.68    
## nobs                     2477          2477          2477          2477          2477          2477       
## ==========================================================================================================
## *** p < 0.001; ** p < 0.01; * p < 0.05

SDM and SDEM are better models in comparison to more simple SAR and SEM models.

To sum up, we should probably stick to models with two components. Based on likelihoood-ratio test Manski model is the best one, but it has drawbacks, as its Rho and Lambda effects have opposite directions, so it is probably misspecified in fact. SAC has positive effect of both Rho and Lambda but it seems to divide between those two components the effect that can be included by one of them. Also some of the thetas seem to be important, as they are significant and SDM and SDEM have lower AIC than SAC. Out of SDM and SDEM, SDEM is probably better, because it has more significant variables, although its AIC is a little bit higher that the one from SDM.

Unfortunately, we still have some insignificant variables, and some of them repeat constantly, in normal as well as in lagged forms. Those are especially expenses and water, which were also insignificant in linear model. We should omit them.

Let’s start with expenses, as it is usually the most insignificant one and it may also be related to income, making the latter insignificant sometimes as well.

7.3 Spatial models without insignificant variables

7.3.1 OLS

We create two formulas with excluding variable expenses in the first one and with excluding variables expenses and water in the second one.

7.3.1.1 OLS without expenses

formula2 <- freq ~ femin + migration + income + people_density2019 + prework + postwork + 
  benefit500 + unemployment + water + sewage + gas + opos + mean_enabled + place_area

model_lm2<-lm(formula2, data=data) 
summary(model_lm2)

## 
## Call:
## lm(formula = formula2, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -24.868  -2.471  -0.022   2.519  39.717 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         4.709e+00  3.492e+00   1.348    0.178    
## femin               1.242e-01  2.981e-02   4.166 3.20e-05 ***
## migration           4.088e-01  1.729e-02  23.644  < 2e-16 ***
## income              4.778e-04  7.312e-05   6.535 7.72e-11 ***
## people_density2019  1.055e-03  2.589e-04   4.076 4.72e-05 ***
## prework             7.266e+01  9.369e+00   7.755 1.29e-14 ***
## postwork            8.656e+01  5.278e+00  16.399  < 2e-16 ***
## benefit500          1.422e+02  1.559e+01   9.124  < 2e-16 ***
## unemployment       -4.588e-01  3.769e-02 -12.174  < 2e-16 ***
## water              -3.660e-03  5.311e-03  -0.689    0.491    
## sewage              2.672e-02  4.190e-03   6.376 2.16e-10 ***
## gas                 3.218e-02  3.669e-03   8.771  < 2e-16 ***
## opos               -1.134e+01  8.566e-01 -13.234  < 2e-16 ***
## mean_enabled        2.201e-03  3.304e-04   6.661 3.34e-11 ***
## place_area         -6.708e-02  7.195e-03  -9.324  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.075 on 2462 degrees of freedom
## Multiple R-squared:  0.6505, Adjusted R-squared:  0.6486 
## F-statistic: 327.4 on 14 and 2462 DF,  p-value: < 2.2e-16

Income variable is significant now. Water is still not significant, so we will calculate next model without this variable.

7.3.1.2 OLS without expenses and water

formula3 <- freq ~ femin + migration + income + people_density2019 + prework + postwork + 
  benefit500 + unemployment + sewage + gas + opos + mean_enabled + place_area

model_lm3<-lm(formula3, data=data) 
summary(model_lm3)

## 
## Call:
## lm(formula = formula3, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -24.905  -2.460  -0.012   2.522  39.919 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         4.421e+00  3.467e+00   1.275    0.202    
## femin               1.245e-01  2.980e-02   4.178 3.04e-05 ***
## migration           4.085e-01  1.728e-02  23.637  < 2e-16 ***
## income              4.771e-04  7.311e-05   6.525 8.20e-11 ***
## people_density2019  1.055e-03  2.589e-04   4.074 4.76e-05 ***
## prework             7.304e+01  9.352e+00   7.810 8.40e-15 ***
## postwork            8.648e+01  5.276e+00  16.390  < 2e-16 ***
## benefit500          1.415e+02  1.555e+01   9.099  < 2e-16 ***
## unemployment       -4.575e-01  3.763e-02 -12.156  < 2e-16 ***
## sewage              2.632e-02  4.149e-03   6.343 2.68e-10 ***
## gas                 3.262e-02  3.614e-03   9.025  < 2e-16 ***
## opos               -1.148e+01  8.314e-01 -13.805  < 2e-16 ***
## mean_enabled        2.190e-03  3.300e-04   6.637 3.92e-11 ***
## place_area         -6.679e-02  7.182e-03  -9.300  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.075 on 2463 degrees of freedom
## Multiple R-squared:  0.6505, Adjusted R-squared:  0.6486 
## F-statistic: 352.6 on 13 and 2463 DF,  p-value: < 2.2e-16

Every variable in the model is statistically significant. It may give better results in spatial models. Let’s investigate it.

7.3.2 Spatial autocorrelation

Let’s check the spatial autocorrelation of residuals for this limited model. We’ll do it again for all matrices in advance.

moran1_limit <- lm.morantest(model_lm3, cont.listw)
moran2_limit <- lm.morantest(model_lm3, knn.listw)
moran3_limit <- lm.morantest(model_lm3, radius.listw)
moran4_limit <- lm.morantest(model_lm3, knn_inv.dist.listw)
moran5_limit <- lm.morantest(model_lm3, knn_inv.dist2.listw)

listw_object <- c("cont.listw","knn.listw","radius.listw","knn_inv.dist.listw","knn_inv.dist2.listw")

Moran_standard_deviate <- c(moran1_limit$statistic[[1]],moran2_limit$statistic[[1]],
                            moran3_limit$statistic[[1]],moran4_limit$statistic[[1]],
                            moran5_limit$statistic[[1]])
Moran_p_value <- c(moran1_limit$p.value[[1]],moran2_limit$p.value[[1]],moran3_limit$p.value[[1]],
                   moran4_limit$p.value[[1]],moran5_limit$p.value[[1]])
Moran_statistic <- c(moran1_limit$estimate[[1]],moran2_limit$estimate[[1]],moran3_limit$estimate[[1]],
                     moran4_limit$estimate[[1]],moran5_limit$estimate[[1]])
Moran_expectation <- c(moran1_limit$estimate[[2]],moran2_limit$estimate[[2]],moran3_limit$estimate[[2]],
                       moran4_limit$estimate[[2]],moran5_limit$estimate[[2]])
Moran_variance <- c(moran1_limit$estimate[[3]],moran2_limit$estimate[[3]],moran3_limit$estimate[[3]],
                    moran4_limit$estimate[[3]],moran5_limit$estimate[[3]])
Moran_I_limit_results <- data.frame(listw_object, Moran_statistic,
                                   Moran_standard_deviate, Moran_p_value,
                                   Moran_expectation, Moran_variance)

knitr::kable(Moran_I_limit_results,
 caption = "Moran I tests results for the limited model")

Moran I tests results for the limited model
listw_object	Moran_statistic	Moran_standard_deviate	Moran_expectation	Moran_variance
cont.listw	0.3826347	31.09554	-0.0024031	0.0001533
knn.listw	0.4115153	16.87045	-0.0024358	0.0006021
radius.listw	0.3352275	32.42526	-0.0022283	0.0001083
knn_inv.dist.listw	0.0326608	44.22744	-0.0007440	0.0000006
knn_inv.dist2.listw	0.1638993	25.95389	-0.0014838	0.0000406

There is still autocorrelation in each model. We will stay with contiguency matrix and try models for this reduced form.

7.3.3 Spatial models without expenses and water variables

7.3.3.1 Manski model - GNS

GNS_3 <- sacsarlm(formula3, data=data, listw=cont.listw, type="sacmixed", method = 'LU')
summary(GNS_3)

## 
## Call:sacsarlm(formula = formula3, data = data, listw = cont.listw, 
##     type = "sacmixed", method = "LU")
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -15.977876  -1.761640  -0.026351   1.692398  27.741915 
## 
## Type: sacmixed 
## Coefficients: (numerical Hessian approximate standard errors) 
##                           Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)            -5.8145e-01  8.3430e-01 -0.6969 0.4858430
## femin                   1.0388e-01  2.3638e-02  4.3947 1.109e-05
## migration               2.6209e-01  1.7049e-02 15.3729 < 2.2e-16
## income                  3.1648e-04  5.2105e-05  6.0739 1.249e-09
## people_density2019      8.2069e-05  2.2671e-04  0.3620 0.7173460
## prework                 2.1749e+01  9.7022e+00  2.2417 0.0249814
## postwork                4.1572e+01  5.5252e+00  7.5241 5.307e-14
## benefit500              3.1306e+01  1.4153e+01  2.2121 0.0269630
## unemployment           -3.4010e-01  4.6370e-02 -7.3345 2.225e-13
## sewage                  2.2639e-02  3.4089e-03  6.6411 3.114e-11
## gas                     2.9960e-02  3.7569e-03  7.9748 1.554e-15
## opos                   -1.6440e+00  1.1799e+00 -1.3934 0.1634981
## mean_enabled            7.2525e-04  2.3361e-04  3.1046 0.0019055
## place_area             -5.0224e-02  6.2474e-03 -8.0393 8.882e-16
## lag.femin              -1.1600e-01  3.0087e-02 -3.8555 0.0001155
## lag.migration          -1.1767e-01  2.8244e-02 -4.1661 3.099e-05
## lag.income             -1.9885e-05  4.9047e-05 -0.4054 0.6851691
## lag.people_density2019  1.0207e-03  4.1311e-04  2.4707 0.0134847
## lag.prework             5.3710e+00  1.3247e+01  0.4054 0.6851554
## lag.postwork           -1.3970e+01  8.1992e+00 -1.7038 0.0884205
## lag.benefit500         -4.3646e+00  1.7287e+01 -0.2525 0.8006718
## lag.unemployment        2.9548e-01  5.7381e-02  5.1495 2.612e-07
## lag.sewage             -1.3998e-02  4.9986e-03 -2.8003 0.0051060
## lag.gas                -2.6388e-02  4.9067e-03 -5.3780 7.532e-08
## lag.opos               -2.2243e+00  1.3386e+00 -1.6616 0.0965909
## lag.mean_enabled        3.5590e-04  2.5028e-04  1.4220 0.1550192
## lag.place_area          4.1405e-02  7.6574e-03  5.4072 6.402e-08
## 
## Rho: 0.79221
## Approximate (numerical Hessian) standard error: 0.026585
##     z-value: 29.798, p-value: < 2.22e-16
## Lambda: -0.33628
## Approximate (numerical Hessian) standard error: 0.064938
##     z-value: -5.1784, p-value: 2.238e-07
## 
## LR test value: 1297.1, p-value: < 2.22e-16
## 
## Log likelihood: -6338.961 for sacmixed model
## ML residual variance (sigma squared): 8.2274, (sigma: 2.8683)
## Number of observations: 2477 
## Number of parameters estimated: 30 
## AIC: 12738, (AIC for lm: 14005)

Now that we removed expenses and water, we have less insignificant variables and income is significant while in previous Manski model it was not. However, some lags are insignificant and benefit is insignificant in both forms. On the other hand thetas of postwork, oposition and mean_enabled is close to being significant. For the Rho and Lambda situation looks similar, so they are countereffective to each other. AIC dropped only by one, but it is the same overcomplicated model.

7.3.3.2 Kelejian-Prucha model

SAC_3 <- sacsarlm(formula3, data=data, listw=cont.listw, method = 'LU')
summary(SAC_3)

## 
## Call:sacsarlm(formula = formula3, data = data, listw = cont.listw, 
##     method = "LU")
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -17.903876  -1.935527  -0.029126   1.899407  32.043669 
## 
## Type: sac 
## Coefficients: (numerical Hessian approximate standard errors) 
##                       Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)        -6.85195300  3.02110654 -2.2680   0.02333
## femin               0.10357495  0.02397265  4.3205 1.556e-05
## migration           0.26111669  0.01537831 16.9795 < 2.2e-16
## income              0.00036250  0.00005771  6.2815 3.353e-10
## people_density2019  0.00039245  0.00021059  1.8636   0.06238
## prework            35.78838510  7.88915640  4.5364 5.722e-06
## postwork           57.85384901  4.51984429 12.8000 < 2.2e-16
## benefit500         90.40319826 13.16599998  6.8664 6.584e-12
## unemployment       -0.22747918  0.03724860 -6.1071 1.015e-09
## sewage              0.02775636  0.00337518  8.2237 2.220e-16
## gas                 0.02170763  0.00326462  6.6494 2.944e-11
## opos               -5.34717818  0.75621746 -7.0710 1.539e-12
## mean_enabled        0.00107504  0.00026525  4.0530 5.057e-05
## place_area         -0.05084294  0.00619375 -8.2087 2.220e-16
## 
## Rho: 0.4933
## Approximate (numerical Hessian) standard error: 0.027659
##     z-value: 17.835, p-value: < 2.22e-16
## Lambda: 0.20667
## Approximate (numerical Hessian) standard error: 0.0528
##     z-value: 3.9143, p-value: 9.066e-05
## 
## LR test value: 1094.7, p-value: < 2.22e-16
## 
## Log likelihood: -6440.145 for sac model
## ML residual variance (sigma squared): 10.028, (sigma: 3.1667)
## Number of observations: 2477 
## Number of parameters estimated: 17 
## AIC: 12914, (AIC for lm: 14005)

SAC model has only one variable on the border of significance now (people_density with pvalue=0.06). Effects of the model didn’t change so much, but the AIC dropped by 4.

7.3.3.3 Spatial Durbin Model

SDM_3 <- lagsarlm(formula3, data=data, listw=cont.listw, type="mixed") 
summary(SDM_3)

## 
## Call:lagsarlm(formula = formula3, data = data, listw = cont.listw, 
##     type = "mixed")
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -16.407562  -1.843088  -0.015728   1.747218  29.813186 
## 
## Type: mixed 
## Coefficients: (asymptotic standard errors) 
##                           Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)             4.3453e-01  4.7822e+00  0.0909 0.9276011
## femin                   1.0773e-01  2.3506e-02  4.5831 4.581e-06
## migration               2.6125e-01  1.6072e-02 16.2544 < 2.2e-16
## income                  3.2851e-04  5.5258e-05  5.9449 2.766e-09
## people_density2019     -7.1312e-05  2.2063e-04 -0.3232 0.7465297
## prework                 2.2800e+01  8.2310e+00  2.7701 0.0056046
## postwork                4.4222e+01  5.0886e+00  8.6905 < 2.2e-16
## benefit500              3.4500e+01  1.4977e+01  2.3035 0.0212511
## unemployment           -3.3052e-01  4.6136e-02 -7.1641 7.829e-13
## sewage                  2.2998e-02  3.3973e-03  6.7696 1.291e-11
## gas                     2.8787e-02  3.7255e-03  7.7271 1.110e-14
## opos                   -1.6527e+00  1.2318e+00 -1.3418 0.1796720
## mean_enabled            8.0907e-04  2.6086e-04  3.1016 0.0019251
## place_area             -5.0002e-02  6.3201e-03 -7.9116 2.442e-15
## lag.femin              -1.2604e-01  4.1682e-02 -3.0239 0.0024956
## lag.migration          -4.0592e-02  2.6677e-02 -1.5217 0.1280967
## lag.income              8.5855e-05  1.1472e-04  0.7484 0.4542240
## lag.people_density2019  1.7555e-03  4.6134e-04  3.8052 0.0001417
## lag.prework             1.8878e+01  1.2783e+01  1.4768 0.1397173
## lag.postwork            4.4629e-01  7.5083e+00  0.0594 0.9526017
## lag.benefit500          1.4668e+01  2.2066e+01  0.6647 0.5062138
## lag.unemployment        2.3305e-01  6.0648e-02  3.8426 0.0001217
## lag.sewage             -1.0622e-02  5.6291e-03 -1.8870 0.0591618
## lag.gas                -2.0905e-02  5.3319e-03 -3.9207 8.830e-05
## lag.opos               -4.2405e+00  1.4411e+00 -2.9425 0.0032558
## lag.mean_enabled        8.4926e-04  4.7215e-04  1.7987 0.0720656
## lag.place_area          3.2724e-02  9.8535e-03  3.3210 0.0008969
## 
## Rho: 0.64507, LR test value: 820.8, p-value: < 2.22e-16
## Asymptotic standard error: 0.01945
##     z-value: 33.166, p-value: < 2.22e-16
## Wald statistic: 1100, p-value: < 2.22e-16
## 
## Log likelihood: -6349.848 for mixed model
## ML residual variance (sigma squared): 9.0069, (sigma: 3.0012)
## Number of observations: 2477 
## Number of parameters estimated: 29 
## AIC: 12758, (AIC for lm: 13576)
## LM test for residual autocorrelation
## test value: 16.49, p-value: 4.8904e-05

moran.test(SDM_3$residuals, cont.listw)

## 
##  Moran I test under randomisation
## 
## data:  SDM_3$residuals  
## weights: cont.listw    
## 
## Moran I statistic standard deviate = -1.6389, p-value = 0.9494
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##     -0.0207732356     -0.0004038772      0.0001544717

There are only two insignificant variables, but many lags are insignificant. AIC is the same as for the SDM on the full model, so there is no big change here.

7.3.3.4 Spatial Durbin Error Model

SDEM_3 <- errorsarlm(formula3, data=data, listw=cont.listw, etype="emixed", method="LU")
summary(SDEM_3)

## 
## Call:errorsarlm(formula = formula3, data = data, listw = cont.listw, 
##     etype = "emixed", method = "LU")
## 
## Residuals:
##         Min          1Q      Median          3Q         Max 
## -16.5829126  -1.8121098  -0.0070059   1.7759786  30.1804384 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##                           Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)             1.3761e+01  8.5442e+00  1.6105 0.1072874
## femin                   8.9151e-02  2.5392e-02  3.5110 0.0004464
## migration               2.8524e-01  1.6483e-02 17.3053 < 2.2e-16
## income                  3.8628e-04  6.0135e-05  6.4235 1.332e-10
## people_density2019      2.0592e-04  2.2468e-04  0.9165 0.3594002
## prework                 3.1523e+01  8.4163e+00  3.7455 0.0001800
## postwork                5.3789e+01  5.0430e+00 10.6661 < 2.2e-16
## benefit500              4.9551e+01  1.4787e+01  3.3509 0.0008054
## unemployment           -3.2441e-01  4.3180e-02 -7.5130 5.773e-14
## sewage                  2.2701e-02  3.5928e-03  6.3184 2.643e-10
## gas                     2.8242e-02  3.5968e-03  7.8520 3.997e-15
## opos                   -3.1415e+00  1.1360e+00 -2.7654 0.0056853
## mean_enabled            1.0948e-03  2.7770e-04  3.9425 8.064e-05
## place_area             -4.9792e-02  6.4415e-03 -7.7299 1.066e-14
## lag.femin              -1.2823e-01  6.0846e-02 -2.1075 0.0350776
## lag.migration           2.1950e-01  3.4797e-02  6.3079 2.829e-10
## lag.income              4.3930e-04  1.5776e-04  2.7846 0.0053598
## lag.people_density2019  4.0295e-03  6.4540e-04  6.2434 4.281e-10
## lag.prework             5.4379e+01  1.8481e+01  2.9425 0.0032559
## lag.postwork            4.5306e+01  1.0422e+01  4.3470 1.380e-05
## lag.benefit500          8.5533e+01  3.1794e+01  2.6903 0.0071395
## lag.unemployment       -1.5250e-02  7.7335e-02 -0.1972 0.8436794
## lag.sewage              8.8366e-04  8.2793e-03  0.1067 0.9150021
## lag.gas                 6.2142e-04  7.7620e-03  0.0801 0.9361900
## lag.opos               -1.0059e+01  1.8022e+00 -5.5815 2.384e-08
## lag.mean_enabled        2.1369e-03  6.8899e-04  3.1015 0.0019256
## lag.place_area         -6.2687e-03  1.4525e-02 -0.4316 0.6660415
## 
## Lambda: 0.66324, LR test value: 807.53, p-value: < 2.22e-16
## Approximate (numerical Hessian) standard error: 0.019143
##     z-value: 34.648, p-value: < 2.22e-16
## Wald statistic: 1200.5, p-value: < 2.22e-16
## 
## Log likelihood: -6356.482 for error model
## ML residual variance (sigma squared): 8.9977, (sigma: 2.9996)
## Number of observations: 2477 
## Number of parameters estimated: 29 
## AIC: 12771, (AIC for lm: 13576)

moran.test(SDEM_3$residuals, cont.listw)

## 
##  Moran I test under randomisation
## 
## data:  SDEM_3$residuals  
## weights: cont.listw    
## 
## Moran I statistic standard deviate = -2.0035, p-value = 0.9774
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##     -0.0253038500     -0.0004038772      0.0001544589

People density is not significant here at all, but has significant lag and 4 insignificant lags. AIC surprisingly increased by 3 points, but we removed two insignificant variables, so it is a positive change and this model is the best one so far.

7.3.3.5 Spatial Lag Model

SAR_3 <- lagsarlm(formula3, data=data, listw=cont.listw, method="LU")
summary(SAR_3)

## 
## Call:lagsarlm(formula = formula3, data = data, listw = cont.listw, 
##     method = "LU")
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -17.656769  -1.939843  -0.038423   1.850106  31.407217 
## 
## Type: lag 
## Coefficients: (numerical Hessian approximate standard errors) 
##                       Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)        -9.3795e+00  2.6990e+00 -3.4752 0.0005105
## femin               8.9333e-02  2.3212e-02  3.8485 0.0001188
## migration           2.4261e-01  1.4066e-02 17.2478 < 2.2e-16
## income              3.6845e-04  5.6703e-05  6.4979 8.145e-11
## people_density2019  5.3121e-04  2.0228e-04  2.6262 0.0086354
## prework             3.5126e+01  7.2955e+00  4.8148 1.474e-06
## postwork            5.5786e+01  4.1675e+00 13.3861 < 2.2e-16
## benefit500          8.6480e+01  1.2142e+01  7.1226 1.059e-12
## unemployment       -1.7363e-01  3.0074e-02 -5.7735 7.762e-09
## sewage              2.8214e-02  3.2150e-03  8.7758 < 2.2e-16
## gas                 1.7975e-02  2.8226e-03  6.3682 1.913e-10
## opos               -5.1278e+00  6.6511e-01 -7.7097 1.266e-14
## mean_enabled        1.0461e-03  2.5936e-04  4.0336 5.493e-05
## place_area         -4.3413e-02  5.6170e-03 -7.7289 1.088e-14
## 
## Rho: 0.57134, LR test value: 1079.3, p-value: < 2.22e-16
## Approximate (numerical Hessian) standard error: 0.014676
##     z-value: 38.93, p-value: < 2.22e-16
## Wald statistic: 1515.6, p-value: < 2.22e-16
## 
## Log likelihood: -6447.823 for lag model
## ML residual variance (sigma squared): 9.9727, (sigma: 3.158)
## Number of observations: 2477 
## Number of parameters estimated: 16 
## AIC: 12928, (AIC for lm: 14005)

moran.test(SAR_3$residuals, cont.listw)

## 
##  Moran I test under randomisation
## 
## data:  SAR_3$residuals  
## weights: cont.listw    
## 
## Moran I statistic standard deviate = 2.455, p-value = 0.007045
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.0301085717     -0.0004038772      0.0001544743

7.3.3.6 SLX model

SLX_3 <- lmSLX(formula3, data=data, listw=cont.listw)
summary(SLX_3)

## 
## Call:
## lm(formula = formula(paste("y ~ ", paste(colnames(x)[-1], collapse = "+"))), 
##     data = as.data.frame(x), weights = weights)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -18.543  -2.277  -0.020   2.324  37.470 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            -8.594e+00  5.939e+00  -1.447 0.148044    
## femin                   9.887e-02  2.919e-02   3.387 0.000718 ***
## migration               2.871e-01  1.995e-02  14.392  < 2e-16 ***
## income                  3.888e-04  6.858e-05   5.669 1.60e-08 ***
## people_density2019     -1.752e-04  2.740e-04  -0.639 0.522602    
## prework                 3.221e+01  1.022e+01   3.152 0.001644 ** 
## postwork                5.691e+01  6.309e+00   9.019  < 2e-16 ***
## benefit500              4.790e+01  1.860e+01   2.576 0.010066 *  
## unemployment           -3.039e-01  5.729e-02  -5.305 1.23e-07 ***
## sewage                  2.666e-02  4.219e-03   6.320 3.11e-10 ***
## gas                     2.875e-02  4.627e-03   6.214 6.07e-10 ***
## opos                   -3.468e+00  1.529e+00  -2.269 0.023378 *  
## mean_enabled            1.135e-03  3.238e-04   3.506 0.000464 ***
## place_area             -4.867e-02  7.850e-03  -6.201 6.57e-10 ***
## lag.femin              -1.181e-03  5.175e-02  -0.023 0.981794    
## lag.migration           2.500e-01  3.118e-02   8.019 1.63e-15 ***
## lag.income              4.677e-04  1.414e-04   3.308 0.000953 ***
## lag.people_density2019  3.086e-03  5.686e-04   5.428 6.26e-08 ***
## lag.prework             7.628e+01  1.574e+01   4.847 1.33e-06 ***
## lag.postwork            5.886e+01  9.014e+00   6.530 7.99e-11 ***
## lag.benefit500          9.987e+01  2.719e+01   3.673 0.000245 ***
## lag.unemployment       -4.468e-02  7.480e-02  -0.597 0.550333    
## lag.sewage              8.432e-03  6.960e-03   1.211 0.225838    
## lag.gas                -4.724e-03  6.582e-03  -0.718 0.472948    
## lag.opos               -1.223e+01  1.770e+00  -6.908 6.23e-12 ***
## lag.mean_enabled        2.921e-03  5.816e-04   5.022 5.48e-07 ***
## lag.place_area         -6.749e-03  1.216e-02  -0.555 0.579024    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.728 on 2450 degrees of freedom
## Multiple R-squared:  0.7091, Adjusted R-squared:  0.706 
## F-statistic: 229.7 on 26 and 2450 DF,  p-value: < 2.2e-16

moran.test(SLX_3$residuals, cont.listw)

## 
##  Moran I test under randomisation
## 
## data:  SLX_3$residuals  
## weights: cont.listw    
## 
## Moran I statistic standard deviate = 32.544, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.4040212787     -0.0004038772      0.0001544309

7.3.3.7 Spatial Error Model

SEM_3 <- errorsarlm(formula3, data=data, listw=cont.listw, method="LU")
summary(SEM_3)

## 
## Call:errorsarlm(formula = formula3, data = data, listw = cont.listw, 
##     method = "LU")
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -15.049623  -1.870898  -0.047211   1.858228  28.064388 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##                       Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)         2.6338e+01  2.9454e+00  8.9421 < 2.2e-16
## femin               1.1951e-01  2.3512e-02  5.0829 3.716e-07
## migration           2.6606e-01  1.5847e-02 16.7899 < 2.2e-16
## income              2.8297e-04  5.5188e-05  5.1273 2.939e-07
## people_density2019  2.7631e-04  2.1895e-04  1.2620 0.2069421
## prework             2.7344e+01  8.2867e+00  3.2997 0.0009679
## postwork            4.2837e+01  5.0974e+00  8.4038 < 2.2e-16
## benefit500          5.2031e+01  1.4984e+01  3.4725 0.0005157
## unemployment       -3.8633e-01  4.4472e-02 -8.6870 < 2.2e-16
## sewage              2.2120e-02  3.4213e-03  6.4655 1.010e-10
## gas                 3.1239e-02  3.6971e-03  8.4494 < 2.2e-16
## opos               -2.6723e+00  1.1557e+00 -2.3123 0.0207627
## mean_enabled        7.2811e-04  2.6299e-04  2.7686 0.0056300
## place_area         -5.4218e-02  6.3815e-03 -8.4961 < 2.2e-16
## 
## Lambda: 0.78518, LR test value: 1033.4, p-value: < 2.22e-16
## Approximate (numerical Hessian) standard error: 0.015236
##     z-value: 51.534, p-value: < 2.22e-16
## Wald statistic: 2655.7, p-value: < 2.22e-16
## 
## Log likelihood: -6470.802 for error model
## ML residual variance (sigma squared): 9.3574, (sigma: 3.059)
## Number of observations: 2477 
## Number of parameters estimated: 16 
## AIC: 12974, (AIC for lm: 14005)

SAR and SLX models do not remove the autocorrelation of residuals and SEM is still not good enough. It would be better to have some of those insignificant variables and lags, but also some significant ones.

7.3.4 Comparison of the models

# LR.sarlm(GNS_3, SAC_3)
# LR.sarlm(GNS_3, SDEM_3)
# LR.sarlm(GNS_3, SDM_3)
# LR.sarlm(GNS_3, SAR_3)
# LR.sarlm(GNS_3, SLX_3)
# LR.sarlm(GNS_3, SEM_3)
model_of_comparison <- c("SAC","SDEM","SDM","SAR","SLX","SEM")
Likelihood_ratio <- c(LR.sarlm(GNS_3, SAC_3)[[1]][[1]], LR.sarlm(GNS_3, SDEM_3)[[1]][[1]],
                      LR.sarlm(GNS_3, SDM_3)[[1]][[1]], LR.sarlm(GNS_3, SAR_3)[[1]][[1]],
                      LR.sarlm(GNS_3, SLX_3)[[1]][[1]], LR.sarlm(GNS_3, SEM_3)[[1]][[1]])
df <- c(LR.sarlm(GNS_3, SAC_3)[[2]][[1]],LR.sarlm(GNS_3, SDEM_3)[[2]][[1]],
        LR.sarlm(GNS_3, SDM_3)[[2]][[1]], LR.sarlm(GNS_3, SAR_3)[[2]][[1]],
        LR.sarlm(GNS_3, SLX_3)[[2]][[1]], LR.sarlm(GNS_3, SEM_3)[[2]][[1]])
p_value <- c(LR.sarlm(GNS_3, SAC_3)[[3]][[1]],LR.sarlm(GNS_3, SDEM_3)[[3]][[1]],
             LR.sarlm(GNS_3, SDM_3)[[3]][[1]], LR.sarlm(GNS_3, SAR_3)[[3]][[1]],
             LR.sarlm(GNS_3, SLX_3)[[3]][[1]],LR.sarlm(GNS_3, SEM_3)[[3]][[1]])
Log_likelihood_GNS <- c(LR.sarlm(GNS_3, SAC_3)[[4]][[1]],LR.sarlm(GNS_3, SDEM_3)[[4]][[1]],
                        LR.sarlm(GNS_3, SDM_3)[[4]][[1]],LR.sarlm(GNS_3, SAR_3)[[4]][[1]],
                        LR.sarlm(GNS_3, SLX_3)[[4]][[1]],LR.sarlm(GNS_3, SEM_3)[[4]][[1]])
Log_likelihood_comparison <- c(LR.sarlm(GNS_3, SAC_3)[[4]][[2]],LR.sarlm(GNS_3, SDEM_3)[[4]][[2]],
                        LR.sarlm(GNS_3, SDM_3)[[4]][[2]],LR.sarlm(GNS_3, SAR_3)[[4]][[2]],
                        LR.sarlm(GNS_3, SLX_3)[[4]][[2]],LR.sarlm(GNS_3, SEM_3)[[4]][[2]])

comparison_df <- data.frame(model_of_comparison, Likelihood_ratio, df, p_value,
                            Log_likelihood_GNS, Log_likelihood_comparison)

knitr::kable(comparison_df,
 caption = "Comparison of the models - GNS vs others")

Comparison of the models - GNS vs others
model_of_comparison	Likelihood_ratio	df	p_value	Log_likelihood_GNS	Log_likelihood_comparison
SAC	202.36641	13	0.0e+00	-6338.961	-6440.145
SDEM	35.04181	1	0.0e+00	-6338.961	-6356.482
SDM	21.77322	1	3.1e-06	-6338.961	-6349.848
SAR	217.72372	14	0.0e+00	-6338.961	-6447.823
SLX	842.56837	2	0.0e+00	-6338.961	-6760.246
SEM	263.68180	14	0.0e+00	-6338.961	-6470.802

GNS is better in all cases. The same like before excluding insignificant variables.

Let’s compare SAR and SEM with the SDM and SDEM models.

LR.sarlm(SDM_3, SAR_3)

## 
##  Likelihood ratio for spatial linear models
## 
## data:  
## Likelihood ratio = 195.95, df = 13, p-value < 2.2e-16
## sample estimates:
## Log likelihood of SDM_3 Log likelihood of SAR_3 
##               -6349.848               -6447.823

LR.sarlm(SDEM_3, SEM_3)

## 
##  Likelihood ratio for spatial linear models
## 
## data:  
## Likelihood ratio = 228.64, df = 13, p-value < 2.2e-16
## sample estimates:
## Log likelihood of SDEM_3  Log likelihood of SEM_3 
##                -6356.482                -6470.802

screenreg(list(GNS_3, SAC_3, SDM_3, SDEM_3, SAR_3, SEM_3))

## 
## ==========================================================================================================
##                         Model 1       Model 2       Model 3       Model 4       Model 5       Model 6     
## ----------------------------------------------------------------------------------------------------------
## rho                         0.79 ***      0.49 ***      0.65 ***                    0.57 ***              
##                            (0.03)        (0.03)        (0.02)                      (0.01)                 
## (Intercept)                -0.58         -6.85 *        0.43         13.76         -9.38 ***     26.34 ***
##                            (0.83)        (3.02)        (4.78)        (8.54)        (2.70)        (2.95)   
## femin                       0.10 ***      0.10 ***      0.11 ***      0.09 ***      0.09 ***      0.12 ***
##                            (0.02)        (0.02)        (0.02)        (0.03)        (0.02)        (0.02)   
## migration                   0.26 ***      0.26 ***      0.26 ***      0.29 ***      0.24 ***      0.27 ***
##                            (0.02)        (0.02)        (0.02)        (0.02)        (0.01)        (0.02)   
## income                      0.00 ***      0.00 ***      0.00 ***      0.00 ***      0.00 ***      0.00 ***
##                            (0.00)        (0.00)        (0.00)        (0.00)        (0.00)        (0.00)   
## people_density2019          0.00          0.00         -0.00          0.00          0.00 **       0.00    
##                            (0.00)        (0.00)        (0.00)        (0.00)        (0.00)        (0.00)   
## prework                    21.75 *       35.79 ***     22.80 **      31.52 ***     35.13 ***     27.34 ***
##                            (9.70)        (7.89)        (8.23)        (8.42)        (7.30)        (8.29)   
## postwork                   41.57 ***     57.85 ***     44.22 ***     53.79 ***     55.79 ***     42.84 ***
##                            (5.53)        (4.52)        (5.09)        (5.04)        (4.17)        (5.10)   
## benefit500                 31.31 *       90.40 ***     34.50 *       49.55 ***     86.48 ***     52.03 ***
##                           (14.15)       (13.17)       (14.98)       (14.79)       (12.14)       (14.98)   
## unemployment               -0.34 ***     -0.23 ***     -0.33 ***     -0.32 ***     -0.17 ***     -0.39 ***
##                            (0.05)        (0.04)        (0.05)        (0.04)        (0.03)        (0.04)   
## sewage                      0.02 ***      0.03 ***      0.02 ***      0.02 ***      0.03 ***      0.02 ***
##                            (0.00)        (0.00)        (0.00)        (0.00)        (0.00)        (0.00)   
## gas                         0.03 ***      0.02 ***      0.03 ***      0.03 ***      0.02 ***      0.03 ***
##                            (0.00)        (0.00)        (0.00)        (0.00)        (0.00)        (0.00)   
## opos                       -1.64         -5.35 ***     -1.65         -3.14 **      -5.13 ***     -2.67 *  
##                            (1.18)        (0.76)        (1.23)        (1.14)        (0.67)        (1.16)   
## mean_enabled                0.00 **       0.00 ***      0.00 **       0.00 ***      0.00 ***      0.00 ** 
##                            (0.00)        (0.00)        (0.00)        (0.00)        (0.00)        (0.00)   
## place_area                 -0.05 ***     -0.05 ***     -0.05 ***     -0.05 ***     -0.04 ***     -0.05 ***
##                            (0.01)        (0.01)        (0.01)        (0.01)        (0.01)        (0.01)   
## lag.femin                  -0.12 ***                   -0.13 **      -0.13 *                              
##                            (0.03)                      (0.04)        (0.06)                               
## lag.migration              -0.12 ***                   -0.04          0.22 ***                            
##                            (0.03)                      (0.03)        (0.03)                               
## lag.income                 -0.00                        0.00          0.00 **                             
##                            (0.00)                      (0.00)        (0.00)                               
## lag.people_density2019      0.00 *                      0.00 ***      0.00 ***                            
##                            (0.00)                      (0.00)        (0.00)                               
## lag.prework                 5.37                       18.88         54.38 **                             
##                           (13.25)                     (12.78)       (18.48)                               
## lag.postwork              -13.97                        0.45         45.31 ***                            
##                            (8.20)                      (7.51)       (10.42)                               
## lag.benefit500             -4.36                       14.67         85.53 **                             
##                           (17.29)                     (22.07)       (31.79)                               
## lag.unemployment            0.30 ***                    0.23 ***     -0.02                                
##                            (0.06)                      (0.06)        (0.08)                               
## lag.sewage                 -0.01 **                    -0.01          0.00                                
##                            (0.00)                      (0.01)        (0.01)                               
## lag.gas                    -0.03 ***                   -0.02 ***      0.00                                
##                            (0.00)                      (0.01)        (0.01)                               
## lag.opos                   -2.22                       -4.24 **     -10.06 ***                            
##                            (1.34)                      (1.44)        (1.80)                               
## lag.mean_enabled            0.00                        0.00          0.00 **                             
##                            (0.00)                      (0.00)        (0.00)                               
## lag.place_area              0.04 ***                    0.03 ***     -0.01                                
##                            (0.01)                      (0.01)        (0.01)                               
## lambda                     -0.34 ***      0.21 ***                    0.66 ***                    0.79 ***
##                            (0.06)        (0.05)                      (0.02)                      (0.02)   
## ----------------------------------------------------------------------------------------------------------
## R^2                         0.83          0.79          0.81          0.81          0.79          0.81    
## AIC                     12737.92      12914.29      12757.70      12770.96      12927.65      12973.60    
## BIC                     12912.37      13013.14      12926.33      12939.59      13020.68      13066.64    
## Deviance                20379.34      24838.90      22310.10      22287.24      24702.49      23178.36    
## Log Likelihood          -6338.96      -6440.14      -6349.85      -6356.48      -6447.82      -6470.80    
## nobs                     2477          2477          2477          2477          2477          2477       
## ==========================================================================================================
## *** p < 0.001; ** p < 0.01; * p < 0.05

SDM and SDEM are better models in comparison to more simple SAR and SEM models.

To sum up, reduction of expenses and water didn’t have as big impact as one could have hoped, but it is enough that now there are less insignificant variables, so SDEM is a little bit better now than before. It is not perfect though, so we can try now another matrix and see how that changes results.

7.4 Models with radius matrix

We’ll start again from the full form of models because with new matrix it might as well lead to different conclusions.

7.4.1 Manski Model - GNS

GNS2_1<-sacsarlm(formula, data=data, listw=radius.listw, type="sacmixed")
summary(GNS2_1)

## 
## Call:sacsarlm(formula = formula, data = data, listw = radius.listw, 
##     type = "sacmixed")
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -16.368609  -1.757362  -0.012962   1.713339  26.182574 
## 
## Type: sacmixed 
## Coefficients: (asymptotic standard errors) 
##                           Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)            -6.0226e+00  4.5942e+00 -1.3109 0.1898826
## expenses               -3.9615e-04  2.9246e-04 -1.3546 0.1755563
## femin                   9.7539e-02  2.4274e-02  4.0182 5.865e-05
## income                  7.0449e-04  2.8150e-04  2.5026 0.0123282
## migration               2.5503e-01  1.6562e-02 15.3990 < 2.2e-16
## people_density2019      3.4216e-04  2.0876e-04  1.6390 0.1012143
## prework                 3.1614e+01  8.5034e+00  3.7178 0.0002009
## postwork                4.2466e+01  5.1632e+00  8.2248 2.220e-16
## benefit500              2.8596e+01  1.5362e+01  1.8615 0.0626728
## unemployment           -3.7739e-01  4.6019e-02 -8.2008 2.220e-16
## water                   9.6392e-03  5.4272e-03  1.7761 0.0757150
## sewage                  2.1709e-02  3.5558e-03  6.1051 1.028e-09
## gas                     3.0840e-02  3.7336e-03  8.2600 2.220e-16
## opos                   -1.6008e+00  1.2503e+00 -1.2803 0.2004314
## mean_enabled            7.4277e-04  2.6531e-04  2.7997 0.0051156
## place_area             -4.7249e-02  6.2785e-03 -7.5255 5.262e-14
## lag.expenses            1.5578e-04  6.3469e-04  0.2454 0.8061186
## lag.femin              -7.3988e-02  4.3873e-02 -1.6864 0.0917152
## lag.income             -9.4760e-05  6.1900e-04 -0.1531 0.8783312
## lag.migration          -4.2669e-02  3.2085e-02 -1.3299 0.1835552
## lag.people_density2019 -8.4079e-05  4.1492e-04 -0.2026 0.8394177
## lag.prework             9.3380e+00  1.3658e+01  0.6837 0.4941535
## lag.postwork           -8.0561e+00  8.0545e+00 -1.0002 0.3172168
## lag.benefit500         -1.1314e+01  2.2765e+01 -0.4970 0.6192117
## lag.unemployment        3.6564e-01  6.1419e-02  5.9533 2.629e-09
## lag.water              -7.2990e-03  7.6994e-03 -0.9480 0.3431296
## lag.sewage             -1.0470e-02  6.4146e-03 -1.6322 0.1026296
## lag.gas                -2.7266e-02  5.3489e-03 -5.0975 3.442e-07
## lag.opos               -3.5928e+00  1.5570e+00 -2.3076 0.0210233
## lag.mean_enabled        4.8930e-04  4.9246e-04  0.9936 0.3204213
## lag.place_area          3.7644e-02  1.0123e-02  3.7188 0.0002002
## 
## Rho: 0.76203
## Asymptotic standard error: 0.02908
##     z-value: 26.205, p-value: < 2.22e-16
## Lambda: -0.24199
## Asymptotic standard error: 0.070651
##     z-value: -3.4251, p-value: 0.00061456
## 
## LR test value: 1207, p-value: < 2.22e-16
## 
## Log likelihood: -6383.738 for sacmixed model
## ML residual variance (sigma squared): 9.0059, (sigma: 3.001)
## Number of observations: 2477 
## Number of parameters estimated: 34 
## AIC: 12835, (AIC for lm: 14009)

In comparision to first Manski model on contiguency matrix, this one has almost all thetas insignificant. Situation with Rho and Lambda is more or less the same and AIC is slightly higher. We have to reduce the model.

7.4.2 SAC, SDM and SDEM models

#SAC

SAC2_1 <- sacsarlm(formula, data=data, listw=radius.listw, method = 'LU')
summary(SAC2_1)

## 
## Call:sacsarlm(formula = formula, data = data, listw = radius.listw, 
##     method = "LU")
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -17.601397  -1.922382  -0.010023   1.901154  29.359056 
## 
## Type: sac 
## Coefficients: (numerical Hessian approximate standard errors) 
##                       Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)        -7.07966428  3.34955075 -2.1136  0.034548
## expenses           -0.00046275  0.00030096 -1.5376  0.124149
## femin               0.10662524  0.02468937  4.3187 1.570e-05
## income              0.00080241  0.00029038  2.7633  0.005722
## migration           0.26530977  0.01548876 17.1292 < 2.2e-16
## people_density2019  0.00059047  0.00021390  2.7604  0.005772
## prework            39.42844259  8.03661387  4.9061 9.290e-07
## postwork           58.26076108  4.66147956 12.4983 < 2.2e-16
## benefit500         83.64215426 13.68421666  6.1123 9.820e-10
## unemployment       -0.26492044  0.04007184 -6.6111 3.814e-11
## water               0.00571764  0.00481482  1.1875  0.235027
## sewage              0.02622187  0.00351560  7.4587 8.726e-14
## gas                 0.02603174  0.00345838  7.5271 5.196e-14
## opos               -5.58147696  0.82095822 -6.7987 1.055e-11
## mean_enabled        0.00107001  0.00027380  3.9080 9.305e-05
## place_area         -0.05162951  0.00630089 -8.1940 2.220e-16
## 
## Rho: 0.48122
## Approximate (numerical Hessian) standard error: 0.030861
##     z-value: 15.593, p-value: < 2.22e-16
## Lambda: 0.28189
## Approximate (numerical Hessian) standard error: 0.06033
##     z-value: 4.6725, p-value: 2.9753e-06
## 
## LR test value: 1027.2, p-value: < 2.22e-16
## 
## Log likelihood: -6473.652 for sac model
## ML residual variance (sigma squared): 10.395, (sigma: 3.2241)
## Number of observations: 2477 
## Number of parameters estimated: 19 
## AIC: 12985, (AIC for lm: 14009)

moran.test(SAC2_1$residuals, radius.listw)

## 
##  Moran I test under randomisation
## 
## data:  SAC2_1$residuals  
## weights: radius.listw    
## 
## Moran I statistic standard deviate = -1.2325, p-value = 0.8911
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##     -0.0133078513     -0.0004038772      0.0001096245

W.c_r <- as(as_dgRMatrix_listw(radius.listw), "CsparseMatrix") 
trMat_r <- trW(W.c_r, type="mult") 

# SDM
SDM2_1 <- lagsarlm(formula, data=data, listw=radius.listw, type="mixed",
                   method = 'LU', trs = trMat_r) 
summary(SDM2_1)

## 
## Call:lagsarlm(formula = formula, data = data, listw = radius.listw, 
##     type = "mixed", method = "LU", trs = trMat_r)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -16.534784  -1.836429  -0.013886   1.731861  26.171652 
## 
## Type: mixed 
## Coefficients: (numerical Hessian approximate standard errors) 
##                           Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)            -7.47524714  5.48223926 -1.3635  0.172713
## expenses               -0.00042026  0.00029286 -1.4350  0.151280
## femin                   0.09655469  0.02416278  3.9960 6.442e-05
## income                  0.00073505  0.00028192  2.6073  0.009126
## migration               0.25747788  0.01639051 15.7090 < 2.2e-16
## people_density2019      0.00030913  0.00020808  1.4856  0.137387
## prework                31.61209764  8.38103117  3.7719  0.000162
## postwork               43.89590736  5.08405121  8.6340 < 2.2e-16
## benefit500             31.03687756 15.18579500  2.0438  0.040972
## unemployment           -0.38105275  0.04584852 -8.3111 < 2.2e-16
## water                   0.01034562  0.00537847  1.9235  0.054414
## sewage                  0.02164424  0.00353728  6.1189 9.423e-10
## gas                     0.03061149  0.00368352  8.3104 < 2.2e-16
## opos                   -1.51448138  1.23471340 -1.2266  0.219978
## mean_enabled            0.00079070  0.00026348  3.0009  0.002692
## place_area             -0.04685600  0.00611795 -7.6588 1.887e-14
## lag.expenses            0.00012762  0.00069835  0.1827  0.854999
## lag.femin              -0.05866184  0.04845052 -1.2108  0.225988
## lag.income              0.00001807  0.00067972  0.0266  0.978791
## lag.migration           0.01980872  0.03063255  0.6467  0.517855
## lag.people_density2019  0.00001617  0.00046157  0.0350  0.972053
## lag.prework            22.39698128 14.43002186  1.5521  0.120636
## lag.postwork            2.88316890  8.09883873  0.3560  0.721842
## lag.benefit500         -1.22929479 24.34693850 -0.0505  0.959731
## lag.unemployment        0.34121020  0.06504225  5.2460 1.555e-07
## lag.water              -0.00874309  0.00825510 -1.0591  0.289548
## lag.sewage             -0.00678702  0.00700149 -0.9694  0.332361
## lag.gas                -0.02501357  0.00565899 -4.4201 9.863e-06
## lag.opos               -5.35739186  1.58236337 -3.3857  0.000710
## lag.mean_enabled        0.00085808  0.00053796  1.5951  0.110698
## lag.place_area          0.03127431  0.01077050  2.9037  0.003688
## 
## Rho: 0.65865, LR test value: 696.29, p-value: < 2.22e-16
## Approximate (numerical Hessian) standard error: 0.021147
##     z-value: 31.147, p-value: < 2.22e-16
## Wald statistic: 970.11, p-value: < 2.22e-16
## 
## Log likelihood: -6388.014 for mixed model
## ML residual variance (sigma squared): 9.4294, (sigma: 3.0707)
## Number of observations: 2477 
## Number of parameters estimated: 33 
## AIC: 12842, (AIC for lm: 13536)

moran.test(SDM2_1$residuals, radius.listw)

## 
##  Moran I test under randomisation
## 
## data:  SDM2_1$residuals  
## weights: radius.listw    
## 
## Moran I statistic standard deviate = -1.1385, p-value = 0.8725
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##     -0.0123260097     -0.0004038772      0.0001096576

#SDEM
SDEM2_1 <- errorsarlm(formula, data=data, listw=radius.listw,
                      etype="emixed", method="LU")
summary(SDEM2_1)

## 
## Call:errorsarlm(formula = formula, data = data, listw = radius.listw, 
##     etype = "emixed", method = "LU")
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -16.865496  -1.848171  -0.035066   1.734110  25.428190 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##                           Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)            -8.6000e+00  9.8283e+00 -0.8750 0.3815583
## expenses               -4.0983e-04  3.3136e-04 -1.2368 0.2161613
## femin                   9.8797e-02  2.5631e-02  3.8545 0.0001160
## income                  7.5850e-04  3.2207e-04  2.3550 0.0185207
## migration               2.9355e-01  1.6357e-02 17.9461 < 2.2e-16
## people_density2019      3.9149e-04  2.1864e-04  1.7905 0.0733664
## prework                 4.0717e+01  8.4988e+00  4.7909 1.660e-06
## postwork                5.2281e+01  5.0405e+00 10.3722 < 2.2e-16
## benefit500              4.1949e+01  1.4963e+01  2.8035 0.0050555
## unemployment           -3.7220e-01  4.3411e-02 -8.5740 < 2.2e-16
## water                   9.0730e-03  5.1967e-03  1.7459 0.0808228
## sewage                  2.2031e-02  3.6138e-03  6.0964 1.085e-09
## gas                     2.9333e-02  3.5914e-03  8.1674 2.220e-16
## opos                   -2.7751e+00  1.1484e+00 -2.4165 0.0156713
## mean_enabled            1.0646e-03  2.7964e-04  3.8070 0.0001406
## place_area             -4.7773e-02  6.5467e-03 -7.2973 2.935e-13
## lag.expenses            2.7591e-04  9.7010e-04  0.2844 0.7760901
## lag.femin               3.0550e-02  7.3504e-02  0.4156 0.6776889
## lag.income              2.4528e-04  9.3897e-04  0.2612 0.7939252
## lag.migration           3.2571e-01  4.0681e-02  8.0064 1.110e-15
## lag.people_density2019  8.8194e-04  6.9242e-04  1.2737 0.2027708
## lag.prework             8.3700e+01  2.1742e+01  3.8498 0.0001182
## lag.postwork            5.6434e+01  1.1991e+01  4.7062 2.524e-06
## lag.benefit500          5.4287e+01  3.6798e+01  1.4753 0.1401422
## lag.unemployment        1.4902e-01  9.0005e-02  1.6556 0.0977949
## lag.water              -1.5216e-02  1.2628e-02 -1.2049 0.2282494
## lag.sewage              1.2130e-02  1.0698e-02  1.1339 0.2568431
## lag.gas                -7.6302e-03  8.3617e-03 -0.9125 0.3614985
## lag.opos               -1.3384e+01  2.1126e+00 -6.3353 2.368e-10
## lag.mean_enabled        2.1499e-03  8.2949e-04  2.5919 0.0095452
## lag.place_area         -3.8420e-03  1.5470e-02 -0.2484 0.8038581
## 
## Lambda: 0.66943, LR test value: 679.71, p-value: < 2.22e-16
## Approximate (numerical Hessian) standard error: 0.02117
##     z-value: 31.621, p-value: < 2.22e-16
## Wald statistic: 999.9, p-value: < 2.22e-16
## 
## Log likelihood: -6396.303 for error model
## ML residual variance (sigma squared): 9.4628, (sigma: 3.0762)
## Number of observations: 2477 
## Number of parameters estimated: 33 
## AIC: 12859, (AIC for lm: 13536)

moran.test(SDEM2_1$residuals, radius.listw)

## 
##  Moran I test under randomisation
## 
## data:  SDEM2_1$residuals  
## weights: radius.listw    
## 
## Moran I statistic standard deviate = -1.9597, p-value = 0.975
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##     -0.0209260628     -0.0004038772      0.0001096690

It seems that we should stick to one of those models again. Moreover we should probably try to remove expenses and water, as they are insignificant. We know we had spatial autocorrelation of residuals in reduced model for radius matrix as well.

7.4.3 SAR, SLX and SEM models

#SAR
SAR2_1<-lagsarlm(formula, data=data, listw=radius.listw, method="LU")
summary(SAR2_1)

## 
## Call:lagsarlm(formula = formula, data = data, listw = radius.listw, 
##     method = "LU")
## 
## Residuals:
##         Min          1Q      Median          3Q         Max 
## -17.4879624  -1.8828480   0.0035344   1.9488814  29.2673972 
## 
## Type: lag 
## Coefficients: (numerical Hessian approximate standard errors) 
##                       Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)        -1.2153e+01  2.7924e+00 -4.3522 1.348e-05
## expenses           -5.1090e-04  3.1307e-04 -1.6319 0.1026984
## femin               9.8894e-02  2.3508e-02  4.2068 2.590e-05
## income              8.5664e-04  3.0199e-04  2.8366 0.0045593
## migration           2.4910e-01  1.4356e-02 17.3516 < 2.2e-16
## people_density2019  6.8357e-04  2.0556e-04  3.3255 0.0008827
## prework             4.0413e+01  7.4970e+00  5.3905 7.025e-08
## postwork            5.7477e+01  4.2548e+00 13.5086 < 2.2e-16
## benefit500          8.1705e+01  1.2458e+01  6.5582 5.445e-11
## unemployment       -1.8944e-01  3.0673e-02 -6.1761 6.569e-10
## water               6.5924e-03  4.2823e-03  1.5394 0.1236955
## sewage              2.7101e-02  3.3261e-03  8.1478 4.441e-16
## gas                 2.0867e-02  2.9194e-03  7.1478 8.815e-13
## opos               -5.6227e+00  6.9314e-01 -8.1119 4.441e-16
## mean_enabled        1.0359e-03  2.6445e-04  3.9173 8.956e-05
## place_area         -4.2117e-02  5.7316e-03 -7.3482 2.010e-13
## 
## Rho: 0.57899, LR test value: 1005.1, p-value: < 2.22e-16
## Approximate (numerical Hessian) standard error: 0.015576
##     z-value: 37.172, p-value: < 2.22e-16
## Wald statistic: 1381.8, p-value: < 2.22e-16
## 
## Log likelihood: -6484.727 for lag model
## ML residual variance (sigma squared): 10.406, (sigma: 3.2258)
## Number of observations: 2477 
## Number of parameters estimated: 18 
## AIC: 13005, (AIC for lm: 14009)

moran.test(SAR2_1$residuals, radius.listw)

## 
##  Moran I test under randomisation
## 
## data:  SAR2_1$residuals  
## weights: radius.listw    
## 
## Moran I statistic standard deviate = 3.0778, p-value = 0.001043
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.0318223119     -0.0004038772      0.0001096294

#SLX
SLX2_1<-lmSLX(formula, data=data, listw=radius.listw)
summary(SLX2_1)

## 
## Call:
## lm(formula = formula(paste("y ~ ", paste(colnames(x)[-1], collapse = "+"))), 
##     data = as.data.frame(x), weights = weights)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -18.107  -2.285   0.030   2.226  33.360 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            -1.626e+01  6.587e+00  -2.469 0.013634 *  
## expenses               -5.278e-04  3.523e-04  -1.498 0.134245    
## femin                   9.708e-02  2.907e-02   3.339 0.000853 ***
## income                  8.741e-04  3.391e-04   2.577 0.010014 *  
## migration               2.905e-01  1.968e-02  14.762  < 2e-16 ***
## people_density2019      3.171e-04  2.504e-04   1.266 0.205483    
## prework                 3.676e+01  1.008e+01   3.646 0.000272 ***
## postwork                5.158e+01  6.110e+00   8.442  < 2e-16 ***
## benefit500              4.207e+01  1.827e+01   2.303 0.021354 *  
## unemployment           -3.304e-01  5.513e-02  -5.993 2.36e-09 ***
## water                   7.861e-03  6.470e-03   1.215 0.224517    
## sewage                  2.329e-02  4.255e-03   5.473 4.87e-08 ***
## gas                     3.013e-02  4.432e-03   6.799 1.32e-11 ***
## opos                   -2.651e+00  1.485e+00  -1.785 0.074372 .  
## mean_enabled            1.107e-03  3.168e-04   3.493 0.000485 ***
## place_area             -4.699e-02  7.361e-03  -6.384 2.06e-10 ***
## lag.expenses            4.636e-04  8.401e-04   0.552 0.581138    
## lag.femin               2.680e-02  5.820e-02   0.460 0.645251    
## lag.income              1.943e-04  8.178e-04   0.238 0.812184    
## lag.migration           3.419e-01  3.469e-02   9.854  < 2e-16 ***
## lag.people_density2019  3.284e-04  5.552e-04   0.591 0.554273    
## lag.prework             8.949e+01  1.717e+01   5.213 2.01e-07 ***
## lag.postwork            7.480e+01  9.340e+00   8.009 1.77e-15 ***
## lag.benefit500          9.053e+01  2.908e+01   3.114 0.001870 ** 
## lag.unemployment        3.191e-02  7.734e-02   0.413 0.679919    
## lag.water              -4.452e-03  9.931e-03  -0.448 0.653981    
## lag.sewage              1.607e-02  8.377e-03   1.918 0.055209 .  
## lag.gas                -8.041e-03  6.777e-03  -1.186 0.235544    
## lag.opos               -1.482e+01  1.868e+00  -7.935 3.18e-15 ***
## lag.mean_enabled        3.470e-03  6.393e-04   5.427 6.28e-08 ***
## lag.place_area         -1.483e-02  1.284e-02  -1.156 0.247893    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.694 on 2446 degrees of freedom
## Multiple R-squared:  0.7147, Adjusted R-squared:  0.7112 
## F-statistic: 204.2 on 30 and 2446 DF,  p-value: < 2.2e-16

moran.test(SLX2_1$residuals, radius.listw)

## 
##  Moran I test under randomisation
## 
## data:  SLX2_1$residuals  
## weights: radius.listw    
## 
## Moran I statistic standard deviate = 32.725, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.3422345303     -0.0004038772      0.0001096237

#SEM
SEM2_1<-errorsarlm(formula, data=data, listw=radius.listw, method="LU")
summary(SEM2_1)

## 
## Call:errorsarlm(formula = formula, data = data, listw = radius.listw, 
##     method = "LU")
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -16.121129  -1.898954   0.013264   1.852818  26.142786 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##                       Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)        26.46877299  3.03610791  8.7180 < 2.2e-16
## expenses           -0.00021874  0.00028768 -0.7604  0.447036
## femin               0.10183533  0.02419657  4.2087 2.569e-05
## income              0.00053270  0.00027615  1.9290  0.053730
## migration           0.25612639  0.01621171 15.7989 < 2.2e-16
## people_density2019  0.00046979  0.00020920  2.2457  0.024725
## prework            33.00225862  8.42723564  3.9161 8.998e-05
## postwork           43.99901199  5.10042465  8.6265 < 2.2e-16
## benefit500         43.45901388 15.18096740  2.8627  0.004200
## unemployment       -0.41980197  0.04416799 -9.5047 < 2.2e-16
## water               0.00827486  0.00537675  1.5390  0.123802
## sewage              0.02190782  0.00356460  6.1459 7.949e-10
## gas                 0.03221510  0.00368568  8.7406 < 2.2e-16
## opos               -2.67827706  1.16890473 -2.2913  0.021948
## mean_enabled        0.00078551  0.00026492  2.9651  0.003026
## place_area         -0.05165104  0.00613177 -8.4235 < 2.2e-16
## 
## Lambda: 0.8108, LR test value: 986.24, p-value: < 2.22e-16
## Approximate (numerical Hessian) standard error: 0.015341
##     z-value: 52.853, p-value: < 2.22e-16
## Wald statistic: 2793.4, p-value: < 2.22e-16
## 
## Log likelihood: -6494.134 for error model
## ML residual variance (sigma squared): 9.7096, (sigma: 3.116)
## Number of observations: 2477 
## Number of parameters estimated: 18 
## AIC: 13024, (AIC for lm: 14009)

moran.test(SEM2_1$residuals, radius.listw)

## 
##  Moran I test under randomisation
## 
## data:  SEM2_1$residuals  
## weights: radius.listw    
## 
## Moran I statistic standard deviate = -3.5399, p-value = 0.9998
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##     -0.0374762346     -0.0004038772      0.0001096763

7.4.4 Models comparison

# LR.sarlm(GNS2_1, SAC2_1)
# LR.sarlm(GNS2_1, SDEM2_1)
# LR.sarlm(GNS2_1, SDM2_1)
# LR.sarlm(GNS2_1, SAR2_1)
# LR.sarlm(GNS2_1, SLX2_1)
# LR.sarlm(GNS2_1, SEM2_1)

model_of_comparison <- c("SAC","SDEM","SDM","SAR","SLX","SEM")
Likelihood_ratio <- c(LR.sarlm(GNS2_1, SAC2_1)[[1]][[1]], LR.sarlm(GNS2_1, SDEM2_1)[[1]][[1]],
                      LR.sarlm(GNS2_1, SDM2_1)[[1]][[1]], LR.sarlm(GNS2_1, SAR2_1)[[1]][[1]],
                      LR.sarlm(GNS2_1, SLX2_1)[[1]][[1]], LR.sarlm(GNS2_1, SEM2_1)[[1]][[1]])
df <- c(LR.sarlm(GNS2_1, SAC2_1)[[2]][[1]],LR.sarlm(GNS2_1, SDEM2_1)[[2]][[1]],
        LR.sarlm(GNS2_1, SDM2_1)[[2]][[1]],
        LR.sarlm(GNS2_1, SAR2_1)[[2]][[1]],LR.sarlm(GNS2_1, SLX2_1)[[2]][[1]],
        LR.sarlm(GNS2_1, SEM2_1)[[2]][[1]])
p_value <- c(LR.sarlm(GNS2_1, SAC2_1)[[3]][[1]],LR.sarlm(GNS2_1, SDEM2_1)[[3]][[1]],
             LR.sarlm(GNS2_1, SDM2_1)[[3]][[1]], LR.sarlm(GNS2_1, SAR2_1)[[3]][[1]],
             LR.sarlm(GNS2_1, SLX2_1)[[3]][[1]],LR.sarlm(GNS2_1, SEM2_1)[[3]][[1]])
Log_likelihood_GNS <- c(LR.sarlm(GNS2_1, SAC2_1)[[4]][[1]],LR.sarlm(GNS2_1, SDEM2_1)[[4]][[1]],
                        LR.sarlm(GNS2_1, SDM2_1)[[4]][[1]],LR.sarlm(GNS2_1, SAR2_1)[[4]][[1]],
                        LR.sarlm(GNS2_1, SLX2_1)[[4]][[1]],LR.sarlm(GNS2_1, SEM2_1)[[4]][[1]])
Log_likelihood_comparison <- c(LR.sarlm(GNS2_1, SAC2_1)[[4]][[2]],LR.sarlm(GNS2_1, SDEM2_1)[[4]][[2]],
                        LR.sarlm(GNS2_1, SDM2_1)[[4]][[2]],LR.sarlm(GNS2_1, SAR2_1)[[4]][[2]],
                        LR.sarlm(GNS2_1, SLX2_1)[[4]][[2]],LR.sarlm(GNS2_1, SEM2_1)[[4]][[2]])

comparison_df <- data.frame(model_of_comparison, Likelihood_ratio, df, p_value,
                            Log_likelihood_GNS, Log_likelihood_comparison)

knitr::kable(comparison_df,
 caption = "Comparison of the models - GNS vs others")

Comparison of the models - GNS vs others
model_of_comparison	Likelihood_ratio	df	p_value	Log_likelihood_GNS	Log_likelihood_comparison
SAC	179.826606	15	0.0000000	-6383.738	-6473.652
SDEM	25.129283	1	0.0000005	-6383.738	-6396.303
SDM	8.551301	1	0.0034528	-6383.738	-6388.014
SAR	201.976862	16	0.0000000	-6383.738	-6484.727
SLX	704.839948	2	0.0000000	-6383.738	-6736.158
SEM	220.791344	16	0.0000000	-6383.738	-6494.134

GNS model is still the best model according to likelihood ratio test.

LR.sarlm(SDEM2_1, SEM2_1)

## 
##  Likelihood ratio for spatial linear models
## 
## data:  
## Likelihood ratio = 195.66, df = 15, p-value < 2.2e-16
## sample estimates:
## Log likelihood of SDEM2_1  Log likelihood of SEM2_1 
##                 -6396.303                 -6494.134

LR.sarlm(SAC2_1, SEM2_1)

## 
##  Likelihood ratio for spatial linear models
## 
## data:  
## Likelihood ratio = 40.965, df = 1, p-value = 1.55e-10
## sample estimates:
## Log likelihood of SAC2_1 Log likelihood of SEM2_1 
##                -6473.652                -6494.134

screenreg(list(GNS2_1, SAC2_1, SDM2_1, SDEM2_1, SAR2_1, SEM2_1))

## 
## ==========================================================================================================
##                         Model 1       Model 2       Model 3       Model 4       Model 5       Model 6     
## ----------------------------------------------------------------------------------------------------------
## rho                         0.76 ***      0.48 ***      0.66 ***                    0.58 ***              
##                            (0.03)        (0.03)        (0.02)                      (0.02)                 
## (Intercept)                -6.02         -7.08 *       -7.48         -8.60        -12.15 ***     26.47 ***
##                            (4.59)        (3.35)        (5.48)        (9.83)        (2.79)        (3.04)   
## expenses                   -0.00         -0.00         -0.00         -0.00         -0.00         -0.00    
##                            (0.00)        (0.00)        (0.00)        (0.00)        (0.00)        (0.00)   
## femin                       0.10 ***      0.11 ***      0.10 ***      0.10 ***      0.10 ***      0.10 ***
##                            (0.02)        (0.02)        (0.02)        (0.03)        (0.02)        (0.02)   
## income                      0.00 *        0.00 **       0.00 **       0.00 *        0.00 **       0.00    
##                            (0.00)        (0.00)        (0.00)        (0.00)        (0.00)        (0.00)   
## migration                   0.26 ***      0.27 ***      0.26 ***      0.29 ***      0.25 ***      0.26 ***
##                            (0.02)        (0.02)        (0.02)        (0.02)        (0.01)        (0.02)   
## people_density2019          0.00          0.00 **       0.00          0.00          0.00 ***      0.00 *  
##                            (0.00)        (0.00)        (0.00)        (0.00)        (0.00)        (0.00)   
## prework                    31.61 ***     39.43 ***     31.61 ***     40.72 ***     40.41 ***     33.00 ***
##                            (8.50)        (8.04)        (8.38)        (8.50)        (7.50)        (8.43)   
## postwork                   42.47 ***     58.26 ***     43.90 ***     52.28 ***     57.48 ***     44.00 ***
##                            (5.16)        (4.66)        (5.08)        (5.04)        (4.25)        (5.10)   
## benefit500                 28.60         83.64 ***     31.04 *       41.95 **      81.70 ***     43.46 ** 
##                           (15.36)       (13.68)       (15.19)       (14.96)       (12.46)       (15.18)   
## unemployment               -0.38 ***     -0.26 ***     -0.38 ***     -0.37 ***     -0.19 ***     -0.42 ***
##                            (0.05)        (0.04)        (0.05)        (0.04)        (0.03)        (0.04)   
## water                       0.01          0.01          0.01          0.01          0.01          0.01    
##                            (0.01)        (0.00)        (0.01)        (0.01)        (0.00)        (0.01)   
## sewage                      0.02 ***      0.03 ***      0.02 ***      0.02 ***      0.03 ***      0.02 ***
##                            (0.00)        (0.00)        (0.00)        (0.00)        (0.00)        (0.00)   
## gas                         0.03 ***      0.03 ***      0.03 ***      0.03 ***      0.02 ***      0.03 ***
##                            (0.00)        (0.00)        (0.00)        (0.00)        (0.00)        (0.00)   
## opos                       -1.60         -5.58 ***     -1.51         -2.78 *       -5.62 ***     -2.68 *  
##                            (1.25)        (0.82)        (1.23)        (1.15)        (0.69)        (1.17)   
## mean_enabled                0.00 **       0.00 ***      0.00 **       0.00 ***      0.00 ***      0.00 ** 
##                            (0.00)        (0.00)        (0.00)        (0.00)        (0.00)        (0.00)   
## place_area                 -0.05 ***     -0.05 ***     -0.05 ***     -0.05 ***     -0.04 ***     -0.05 ***
##                            (0.01)        (0.01)        (0.01)        (0.01)        (0.01)        (0.01)   
## lag.expenses                0.00                        0.00          0.00                                
##                            (0.00)                      (0.00)        (0.00)                               
## lag.femin                  -0.07                       -0.06          0.03                                
##                            (0.04)                      (0.05)        (0.07)                               
## lag.income                 -0.00                        0.00          0.00                                
##                            (0.00)                      (0.00)        (0.00)                               
## lag.migration              -0.04                        0.02          0.33 ***                            
##                            (0.03)                      (0.03)        (0.04)                               
## lag.people_density2019     -0.00                        0.00          0.00                                
##                            (0.00)                      (0.00)        (0.00)                               
## lag.prework                 9.34                       22.40         83.70 ***                            
##                           (13.66)                     (14.43)       (21.74)                               
## lag.postwork               -8.06                        2.88         56.43 ***                            
##                            (8.05)                      (8.10)       (11.99)                               
## lag.benefit500            -11.31                       -1.23         54.29                                
##                           (22.77)                     (24.35)       (36.80)                               
## lag.unemployment            0.37 ***                    0.34 ***      0.15                                
##                            (0.06)                      (0.07)        (0.09)                               
## lag.water                  -0.01                       -0.01         -0.02                                
##                            (0.01)                      (0.01)        (0.01)                               
## lag.sewage                 -0.01                       -0.01          0.01                                
##                            (0.01)                      (0.01)        (0.01)                               
## lag.gas                    -0.03 ***                   -0.03 ***     -0.01                                
##                            (0.01)                      (0.01)        (0.01)                               
## lag.opos                   -3.59 *                     -5.36 ***    -13.38 ***                            
##                            (1.56)                      (1.58)        (2.11)                               
## lag.mean_enabled            0.00                        0.00          0.00 **                             
##                            (0.00)                      (0.00)        (0.00)                               
## lag.place_area              0.04 ***                    0.03 **      -0.00                                
##                            (0.01)                      (0.01)        (0.02)                               
## lambda                     -0.24 ***      0.28 ***                    0.67 ***                    0.81 ***
##                            (0.07)        (0.06)                      (0.02)                      (0.02)   
## ----------------------------------------------------------------------------------------------------------
## R^2                         0.81          0.78          0.80          0.80          0.78          0.80    
## AIC                     12835.48      12985.30      12842.03      12858.61      13005.45      13024.27    
## BIC                     13033.18      13095.78      13033.92      13050.49      13110.12      13128.93    
## Deviance                22307.61      25747.55      23356.62      23439.41      25774.67      24050.71    
## Log Likelihood          -6383.74      -6473.65      -6388.01      -6396.30      -6484.73      -6494.13    
## nobs                     2477          2477          2477          2477          2477          2477       
## ==========================================================================================================
## *** p < 0.001; ** p < 0.01; * p < 0.05

Models with one component are again not enough and have higher AIC as well.

7.5 Spatial models (with radius matrix) without insignificant variables

#GNS
GNS2_3<-sacsarlm(formula3, data=data, listw=radius.listw, type="sacmixed")
#SAC
SAC2_3<-sacsarlm(formula3, data=data, listw=radius.listw, method = 'LU')
#SDM
SDM2_3<-lagsarlm(formula3, data=data, listw=radius.listw, type="mixed", method = 'LU', trs = trMat_r)
#SDEM
SDEM2_3<-errorsarlm(formula3, data=data, listw=radius.listw, etype="emixed", method="LU")
#SAR
SAR2_3<-lagsarlm(formula3, data=data, listw=radius.listw, method="LU")
moran.test(SAR2_3$residuals, radius.listw)

## 
##  Moran I test under randomisation
## 
## data:  SAR2_3$residuals  
## weights: radius.listw    
## 
## Moran I statistic standard deviate = 3.1828, p-value = 0.0007294
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.0329222897     -0.0004038772      0.0001096380

SLX2_3<-lmSLX(formula3, data=data, listw=radius.listw)
moran.test(SLX2_3$residuals, radius.listw)

## 
##  Moran I test under randomisation
## 
## data:  SLX2_3$residuals  
## weights: radius.listw    
## 
## Moran I statistic standard deviate = 32.674, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.3417019626     -0.0004038772      0.0001096272

#SEM
SEM2_3<-errorsarlm(formula3, data=data, listw=radius.listw, method="LU")

screenreg(list(GNS2_3, SAC2_3, SDM2_3, SDEM2_3, SAR2_3, SEM2_3))

## 
## ==========================================================================================================
##                         Model 1       Model 2       Model 3       Model 4       Model 5       Model 6     
## ----------------------------------------------------------------------------------------------------------
## rho                         0.76 ***      0.47 ***      0.66 ***                    0.58 ***              
##                            (0.03)        (0.03)        (0.02)                      (0.02)                 
## (Intercept)                -5.67         -6.14         -7.37        -10.13        -11.35 ***     27.08 ***
##                            (4.48)        (3.32)        (5.37)        (9.70)        (2.81)        (3.01)   
## femin                       0.10 ***      0.11 ***      0.10 ***      0.10 ***      0.10 ***      0.10 ***
##                            (0.02)        (0.02)        (0.02)        (0.03)        (0.02)        (0.02)   
## migration                   0.26 ***      0.27 ***      0.26 ***      0.29 ***      0.25 ***      0.26 ***
##                            (0.02)        (0.02)        (0.02)        (0.02)        (0.01)        (0.02)   
## income                      0.00 ***      0.00 ***      0.00 ***      0.00 ***      0.00 ***      0.00 ***
##                            (0.00)        (0.00)        (0.00)        (0.00)        (0.00)        (0.00)   
## people_density2019          0.00          0.00 **       0.00          0.00          0.00 **       0.00 *  
##                            (0.00)        (0.00)        (0.00)        (0.00)        (0.00)        (0.00)   
## prework                    31.15 ***     38.53 ***     31.08 ***     40.57 ***     39.15 ***     32.83 ***
##                            (8.50)        (7.96)        (8.38)        (8.49)        (7.51)        (8.42)   
## postwork                   41.97 ***     58.15 ***     43.44 ***     51.88 ***     57.56 ***     43.83 ***
##                            (5.16)        (4.67)        (5.08)        (5.04)        (4.27)        (5.10)   
## benefit500                 27.00         83.36 ***     29.36         39.73 **      82.45 ***     42.68 ** 
##                           (15.36)       (13.66)       (15.18)       (14.94)       (12.47)       (15.17)   
## unemployment               -0.38 ***     -0.27 ***     -0.38 ***     -0.38 ***     -0.19 ***     -0.42 ***
##                            (0.05)        (0.04)        (0.05)        (0.04)        (0.03)        (0.04)   
## sewage                      0.02 ***      0.03 ***      0.02 ***      0.02 ***      0.03 ***      0.02 ***
##                            (0.00)        (0.00)        (0.00)        (0.00)        (0.00)        (0.00)   
## gas                         0.03 ***      0.03 ***      0.03 ***      0.03 ***      0.02 ***      0.03 ***
##                            (0.00)        (0.00)        (0.00)        (0.00)        (0.00)        (0.00)   
## opos                       -1.64         -5.45 ***     -1.55         -2.84 *       -5.43 ***     -2.67 *  
##                            (1.25)        (0.81)        (1.24)        (1.15)        (0.68)        (1.17)   
## mean_enabled                0.00 **       0.00 ***      0.00 **       0.00 ***      0.00 ***      0.00 ** 
##                            (0.00)        (0.00)        (0.00)        (0.00)        (0.00)        (0.00)   
## place_area                 -0.05 ***     -0.05 ***     -0.05 ***     -0.05 ***     -0.04 ***     -0.05 ***
##                            (0.01)        (0.01)        (0.01)        (0.01)        (0.01)        (0.01)   
## lag.femin                  -0.08                       -0.06          0.04                                
##                            (0.04)                      (0.05)        (0.07)                               
## lag.migration              -0.05                        0.02          0.32 ***                            
##                            (0.03)                      (0.03)        (0.04)                               
## lag.income                  0.00                        0.00          0.00 *                              
##                            (0.00)                      (0.00)        (0.00)                               
## lag.people_density2019     -0.00                        0.00          0.00                                
##                            (0.00)                      (0.00)        (0.00)                               
## lag.prework                 8.11                       21.88         85.76 ***                            
##                           (13.41)                     (14.17)       (21.55)                               
## lag.postwork               -7.69                        3.45         56.64 ***                            
##                            (8.04)                      (8.10)       (11.98)                               
## lag.benefit500             -7.83                        2.05         54.58                                
##                           (22.50)                     (24.10)       (36.54)                               
## lag.unemployment            0.37 ***                    0.34 ***      0.16                                
##                            (0.06)                      (0.06)        (0.09)                               
## lag.sewage                 -0.01                       -0.01          0.01                                
##                            (0.01)                      (0.01)        (0.01)                               
## lag.gas                    -0.03 ***                   -0.03 ***     -0.01                                
##                            (0.01)                      (0.01)        (0.01)                               
## lag.opos                   -3.41 *                     -5.26 ***    -13.58 ***                            
##                            (1.53)                      (1.55)        (2.05)                               
## lag.mean_enabled            0.00                        0.00          0.00 *                              
##                            (0.00)                      (0.00)        (0.00)                               
## lag.place_area              0.04 ***                    0.03 **      -0.00                                
##                            (0.01)                      (0.01)        (0.02)                               
## lambda                     -0.25 ***      0.29 ***                    0.67 ***                    0.81 ***
##                            (0.07)        (0.06)                      (0.02)                      (0.02)   
## ----------------------------------------------------------------------------------------------------------
## R^2                         0.81          0.78          0.80          0.80          0.78          0.80    
## AIC                     12832.60      12984.94      12839.67      12857.24      13006.59      13023.13    
## BIC                     13007.04      13083.80      13008.30      13025.87      13099.62      13116.17    
## Deviance                22332.07      25794.03      23413.51      23511.06      25845.03      24101.49    
## Log Likelihood          -6386.30      -6475.47      -6390.83      -6399.62      -6487.29      -6495.57    
## nobs                     2477          2477          2477          2477          2477          2477       
## ==========================================================================================================
## *** p < 0.001; ** p < 0.01; * p < 0.05

# LR.sarlm(GNS2_3, SAC2_3)
# LR.sarlm(GNS2_3, SDEM2_3)
# LR.sarlm(GNS2_3, SDM2_3)
# LR.sarlm(GNS2_3, SAR2_3)
# LR.sarlm(GNS2_3, SLX2_3)
# LR.sarlm(GNS2_3, SEM2_3)

model_of_comparison <- c("SAC","SDEM","SDM","SAR","SLX","SEM")
Likelihood_ratio <- c(LR.sarlm(GNS2_3, SAC2_3)[[1]][[1]], LR.sarlm(GNS2_3, SDEM2_3)[[1]][[1]],
                      LR.sarlm(GNS2_3, SDM2_3)[[1]][[1]], LR.sarlm(GNS2_3, SAR2_3)[[1]][[1]],
                      LR.sarlm(GNS2_3, SLX2_3)[[1]][[1]], LR.sarlm(GNS2_3, SEM2_3)[[1]][[1]])
df <- c(LR.sarlm(GNS2_3, SAC2_3)[[2]][[1]],LR.sarlm(GNS2_3, SDEM2_3)[[2]][[1]],
        LR.sarlm(GNS2_3, SDM2_3)[[2]][[1]],
        LR.sarlm(GNS2_3, SAR2_3)[[2]][[1]],LR.sarlm(GNS2_3, SLX2_3)[[2]][[1]],
        LR.sarlm(GNS2_3, SEM2_3)[[2]][[1]])
p_value <- c(LR.sarlm(GNS2_3, SAC2_3)[[3]][[1]],LR.sarlm(GNS2_3, SDEM2_3)[[3]][[1]],
             LR.sarlm(GNS2_3, SDM2_3)[[3]][[1]], LR.sarlm(GNS2_3, SAR2_3)[[3]][[1]],
             LR.sarlm(GNS2_3, SLX2_3)[[3]][[1]],LR.sarlm(GNS2_3, SEM2_3)[[3]][[1]])
Log_likelihood_GNS <- c(LR.sarlm(GNS2_3, SAC2_3)[[4]][[1]],LR.sarlm(GNS2_3, SDEM2_3)[[4]][[1]],
                        LR.sarlm(GNS2_3, SDM2_3)[[4]][[1]],LR.sarlm(GNS2_3, SAR2_3)[[4]][[1]],
                        LR.sarlm(GNS2_3, SLX2_3)[[4]][[1]],LR.sarlm(GNS2_3, SEM2_3)[[4]][[1]])
Log_likelihood_comparison <- c(LR.sarlm(GNS2_3, SAC2_3)[[4]][[2]],LR.sarlm(GNS2_3, SDEM2_3)[[4]][[2]],
                        LR.sarlm(GNS2_3, SDM2_3)[[4]][[2]],LR.sarlm(GNS2_3, SAR2_3)[[4]][[2]],
                        LR.sarlm(GNS2_3, SLX2_3)[[4]][[2]],LR.sarlm(GNS2_3, SEM2_3)[[4]][[2]])

comparison_df <- data.frame(model_of_comparison, Likelihood_ratio, df, p_value,
                            Log_likelihood_GNS, Log_likelihood_comparison)

knitr::kable(comparison_df,
 caption = "Comparison of the models - GNS vs others")

Comparison of the models - GNS vs others
model_of_comparison	Likelihood_ratio	df	p_value	Log_likelihood_GNS	Log_likelihood_comparison
SAC	178.346067	13	0.0000000	-6386.299	-6475.472
SDEM	26.643995	1	0.0000002	-6386.299	-6399.621
SDM	9.070707	1	0.0025974	-6386.299	-6390.835
SAR	201.988266	14	0.0000000	-6386.299	-6487.294
SLX	703.715744	2	0.0000000	-6386.299	-6738.157
SEM	218.535670	14	0.0000000	-6386.299	-6495.567

GNS model is still the best model according to the likelihood ratio test.

LR.sarlm(SDEM2_3, SEM2_3)

## 
##  Likelihood ratio for spatial linear models
## 
## data:  
## Likelihood ratio = 191.89, df = 13, p-value < 2.2e-16
## sample estimates:
## Log likelihood of SDEM2_3  Log likelihood of SEM2_3 
##                 -6399.621                 -6495.567

LR.sarlm(SAC2_3, SEM2_3)

## 
##  Likelihood ratio for spatial linear models
## 
## data:  
## Likelihood ratio = 40.19, df = 1, p-value = 2.305e-10
## sample estimates:
## Log likelihood of SAC2_3 Log likelihood of SEM2_3 
##                -6475.472                -6495.567

Moran I shows that again we should not think about the simplest models like SAR or SLX.

Generally SAC and SDEM look better than GNS and SDM because the latter have so many insignificant variables here. But again GNS and SDM have lower AIC. SAC (2) is looking very good with no insignificant variables at all, but it may be lacking some of the info from those thetas which in SDEM are significant and moreover they have all same direction of effect as lambda. Also one may wonder if the effects of Rho and Lambda aren’t just the effect of lambda divided between two components, especially that its AIC is much higher.

Comparing to reduced model with contingency matrix, this SDEM is probably worse anyway. Density is almost significant here, but its lag is not and there are more lags insignificant in this model with radius matrix. AIC is also higher here.

Let’s try knn matrix.

7.6 KNN matrix models

7.6.1 Spatial models on the full dataset

# GNS (Manski)
GNS3_1<-sacsarlm(formula, data=data, listw=knn.listw, type="sacmixed")
#SAC
SAC3_1<-sacsarlm(formula, data=data, listw=knn.listw)
#SDM
SDM3_1<-lagsarlm(formula, data=data, listw=knn.listw, type="mixed") 
#SDEM
SDEM3_1<-errorsarlm(formula, data=data, listw=knn.listw, etype="emixed", method="LU")

W.c_k<-as(as_dgRMatrix_listw(knn.listw), "CsparseMatrix") 
trMat_k<-trW(W.c_k, type="mult") 

#SAR
SAR3_1<-lagsarlm(formula, data=data, listw=knn.listw, method="LU", trs = trMat_k)
#SLX
SLX3_1<-lmSLX(formula, data=data, listw=knn.listw)
#SEM
SEM3_1<-errorsarlm(formula, data=data, listw=knn.listw, method="LU")

screenreg(list(GNS3_1, SAC3_1, SDM3_1, SDEM3_1, SAR3_1, SEM3_1))

## 
## ==========================================================================================================
##                         Model 1       Model 2       Model 3       Model 4       Model 5       Model 6     
## ----------------------------------------------------------------------------------------------------------
## rho                         0.51 ***      0.30 ***      0.34 ***                    0.31 ***              
##                            (0.03)        (0.02)        (0.01)                      (0.01)                 
## (Intercept)                -4.47         -5.80         -5.55         -6.32         -6.41 *       13.37 ***
##                            (3.10)        (3.12)        (3.96)        (5.57)        (3.03)        (3.23)   
## expenses                   -0.00         -0.00         -0.00         -0.00         -0.00          0.00    
##                            (0.00)        (0.00)        (0.00)        (0.00)        (0.00)        (0.00)   
## femin                       0.13 ***      0.13 ***      0.13 ***      0.13 ***      0.13 ***      0.14 ***
##                            (0.03)        (0.03)        (0.03)        (0.03)        (0.03)        (0.03)   
## income                      0.00          0.00          0.00          0.00          0.00 *        0.00    
##                            (0.00)        (0.00)        (0.00)        (0.00)        (0.00)        (0.00)   
## migration                   0.29 ***      0.30 ***      0.31 ***      0.34 ***      0.29 ***      0.33 ***
##                            (0.02)        (0.02)        (0.02)        (0.02)        (0.02)        (0.02)   
## people_density2019          0.00          0.00 *        0.00          0.00 *        0.00 *        0.00 ** 
##                            (0.00)        (0.00)        (0.00)        (0.00)        (0.00)        (0.00)   
## prework                    35.75 ***     52.99 ***     41.41 ***     53.81 ***     53.22 ***     47.75 ***
##                            (8.99)        (8.19)        (8.72)        (8.84)        (8.10)        (8.95)   
## postwork                   51.46 ***     71.79 ***     57.37 ***     68.20 ***     71.59 ***     63.84 ***
##                            (5.33)        (4.64)        (5.08)        (5.06)        (4.57)        (5.15)   
## benefit500                 64.02 ***    117.51 ***     72.76 ***     87.32 ***    115.67 ***    121.47 ***
##                           (16.15)       (13.70)       (15.52)       (15.27)       (13.44)       (15.43)   
## unemployment               -0.36 ***     -0.32 ***     -0.36 ***     -0.38 ***     -0.31 ***     -0.46 ***
##                            (0.05)        (0.03)        (0.04)        (0.04)        (0.03)        (0.04)   
## water                       0.00          0.00          0.00          0.00          0.00         -0.00    
##                            (0.01)        (0.00)        (0.01)        (0.01)        (0.00)        (0.01)   
## sewage                      0.02 ***      0.03 ***      0.02 ***      0.02 ***      0.03 ***      0.02 ***
##                            (0.00)        (0.00)        (0.00)        (0.00)        (0.00)        (0.00)   
## gas                         0.03 ***      0.03 ***      0.03 ***      0.03 ***      0.02 ***      0.03 ***
##                            (0.00)        (0.00)        (0.00)        (0.00)        (0.00)        (0.00)   
## opos                       -3.43 **      -8.20 ***     -4.34 ***     -6.40 ***     -8.14 ***     -8.38 ***
##                            (1.26)        (0.77)        (1.16)        (1.03)        (0.75)        (0.94)   
## mean_enabled                0.00 ***      0.00 ***      0.00 ***      0.00 ***      0.00 ***      0.00 ***
##                            (0.00)        (0.00)        (0.00)        (0.00)        (0.00)        (0.00)   
## place_area                 -0.05 ***     -0.06 ***     -0.06 ***     -0.06 ***     -0.06 ***     -0.07 ***
##                            (0.01)        (0.01)        (0.01)        (0.01)        (0.01)        (0.01)   
## lag.expenses                0.00                       -0.00         -0.00                                
##                            (0.00)                      (0.00)        (0.00)                               
## lag.femin                  -0.07 *                     -0.04          0.00                                
##                            (0.03)                      (0.03)        (0.03)                               
## lag.income                 -0.00                       -0.00          0.00                                
##                            (0.00)                      (0.00)        (0.00)                               
## lag.migration              -0.06 **                     0.00          0.12 ***                            
##                            (0.02)                      (0.02)        (0.02)                               
## lag.people_density2019      0.00                        0.00          0.00                                
##                            (0.00)                      (0.00)        (0.00)                               
## lag.prework                17.95                       29.41 **      48.86 ***                            
##                            (9.91)                      (9.54)        (9.86)                               
## lag.postwork                5.16                       17.56 **      40.81 ***                            
##                            (6.23)                      (5.62)        (5.65)                               
## lag.benefit500              4.16                       18.81         50.92 **                             
##                           (17.47)                     (16.74)       (16.90)                               
## lag.unemployment            0.16 **                     0.10 *       -0.03                                
##                            (0.05)                      (0.05)        (0.04)                               
## lag.water                  -0.00                       -0.00         -0.00                                
##                            (0.01)                      (0.01)        (0.01)                               
## lag.sewage                 -0.01                       -0.00          0.01                                
##                            (0.00)                      (0.00)        (0.00)                               
## lag.gas                    -0.02 ***                   -0.01 **      -0.00                                
##                            (0.00)                      (0.00)        (0.00)                               
## lag.opos                   -3.51 **                    -4.86 ***     -7.17 ***                            
##                            (1.32)                      (1.21)        (1.10)                               
## lag.mean_enabled            0.00                        0.00 *        0.00 ***                            
##                            (0.00)                      (0.00)        (0.00)                               
## lag.place_area              0.02 *                      0.01         -0.01                                
##                            (0.01)                      (0.01)        (0.01)                               
## lambda                     -0.21 ***      0.03                        0.34 ***                    0.35 ***
##                            (0.04)        (0.03)                      (0.01)                      (0.02)   
## ----------------------------------------------------------------------------------------------------------
## R^2                         0.79          0.74          0.76          0.75          0.74          0.73    
## AIC                     13345.23      13448.86      13357.24      13379.89      13447.68      13628.62    
## BIC                     13542.93      13559.35      13549.13      13571.78      13552.35      13733.29    
## Deviance                24432.62      30564.12      28513.43      28841.32      30365.59      32041.70    
## Log Likelihood          -6638.62      -6705.43      -6645.62      -6656.94      -6705.84      -6796.31    
## nobs                     2477          2477          2477          2477          2477          2477       
## ==========================================================================================================
## *** p < 0.001; ** p < 0.01; * p < 0.05

We can notice that permanently expenses and water are variables to omit, so we’ll do it before making any further decisions.

7.6.2 Spatial models without expenses and water variables

# GNS
GNS3_3<-sacsarlm(formula3, data=data, listw=knn.listw, type="sacmixed")
#SAC
SAC3_3<-sacsarlm(formula3, data=data, listw=knn.listw, method = 'LU')
#SDM
SDM3_3<-lagsarlm(formula3, data=data, listw=knn.listw, type="mixed", method="LU", trs = trMat_k)
#SDEM
SDEM3_3<-errorsarlm(formula3, data=data, listw=knn.listw, etype="emixed", method="LU")
#SAR
SAR3_3<-lagsarlm(formula3, data=data, listw=knn.listw, method="LU")
#SLX
SLX3_3<-lmSLX(formula3, data=data, listw=knn.listw)
#SEM
SEM3_3<-errorsarlm(formula3, data=data, listw=knn.listw, method="LU")

# autocorrelation
moran_model1 <- moran.test(SAC3_3$residuals, knn.listw) 
moran_model2 <- moran.test(SDM3_3$residuals, knn.listw) 
moran_model3 <- moran.test(SDEM3_3$residuals, knn.listw) 
moran_model4 <- moran.test(SAR3_3$residuals, knn.listw) 
moran_model5 <- moran.test(SLX3_3$residuals, knn.listw)
moran_model6 <- moran.test(SEM3_3$residuals, knn.listw) 

model <- c("SAC","SDM","SDEM","SAR","SLX", "SEM")
Moran_standard_deviate <- 
  c(moran_model1$statistic[[1]],moran_model2$statistic[[1]],moran_model3$statistic[[1]],
    moran_model4$statistic[[1]],moran_model5$statistic[[1]],moran_model6$statistic[[1]])
Moran_p_value <- 
  c(moran_model1$p.value[[1]],moran_model2$p.value[[1]],moran_model3$p.value[[1]],
    moran_model4$p.value[[1]],moran_model5$p.value[[1]],moran_model6$p.value[[1]])
Moran_statistic <- 
  c(moran_model1$estimate[[1]],moran_model2$estimate[[1]],moran_model3$estimate[[1]],
    moran_model4$estimate[[1]],moran_model5$estimate[[1]],moran_model6$estimate[[1]])
Moran_expectation <- 
  c(moran_model1$estimate[[2]],moran_model2$estimate[[2]],moran_model3$estimate[[2]],
    moran_model4$estimate[[2]],moran_model5$estimate[[2]],moran_model6$estimate[[2]])
Moran_variance <- 
  c(moran_model1$estimate[[3]],moran_model2$estimate[[3]],moran_model3$estimate[[3]],
    moran_model4$estimate[[3]],moran_model5$estimate[[3]],moran_model6$estimate[[3]])
Moran_I_results <- data.frame(model, Moran_statistic, Moran_standard_deviate, Moran_p_value, Moran_expectation, Moran_variance)

knitr::kable(Moran_I_results,
 caption = "Moran I test for each model results")

Moran I test for each model results
model	Moran_statistic	Moran_standard_deviate	Moran_p_value	Moran_expectation	Moran_variance
SAC	-0.0009660	-0.0229275	0.5091459	-0.0004039	0.0006010
SDM	-0.0191059	-0.7627999	0.7772086	-0.0004039	0.0006011
SDEM	-0.0077808	-0.3008871	0.6182497	-0.0004039	0.0006011
SAR	0.0122083	0.5144508	0.3034684	-0.0004039	0.0006010
SLX	0.4682443	19.1172802	0.0000000	-0.0004039	0.0006010
SEM	-0.0168938	-0.6725971	0.7493982	-0.0004039	0.0006011

For KNN matrix autocorrelation stays only for SLX model. SAR and SEM deal with this problem correctly.

# LR.sarlm(GNS3_3, SAC3_3)
# LR.sarlm(GNS3_3, SDEM3_3)
# LR.sarlm(GNS3_3, SDM3_3)
# LR.sarlm(GNS3_3, SAR3_3)
# LR.sarlm(GNS3_3, SLX3_3) 
# LR.sarlm(GNS3_3, SEM3_3)

model_of_comparison <- c("SAC","SDEM","SDM","SAR","SLX","SEM")
Likelihood_ratio <- c(LR.sarlm(GNS3_3, SAC3_3)[[1]][[1]], LR.sarlm(GNS3_3, SDEM3_3)[[1]][[1]],
                      LR.sarlm(GNS3_3, SDM3_3)[[1]][[1]], LR.sarlm(GNS3_3, SAR3_3)[[1]][[1]],
                      LR.sarlm(GNS3_3, SLX3_3)[[1]][[1]], LR.sarlm(GNS3_3, SEM3_3)[[1]][[1]])
df <- c(LR.sarlm(GNS3_3, SAC3_3)[[2]][[1]],LR.sarlm(GNS3_3, SDEM3_3)[[2]][[1]],
        LR.sarlm(GNS3_3, SDM3_3)[[2]][[1]],
        LR.sarlm(GNS3_3, SAR3_3)[[2]][[1]],LR.sarlm(GNS3_3, SLX3_3)[[2]][[1]],
        LR.sarlm(GNS3_3, SEM3_3)[[2]][[1]])
p_value <- c(LR.sarlm(GNS3_3, SAC3_3)[[3]][[1]],LR.sarlm(GNS3_3, SDEM3_3)[[3]][[1]],
             LR.sarlm(GNS3_3, SDM3_3)[[3]][[1]], LR.sarlm(GNS3_3, SAR3_3)[[3]][[1]],
             LR.sarlm(GNS3_3, SLX3_3)[[3]][[1]],LR.sarlm(GNS3_3, SEM3_3)[[3]][[1]])
Log_likelihood_GNS <- c(LR.sarlm(GNS3_3, SAC3_3)[[4]][[1]],LR.sarlm(GNS3_3, SDEM3_3)[[4]][[1]],
                        LR.sarlm(GNS3_3, SDM3_3)[[4]][[1]],LR.sarlm(GNS3_3, SAR3_3)[[4]][[1]],
                        LR.sarlm(GNS3_3, SLX3_3)[[4]][[1]],LR.sarlm(GNS3_3, SEM3_3)[[4]][[1]])
Log_likelihood_comparison <- c(LR.sarlm(GNS3_3, SAC3_3)[[4]][[2]],LR.sarlm(GNS3_3, SDEM3_3)[[4]][[2]],
                        LR.sarlm(GNS3_3, SDM3_3)[[4]][[2]],LR.sarlm(GNS3_3, SAR3_3)[[4]][[2]],
                        LR.sarlm(GNS3_3, SLX3_3)[[4]][[2]],LR.sarlm(GNS3_3, SEM3_3)[[4]][[2]])

comparison_df <- data.frame(model_of_comparison, Likelihood_ratio, df, p_value,
                            Log_likelihood_GNS, Log_likelihood_comparison)

knitr::kable(comparison_df,
 caption = "Comparison of the models - GNS vs others")

Comparison of the models - GNS vs others
model_of_comparison	Likelihood_ratio	df	p_value	Log_likelihood_GNS	Log_likelihood_comparison
SAC	133.41365	13	0.0000000	-6638.894	-6705.601
SDEM	36.63133	1	0.0000000	-6638.894	-6657.210
SDM	14.07341	1	0.0001758	-6638.894	-6645.931
SAR	134.28748	14	0.0000000	-6638.894	-6706.038
SLX	465.72509	2	0.0000000	-6638.894	-6871.757
SEM	315.35857	14	0.0000000	-6638.894	-6796.573

GNS is still the best model according to the likelihood ratio test.

LR.sarlm(SDM3_3, SAR3_3)

## 
##  Likelihood ratio for spatial linear models
## 
## data:  
## Likelihood ratio = 120.21, df = 13, p-value < 2.2e-16
## sample estimates:
## Log likelihood of SDM3_3 Log likelihood of SAR3_3 
##                -6645.931                -6706.038

LR.sarlm(SDEM3_3, SEM3_3)

## 
##  Likelihood ratio for spatial linear models
## 
## data:  
## Likelihood ratio = 278.73, df = 13, p-value < 2.2e-16
## sample estimates:
## Log likelihood of SDEM3_3  Log likelihood of SEM3_3 
##                 -6657.210                 -6796.573

screenreg(list(GNS3_3, SAC3_3, SDM3_3, SDEM3_3, SAR3_3, SEM3_3))

## 
## ==========================================================================================================
##                         Model 1       Model 2       Model 3       Model 4       Model 5       Model 6     
## ----------------------------------------------------------------------------------------------------------
## rho                         0.51 ***      0.30 ***      0.34 ***                    0.31 ***              
##                            (0.03)        (0.02)        (0.02)                      (0.01)                 
## (Intercept)                -4.26         -5.63         -5.32         -6.16         -6.25 *       13.11 ***
##                            (3.06)        (3.11)        (3.91)        (5.51)        (3.06)        (3.21)   
## femin                       0.13 ***      0.13 ***      0.13 ***      0.13 ***      0.13 ***      0.14 ***
##                            (0.03)        (0.03)        (0.03)        (0.03)        (0.03)        (0.03)   
## migration                   0.29 ***      0.30 ***      0.31 ***      0.34 ***      0.29 ***      0.33 ***
##                            (0.02)        (0.02)        (0.02)        (0.02)        (0.02)        (0.02)   
## income                      0.00 ***      0.00 ***      0.00 ***      0.00 ***      0.00 ***      0.00 ***
##                            (0.00)        (0.00)        (0.00)        (0.00)        (0.00)        (0.00)   
## people_density2019          0.00          0.00 *        0.00          0.00 *        0.00 *        0.00 ** 
##                            (0.00)        (0.00)        (0.00)        (0.00)        (0.00)        (0.00)   
## prework                    35.53 ***     52.65 ***     41.19 ***     53.66 ***     52.85 ***     47.95 ***
##                            (8.97)        (8.12)        (8.70)        (8.81)        (8.12)        (8.93)   
## postwork                   51.37 ***     71.75 ***     57.29 ***     68.13 ***     71.55 ***     63.82 ***
##                            (5.33)        (4.62)        (5.08)        (5.05)        (4.59)        (5.15)   
## benefit500                 63.80 ***    117.44 ***     72.56 ***     87.08 ***    115.56 ***    121.48 ***
##                           (16.12)       (13.70)       (15.48)       (15.21)       (13.43)       (15.43)   
## unemployment               -0.36 ***     -0.32 ***     -0.36 ***     -0.38 ***     -0.31 ***     -0.46 ***
##                            (0.05)        (0.04)        (0.04)        (0.04)        (0.03)        (0.04)   
## sewage                      0.02 ***      0.03 ***      0.02 ***      0.02 ***      0.03 ***      0.02 ***
##                            (0.00)        (0.00)        (0.00)        (0.00)        (0.00)        (0.00)   
## gas                         0.03 ***      0.03 ***      0.03 ***      0.03 ***      0.02 ***      0.03 ***
##                            (0.00)        (0.00)        (0.00)        (0.00)        (0.00)        (0.00)   
## opos                       -3.40 **      -8.19 ***     -4.31 ***     -6.36 ***     -8.12 ***     -8.48 ***
##                            (1.26)        (0.74)        (1.16)        (1.02)        (0.73)        (0.92)   
## mean_enabled                0.00 ***      0.00 ***      0.00 ***      0.00 ***      0.00 ***      0.00 ***
##                            (0.00)        (0.00)        (0.00)        (0.00)        (0.00)        (0.00)   
## place_area                 -0.05 ***     -0.06 ***     -0.06 ***     -0.06 ***     -0.06 ***     -0.07 ***
##                            (0.01)        (0.01)        (0.01)        (0.01)        (0.01)        (0.01)   
## lag.femin                  -0.07 *                     -0.04          0.00                                
##                            (0.03)                      (0.03)        (0.03)                               
## lag.migration              -0.06 **                     0.00          0.12 ***                            
##                            (0.02)                      (0.02)        (0.02)                               
## lag.income                 -0.00                       -0.00          0.00                                
##                            (0.00)                      (0.00)        (0.00)                               
## lag.people_density2019      0.00                        0.00          0.00                                
##                            (0.00)                      (0.00)        (0.00)                               
## lag.prework                17.61                       29.04 **      48.58 ***                            
##                            (9.87)                      (9.50)        (9.80)                               
## lag.postwork                5.20                       17.62 **      40.85 ***                            
##                            (6.22)                      (5.63)        (5.65)                               
## lag.benefit500              4.81                       19.47         51.33 **                             
##                           (17.44)                     (16.70)       (16.84)                               
## lag.unemployment            0.16 **                     0.10 *       -0.03                                
##                            (0.05)                      (0.05)        (0.04)                               
## lag.sewage                 -0.01                       -0.00          0.01                                
##                            (0.00)                      (0.00)        (0.00)                               
## lag.gas                    -0.02 ***                   -0.01 **      -0.00                                
##                            (0.00)                      (0.00)        (0.00)                               
## lag.opos                   -3.47 **                    -4.83 ***     -7.15 ***                            
##                            (1.31)                      (1.20)        (1.08)                               
## lag.mean_enabled            0.00                        0.00 *        0.00 ***                            
##                            (0.00)                      (0.00)        (0.00)                               
## lag.place_area              0.02 **                     0.01         -0.01                                
##                            (0.01)                      (0.01)        (0.01)                               
## lambda                     -0.21 ***      0.03                        0.34 ***                    0.35 ***
##                            (0.04)        (0.03)                      (0.01)                      (0.02)   
## ----------------------------------------------------------------------------------------------------------
## R^2                         0.79          0.74          0.76          0.75          0.74          0.73    
## AIC                     13337.79      13445.20      13349.86      13372.42      13444.08      13625.15    
## BIC                     13512.23      13544.05      13518.49      13541.05      13537.11      13718.18    
## Deviance                24428.86      30577.66      28520.40      28847.07      30374.30      32045.05    
## Log Likelihood          -6638.89      -6705.60      -6645.93      -6657.21      -6706.04      -6796.57    
## nobs                     2477          2477          2477          2477          2477          2477       
## ==========================================================================================================
## *** p < 0.001; ** p < 0.01; * p < 0.05

Conclusions among the different types of models for knn matrix are similar to the previous ones. Manski model has the lowest AIC but it is probably misspecified. SAC has the insignificant Lambda here, so it is not the right model as well. Those models have lower number of insignificant predictors but more insignificant lags. Finally, all models have significantly higher AIC than models made with two remaining matrices. To conclude, we should rather stick to contingency matrix or even to the radius one. But changing the matrices doesn’t result in any big changes in the selection of models or in predictors significance.

Technically, we could try the rest of our matrices but we see that changing them doesn’t change that much in our problem. Moreover, calculating spatial models on level of communes with matrices of inverse distances can be outstandingly demanding computationally, especially when method ‘LU’ cannot be used because of the calculating problems described before. Hence it exceeded our computational capabilities and we will focus on some other ideas.

Now we will try to add some unconventional modifications. Let’s start with spatio-temporal lag of our dependent variable, as it is quite easy to obtain. However it won’t be based on a standard variable from a year before, because previous elections were of course 4 years before, in 2015. We’ll use turnout from that elections to create our lag and check if it improves the model.

7.7 Voter turnout in previous year

7.7.1 Data preparation

elect_old<-read.csv("data/2015-gl-lis-gm.csv", header=TRUE, dec=",", sep=";", encoding = "UTF-8")

matched <- intersect(data$Kod, elect_old$TERYT)
all <-  union(data$Kod, elect_old$TERYT)
non_matched <- all[!all %in% matched]
non_matched

##  [1] "146501" "146502" "146503" "146504" "146505" "146506" "146507" "146508"
##  [9] "146509" "146510" "146511" "146512" "146513" "146514" "146515" "146516"
## [17] "146517" "146518" "146519" "149901" "229801" "229901" "320304"

elect_old[elect_old$TERYT == 146502, c(5,26)] <- 
  colSums(elect_old[elect_old$TERYT >= 146501 & elect_old$TERYT <= 146519, c(5,26)])

elect_old[elect_old$TERYT == 146502, 2:3] <- c(146501, "Warszawa")

# removing rest of warsaw districts and abroad/ships
elect_old <- elect_old[elect_old$TERYT < 146502 | elect_old$TERYT > 149901,]

# 320304 is code of Ostrowice commune which bancrupted in 2019 and was divided between two other communes. There isn't an easy way
# to track those division and to match it with our data so we simply remove this commune from the data as well
elect_old <- 
  elect_old[elect_old$TERYT != 229801 &
              elect_old$TERYT != 229901 &
              elect_old$TERYT != 320304,]

# turnout in 2015 elections
elect_old$freq15 <- elect_old$Głosy.ważne / elect_old$Liczba.wyborców *100

elect_old <- elect_old[c(2, 44)]

data <- left_join(x = data, y = elect_old, by = c("Kod" = "TERYT"))

7.7.2 Model with spatio-temporal interaction of turnout (previous election) without insignificant variables

7.7.2.1 First model with interaction

The first model is the model in which we have our spatio-temporal lag of turnout and Rho to see which has the stronger effect and how it comes out.

data$freq_st<-lag.listw(cont.listw, data$freq15)

formula_freq_st <- freq ~ femin + migration + income + people_density2019 +
  prework + postwork + benefit500 + unemployment + sewage + gas + opos + 
  mean_enabled + place_area + freq_st

SAR_freq15<-lagsarlm(formula_freq_st, data=data, cont.listw, tol.solve=3e-30)
summary(SAR_freq15)

## 
## Call:lagsarlm(formula = formula_freq_st, data = data, listw = cont.listw, 
##     tol.solve = 3e-30)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -16.817051  -2.007704  -0.011523   1.900415  30.927379 
## 
## Type: lag 
## Coefficients: (asymptotic standard errors) 
##                       Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)        -9.3148e+00  2.7335e+00 -3.4076 0.0006553
## femin               7.9536e-02  2.3311e-02  3.4119 0.0006451
## migration           2.2167e-01  1.4478e-02 15.3111 < 2.2e-16
## income              3.6343e-04  5.7138e-05  6.3606 2.010e-10
## people_density2019  4.7739e-04  2.0270e-04  2.3551 0.0185171
## prework             3.2250e+01  7.3772e+00  4.3715 1.234e-05
## postwork            5.6772e+01  4.1986e+00 13.5216 < 2.2e-16
## benefit500          9.8403e+01  1.2226e+01  8.0486 8.882e-16
## unemployment       -1.6079e-01  3.0540e-02 -5.2649 1.402e-07
## sewage              2.8001e-02  3.2396e-03  8.6433 < 2.2e-16
## gas                 1.2821e-02  2.8965e-03  4.4264 9.580e-06
## opos               -2.8719e+00  7.1185e-01 -4.0344 5.474e-05
## mean_enabled        1.0420e-03  2.5915e-04  4.0208 5.799e-05
## place_area         -3.9939e-02  5.6741e-03 -7.0388 1.939e-12
## freq_st             2.4403e-01  2.2073e-02 11.0555 < 2.2e-16
## 
## Rho: 0.37076, LR test value: 134.76, p-value: < 2.22e-16
## Asymptotic standard error: 0.022466
##     z-value: 16.503, p-value: < 2.22e-16
## Wald statistic: 272.35, p-value: < 2.22e-16
## 
## Log likelihood: -6413.555 for lag model
## ML residual variance (sigma squared): 10.118, (sigma: 3.1809)
## Number of observations: 2477 
## Number of parameters estimated: 17 
## AIC: 12861, (AIC for lm: 12994)
## LM test for residual autocorrelation
## test value: 15.8, p-value: 7.0418e-05

moran.test(SAR_freq15$residuals, cont.listw)

## 
##  Moran I test under randomisation
## 
## data:  SAR_freq15$residuals  
## weights: cont.listw    
## 
## Moran I statistic standard deviate = 2.3183, p-value = 0.01022
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.0284125526     -0.0004038772      0.0001545008

Freq_st is significant, but Rho is also significant and has a bit stronger effect. AIC (12861) is lower than it was for simple SAR, but higher than it was for normal models with 2 components. It still doesn’t remove spatial component though.

7.7.2.2 Second model with interaction

The second model is SEM model with spatio-temporal lag of freq added, so that we get model similar to SAC model, but with spatio-temporal instead of the usual spatial lag of turnout.

SEM_freq15 <-
  errorsarlm(formula_freq_st, data=data, cont.listw, tol.solve=3e-30, method = "LU")
summary(SEM_freq15)

## 
## Call:errorsarlm(formula = formula_freq_st, data = data, listw = cont.listw, 
##     method = "LU", tol.solve = 3e-30)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -16.997870  -2.008528  -0.018887   1.873065  31.834720 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##                       Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)        -1.1970e+00  3.0056e+00 -0.3983 0.6904307
## femin               1.0735e-01  2.4491e-02  4.3832 1.170e-05
## migration           2.5417e-01  1.5662e-02 16.2285 < 2.2e-16
## income              3.4627e-04  5.8545e-05  5.9147 3.325e-09
## people_density2019  2.7650e-04  2.1936e-04  1.2605 0.2075062
## prework             3.5429e+01  8.1851e+00  4.3285 1.502e-05
## postwork            6.1098e+01  4.8040e+00 12.7181 < 2.2e-16
## benefit500          1.0367e+02  1.4055e+01  7.3763 1.628e-13
## unemployment       -2.8248e-01  3.8000e-02 -7.4337 1.057e-13
## sewage              2.6698e-02  3.5006e-03  7.6266 2.420e-14
## gas                 1.9552e-02  3.4186e-03  5.7193 1.069e-08
## opos               -2.6469e+00  8.9873e-01 -2.9452 0.0032276
## mean_enabled        1.0578e-03  2.7320e-04  3.8717 0.0001081
## place_area         -5.2951e-02  6.2951e-03 -8.4114 < 2.2e-16
## freq_st             4.3995e-01  1.9476e-02 22.5893 < 2.2e-16
## 
## Lambda: 0.38703, LR test value: 108.21, p-value: < 2.22e-16
## Approximate (numerical Hessian) standard error: 0.03907
##     z-value: 9.9061, p-value: < 2.22e-16
## Wald statistic: 98.13, p-value: < 2.22e-16
## 
## Log likelihood: -6426.831 for error model
## ML residual variance (sigma squared): 10.201, (sigma: 3.194)
## Number of observations: 2477 
## Number of parameters estimated: 17 
## AIC: 12888, (AIC for lm: 12994)

Freq_st and lambda are significant, but density was more significant for normal SAC. On the other hand this model has lower AIC.

Some of our variables are aggregated from the outset, while we also have data from 2019 only. There are variables that are probably not spatially influenced by spatio-temporal lags either from the previous period and those related to the election, so they are not available from 2018, but we consider adding some of those that are possible as lags in this way.

7.7.3 2018 data addition

# gas and sewage
wat_sew_gas18<-read.csv("data/wodkangaz18.csv", header=TRUE, dec=",", sep=";")
colnames(wat_sew_gas18)[4:5] <- c("sewage18", "gas18")
wat_sew_gas18[c(2,6)] <- NULL

#removing last letter meaning type of region (like before)
wat_sew_gas18$Kod <- wat_sew_gas18$Kod %>% as.character() %>% substr(1, nchar(wat_sew_gas18$Kod)-1) %>% as.numeric()

#adding this variable to data
data2 <- data
data2$Kod <- as.numeric(data2$Kod)
data2 <- left_join(x = data2, y = wat_sew_gas18, by = 'Kod')

# people density once again
people<-read.csv("data/ludnosc 2016-2019.csv", header=TRUE, dec=",", sep=";")
colnames(people)[5] <- "people_density2018"
people[c(2:4,6:7)] <- NULL
people$Kod <- 
  people$Kod %>% as.character() %>% substr(1, nchar(people$Kod)-1) %>% as.numeric()

data2 <- left_join(x = data2, y = people, by = 'Kod')

# prework, postwork
people_prod<-read.csv("data/ludnosc ogol przed poprod.csv", 
                      header=TRUE, dec=",", sep=";", encoding = "UTF-8")
colnames(people_prod)[5] <- "ogolem18"
people_prod$prework18 <- 
  people_prod$w.wieku.przedprodukcyjnym.ogółem.2018..osoba. / people_prod$ogolem18
people_prod$postwork18 <- 
  people_prod$w.wieku.poprodukcyjnym.ogółem.2018..osoba. / people_prod$ogolem18
people_prod[c(2:4, 6:15)] <- NULL
people_prod$Kod <- 
  people_prod$Kod %>% as.character() %>% substr(1, nchar(people_prod$Kod)-1) %>% as.numeric()

data2 <- left_join(x = data2, y = people_prod, by = 'Kod')

#fem
fem18<-read.csv("data/feminizacja18.csv", header=TRUE, dec=",", sep=";")
colnames(fem18)[3] <- "femin18"
fem18[c(2,4)] <- NULL

fem18$Kod <- fem18$Kod %>% as.character() %>% substr(1, nchar(fem18$Kod)-1) %>% as.numeric()

data2 <- left_join(x = data2, y = fem18, by = 'Kod')

#soc_ben - 500+ (za caly rok miesiecznie srednio)
soc_ben18<-read.csv("data/swiadczenie wychowawcze 500 18.csv", header=TRUE, dec=",", sep=";", encoding = "UTF-8")
soc_ben18$Kod <- soc_ben18$Kod %>% as.character() %>% substr(1, nchar(soc_ben18$Kod)-1) %>% as.numeric()

matched <- intersect(soc_ben18$Kod, people_prod$Kod)
all <-  union(soc_ben18$Kod, people_prod$Kod)
non_matched <- all[!all %in% matched] 

soc_ben18 <- soc_ben18[soc_ben18$Kod != 320304,]
# calculating on the number of people living there
soc_ben18$benefit50018 <- 
  soc_ben18$przeciętna.miesięczna.liczba.rodzin.pobierających.świadczenie.2018.... / people_prod$ogolem18
soc_ben18[c(2:4)] <- NULL

data2 <- left_join(x = data2, y = soc_ben18, by = 'Kod')

#unempl
unempl18<-read.csv("data/bezrobocie18.csv", header=TRUE, dec=",", sep=";")
colnames(unempl18)[3] <- "unemployment18"
unempl18[c(2,4)] <- NULL
unempl18$Kod <- 
  unempl18$Kod %>% as.character() %>% substr(1, nchar(unempl18$Kod)-1) %>% as.numeric()

data2 <- left_join(x = data2, y = unempl18, by = 'Kod')

# some na's, let's leave them with their values for 2019
data2[is.na(data2$people_density2018),]$people_density2018 <-
  data2[is.na(data2$people_density2018),]$people_density2019
data2[is.na(data2$prework18),]$prework18 <- data2[is.na(data2$prework18),]$prework
data2[is.na(data2$postwork18),]$postwork18 <- data2[is.na(data2$postwork18),]$postwork
data2[is.na(data2$benefit50018),]$benefit50018 <- data2[is.na(data2$benefit50018),]$benefit500
data2$sewage_st<-lag.listw(cont.listw, data2$sewage18)
data2$gas_st<-lag.listw(cont.listw, data2$gas18)
data2$people_density_st<-lag.listw(cont.listw, data2$people_density2018)
data2$prework_st<-lag.listw(cont.listw, data2$prework18) 
data2$postwork_st<-lag.listw(cont.listw, data2$postwork18)
data2$femin_st<-lag.listw(cont.listw, data2$femin18)
data2$unemployment_st<-lag.listw(cont.listw, data2$unemployment18)
data2$benefit500_st<-lag.listw(cont.listw, data2$benefit50018)

7.7.4 Models with 2018 data added

We will keep also our spatio-temporal lag of frequency as it is so significant.

formula_x_st <- freq ~ femin + migration + income + people_density2019 + prework + 
  postwork + benefit500 + unemployment + sewage + gas + opos + mean_enabled + 
  place_area + sewage_st + gas_st + people_density_st + prework_st + postwork_st + 
  femin_st + unemployment_st + benefit500_st + freq_st

7.7.4.1 First model

The first model is a SAR with spatio-temporal lags of x added.

SAR_x_st<-lagsarlm(formula_x_st, data=data2, cont.listw, tol.solve=3e-30)
summary(SAR_x_st)

## 
## Call:lagsarlm(formula = formula_x_st, data = data2, listw = cont.listw, 
##     tol.solve = 3e-30)
## 
## Residuals:
##         Min          1Q      Median          3Q         Max 
## -16.0944846  -1.8940430   0.0026537   1.7860547  28.9791854 
## 
## Type: lag 
## Coefficients: (asymptotic standard errors) 
##                       Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)         2.1238e+01  4.5045e+00  4.7149 2.419e-06
## femin               1.0092e-01  2.3446e-02  4.3044 1.675e-05
## migration           2.3619e-01  1.5073e-02 15.6704 < 2.2e-16
## income              3.1442e-04  5.4910e-05  5.7262 1.027e-08
## people_density2019 -3.0588e-04  2.1578e-04 -1.4176  0.156314
## prework             2.3699e+01  8.2321e+00  2.8788  0.003992
## postwork            4.3081e+01  5.1390e+00  8.3833 < 2.2e-16
## benefit500          3.5376e+01  1.5097e+01  2.3432  0.019119
## unemployment       -1.8349e-01  3.0679e-02 -5.9809 2.220e-09
## sewage              2.2338e-02  3.3910e-03  6.5875 4.474e-11
## gas                 3.2602e-02  3.7081e-03  8.7921 < 2.2e-16
## opos               -2.0614e-01  7.7118e-01 -0.2673  0.789234
## mean_enabled        7.6591e-04  2.4891e-04  3.0771  0.002090
## place_area         -3.6400e-02  5.5366e-03 -6.5744 4.885e-11
## sewage_st          -1.0377e-02  5.3325e-03 -1.9461  0.051645
## gas_st             -3.8898e-02  5.2913e-03 -7.3512 1.963e-13
## people_density_st   9.3044e-04  5.2436e-04  1.7744  0.075989
## prework_st         -1.8578e+01  1.2881e+01 -1.4423  0.149231
## postwork_st        -1.1025e+01  7.6051e+00 -1.4497  0.147147
## femin_st           -2.6264e-01  3.7570e-02 -6.9908 2.733e-12
## unemployment_st     2.6752e-04  1.6183e-04  1.6531  0.098304
## benefit500_st       5.1015e+01  2.0644e+01  2.4711  0.013468
## freq_st             3.8511e-01  2.4732e-02 15.5717 < 2.2e-16
## 
## Rho: 0.35337, LR test value: 122.42, p-value: < 2.22e-16
## Asymptotic standard error: 0.024379
##     z-value: 14.495, p-value: < 2.22e-16
## Wald statistic: 210.1, p-value: < 2.22e-16
## 
## Log likelihood: -6297.875 for lag model
## ML residual variance (sigma squared): 9.2397, (sigma: 3.0397)
## Number of observations: 2477 
## Number of parameters estimated: 25 
## AIC: 12646, (AIC for lm: 12766)
## LM test for residual autocorrelation
## test value: 28.15, p-value: 1.1224e-07

The result is not bad, but density, oposition and many lags are insignificant. Rho and freq_st are significant and have the same sign, but they may actually reduce each other. AIC is pretty low, the lowest by far.

7.7.4.2 Second model

SEM + spatio-temporal lags of x & y

SEM_x_st<-errorsarlm(formula_x_st, data=data2, cont.listw, tol.solve=3e-30, 
                     method = 'LU')
summary(SEM_x_st)

## 
## Call:errorsarlm(formula = formula_x_st, data = data2, listw = cont.listw, 
##     method = "LU", tol.solve = 3e-30)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -16.347559  -1.958498   0.015671   1.874006  30.601070 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##                       Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)        18.14126480  5.39025472  3.3656 0.0007639
## femin               0.09626507  0.02395883  4.0179 5.871e-05
## migration           0.26093659  0.01591211 16.3986 < 2.2e-16
## income              0.00033606  0.00005715  5.8804 4.094e-09
## people_density2019 -0.00021647  0.00021964 -0.9856 0.3243445
## prework            22.68315301  8.26322945  2.7451 0.0060498
## postwork           47.23182428  5.13028814  9.2065 < 2.2e-16
## benefit500         41.92138683 15.18096469  2.7614 0.0057546
## unemployment       -0.25873790  0.03474855 -7.4460 9.615e-14
## sewage              0.02234525  0.00344351  6.4891 8.636e-11
## gas                 0.02869635  0.00368585  7.7855 6.883e-15
## opos               -0.11686663  0.88920011 -0.1314 0.8954360
## mean_enabled        0.00088881  0.00026354  3.3726 0.0007446
## place_area         -0.04291556  0.00599980 -7.1528 8.500e-13
## sewage_st          -0.00649512  0.00587012 -1.1065 0.2685224
## gas_st             -0.03482782  0.00577684 -6.0289 1.651e-09
## people_density_st   0.00138378  0.00059116  2.3408 0.0192423
## prework_st         -0.31744117 14.09309632 -0.0225 0.9820295
## postwork_st         2.25995321  8.18888284  0.2760 0.7825648
## femin_st           -0.21935853  0.04192788 -5.2318 1.679e-07
## unemployment_st     0.00026406  0.00017520  1.5072 0.1317506
## benefit500_st      87.62080724 22.50057583  3.8942 9.854e-05
## freq_st             0.59499312  0.02302526 25.8409 < 2.2e-16
## 
## Lambda: 0.21524, LR test value: 30.642, p-value: 3.1037e-08
## Approximate (numerical Hessian) standard error: 0.036858
##     z-value: 5.8397, p-value: 5.2302e-09
## Wald statistic: 34.102, p-value: 5.2302e-09
## 
## Log likelihood: -6343.765 for error model
## ML residual variance (sigma squared): 9.7363, (sigma: 3.1203)
## Number of observations: 2477 
## Number of parameters estimated: 25 
## AIC: 12738, (AIC for lm: 12766)

The result is not bad, but many lags are insignificant. This time both Labda and freq_st which somehow substitutes Rho are significant. AIC is exactly like the one for GNS.

We will remove one by one lags that are insignificant.

# removing prework_st & postwork_st
formula_x_st2 <- freq ~ femin + migration + income + people_density2019 + prework + 
  postwork + benefit500 + unemployment + sewage + gas + opos + mean_enabled + place_area + 
  sewage_st + gas_st + people_density_st + femin_st + unemployment_st + benefit500_st + freq_st

SAR_x_st2<-lagsarlm(formula_x_st2, data=data2, cont.listw, tol.solve=3e-30, method = 'LU')
summary(SAR_x_st2)

## 
## Call:lagsarlm(formula = formula_x_st2, data = data2, listw = cont.listw, 
##     method = "LU", tol.solve = 3e-30)
## 
## Residuals:
##         Min          1Q      Median          3Q         Max 
## -16.0950625  -1.8587473  -0.0039796   1.7998163  29.0608442 
## 
## Type: lag 
## Coefficients: (numerical Hessian approximate standard errors) 
##                       Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)         1.8590e+01  4.1925e+00  4.4341 9.245e-06
## femin               1.0607e-01  2.3356e-02  4.5416 5.583e-06
## migration           2.3834e-01  1.4706e-02 16.2067 < 2.2e-16
## income              3.1810e-04  5.4792e-05  5.8054 6.419e-09
## people_density2019 -2.5945e-04  2.1491e-04 -1.2072  0.227341
## prework             1.8057e+01  7.1912e+00  2.5109  0.012041
## postwork            3.9396e+01  4.2404e+00  9.2906 < 2.2e-16
## benefit500          3.6717e+01  1.4363e+01  2.5563  0.010579
## unemployment       -1.7908e-01  3.0615e-02 -5.8493 4.937e-09
## sewage              2.2525e-02  3.4679e-03  6.4953 8.288e-11
## gas                 3.3037e-02  3.6726e-03  8.9955 < 2.2e-16
## opos               -4.4445e-01  8.1374e-01 -0.5462  0.584942
## mean_enabled        7.6999e-04  2.4641e-04  3.1249  0.001779
## place_area         -3.6904e-02  5.5564e-03 -6.6417 3.101e-11
## sewage_st          -9.6847e-03  4.9981e-03 -1.9377  0.052662
## gas_st             -3.9314e-02  5.2586e-03 -7.4761 7.661e-14
## people_density_st   8.1152e-04  5.6282e-04  1.4419  0.149340
## femin_st           -2.6882e-01  3.6783e-02 -7.3083 2.707e-13
## unemployment_st     2.8257e-04  1.7774e-04  1.5898  0.111876
## benefit500_st       3.9937e+01  1.4468e+01  2.7603  0.005775
## freq_st             3.7732e-01  3.2777e-02 11.5118 < 2.2e-16
## 
## Rho: 0.35256, LR test value: 121.93, p-value: < 2.22e-16
## Approximate (numerical Hessian) standard error: 0.030296
##     z-value: 11.637, p-value: < 2.22e-16
## Wald statistic: 135.42, p-value: < 2.22e-16
## 
## Log likelihood: -6299.15 for lag model
## ML residual variance (sigma squared): 9.2503, (sigma: 3.0414)
## Number of observations: 2477 
## Number of parameters estimated: 23 
## AIC: 12644, (AIC for lm: 12764)

SEM_x_st2<-errorsarlm(formula_x_st2, data=data2, cont.listw, tol.solve=3e-30, method = 'LU')
summary(SEM_x_st2)

## 
## Call:errorsarlm(formula = formula_x_st2, data = data2, listw = cont.listw, 
##     method = "LU", tol.solve = 3e-30)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -16.312509  -1.956125   0.020037   1.884177  30.556307 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##                       Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)         1.8309e+01  4.9308e+00  3.7132 0.0002047
## femin               9.5683e-02  2.3798e-02  4.0206 5.805e-05
## migration           2.6159e-01  1.5638e-02 16.7277 < 2.2e-16
## income              3.3570e-04  5.7111e-05  5.8781 4.151e-09
## people_density2019 -2.3102e-04  2.1626e-04 -1.0682 0.2854156
## prework             2.2491e+01  7.8119e+00  2.8790 0.0039888
## postwork            4.7955e+01  4.6635e+00 10.2831 < 2.2e-16
## benefit500          4.3210e+01  1.4738e+01  2.9319 0.0033694
## unemployment       -2.5730e-01  3.4288e-02 -7.5041 6.195e-14
## sewage              2.2261e-02  3.4369e-03  6.4770 9.358e-11
## gas                 2.8537e-02  3.6582e-03  7.8008 6.217e-15
## opos               -1.4122e-01  8.5617e-01 -0.1649 0.8689900
## mean_enabled        8.8628e-04  2.6339e-04  3.3649 0.0007658
## place_area         -4.2749e-02  5.9895e-03 -7.1374 9.512e-13
## sewage_st          -7.1813e-03  5.5957e-03 -1.2834 0.1993689
## gas_st             -3.5061e-02  5.7456e-03 -6.1023 1.046e-09
## people_density_st   1.4176e-03  5.7951e-04  2.4462 0.0144370
## femin_st           -2.1587e-01  4.0813e-02 -5.2893 1.228e-07
## unemployment_st     2.6150e-04  1.7482e-04  1.4958 0.1347085
## benefit500_st       8.3335e+01  1.5046e+01  5.5386 3.048e-08
## freq_st             5.9648e-01  2.2131e-02 26.9523 < 2.2e-16
## 
## Lambda: 0.21359, LR test value: 32.548, p-value: 1.1629e-08
## Approximate (numerical Hessian) standard error: 0.040471
##     z-value: 5.2777, p-value: 1.3081e-07
## Wald statistic: 27.854, p-value: 1.3081e-07
## 
## Log likelihood: -6343.84 for error model
## ML residual variance (sigma squared): 9.7382, (sigma: 3.1206)
## Number of observations: 2477 
## Number of parameters estimated: 23 
## AIC: 12734, (AIC for lm: 12764)

Maybe we can add also some simple spatial lags to the variables that doesn’t have lags anymore or have them insignificant.

#data$femin_s<-lag.listw(cont.listw, data$femin) 
data2$migration_s<-lag.listw(cont.listw, data2$migration)
data2$income_s<-lag.listw(cont.listw, data2$income) 
data2$people_density2019_s<-lag.listw(cont.listw, data2$people_density2019) 
data2$prework_s<-lag.listw(cont.listw, data2$prework) 
data2$postwork_s<-lag.listw(cont.listw, data2$postwork)
data2$opos_s<-lag.listw(cont.listw, data2$opos)
data2$mean_enabled_s<-lag.listw(cont.listw, data2$mean_enabled)
data2$place_area_s<-lag.listw(cont.listw, data2$place_area)

formula_x_s <- freq ~ femin + migration + income + people_density2019 + prework + 
  postwork + benefit500 + unemployment + sewage + gas + opos + 
  mean_enabled + place_area + femin_st + migration_s + income_s + 
  people_density2019_s + prework_s + postwork_s + benefit500_st + unemployment_st +
  sewage_st + gas_st + opos_s + mean_enabled_s + place_area_s + freq_st


SAR_x_s<-lagsarlm(formula_x_s, data=data2, cont.listw, tol.solve=3e-30)
summary(SAR_x_s)

## 
## Call:lagsarlm(formula = formula_x_s, data = data2, listw = cont.listw, 
##     tol.solve = 3e-30)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -15.890712  -1.927084   0.018795   1.818501  28.905972 
## 
## Type: lag 
## Coefficients: (asymptotic standard errors) 
##                         Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)           9.0995e+00  4.7976e+00  1.8967  0.057869
## femin                 1.0270e-01  2.3457e-02  4.3782 1.197e-05
## migration             2.6267e-01  1.6031e-02 16.3852 < 2.2e-16
## income                3.2087e-04  5.5281e-05  5.8043 6.465e-09
## people_density2019   -4.0864e-05  2.2155e-04 -0.1844  0.853662
## prework               2.0095e+01  8.2266e+00  2.4427  0.014577
## postwork              4.2021e+01  5.1049e+00  8.2315 2.220e-16
## benefit500            2.9801e+01  1.4985e+01  1.9887  0.046732
## unemployment         -1.8602e-01  3.0752e-02 -6.0490 1.458e-09
## sewage                2.1135e-02  3.3963e-03  6.2229 4.881e-10
## gas                   2.9540e-02  3.7340e-03  7.9111 2.442e-15
## opos                 -1.0961e+00  1.2339e+00 -0.8883  0.374361
## mean_enabled          8.0116e-04  2.6098e-04  3.0698  0.002142
## place_area           -5.1528e-02  6.3018e-03 -8.1767 2.220e-16
## femin_st             -1.7272e-01  4.1943e-02 -4.1181 3.821e-05
## migration_s          -1.2829e-01  2.7813e-02 -4.6127 3.975e-06
## income_s              3.4759e-05  1.1499e-04  0.3023  0.762445
## people_density2019_s  4.2313e-04  5.5660e-04  0.7602  0.447132
## prework_s            -1.6393e+01  1.2621e+01 -1.2989  0.193973
## postwork_s           -1.4640e+01  7.6516e+00 -1.9133  0.055706
## benefit500_st         5.6525e+01  2.0716e+01  2.7286  0.006361
## unemployment_st       2.5665e-04  1.6270e-04  1.5774  0.114701
## sewage_st            -1.5488e-02  5.6329e-03 -2.7495  0.005968
## gas_st               -3.3880e-02  5.3769e-03 -6.3011 2.956e-10
## opos_s                1.1079e+00  1.4937e+00  0.7417  0.458273
## mean_enabled_s        7.2235e-04  4.6759e-04  1.5448  0.122383
## place_area_s          4.6374e-02  9.8436e-03  4.7111 2.464e-06
## freq_st               4.2594e-01  2.6252e-02 16.2251 < 2.2e-16
## 
## Rho: 0.3678, LR test value: 126.37, p-value: < 2.22e-16
## Asymptotic standard error: 0.025494
##     z-value: 14.427, p-value: < 2.22e-16
## Wald statistic: 208.13, p-value: < 2.22e-16
## 
## Log likelihood: -6270.075 for lag model
## ML residual variance (sigma squared): 9.0155, (sigma: 3.0026)
## Number of observations: 2477 
## Number of parameters estimated: 30 
## AIC: 12600, (AIC for lm: 12725)
## LM test for residual autocorrelation
## test value: 175.08, p-value: < 2.22e-16

SEM_x_s<-errorsarlm(formula_x_s, data=data2, cont.listw, tol.solve=3e-30, method = 'LU')
summary(SEM_x_s)

## 
## Call:errorsarlm(formula = formula_x_s, data = data2, listw = cont.listw, 
##     method = "LU", tol.solve = 3e-30)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -15.971495  -1.952487   0.037461   1.888176  30.682852 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##                         Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)           1.0928e+01  5.5039e+00  1.9854 0.0470948
## femin                 1.0085e-01  2.4037e-02  4.1954 2.723e-05
## migration             2.6405e-01  1.6339e-02 16.1604 < 2.2e-16
## income                3.3302e-04  5.6863e-05  5.8565 4.727e-09
## people_density2019   -1.0792e-04  2.2569e-04 -0.4782 0.6325210
## prework               2.0439e+01  8.3471e+00  2.4487 0.0143378
## postwork              4.4985e+01  5.1641e+00  8.7111 < 2.2e-16
## benefit500            3.8293e+01  1.5238e+01  2.5130 0.0119700
## unemployment         -2.3477e-01  3.4004e-02 -6.9044 5.042e-12
## sewage                2.1537e-02  3.4648e-03  6.2160 5.101e-10
## gas                   2.8679e-02  3.7667e-03  7.6139 2.665e-14
## opos                 -9.5050e-01  1.2442e+00 -0.7640 0.4448854
## mean_enabled          8.9710e-04  2.6680e-04  3.3624 0.0007725
## place_area           -5.0158e-02  6.4044e-03 -7.8318 4.885e-15
## femin_st             -1.7944e-01  4.5644e-02 -3.9314 8.447e-05
## migration_s          -5.8675e-02  2.9324e-02 -2.0009 0.0454010
## income_s              1.3917e-04  1.2485e-04  1.1147 0.2649783
## people_density2019_s  6.5290e-04  6.0855e-04  1.0729 0.2833322
## prework_s            -5.8342e+00  1.3551e+01 -0.4305 0.6667988
## postwork_s            2.8660e-02  8.0467e+00  0.0036 0.9971581
## benefit500_st         8.8023e+01  2.2247e+01  3.9567 7.599e-05
## unemployment_st       2.3686e-04  1.7455e-04  1.3570 0.1747878
## sewage_st            -1.4156e-02  6.1139e-03 -2.3154 0.0205926
## gas_st               -3.4484e-02  5.7760e-03 -5.9702 2.369e-09
## opos_s                1.0792e+00  1.5614e+00  0.6912 0.4894544
## mean_enabled_s        1.4927e-03  5.0583e-04  2.9511 0.0031666
## place_area_s          3.6631e-02  1.0630e-02  3.4460 0.0005690
## freq_st               6.5732e-01  2.5548e-02 25.7291 < 2.2e-16
## 
## Lambda: 0.14783, LR test value: 13.255, p-value: 0.00027186
## Approximate (numerical Hessian) standard error: 0.023292
##     z-value: 6.347, p-value: 2.1959e-10
## Wald statistic: 40.284, p-value: 2.1959e-10
## 
## Log likelihood: -6326.634 for error model
## ML residual variance (sigma squared): 9.646, (sigma: 3.1058)
## Number of observations: 2477 
## Number of parameters estimated: 30 
## AIC: 12713, (AIC for lm: 12725)

In both models benefit500 and oposition are not significant, neither are their lags. Removing only lags did not change anything so we’ll remove them totally.

formula_x_s2 <- freq ~ femin + migration + income + prework + 
  postwork + benefit500 + unemployment + sewage + gas + 
  mean_enabled + place_area + femin_st + migration_s + income_s + prework_s + 
  postwork_s + benefit500_st + unemployment_st +
  sewage_st + gas_st + mean_enabled_s + place_area_s + freq_st


SAR_x_s2<-lagsarlm(formula_x_s2, data=data2, cont.listw, tol.solve=3e-30)
summary(SAR_x_s2)

## 
## Call:lagsarlm(formula = formula_x_s2, data = data2, listw = cont.listw, 
##     tol.solve = 3e-30)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -15.936119  -1.912773   0.011356   1.826774  28.751621 
## 
## Type: lag 
## Coefficients: (asymptotic standard errors) 
##                    Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)      8.7880e+00  4.6292e+00  1.8984  0.057646
## femin            9.7835e-02  2.2715e-02  4.3070 1.655e-05
## migration        2.5992e-01  1.4724e-02 17.6529 < 2.2e-16
## income           3.1896e-04  5.5192e-05  5.7791 7.508e-09
## prework          2.0666e+01  8.1585e+00  2.5331  0.011306
## postwork         4.1781e+01  4.9992e+00  8.3575 < 2.2e-16
## benefit500       3.1637e+01  1.4823e+01  2.1343  0.032817
## unemployment    -1.8897e-01  3.0286e-02 -6.2394 4.393e-10
## sewage           2.0635e-02  3.3220e-03  6.2117 5.242e-10
## gas              2.9119e-02  3.6963e-03  7.8777 3.331e-15
## mean_enabled     7.7660e-04  2.4964e-04  3.1109  0.001865
## place_area      -5.1133e-02  6.1439e-03 -8.3226 < 2.2e-16
## femin_st        -1.6786e-01  4.0361e-02 -4.1589 3.198e-05
## migration_s     -1.2758e-01  2.4516e-02 -5.2039 1.952e-07
## income_s         4.9448e-05  1.1421e-04  0.4330  0.665040
## prework_s       -1.6238e+01  1.2156e+01 -1.3358  0.181614
## postwork_s      -1.3705e+01  7.4190e+00 -1.8473  0.064698
## benefit500_st    5.3565e+01  2.0155e+01  2.6577  0.007868
## unemployment_st  3.2861e-04  1.3296e-04  2.4715  0.013455
## sewage_st       -1.4679e-02  5.2941e-03 -2.7727  0.005560
## gas_st          -3.3449e-02  5.3444e-03 -6.2586 3.884e-10
## mean_enabled_s   8.3716e-04  4.4973e-04  1.8615  0.062676
## place_area_s     4.4595e-02  9.5516e-03  4.6688 3.029e-06
## freq_st          4.2776e-01  2.3507e-02 18.1972 < 2.2e-16
## 
## Rho: 0.36689, LR test value: 129.81, p-value: < 2.22e-16
## Asymptotic standard error: 0.025471
##     z-value: 14.404, p-value: < 2.22e-16
## Wald statistic: 207.48, p-value: < 2.22e-16
## 
## Log likelihood: -6270.777 for lag model
## ML residual variance (sigma squared): 9.0219, (sigma: 3.0036)
## Number of observations: 2477 
## Number of parameters estimated: 26 
## AIC: 12594, (AIC for lm: 12721)
## LM test for residual autocorrelation
## test value: 176.51, p-value: < 2.22e-16

SEM_x_s2<-errorsarlm(formula_x_s2, data=data2, cont.listw, tol.solve=3e-30, method = 'LU')
summary(SEM_x_s2)

## 
## Call:errorsarlm(formula = formula_x_s2, data = data2, listw = cont.listw, 
##     method = "LU", tol.solve = 3e-30)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -16.060446  -1.964443   0.028847   1.895048  30.544141 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##                    Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)      1.0177e+01  5.3002e+00  1.9201 0.0548428
## femin            9.5853e-02  2.3293e-02  4.1151 3.870e-05
## migration        2.6358e-01  1.5049e-02 17.5144 < 2.2e-16
## income           3.3120e-04  5.6777e-05  5.8335 5.428e-09
## prework          2.0859e+01  8.2787e+00  2.5196 0.0117483
## postwork         4.4382e+01  5.0580e+00  8.7746 < 2.2e-16
## benefit500       3.9799e+01  1.5083e+01  2.6386 0.0083248
## unemployment    -2.3873e-01  3.3438e-02 -7.1396 9.359e-13
## sewage           2.1140e-02  3.3846e-03  6.2459 4.214e-10
## gas              2.8162e-02  3.7333e-03  7.5434 4.574e-14
## mean_enabled     8.5773e-04  2.5538e-04  3.3586 0.0007833
## place_area      -4.9415e-02  6.2392e-03 -7.9200 2.442e-15
## femin_st        -1.7122e-01  4.3817e-02 -3.9075 9.324e-05
## migration_s     -6.1359e-02  2.5775e-02 -2.3805 0.0172877
## income_s         1.5550e-04  1.2385e-04  1.2555 0.2092851
## prework_s       -5.0233e+00  1.3021e+01 -0.3858 0.6996560
## postwork_s       1.4918e+00  7.7799e+00  0.1917 0.8479420
## benefit500_st    8.3961e+01  2.1531e+01  3.8994 9.641e-05
## unemployment_st  3.3937e-04  1.4272e-04  2.3778 0.0174150
## sewage_st       -1.3122e-02  5.7258e-03 -2.2917 0.0219217
## gas_st          -3.4096e-02  5.7356e-03 -5.9447 2.770e-09
## mean_enabled_s   1.6537e-03  4.8535e-04  3.4073 0.0006562
## place_area_s     3.4774e-02  1.0288e-02  3.3799 0.0007251
## freq_st          6.6013e-01  2.1468e-02 30.7494 < 2.2e-16
## 
## Lambda: 0.14222, LR test value: 16.266, p-value: 5.5055e-05
## Approximate (numerical Hessian) standard error: 0.035951
##     z-value: 3.9559, p-value: 7.6239e-05
## Wald statistic: 15.649, p-value: 7.6239e-05
## 
## Log likelihood: -6327.549 for error model
## ML residual variance (sigma squared): 9.6559, (sigma: 3.1074)
## Number of observations: 2477 
## Number of parameters estimated: 26 
## AIC: 12707, (AIC for lm: 12721)

Having all basic variables significant, we continue with our lags reduction and stay with SAR with different Durbin components. In this model, which gives better results despite having both spatial and spatio-temporal lags of frequency, income is now first one to omit among lags.

formula_x_s3 <- freq ~ femin + migration + income + prework + 
  postwork + benefit500 + unemployment + sewage + gas + 
  mean_enabled + place_area + femin_st + migration_s + prework_s + postwork_s + benefit500_st + unemployment_st +
  sewage_st + gas_st + mean_enabled_s + place_area_s + freq_st


SAR_x_s3<-lagsarlm(formula_x_s3, data=data2, cont.listw, tol.solve=3e-30)
summary(SAR_x_s3)

## 
## Call:lagsarlm(formula = formula_x_s3, data = data2, listw = cont.listw, 
##     tol.solve = 3e-30)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -16.075791  -1.906678   0.023202   1.825936  28.738872 
## 
## Type: lag 
## Coefficients: (asymptotic standard errors) 
##                    Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)      8.7844e+00  4.6291e+00  1.8977  0.057740
## femin            9.7500e-02  2.2702e-02  4.2948 1.749e-05
## migration        2.6015e-01  1.4714e-02 17.6805 < 2.2e-16
## income           3.2353e-04  5.4186e-05  5.9708 2.361e-09
## prework          2.0560e+01  8.1551e+00  2.5211  0.011699
## postwork         4.1730e+01  4.9979e+00  8.3496 < 2.2e-16
## benefit500       3.1380e+01  1.4810e+01  2.1189  0.034102
## unemployment    -1.8909e-01  3.0287e-02 -6.2433 4.286e-10
## sewage           2.0621e-02  3.3218e-03  6.2077 5.377e-10
## gas              2.9033e-02  3.6915e-03  7.8649 3.775e-15
## mean_enabled     7.7974e-04  2.4954e-04  3.1248  0.001780
## place_area      -5.1191e-02  6.1426e-03 -8.3338 < 2.2e-16
## femin_st        -1.6678e-01  4.0273e-02 -4.1412 3.454e-05
## migration_s     -1.2662e-01  2.4439e-02 -5.1812 2.205e-07
## prework_s       -1.5848e+01  1.2128e+01 -1.3068  0.191292
## postwork_s      -1.3637e+01  7.4185e+00 -1.8382  0.066032
## benefit500_st    5.4032e+01  2.0131e+01  2.6840  0.007274
## unemployment_st  3.3694e-04  1.3162e-04  2.5599  0.010470
## sewage_st       -1.4243e-02  5.1987e-03 -2.7396  0.006151
## gas_st          -3.3511e-02  5.3415e-03 -6.2737 3.525e-10
## mean_enabled_s   8.1688e-04  4.4704e-04  1.8273  0.067653
## place_area_s     4.5388e-02  9.3637e-03  4.8473 1.251e-06
## freq_st          4.2811e-01  2.3503e-02 18.2150 < 2.2e-16
## 
## Rho: 0.36752, LR test value: 130.58, p-value: < 2.22e-16
## Asymptotic standard error: 0.025381
##     z-value: 14.48, p-value: < 2.22e-16
## Wald statistic: 209.68, p-value: < 2.22e-16
## 
## Log likelihood: -6270.871 for lag model
## ML residual variance (sigma squared): 9.0217, (sigma: 3.0036)
## Number of observations: 2477 
## Number of parameters estimated: 25 
## AIC: 12592, (AIC for lm: 12720)
## LM test for residual autocorrelation
## test value: 173, p-value: < 2.22e-16

Removing prework_s

formula_x_s4 <- freq ~ femin + migration + income + prework + 
  postwork + benefit500 + unemployment + sewage + gas + 
  mean_enabled + place_area + femin_st + migration_s + postwork_s + benefit500_st + 
  unemployment_st + sewage_st + gas_st + mean_enabled_s + place_area_s + freq_st


SAR_x_s4<-lagsarlm(formula_x_s4, data=data2, cont.listw, tol.solve=3e-30)
summary(SAR_x_s4)

## 
## Call:lagsarlm(formula = formula_x_s4, data = data2, listw = cont.listw, 
##     tol.solve = 3e-30)
## 
## Residuals:
##         Min          1Q      Median          3Q         Max 
## -15.9764079  -1.8953197   0.0066848   1.8615697  28.7043188 
## 
## Type: lag 
## Coefficients: (asymptotic standard errors) 
##                    Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)      7.1111e+00  4.4550e+00  1.5962  0.110440
## femin            9.9697e-02  2.2652e-02  4.4013 1.076e-05
## migration        2.6207e-01  1.4649e-02 17.8898 < 2.2e-16
## income           3.2581e-04  5.4179e-05  6.0136 1.814e-09
## prework          1.5176e+01  7.0580e+00  2.1502  0.031541
## postwork         3.9957e+01  4.8175e+00  8.2941 < 2.2e-16
## benefit500       3.5484e+01  1.4475e+01  2.4514  0.014232
## unemployment    -1.8275e-01  2.9956e-02 -6.1004 1.058e-09
## sewage           2.0604e-02  3.3233e-03  6.1998 5.654e-10
## gas              2.9178e-02  3.6920e-03  7.9030 2.665e-15
## mean_enabled     7.7857e-04  2.4964e-04  3.1187  0.001816
## place_area      -5.1359e-02  6.1442e-03 -8.3590 < 2.2e-16
## femin_st        -1.7042e-01  4.0185e-02 -4.2409 2.227e-05
## migration_s     -1.3024e-01  2.4336e-02 -5.3516 8.718e-08
## postwork_s      -7.7314e+00  5.8258e+00 -1.3271  0.184475
## benefit500_st    3.9789e+01  1.7062e+01  2.3320  0.019703
## unemployment_st  3.4043e-04  1.3165e-04  2.5860  0.009711
## sewage_st       -1.4996e-02  5.1704e-03 -2.9003  0.003728
## gas_st          -3.3967e-02  5.3346e-03 -6.3674 1.922e-10
## mean_enabled_s   7.5913e-04  4.4531e-04  1.7047  0.088243
## place_area_s     4.4925e-02  9.3592e-03  4.8001 1.586e-06
## freq_st          4.2584e-01  2.3449e-02 18.1604 < 2.2e-16
## 
## Rho: 0.36583, LR test value: 129.5, p-value: < 2.22e-16
## Asymptotic standard error: 0.025259
##     z-value: 14.483, p-value: < 2.22e-16
## Wald statistic: 209.76, p-value: < 2.22e-16
## 
## Log likelihood: -6271.73 for lag model
## ML residual variance (sigma squared): 9.0303, (sigma: 3.005)
## Number of observations: 2477 
## Number of parameters estimated: 24 
## AIC: 12591, (AIC for lm: 12719)
## LM test for residual autocorrelation
## test value: 150.1, p-value: < 2.22e-16

Now also postwork_s is probably unnecesary .

formula_x_s5 <- freq ~ femin + migration + income + prework + 
  postwork + benefit500 + unemployment + sewage + gas + 
  mean_enabled + place_area + femin_st + migration_s + benefit500_st + unemployment_st +
  sewage_st + gas_st + mean_enabled_s + place_area_s + freq_st


SAR_x_s5<-lagsarlm(formula_x_s5, data=data2, cont.listw, tol.solve=3e-30)
summary(SAR_x_s5)

## 
## Call:lagsarlm(formula = formula_x_s5, data = data2, listw = cont.listw, 
##     tol.solve = 3e-30)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -16.093831  -1.890310   0.019569   1.827830  28.914285 
## 
## Type: lag 
## Coefficients: (asymptotic standard errors) 
##                    Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)      7.0178e+00  4.4578e+00  1.5743 0.1154242
## femin            1.0430e-01  2.2423e-02  4.6515 3.295e-06
## migration        2.5743e-01  1.4253e-02 18.0617 < 2.2e-16
## income           3.2671e-04  5.4208e-05  6.0270 1.671e-09
## prework          1.5571e+01  7.0543e+00  2.2072 0.0272972
## postwork         3.6706e+01  4.2094e+00  8.7200 < 2.2e-16
## benefit500       3.0144e+01  1.3949e+01  2.1609 0.0306997
## unemployment    -1.8399e-01  2.9937e-02 -6.1459 7.951e-10
## sewage           2.0827e-02  3.3215e-03  6.2704 3.602e-10
## gas              3.0061e-02  3.6410e-03  8.2562 2.220e-16
## mean_enabled     7.8703e-04  2.4973e-04  3.1516 0.0016240
## place_area      -5.1675e-02  6.1437e-03 -8.4110 < 2.2e-16
## femin_st        -1.8665e-01  3.8299e-02 -4.8734 1.097e-06
## migration_s     -1.2056e-01  2.3088e-02 -5.2219 1.771e-07
## benefit500_st    5.2057e+01  1.4254e+01  3.6521 0.0002601
## unemployment_st  3.2974e-04  1.3155e-04  2.5067 0.0121880
## sewage_st       -1.3239e-02  5.0042e-03 -2.6456 0.0081553
## gas_st          -3.4172e-02  5.3383e-03 -6.4012 1.541e-10
## mean_enabled_s   8.0833e-04  4.4371e-04  1.8218 0.0684906
## place_area_s     4.4225e-02  9.3451e-03  4.7325 2.218e-06
## freq_st          4.2405e-01  2.3399e-02 18.1228 < 2.2e-16
## 
## Rho: 0.36164, LR test value: 127.72, p-value: < 2.22e-16
## Asymptotic standard error: 0.02484
##     z-value: 14.559, p-value: < 2.22e-16
## Wald statistic: 211.96, p-value: < 2.22e-16
## 
## Log likelihood: -6272.632 for lag model
## ML residual variance (sigma squared): 9.0424, (sigma: 3.0071)
## Number of observations: 2477 
## Number of parameters estimated: 23 
## AIC: 12591, (AIC for lm: 12717)
## LM test for residual autocorrelation
## test value: 110.06, p-value: < 2.22e-16

moran.test(SAR_x_s5$residuals, cont.listw)

## 
##  Moran I test under randomisation
## 
## data:  SAR_x_s5$residuals  
## weights: cont.listw    
## 
## Moran I statistic standard deviate = -4.5472, p-value = 1
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##     -0.0569258972     -0.0004038772      0.0001545077

We have probably done as much as we could to make this quite complicated model with Rho component, spatio-temporal lag of frequency and a combination of chosen spatial (migration, mean_enabled, place_area) and spatio-temporal (femin, benefit500, unemployment, sewage, gas) lags of independent variables.

screenreg(list(GNS_3, SDM_3, SDEM_3, SAR_x_s5, model_lm3))

## 
## ===========================================================================================
##                         Model 1       Model 2       Model 3       Model 4       Model 5    
## -------------------------------------------------------------------------------------------
## rho                         0.79 ***      0.65 ***                    0.36 ***             
##                            (0.03)        (0.02)                      (0.02)                
## (Intercept)                -0.58          0.43         13.76          7.02         4.42    
##                            (0.83)        (4.78)        (8.54)        (4.46)       (3.47)   
## femin                       0.10 ***      0.11 ***      0.09 ***      0.10 ***     0.12 ***
##                            (0.02)        (0.02)        (0.03)        (0.02)       (0.03)   
## migration                   0.26 ***      0.26 ***      0.29 ***      0.26 ***     0.41 ***
##                            (0.02)        (0.02)        (0.02)        (0.01)       (0.02)   
## income                      0.00 ***      0.00 ***      0.00 ***      0.00 ***     0.00 ***
##                            (0.00)        (0.00)        (0.00)        (0.00)       (0.00)   
## people_density2019          0.00         -0.00          0.00                       0.00 ***
##                            (0.00)        (0.00)        (0.00)                     (0.00)   
## prework                    21.75 *       22.80 **      31.52 ***     15.57 *      73.04 ***
##                            (9.70)        (8.23)        (8.42)        (7.05)       (9.35)   
## postwork                   41.57 ***     44.22 ***     53.79 ***     36.71 ***    86.48 ***
##                            (5.53)        (5.09)        (5.04)        (4.21)       (5.28)   
## benefit500                 31.31 *       34.50 *       49.55 ***     30.14 *     141.53 ***
##                           (14.15)       (14.98)       (14.79)       (13.95)      (15.55)   
## unemployment               -0.34 ***     -0.33 ***     -0.32 ***     -0.18 ***    -0.46 ***
##                            (0.05)        (0.05)        (0.04)        (0.03)       (0.04)   
## sewage                      0.02 ***      0.02 ***      0.02 ***      0.02 ***     0.03 ***
##                            (0.00)        (0.00)        (0.00)        (0.00)       (0.00)   
## gas                         0.03 ***      0.03 ***      0.03 ***      0.03 ***     0.03 ***
##                            (0.00)        (0.00)        (0.00)        (0.00)       (0.00)   
## opos                       -1.64         -1.65         -3.14 **                  -11.48 ***
##                            (1.18)        (1.23)        (1.14)                     (0.83)   
## mean_enabled                0.00 **       0.00 **       0.00 ***      0.00 **      0.00 ***
##                            (0.00)        (0.00)        (0.00)        (0.00)       (0.00)   
## place_area                 -0.05 ***     -0.05 ***     -0.05 ***     -0.05 ***    -0.07 ***
##                            (0.01)        (0.01)        (0.01)        (0.01)       (0.01)   
## lag.femin                  -0.12 ***     -0.13 **      -0.13 *                             
##                            (0.03)        (0.04)        (0.06)                              
## lag.migration              -0.12 ***     -0.04          0.22 ***                           
##                            (0.03)        (0.03)        (0.03)                              
## lag.income                 -0.00          0.00          0.00 **                            
##                            (0.00)        (0.00)        (0.00)                              
## lag.people_density2019      0.00 *        0.00 ***      0.00 ***                           
##                            (0.00)        (0.00)        (0.00)                              
## lag.prework                 5.37         18.88         54.38 **                            
##                           (13.25)       (12.78)       (18.48)                              
## lag.postwork              -13.97          0.45         45.31 ***                           
##                            (8.20)        (7.51)       (10.42)                              
## lag.benefit500             -4.36         14.67         85.53 **                            
##                           (17.29)       (22.07)       (31.79)                              
## lag.unemployment            0.30 ***      0.23 ***     -0.02                               
##                            (0.06)        (0.06)        (0.08)                              
## lag.sewage                 -0.01 **      -0.01          0.00                               
##                            (0.00)        (0.01)        (0.01)                              
## lag.gas                    -0.03 ***     -0.02 ***      0.00                               
##                            (0.00)        (0.01)        (0.01)                              
## lag.opos                   -2.22         -4.24 **     -10.06 ***                           
##                            (1.34)        (1.44)        (1.80)                              
## lag.mean_enabled            0.00          0.00          0.00 **                            
##                            (0.00)        (0.00)        (0.00)                              
## lag.place_area              0.04 ***      0.03 ***     -0.01                               
##                            (0.01)        (0.01)        (0.01)                              
## lambda                     -0.34 ***                    0.66 ***                           
##                            (0.06)                      (0.02)                              
## femin_st                                                             -0.19 ***             
##                                                                      (0.04)                
## migration_s                                                          -0.12 ***             
##                                                                      (0.02)                
## benefit500_st                                                        52.06 ***             
##                                                                     (14.25)                
## unemployment_st                                                       0.00 *               
##                                                                      (0.00)                
## sewage_st                                                            -0.01 **              
##                                                                      (0.01)                
## gas_st                                                               -0.03 ***             
##                                                                      (0.01)                
## mean_enabled_s                                                        0.00                 
##                                                                      (0.00)                
## place_area_s                                                          0.04 ***             
##                                                                      (0.01)                
## freq_st                                                               0.42 ***             
##                                                                      (0.02)                
## -------------------------------------------------------------------------------------------
## R^2                         0.83          0.81          0.81          0.81         0.65    
## AIC                     12737.92      12757.70      12770.96      12591.26                 
## BIC                     12912.37      12926.33      12939.59      12725.00                 
## Deviance                20379.34      22310.10      22287.24      22398.12                 
## Log Likelihood          -6338.96      -6349.85      -6356.48      -6272.63                 
## nobs                     2477          2477          2477          2477                    
## Adj. R^2                                                                           0.65    
## Num. obs.                                                                       2477       
## ===========================================================================================
## *** p < 0.001; ** p < 0.01; * p < 0.05

Our model finally has more or less only significant variables, only lag of mean_enabled is on a border, but it can probably stay in the model. We have rho equal to 0.36164 and also a spatio-temporal lag of frequency (from four years before). Some lags have negative effect so one could think about removing them as they may be somehow reacting with rho. But our AIC now is the lowest from all models made for this project, so we managed to combine low AIC with significance of variables which was not possible with standard types of models. We have spatial and spatio-temporal diffusion of random shocks and different lags of independent variables. Our spatial lag model with spatial and spatio-temporal lags is no longer a typical Spatial Durbin model, so in some cases it would be much better to use one of simpler models to have much better explainability.

Distribution of total impact:

W.c <- as(as_dgRMatrix_listw(cont.listw), "CsparseMatrix") 
trMat <- trW(W.c, type="mult") 

SAR_x_s5_imp <- impacts(SAR_x_s5, tr=trMat, R=2000)
summary(SAR_x_s5_imp, zstats=TRUE, short=TRUE)

## Impact measures (lag, trace):
##                        Direct      Indirect         Total
## femin            0.1070746825  0.0563130210  0.1633877035
## migration        0.2642820837  0.1389919836  0.4032740673
## income           0.0003353981  0.0001763935  0.0005117916
## prework         15.9847464054  8.4067432040 24.3914896094
## postwork        37.6828103409 19.8182505813 57.5010609222
## benefit500      30.9452763303 16.2748275692 47.2201038995
## unemployment    -0.1888854259 -0.0993391594 -0.2882245852
## sewage           0.0213809471  0.0112447284  0.0326256755
## gas              0.0308605949  0.0162302917  0.0470908866
## mean_enabled     0.0008079610  0.0004249251  0.0012328861
## place_area      -0.0530497292 -0.0279000641 -0.0809497933
## femin_st        -0.1916104987 -0.1007723374 -0.2923828360
## migration_s     -0.1237682626 -0.0650925560 -0.1888608187
## benefit500_st   53.4415040719 28.1061075211 81.5476115930
## unemployment_st  0.0003385080  0.0001780291  0.0005165371
## sewage_st       -0.0135912232 -0.0071479347 -0.0207391579
## gas_st          -0.0350804206 -0.0184495944 -0.0535300149
## mean_enabled_s   0.0008298350  0.0004364292  0.0012662642
## place_area_s     0.0454015839  0.0238777299  0.0692793137
## freq_st          0.4353326153  0.2289513648  0.6642839801
## ========================================================
## Simulation results ( variance matrix):
## ========================================================
## Simulated standard errors
##                       Direct     Indirect        Total
## femin           2.273635e-02 1.364384e-02 3.546833e-02
## migration       1.467596e-02 1.666173e-02 2.701411e-02
## income          5.480710e-05 3.467731e-05 8.602757e-05
## prework         7.165646e+00 3.831341e+00 1.091800e+01
## postwork        4.229526e+00 2.827414e+00 6.503261e+00
## benefit500      1.412253e+01 7.603846e+00 2.157291e+01
## unemployment    3.057434e-02 1.789308e-02 4.629513e-02
## sewage          3.398461e-03 2.203768e-03 5.386422e-03
## gas             3.660949e-03 2.678586e-03 5.971436e-03
## mean_enabled    2.615719e-04 1.472076e-04 4.039563e-04
## place_area      6.261812e-03 4.624933e-03 1.027113e-02
## femin_st        3.843049e-02 2.343269e-02 6.018464e-02
## migration_s     2.379986e-02 1.543587e-02 3.823855e-02
## benefit500_st   1.404343e+01 7.596394e+00 2.123397e+01
## unemployment_st 1.371360e-04 7.498810e-05 2.103056e-04
## sewage_st       5.115713e-03 2.836912e-03 7.880952e-03
## gas_st          5.308198e-03 3.514093e-03 8.453358e-03
## mean_enabled_s  4.589107e-04 2.431000e-04 6.984325e-04
## place_area_s    9.730539e-03 5.885983e-03 1.526749e-02
## freq_st         2.322901e-02 1.909467e-02 2.841290e-02
## 
## Simulated z-values:
##                    Direct  Indirect     Total
## femin            4.761385  4.192511  4.664964
## migration       18.007415  8.375894 14.948967
## income           6.135271  5.120795  5.972874
## prework          2.202315  2.169904  2.206873
## postwork         8.880533  7.002531  8.820114
## benefit500       2.200627  2.153869  2.199801
## unemployment    -6.198258 -5.577415 -6.249140
## sewage           6.262679  5.103158  6.039194
## gas              8.408055  6.073518  7.879160
## mean_enabled     3.074739  2.886107  3.042711
## place_area      -8.466535 -6.059537 -7.890158
## femin_st        -5.024177 -4.354686 -4.903636
## migration_s     -5.213308 -4.259928 -4.964406
## benefit500_st    3.811135  3.705124  3.846055
## unemployment_st  2.485576  2.396831  2.475425
## sewage_st       -2.613312 -2.491777 -2.593330
## gas_st          -6.607446 -5.275220 -6.342006
## mean_enabled_s   1.802378  1.785466  1.805725
## place_area_s     4.620583  4.040725  4.502666
## freq_st         18.726714 11.988363 23.366736
## 
## Simulated p-values:
##                 Direct     Indirect   Total     
## femin           1.9227e-06 2.7588e-05 3.0867e-06
## migration       < 2.22e-16 < 2.22e-16 < 2.22e-16
## income          8.5014e-10 3.0425e-07 2.3311e-09
## prework         0.02764305 0.03001414 0.02732290
## postwork        < 2.22e-16 2.5138e-12 < 2.22e-16
## benefit500      0.02776244 0.03125044 0.02782101
## unemployment    5.7092e-10 2.4412e-08 4.1272e-10
## sewage          3.7842e-10 3.3403e-07 1.5489e-09
## gas             < 2.22e-16 1.2514e-09 3.3307e-15
## mean_enabled    0.00210687 0.00390040 0.00234458
## place_area      < 2.22e-16 1.3651e-09 3.1086e-15
## femin_st        5.0560e-07 1.3326e-05 9.4079e-07
## migration_s     1.8550e-07 2.0449e-05 6.8912e-07
## benefit500_st   0.00013833 0.00021129 0.00012004
## unemployment_st 0.01293421 0.01653754 0.01330777
## sewage_st       0.00896694 0.01271059 0.00950516
## gas_st          3.9101e-11 1.3260e-07 2.2679e-10
## mean_enabled_s  0.07148590 0.07418580 0.07096124
## place_area_s    3.8266e-06 5.3286e-05 6.7106e-06
## freq_st         < 2.22e-16 < 2.22e-16 < 2.22e-16

8 Summary

Direct and indirect effects are significant generally everywere but in a model made in a way this one was, it is hard to interpret them normally. We have direct efects of normal variables and some of them have also spatial lags, which are like indirect effects of those variables, although indirect efects of main variables are also significant and given. Similarly, direct effects of spatio-temporal lags are in fact indirect from definition, as they describe effects of those lags, which are in the model instead of normal spatial lags.

2019 Polish parliamentary elections - voter turnout spatial analysis

Mateusz Majak, Przemysław Mazurek

07 09 2021