1 Import data

library(sf)
WIfinal = st_read('wi_final_census2_random4.shp')

## Reading layer `wi_final_census2_random4' from data source 
##   `/Users/admin/Desktop/Linh Data Studio/Transport modelling/wi_final_census2_random4.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 417 features and 34 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -88.5423 ymin: 42.84136 xmax: -87.79183 ymax: 43.54352
## CRS:           NA

class(WIfinal)

## [1] "sf"         "data.frame"

2 Project spatial data

st_crs(WIfinal) = 4326 #set the crs to WGS84
WIfinal = st_transform(WIfinal, 3857) #project the data 

## Without this set up, at the step "Quantile Classification in GIS" > Warning: Currect projection of shape WIfinal unknown. Long-lat (WGS84) is assumed.

2.1 Quantile Classification in GIS

# Distribution of Hispanic people with map
summary(WIfinal$HISP_)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0    44.0    91.0   225.1   192.0  3613.0

library(tmap)
tm_shape(WIfinal) +
  tm_polygons(style = "quantile", col = "HISP_", title = "Hispanic people", legend.hist = TRUE) +
  tm_legend(outside = TRUE, text.size = .8)

tmap_mode("view")

tm_shape(WIfinal) +
  tm_polygons(stype = "quantile", col = "HISP_", title = "Hispanic people")

3 For Polygon geometry

3.1 Global spatial autocorrelation

3.1.1 Neighbors

library(spdep)
neighbors <- poly2nb(WIfinal, queen = TRUE) #contiguous polygon that shares at least on vertex
neighbors

## Neighbour list object:
## Number of regions: 417 
## Number of nonzero links: 2628 
## Percentage nonzero weights: 1.511309 
## Average number of links: 6.302158

neighbors <- poly2nb(WIfinal, queen = FALSE) #at least one edge be shared between polygons (rook neighbors)
neighbors

## Neighbour list object:
## Number of regions: 417 
## Number of nonzero links: 2170 
## Percentage nonzero weights: 1.247923 
## Average number of links: 5.203837

3.1.2 Weights

# Assign weights to each neighboaring polygon
weightsW <- nb2listw(neighbors, style = "W")
weightsW

## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 417 
## Number of nonzero links: 2170 
## Percentage nonzero weights: 1.247923 
## Average number of links: 5.203837 
## 
## Weights style: W 
## Weights constants summary:
##     n     nn  S0       S1      S2
## W 417 173889 417 167.8758 1720.73

3.1.3 Moran’s I test

3.1.3.1 Moran test function

moran.test(WIfinal$HISP_, weightsW)

## 
##  Moran I test under randomisation
## 
## data:  WIfinal$HISP_  
## weights: weightsW    
## 
## Moran I statistic standard deviate = 26.95, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.8084882617     -0.0024038462      0.0009053312

3.1.3.2 Moran test with Monte Carlo simulation

moran.mc(WIfinal$HISP_, weightsW, nsim = 599)

## 
##  Monte-Carlo simulation of Moran I
## 
## data:  WIfinal$HISP_ 
## weights: weightsW  
## number of simulations + 1: 600 
## 
## statistic = 0.80849, observed rank = 600, p-value = 0.001667
## alternative hypothesis: greater

3.1.3.3 Density plot - distribution

plot(moran.mc(WIfinal$HISP_, weightsW, nsim=599), main="", las=1) #density plot

3.2 Local spatial autocorrelation

3.2.1 Local Moran

3.2.1.1 Moran scatterplot

moran.plot(WIfinal$HISP_, listw = weightsW)

Notice how the plot is split in 4 quadrants.

The top right corner belongs to areas that have high level of Hispanic people and are surrounded by other areas that have above the average level of Hispanic people. This are the high-high locations. The bottom left corner belongs to the low-low areas. These are areas with low level of Hispanic people and surrounded by areas with below average levels of Hispanic people.

Both the high-high and low-low represent clusters.A high-high cluster is what you may refer to as a hot spot. And the low-low clusters represent cold spots.

In the opposite diagonal we have spatial outliers. They are not outliers in the standard sense, extreme observations, they are outliers in that they are surrounded by areas that are very unlike them: So you could have high-low spatial outliers, areas with high levels of Hispanic people and low levels of surrounding Hispanic people, or low-high spatial outliers, areas that have themselves low levels of Hispanic people (or whatever else you are mapping) and that are surrounded by areas with above average levels of Hispanic people.

3.2.1.2 Local Moran statistics

localmoranstats <- localmoran(WIfinal$HISP_, weightsW)
summary(localmoranstats)

##        Ii                E.Ii                Var.Ii              Z.Ii        
##  Min.   :-0.23179   Min.   :-0.1330661   Min.   :0.000000   Min.   :-1.9903  
##  1st Qu.: 0.02988   1st Qu.:-0.0004505   1st Qu.:0.007475   1st Qu.: 0.3780  
##  Median : 0.09180   Median :-0.0002826   Median :0.022154   Median : 0.6538  
##  Mean   : 0.80849   Mean   :-0.0024038   Mean   :0.198742   Mean   : 0.9330  
##  3rd Qu.: 0.14791   3rd Qu.:-0.0001005   3rd Qu.:0.038485   3rd Qu.: 0.8765  
##  Max.   :38.66655   Max.   : 0.0000000   Max.   :9.528253   Max.   :12.9702  
##  Pr(z != E(Ii))  
##  Min.   :0.0000  
##  1st Qu.:0.3788  
##  Median :0.5028  
##  Mean   :0.5143  
##  3rd Qu.:0.6694  
##  Max.   :0.9975

moranmap <- cbind(WIfinal, localmoranstats) #first, bind the statistics to the original data
names(moranmap)[39] <- "Pvalue" #change the name of this variable to make it easier to call it

tm_shape(moranmap) +
  tm_polygons(col = "Ii", style = "pretty", title = "local Moran's statistic")

tm_shape(moranmap) +
  tm_polygons(
    col = "Pvalue",
    breaks = c(-Inf, 0.01, 0.05, 0.1, 0.15, Inf),
    palette = "-Blues",
    title = "local Moran's I p-values")

3.3 LISA Clusters

3.3.1 Scale variable

# scale the variable of interest and save it to a new column
moranmap$HISP_scale <- as.vector(scale(moranmap$HISP_))
summary(moranmap$HISP_scale)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.49380 -0.39729 -0.29420  0.00000 -0.07266  7.43120

3.3.2 Create a spatial lag variable

moranmap$HISP_lag <- lag.listw(weightsW, moranmap$HISP_scale)
summary(moranmap$HISP_lag)

##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -0.475708 -0.351229 -0.263490 -0.004672 -0.086916  5.628609

3.3.3 Create a variable to distinguish in which quadrant each observation is

# recall the Moran scatterplot
library(tidyverse)
siglevel = 0.15 #we can change to different levels
moranmap <- moranmap %>%  mutate(quad_sig = ifelse(moranmap$HISP_scale > 0 & 
                              moranmap$HISP_lag > 0 & 
                              moranmap$Pvalue <= siglevel, 
                     "high-high",
                     ifelse(moranmap$HISP_scale <= 0 & 
                              moranmap$HISP_lag <= 0 & 
                              moranmap$Pvalue <= siglevel, 
                     "low-low", 
                     ifelse(moranmap$HISP_scale > 0 & 
                              moranmap$HISP_lag <= 0 & 
                              moranmap$Pvalue <= siglevel, 
                     "high-low",
                     ifelse(moranmap$HISP_scale <= 0 & 
                              moranmap$HISP_lag > 0 & 
                              moranmap$Pvalue <= siglevel,
                     "low-high", 
                     "non-significant")))))
moranmap$quad_sig = factor(moranmap$quad_sig)
table(moranmap$quad_sig)

## 
##       high-high        low-high non-significant 
##              32               2             383

palcolor = c("red", "white") #the color palette for only 2 categories
tmap_mode("plot")
tm_shape(moranmap)+
  tm_polygons(col = "quad_sig", palette = palcolor, title = "local moran statistic")+
  tm_legend(outside = TRUE)

4 Reference

https://github.com/U-Shift/Transport-Demand-Modelling/blob/master/6-SpatialModels.md

Spatial Regression Model - Part 1: Spatial Model

Apr 2, 2022