library(sf)
WIfinal = st_read('wi_final_census2_random4.shp')## Reading layer `wi_final_census2_random4' from data source
## `/Users/admin/Desktop/Linh Data Studio/Transport modelling/wi_final_census2_random4.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 417 features and 34 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -88.5423 ymin: 42.84136 xmax: -87.79183 ymax: 43.54352
## CRS: NA
class(WIfinal)## [1] "sf" "data.frame"
st_crs(WIfinal) = 4326 #set the crs to WGS84
WIfinal = st_transform(WIfinal, 3857) #project the data
## Without this set up, at the step "Quantile Classification in GIS" > Warning: Currect projection of shape WIfinal unknown. Long-lat (WGS84) is assumed.# Distribution of Hispanic people with map
summary(WIfinal$HISP_)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 44.0 91.0 225.1 192.0 3613.0
library(tmap)
tm_shape(WIfinal) +
tm_polygons(style = "quantile", col = "HISP_", title = "Hispanic people", legend.hist = TRUE) +
tm_legend(outside = TRUE, text.size = .8)tmap_mode("view")
tm_shape(WIfinal) +
tm_polygons(stype = "quantile", col = "HISP_", title = "Hispanic people")library(spdep)
neighbors <- poly2nb(WIfinal, queen = TRUE) #contiguous polygon that shares at least on vertex
neighbors## Neighbour list object:
## Number of regions: 417
## Number of nonzero links: 2628
## Percentage nonzero weights: 1.511309
## Average number of links: 6.302158
neighbors <- poly2nb(WIfinal, queen = FALSE) #at least one edge be shared between polygons (rook neighbors)
neighbors## Neighbour list object:
## Number of regions: 417
## Number of nonzero links: 2170
## Percentage nonzero weights: 1.247923
## Average number of links: 5.203837
# Assign weights to each neighboaring polygon
weightsW <- nb2listw(neighbors, style = "W")
weightsW ## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 417
## Number of nonzero links: 2170
## Percentage nonzero weights: 1.247923
## Average number of links: 5.203837
##
## Weights style: W
## Weights constants summary:
## n nn S0 S1 S2
## W 417 173889 417 167.8758 1720.73
moran.test(WIfinal$HISP_, weightsW)##
## Moran I test under randomisation
##
## data: WIfinal$HISP_
## weights: weightsW
##
## Moran I statistic standard deviate = 26.95, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.8084882617 -0.0024038462 0.0009053312
moran.mc(WIfinal$HISP_, weightsW, nsim = 599)##
## Monte-Carlo simulation of Moran I
##
## data: WIfinal$HISP_
## weights: weightsW
## number of simulations + 1: 600
##
## statistic = 0.80849, observed rank = 600, p-value = 0.001667
## alternative hypothesis: greater
plot(moran.mc(WIfinal$HISP_, weightsW, nsim=599), main="", las=1) #density plotmoran.plot(WIfinal$HISP_, listw = weightsW)Notice how the plot is split in 4 quadrants.
The top right corner belongs to areas that have high level of Hispanic people and are surrounded by other areas that have above the average level of Hispanic people. This are the high-high locations. The bottom left corner belongs to the low-low areas. These are areas with low level of Hispanic people and surrounded by areas with below average levels of Hispanic people.
Both the high-high and low-low represent clusters.A high-high cluster is what you may refer to as a hot spot. And the low-low clusters represent cold spots.
In the opposite diagonal we have spatial outliers. They are not outliers in the standard sense, extreme observations, they are outliers in that they are surrounded by areas that are very unlike them: So you could have high-low spatial outliers, areas with high levels of Hispanic people and low levels of surrounding Hispanic people, or low-high spatial outliers, areas that have themselves low levels of Hispanic people (or whatever else you are mapping) and that are surrounded by areas with above average levels of Hispanic people.
localmoranstats <- localmoran(WIfinal$HISP_, weightsW)
summary(localmoranstats)## Ii E.Ii Var.Ii Z.Ii
## Min. :-0.23179 Min. :-0.1330661 Min. :0.000000 Min. :-1.9903
## 1st Qu.: 0.02988 1st Qu.:-0.0004505 1st Qu.:0.007475 1st Qu.: 0.3780
## Median : 0.09180 Median :-0.0002826 Median :0.022154 Median : 0.6538
## Mean : 0.80849 Mean :-0.0024038 Mean :0.198742 Mean : 0.9330
## 3rd Qu.: 0.14791 3rd Qu.:-0.0001005 3rd Qu.:0.038485 3rd Qu.: 0.8765
## Max. :38.66655 Max. : 0.0000000 Max. :9.528253 Max. :12.9702
## Pr(z != E(Ii))
## Min. :0.0000
## 1st Qu.:0.3788
## Median :0.5028
## Mean :0.5143
## 3rd Qu.:0.6694
## Max. :0.9975
moranmap <- cbind(WIfinal, localmoranstats) #first, bind the statistics to the original data
names(moranmap)[39] <- "Pvalue" #change the name of this variable to make it easier to call it
tm_shape(moranmap) +
tm_polygons(col = "Ii", style = "pretty", title = "local Moran's statistic")tm_shape(moranmap) +
tm_polygons(
col = "Pvalue",
breaks = c(-Inf, 0.01, 0.05, 0.1, 0.15, Inf),
palette = "-Blues",
title = "local Moran's I p-values") # scale the variable of interest and save it to a new column
moranmap$HISP_scale <- as.vector(scale(moranmap$HISP_))
summary(moranmap$HISP_scale)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.49380 -0.39729 -0.29420 0.00000 -0.07266 7.43120
moranmap$HISP_lag <- lag.listw(weightsW, moranmap$HISP_scale)
summary(moranmap$HISP_lag)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.475708 -0.351229 -0.263490 -0.004672 -0.086916 5.628609
# recall the Moran scatterplot
library(tidyverse)
siglevel = 0.15 #we can change to different levels
moranmap <- moranmap %>% mutate(quad_sig = ifelse(moranmap$HISP_scale > 0 &
moranmap$HISP_lag > 0 &
moranmap$Pvalue <= siglevel,
"high-high",
ifelse(moranmap$HISP_scale <= 0 &
moranmap$HISP_lag <= 0 &
moranmap$Pvalue <= siglevel,
"low-low",
ifelse(moranmap$HISP_scale > 0 &
moranmap$HISP_lag <= 0 &
moranmap$Pvalue <= siglevel,
"high-low",
ifelse(moranmap$HISP_scale <= 0 &
moranmap$HISP_lag > 0 &
moranmap$Pvalue <= siglevel,
"low-high",
"non-significant")))))
moranmap$quad_sig = factor(moranmap$quad_sig)
table(moranmap$quad_sig)##
## high-high low-high non-significant
## 32 2 383
palcolor = c("red", "white") #the color palette for only 2 categories
tmap_mode("plot")
tm_shape(moranmap)+
tm_polygons(col = "quad_sig", palette = palcolor, title = "local moran statistic")+
tm_legend(outside = TRUE)