library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(spData)
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(ggplot2)
library(ggthemes)
library(spdep)
#Let's load in some packages and data
setwd("/Users/erlisakabashi/Desktop/dida 370/homework/chicago_airbnb")
airbnb <- st_read("airbnb_Chicago 2015.shp")
## Reading layer `airbnb_Chicago 2015' from data source
## `/Users/erlisakabashi/Desktop/dida 370/homework/chicago_airbnb/airbnb_Chicago 2015.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 77 features and 20 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -87.94011 ymin: 41.64454 xmax: -87.52414 ymax: 42.02304
## Geodetic CRS: WGS 84
#Let's plot the data first
ggplot()+
geom_sf(data = airbnb, aes(fill=poverty))+
scale_fill_steps(
name = "Percent Households /nBelow Poverty",
low = "lightsteelblue1",
high = "tomato1",
n.breaks = 4,
show.limits = T)+
theme_void()

#This code creates the weights
airbnb_list <- airbnb %>%
poly2nb(st_geometry(airbnb)) %>%
nb2listw(zero.policy = TRUE)
#Now we can calculate the "Global Moran's I" for the data
airbnb_list %>%
moran.test(airbnb$poverty, ., zero.policy = TRUE)
##
## Moran I test under randomisation
##
## data: airbnb$poverty
## weights: .
##
## Moran I statistic standard deviate = 6.8813, p-value = 2.965e-12
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.480660751 -0.013157895 0.005149779
moran.plot(airbnb$poverty,
airbnb_list,
zero.policy = TRUE,
labels = F,
xlab = 'Percent Households /nBelow Poverty',
ylab = 'Lagged Poor Households (of Neighbors)',
pch=20)

lisaRslt <- localmoran(airbnb$poverty, airbnb_list,
zero.policy = TRUE, na.action = na.omit)
# Now we can derive the cluster/outlier types (COType in ArcGIS term) for each spatial feature in the data
library(magrittr)
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:magrittr':
##
## extract
significanceLevel <- 0.05; # 95% confidence
meanVal <- mean(airbnb$poverty);
lisaRslt %<>% as_tibble() %>%
set_colnames(c("Ii","E.Ii","Var.Ii","Z.Ii","Pr(z > 0)")) %>%
#Create a new categorical value
#This variable will call insignificant units "insignificant"
#Outliers are determined by LI <0 - units higher than the mean are HL and units lower than the mean are LH
#Clusters are units with LI >= 0, and classified as HH or LL based on whether they are above or below the mean
mutate(coType = case_when(
`Pr(z > 0)` > 0.05 ~ "Insignificant",
`Pr(z > 0)` <= 0.05 & Ii >= 0 & airbnb$poverty >= meanVal ~ "HH",
`Pr(z > 0)` <= 0.05 & Ii >= 0 & airbnb$poverty < meanVal ~ "LL",
`Pr(z > 0)` <= 0.05 & Ii < 0 & airbnb$poverty >= meanVal ~ "HL",
`Pr(z > 0)` <= 0.05 & Ii < 0 & airbnb$poverty < meanVal ~ "LH"))
# Now add this coType to original sf data
airbnb$coType <- lisaRslt$coType %>% replace_na("Insignificant")
#Now we'll plot it!
ggplot(airbnb) +
geom_sf(aes(fill=coType),color = 'lightgrey') +
scale_fill_manual(values = c('red','NA','blue','cyan'), name='Clusters & \nOutliers') +
labs(title = "Number of Poor Families by Neighborhood")+
theme_minimal()
