# Load in packages
library(sf)     
## Warning: package 'sf' was built under R version 4.3.2
## Linking to GEOS 3.11.2, GDAL 3.7.2, PROJ 9.3.0; sf_use_s2() is TRUE
library(dplyr)   
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(spData) 
## Warning: package 'spData' was built under R version 4.3.2
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(ggplot2)
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 4.3.2
# Load in data
setwd("C:/Users/kajsa/OneDrive/Documents/Binghamton/Senior - Spring/DIDA 370/chicago_airbnb")
chicago <- st_read("airbnb_Chicago 2015.shp")
## Reading layer `airbnb_Chicago 2015' from data source 
##   `C:\Users\kajsa\OneDrive\Documents\Binghamton\Senior - Spring\DIDA 370\chicago_airbnb\airbnb_Chicago 2015.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 77 features and 20 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -87.94011 ymin: 41.64454 xmax: -87.52414 ymax: 42.02304
## Geodetic CRS:  WGS 84
library(spdep)
## Warning: package 'spdep' was built under R version 4.3.3
#This code creates the weights
chicago_list <- chicago %>% 
  poly2nb(st_geometry(chicago)) %>% 
  nb2listw(zero.policy = TRUE) 

#Now we can calculate the "Global Moran's I" for the data
#start with the weights, then give R the corresponding variable
chicago_list %>% 
  moran.test(chicago$poverty, ., zero.policy = TRUE)
## 
##  Moran I test under randomisation
## 
## data:  chicago$poverty  
## weights: .    
## 
## Moran I statistic standard deviate = 6.8813, p-value = 2.965e-12
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.480660751      -0.013157895       0.005149779
moran.plot(chicago$poverty, 
           chicago_list, 
           zero.policy = TRUE, 
           labels = F,
           xlab = 'Number of Poor Households',
           ylab = 'Lagged Poor Households (of Neighbors)',
                   pch=20)

lisaRslt <- localmoran(chicago$poverty, chicago_list, 
            zero.policy = TRUE, na.action = na.omit)
# Now we can derive the cluster/outlier types (COType in ArcGIS term) for each spatial feature in the data
library(magrittr)
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:magrittr':
## 
##     extract
significanceLevel <- 0.05; # 95% confidence
meanVal <- mean(chicago$poverty);

lisaRslt %<>% as_tibble() %>%
  set_colnames(c("Ii","E.Ii","Var.Ii","Z.Ii","Pr(z > 0)")) %>%
  mutate(coType = case_when(
  `Pr(z > 0)` > 0.05 ~ "Insignificant",
  `Pr(z > 0)` <= 0.05 & Ii >= 0 & chicago$poverty >= meanVal ~ "HH",
  `Pr(z > 0)` <= 0.05 & Ii >= 0 & chicago$poverty < meanVal ~ "LL",
  `Pr(z > 0)` <= 0.05 & Ii < 0 & chicago$poverty >= meanVal ~ "HL",
  `Pr(z > 0)` <= 0.05 & Ii < 0 & chicago$poverty < meanVal ~ "LH"))

# Now add this coType to original sf data
chicago$coType <- lisaRslt$coType %>% replace_na("Insignificant")

#Now we'll plot it!
ggplot(chicago) +
  geom_sf(aes(fill=coType),color = 'lightgrey') +
  scale_fill_manual(values = c('red','NA','blue','cyan'), name='Clusters & \nOutliers') +
  labs(title = "Percentage of Poverty by Chiacgo Neighborhood")+
  theme_minimal()

# cyan region - area where the poverty level is lower than the average of all of chicago 
# red region - area where the poverty level is higher than the average of all the chicago - low values are surronded by other low values 
# white region - there is no significance/ dependence between these and the other region 
# LH dark blue- these are outliers