correlation_practice

library(sf)

## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(spData)

## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`

library(ggplot2)
library(ggthemes)
library(spdep)
#Let's load in some packages and data
setwd("/Users/erlisakabashi/Desktop/dida 370/homework/chicago_airbnb")
airbnb <- st_read("airbnb_Chicago 2015.shp")

## Reading layer `airbnb_Chicago 2015' from data source 
##   `/Users/erlisakabashi/Desktop/dida 370/homework/chicago_airbnb/airbnb_Chicago 2015.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 77 features and 20 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -87.94011 ymin: 41.64454 xmax: -87.52414 ymax: 42.02304
## Geodetic CRS:  WGS 84

#Let's plot the data first

ggplot()+
  geom_sf(data = airbnb, aes(fill=poverty))+
  scale_fill_steps(
    name = "Percent Households /nBelow Poverty",
    low = "lightsteelblue1",
    high = "tomato1",
    n.breaks = 4,
    show.limits = T)+
  theme_void()

#This code creates the weights
airbnb_list <- airbnb %>% 
  poly2nb(st_geometry(airbnb)) %>% 
  nb2listw(zero.policy = TRUE) 

#Now we can calculate the "Global Moran's I" for the data
airbnb_list %>% 
  moran.test(airbnb$poverty, ., zero.policy = TRUE)

## 
##  Moran I test under randomisation
## 
## data:  airbnb$poverty  
## weights: .    
## 
## Moran I statistic standard deviate = 6.8813, p-value = 2.965e-12
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.480660751      -0.013157895       0.005149779

moran.plot(airbnb$poverty, 
           airbnb_list, 
           zero.policy = TRUE, 
           labels = F,
           xlab = 'Percent Households /nBelow Poverty',
           ylab = 'Lagged Poor Households (of Neighbors)',
           pch=20)

lisaRslt <- localmoran(airbnb$poverty, airbnb_list, 
                       zero.policy = TRUE, na.action = na.omit)

# Now we can derive the cluster/outlier types (COType in ArcGIS term) for each spatial feature in the data
library(magrittr)
library(tidyr)

## 
## Attaching package: 'tidyr'

## The following object is masked from 'package:magrittr':
## 
##     extract

significanceLevel <- 0.05; # 95% confidence
meanVal <- mean(airbnb$poverty);

lisaRslt %<>% as_tibble() %>%
  set_colnames(c("Ii","E.Ii","Var.Ii","Z.Ii","Pr(z > 0)")) %>%
  #Create a new categorical value
  #This variable will call insignificant units "insignificant"
  #Outliers are determined by LI <0 - units higher than the mean are HL and units lower than the mean are LH
  #Clusters are units with LI >= 0, and classified as HH or LL based on whether they are above or below the mean
  mutate(coType = case_when(
    `Pr(z > 0)` > 0.05 ~ "Insignificant",
    `Pr(z > 0)` <= 0.05 & Ii >= 0 & airbnb$poverty >= meanVal ~ "HH",
    `Pr(z > 0)` <= 0.05 & Ii >= 0 & airbnb$poverty < meanVal ~ "LL",
    `Pr(z > 0)` <= 0.05 & Ii < 0 & airbnb$poverty >= meanVal ~ "HL",
    `Pr(z > 0)` <= 0.05 & Ii < 0 & airbnb$poverty < meanVal ~ "LH"))

# Now add this coType to original sf data
airbnb$coType <- lisaRslt$coType %>% replace_na("Insignificant")

#Now we'll plot it!
ggplot(airbnb) +
  geom_sf(aes(fill=coType),color = 'lightgrey') +
  scale_fill_manual(values = c('red','NA','blue','cyan'), name='Clusters & \nOutliers') +
  labs(title = "Number of Poor Families by Neighborhood")+
  theme_minimal()

correlation_practice

2024-04-10