library(sf)
## Linking to GEOS 3.11.2, GDAL 3.7.2, PROJ 9.3.0; sf_use_s2() is TRUE
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(spData)
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(ggplot2)
library(ggthemes)
library(spdep)
## Warning: package 'spdep' was built under R version 4.3.3
library(spatialreg)
## Warning: package 'spatialreg' was built under R version 4.3.3
## Loading required package: Matrix
##
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
##
## get.ClusterOption, get.coresOption, get.mcOption,
## get.VerboseOption, get.ZeroPolicyOption, set.ClusterOption,
## set.coresOption, set.mcOption, set.VerboseOption,
## set.ZeroPolicyOption
library(GWmodel)
## Warning: package 'GWmodel' was built under R version 4.3.3
## Loading required package: robustbase
## Warning: package 'robustbase' was built under R version 4.3.3
## Loading required package: sp
## Loading required package: Rcpp
## Welcome to GWmodel version 2.3-2.
library(tidyr)
##
## Attaching package: 'tidyr'
## The following objects are masked from 'package:Matrix':
##
## expand, pack, unpack
#Let's load in some packages and data
setwd("C:/Users/lizzi/OneDrive/Desktop/DIDA 370/chicago_airbnb/chicago_airbnb")
chicago <- st_read("airbnb_Chicago 2015.shp")
## Reading layer `airbnb_Chicago 2015' from data source
## `C:\Users\lizzi\OneDrive\Desktop\DIDA 370\chicago_airbnb\chicago_airbnb\airbnb_Chicago 2015.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 77 features and 20 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -87.94011 ymin: 41.64454 xmax: -87.52414 ymax: 42.02304
## Geodetic CRS: WGS 84
ggplot()+
geom_sf(data = chicago, aes(fill=poverty))+
scale_fill_steps(
name = "",
low = "lightsteelblue1",
high = "tomato1",
n.breaks = 5,
show.limits = T)+
theme_void()

##First we create a model of the data that we want to do our regression on, we decided to regress unemployment, high school education, percent crime and the hardship index on poverty.
model <- chicago %>%
mutate(perc_crime = (num_crimes/population)*100) %>%
select(c(poverty, unemployed, harship_in, without_hs, perc_crime, AREAID, geometry)) %>%
filter(!poverty %in% "NA")
## We do a regular linear regression on our model
summary(lm1 <- lm(poverty ~ unemployed + without_hs + perc_crime + harship_in, data = model))
##
## Call:
## lm(formula = poverty ~ unemployed + without_hs + perc_crime +
## harship_in, data = model)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.098 -3.361 -1.179 2.807 15.886
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.57924 2.00928 2.777 0.006994 **
## unemployed -0.15730 0.19645 -0.801 0.425941
## without_hs -0.36544 0.14750 -2.478 0.015571 *
## perc_crime 0.26125 0.06644 3.932 0.000192 ***
## harship_in 0.41375 0.08890 4.654 1.45e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.014 on 72 degrees of freedom
## Multiple R-squared: 0.8209, Adjusted R-squared: 0.8109
## F-statistic: 82.48 on 4 and 72 DF, p-value: < 2.2e-16
chicago_list <- model %>%
poly2nb(st_geometry(model)) %>%
nb2listw(zero.policy = TRUE)
##We carry out the moran test on the residuals with the weights we created.
lm.morantest(lm1, chicago_list)
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = poverty ~ unemployed + without_hs + perc_crime +
## harship_in, data = model)
## weights: chicago_list
##
## Moran I statistic standard deviate = 6.2879, p-value = 1.609e-10
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.400000849 -0.035552525 0.004798177
#Based on our Moran test, we have a Moran's I statistic of 0.4000 and p-value that is < 0.005/ 0.001, therefore our data was not generated by a spatially random process and so the poverty levels in chicago are not spatially independent from the variables.
#We run the Lagrange test to see what model would be best.
LM <- lm.LMtests(lm1, chicago_list, test = "all")
## Please update scripts to use lm.RStests in place of lm.LMtests
LM
##
## Rao's score (a.k.a Lagrange multiplier) diagnostics for spatial
## dependence
##
## data:
## model: lm(formula = poverty ~ unemployed + without_hs + perc_crime +
## harship_in, data = model)
## test weights: listw
##
## RSerr = 28.907, df = 1, p-value = 7.594e-08
##
##
## Rao's score (a.k.a Lagrange multiplier) diagnostics for spatial
## dependence
##
## data:
## model: lm(formula = poverty ~ unemployed + without_hs + perc_crime +
## harship_in, data = model)
## test weights: listw
##
## RSlag = 4.2715, df = 1, p-value = 0.03876
##
##
## Rao's score (a.k.a Lagrange multiplier) diagnostics for spatial
## dependence
##
## data:
## model: lm(formula = poverty ~ unemployed + without_hs + perc_crime +
## harship_in, data = model)
## test weights: listw
##
## adjRSerr = 26.138, df = 1, p-value = 3.178e-07
##
##
## Rao's score (a.k.a Lagrange multiplier) diagnostics for spatial
## dependence
##
## data:
## model: lm(formula = poverty ~ unemployed + without_hs + perc_crime +
## harship_in, data = model)
## test weights: listw
##
## adjRSlag = 1.5026, df = 1, p-value = 0.2203
##
##
## Rao's score (a.k.a Lagrange multiplier) diagnostics for spatial
## dependence
##
## data:
## model: lm(formula = poverty ~ unemployed + without_hs + perc_crime +
## harship_in, data = model)
## test weights: listw
##
## SARMA = 30.41, df = 2, p-value = 2.492e-07
#Based on our Lagrange Multiplier Test, we see that the Adjusted Error Model has p-value that is statistically significant, meaning that it does a better job at controlling for spacial dependence in the data.
lm_error <- errorsarlm(poverty ~ unemployed + without_hs + perc_crime + harship_in, data = model,
listw = chicago_list,
zero.policy = TRUE,
na.action = na.omit)
summary(lm_error)
##
## Call:errorsarlm(formula = poverty ~ unemployed + without_hs + perc_crime +
## harship_in, data = model, listw = chicago_list, na.action = na.omit,
## zero.policy = TRUE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.47435 -2.64435 -0.60737 1.87572 15.41638
##
## Type: error
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.194394 2.254425 1.8605 0.062813
## unemployed -0.068804 0.151192 -0.4551 0.649055
## without_hs -0.382032 0.128163 -2.9808 0.002875
## perc_crime 0.155251 0.062691 2.4765 0.013269
## harship_in 0.468091 0.067962 6.8875 5.678e-12
##
## Lambda: 0.71069, LR test value: 28.77, p-value: 8.1515e-08
## Asymptotic standard error: 0.08969
## z-value: 7.9239, p-value: 2.2204e-15
## Wald statistic: 62.788, p-value: 2.3315e-15
##
## Log likelihood: -216.4258 for error model
## ML residual variance (sigma squared): 14.097, (sigma: 3.7546)
## Number of observations: 77
## Number of parameters estimated: 7
## AIC: 446.85, (AIC for lm: 473.62)
#The results of this model show an AIC of 446.85 that is lower than the AIC of the linear model so there's an improvement, we also have a lambda of 0.71069 that means there's spatial autocorrelation in the residuals. We also see a very low p-value meaning the model is doing what it supposed to do. In terms of the variables, the without_hs and the hardship index seem to be the most significant and may have the biggest effect on poverty in Chicago.
#Education had a negative and significant relationship on poverty, while Hardship had a positive and significant relationship on poverty. This is intuitive because an increase in education would most likely increase the ability to get a job and lower poverty, while a higher hardship index indicates a difficulty accessing resources and an increase in poverty.