library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(spData)
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(ggplot2)
library(ggthemes)
library(spdep)
library(spatialreg)
## Loading required package: Matrix
##
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
##
## get.ClusterOption, get.coresOption, get.mcOption,
## get.VerboseOption, get.ZeroPolicyOption, set.ClusterOption,
## set.coresOption, set.mcOption, set.VerboseOption,
## set.ZeroPolicyOption
library(GWmodel)
## Loading required package: robustbase
## Loading required package: sp
## Loading required package: Rcpp
## Welcome to GWmodel version 2.3-2.
library(tidyr)
##
## Attaching package: 'tidyr'
## The following objects are masked from 'package:Matrix':
##
## expand, pack, unpack
setwd("/Users/erlisakabashi/Desktop/dida 370/homework/chicago_airbnb")
airbnb <- st_read("airbnb_Chicago 2015.shp")
## Reading layer `airbnb_Chicago 2015' from data source
## `/Users/erlisakabashi/Desktop/dida 370/homework/chicago_airbnb/airbnb_Chicago 2015.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 77 features and 20 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -87.94011 ymin: 41.64454 xmax: -87.52414 ymax: 42.02304
## Geodetic CRS: WGS 84
#percent unemployed over 16 years old
ggplot()+
geom_sf(data = airbnb, aes(fill=unemployed))+
scale_fill_steps(
name = "Unemployment",
low = "lightsteelblue1",
high = "tomato1",
n.breaks = 5,
show.limits = T)
RESEARCH QUESTION: Does hardship, crime, or without high school rates effect unemployment rates in Chicago?
summary(lm1 <- lm(unemployed~ harship_in + num_crimes + without_hs, data = airbnb))
##
## Call:
## lm(formula = unemployed ~ harship_in + num_crimes + without_hs,
## data = airbnb)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.2956 -1.8252 0.0558 2.1518 8.7022
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.024e+00 7.921e-01 8.867 3.31e-13 ***
## harship_in 3.741e-01 2.071e-02 18.060 < 2e-16 ***
## num_crimes 5.310e-06 6.032e-05 0.088 0.93
## without_hs -5.018e-01 4.958e-02 -10.121 1.53e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.995 on 73 degrees of freedom
## Multiple R-squared: 0.8486, Adjusted R-squared: 0.8424
## F-statistic: 136.4 on 3 and 73 DF, p-value: < 2.2e-16
airbnb_list <- airbnb %>%
poly2nb(st_geometry(airbnb)) %>%
nb2listw(zero.policy = TRUE)
lm.morantest(lm1, airbnb_list)
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = unemployed ~ harship_in + num_crimes + without_hs,
## data = airbnb)
## weights: airbnb_list
##
## Moran I statistic standard deviate = 1.4754, p-value = 0.07005
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.068772397 -0.033672615 0.004821243
The p-value in this Moran’s test is weakly significant, meaning that there is evidence of some spatial autocorrelation in the residuals.
lm_error <- errorsarlm(unemployed ~ harship_in + num_crimes + without_hs, data = airbnb,
listw = airbnb_list,
zero.policy = TRUE,
na.action = na.omit)
summary(lm_error)
##
## Call:errorsarlm(formula = unemployed ~ harship_in + num_crimes + without_hs,
## data = airbnb, listw = airbnb_list, na.action = na.omit,
## zero.policy = TRUE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.29031 -1.95061 0.12897 1.98196 8.72179
##
## Type: error
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 7.1103e+00 8.5477e-01 8.3184 <2e-16
## harship_in 3.6582e-01 2.2205e-02 16.4743 <2e-16
## num_crimes 2.0412e-05 6.1780e-05 0.3304 0.7411
## without_hs -4.9218e-01 5.3272e-02 -9.2389 <2e-16
##
## Lambda: 0.18754, LR test value: 1.0091, p-value: 0.31511
## Asymptotic standard error: 0.16167
## z-value: 1.1601, p-value: 0.24602
## Wald statistic: 1.3458, p-value: 0.24602
##
## Log likelihood: -191.1616 for error model
## ML residual variance (sigma squared): 8.3313, (sigma: 2.8864)
## Number of observations: 77
## Number of parameters estimated: 6
## AIC: 394.32, (AIC for lm: 393.33)
Lamda is insignificant, we dont use this model! We move on.
seResiduals <- rep(0, length(airbnb$unemployed))
#create an index based on the order of the residuals in the lm_error object
resIndex <- lm_error$residuals %>% names() %>% as.integer();
#use the index to fill in the seResiduals object with corresponding residual values
seResiduals[resIndex] <- lm_error$residuals
#perform the Moran Test
airbnb_list %>%
moran.test(seResiduals, ., zero.policy = TRUE)
##
## Moran I test under randomisation
##
## data: seResiduals
## weights: .
##
## Moran I statistic standard deviate = 0.17189, p-value = 0.4318
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## -0.0008557562 -0.0131578947 0.0051222529
The linear error model is not significant, so now we look at the lag model.
lm_lag <- lagsarlm(unemployed~ harship_in + num_crimes + without_hs, data = airbnb,
listw = airbnb_list,
zero.policy = TRUE,
na.action = na.omit)
summary(lm_lag)
##
## Call:lagsarlm(formula = unemployed ~ harship_in + num_crimes + without_hs,
## data = airbnb, listw = airbnb_list, na.action = na.omit,
## zero.policy = TRUE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.77334 -1.76733 0.40202 1.91087 8.59803
##
## Type: lag
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.5771e+00 1.1495e+00 3.9817 6.843e-05
## harship_in 3.2293e-01 2.6957e-02 11.9795 < 2.2e-16
## num_crimes 1.6666e-05 5.6040e-05 0.2974 0.7662
## without_hs -4.3007e-01 5.3134e-02 -8.0941 6.661e-16
##
## Rho: 0.22034, LR test value: 6.5236, p-value: 0.010645
## Asymptotic standard error: 0.081665
## z-value: 2.6981, p-value: 0.0069731
## Wald statistic: 7.2799, p-value: 0.0069731
##
## Log likelihood: -188.4044 for lag model
## ML residual variance (sigma squared): 7.7331, (sigma: 2.7809)
## Number of observations: 77
## Number of parameters estimated: 6
## AIC: 388.81, (AIC for lm: 393.33)
## LM test for residual autocorrelation
## test value: 0.16343, p-value: 0.68602
This model already looks promising with a p-value of 0.01
seResiduals <- rep(0, length(airbnb$unemployed))
resIndex <- lm_lag$residuals %>% names() %>% as.integer();
seResiduals[resIndex] <- lm_lag$residuals
#perform the Moran Test
airbnb_list %>%
moran.test(seResiduals, ., zero.policy = TRUE)
##
## Moran I test under randomisation
##
## data: seResiduals
## weights: .
##
## Moran I statistic standard deviate = -0.17724, p-value = 0.5703
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## -0.02583888 -0.01315789 0.00511869
The p-value for the lag model is obviously not significant, so there is no evidence here of spatial autocorrelation in the residuals. We continue.
LM <- lm.LMtests(lm1, airbnb_list, test = "all")
## Please update scripts to use lm.RStests in place of lm.LMtests
LM
##
## Rao's score (a.k.a Lagrange multiplier) diagnostics for spatial
## dependence
##
## data:
## model: lm(formula = unemployed ~ harship_in + num_crimes + without_hs,
## data = airbnb)
## test weights: listw
##
## RSerr = 0.8545, df = 1, p-value = 0.3553
##
##
## Rao's score (a.k.a Lagrange multiplier) diagnostics for spatial
## dependence
##
## data:
## model: lm(formula = unemployed ~ harship_in + num_crimes + without_hs,
## data = airbnb)
## test weights: listw
##
## RSlag = 6.3731, df = 1, p-value = 0.01159
##
##
## Rao's score (a.k.a Lagrange multiplier) diagnostics for spatial
## dependence
##
## data:
## model: lm(formula = unemployed ~ harship_in + num_crimes + without_hs,
## data = airbnb)
## test weights: listw
##
## adjRSerr = 0.18143, df = 1, p-value = 0.6701
##
##
## Rao's score (a.k.a Lagrange multiplier) diagnostics for spatial
## dependence
##
## data:
## model: lm(formula = unemployed ~ harship_in + num_crimes + without_hs,
## data = airbnb)
## test weights: listw
##
## adjRSlag = 5.7, df = 1, p-value = 0.01696
##
##
## Rao's score (a.k.a Lagrange multiplier) diagnostics for spatial
## dependence
##
## data:
## model: lm(formula = unemployed ~ harship_in + num_crimes + without_hs,
## data = airbnb)
## test weights: listw
##
## SARMA = 6.5545, df = 2, p-value = 0.03773
The RSlag and the adjRSlag tests are the only ones that are significant here, so this reinforced the idea that the lag model is the best fit for this data and these variables.