library(sf)
## Linking to GEOS 3.10.2, GDAL 3.4.2, PROJ 8.2.1; sf_use_s2() is TRUE
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(spData)
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(ggplot2)
library(ggthemes)
library(spdep)
library(spatialreg)
## Loading required package: Matrix
##
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
##
## get.ClusterOption, get.coresOption, get.mcOption,
## get.VerboseOption, get.ZeroPolicyOption, set.ClusterOption,
## set.coresOption, set.mcOption, set.VerboseOption,
## set.ZeroPolicyOption
library(GWmodel)
## Loading required package: robustbase
## Loading required package: sp
## Loading required package: Rcpp
## Welcome to GWmodel version 2.3-1.
library(tidyr)
##
## Attaching package: 'tidyr'
## The following objects are masked from 'package:Matrix':
##
## expand, pack, unpack
setwd("/Users/briannasexton/Desktop/bing/Spring 2024/DiDa 370/chicago_airbnb")
chicago <- st_read("airbnb_chicago 2015.shp")
## Reading layer `airbnb_Chicago 2015' from data source
## `/Users/briannasexton/Desktop/bing/Spring 2024/DiDa 370/chicago_airbnb/airbnb_Chicago 2015.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 77 features and 20 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -87.94011 ymin: 41.64454 xmax: -87.52414 ymax: 42.02304
## Geodetic CRS: WGS 84
#1) Research Question: What is the impact of education, unemployment, and poverty on crime rates?
model <- chicago %>%
mutate(perc_crimes = (num_crimes/population)*100) %>%
select(c(poverty, unemployed, without_hs, perc_crimes, geometry)) %>%
filter(!poverty %in% "NA") %>%
mutate(povertyi = as.numeric(poverty))
summary(lm1 <- lm(perc_crimes ~ unemployed + without_hs + poverty, data = model))
##
## Call:
## lm(formula = perc_crimes ~ unemployed + without_hs + poverty,
## data = model)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.606 -3.197 -0.140 2.946 37.245
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.01793 2.32807 1.726 0.08860 .
## unemployed 0.55456 0.20497 2.705 0.00848 **
## without_hs -0.41875 0.08664 -4.833 7.23e-06 ***
## poverty 0.79207 0.13842 5.722 2.17e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.083 on 73 degrees of freedom
## Multiple R-squared: 0.6801, Adjusted R-squared: 0.667
## F-statistic: 51.74 on 3 and 73 DF, p-value: < 2.2e-16
#2) Moran’s I Test
chicago_list <- model %>%
poly2nb(st_geometry(model)) %>%
nb2listw(zero.policy = TRUE)
#run the Moran's I test for regression residuals
lm.morantest(lm1, chicago_list)
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = perc_crimes ~ unemployed + without_hs + poverty,
## data = model)
## weights: chicago_list
##
## Moran I statistic standard deviate = 4.6163, p-value = 1.953e-06
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.285460603 -0.033812978 0.004783458
Interpretation: Here, with a Moran’s I statistic of 0.285 and a p-value of <0.001, we can reject the null hypothesis and conclude that our data was not generated by a spatially random process. In other words, the percentage of poverty in Chicago is not spatially independent - areas that are close together tend to be more similar than areas that are further apart.
#3) Spatial Regression Model:
LM <- lm.LMtests(lm1, chicago_list, test = "all")
LM
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = perc_crimes ~ unemployed + without_hs + poverty,
## data = model)
## weights: chicago_list
##
## LMerr = 14.722, df = 1, p-value = 0.0001246
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = perc_crimes ~ unemployed + without_hs + poverty,
## data = model)
## weights: chicago_list
##
## LMlag = 8.7994, df = 1, p-value = 0.003013
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = perc_crimes ~ unemployed + without_hs + poverty,
## data = model)
## weights: chicago_list
##
## RLMerr = 6.0726, df = 1, p-value = 0.01373
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = perc_crimes ~ unemployed + without_hs + poverty,
## data = model)
## weights: chicago_list
##
## RLMlag = 0.14979, df = 1, p-value = 0.6987
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = perc_crimes ~ unemployed + without_hs + poverty,
## data = model)
## weights: chicago_list
##
## SARMA = 14.872, df = 2, p-value = 0.0005896
Justification: LMerr is significant, based on the p-value. I would choose LMerr because it has the lowest p-value.
#4) How I would interpret the results of the model: From the model, we can see that poverty, low education level, and unemployment all have a positive and statistically significant impact on crime rate. This as poverty, number of people with no hig school education, and number of people unemployed increase, the crime rate also increases.