library(sf)     
## Linking to GEOS 3.10.2, GDAL 3.4.2, PROJ 8.2.1; sf_use_s2() is TRUE
library(dplyr)   
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(spData) 
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(ggplot2)
library(ggthemes)
library(spdep)
library(spatialreg)
## Loading required package: Matrix
## 
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
## 
##     get.ClusterOption, get.coresOption, get.mcOption,
##     get.VerboseOption, get.ZeroPolicyOption, set.ClusterOption,
##     set.coresOption, set.mcOption, set.VerboseOption,
##     set.ZeroPolicyOption
library(GWmodel)
## Loading required package: robustbase
## Loading required package: sp
## Loading required package: Rcpp
## Welcome to GWmodel version 2.3-1.
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following objects are masked from 'package:Matrix':
## 
##     expand, pack, unpack
setwd("/Users/briannasexton/Desktop/bing/Spring 2024/DiDa 370/chicago_airbnb")
chicago <- st_read("airbnb_chicago 2015.shp")
## Reading layer `airbnb_Chicago 2015' from data source 
##   `/Users/briannasexton/Desktop/bing/Spring 2024/DiDa 370/chicago_airbnb/airbnb_Chicago 2015.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 77 features and 20 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -87.94011 ymin: 41.64454 xmax: -87.52414 ymax: 42.02304
## Geodetic CRS:  WGS 84

#1) Research Question: What is the impact of education, unemployment, and poverty on crime rates?

model <- chicago %>% 
  mutate(perc_crimes = (num_crimes/population)*100) %>% 
  select(c(poverty, unemployed, without_hs, perc_crimes, geometry)) %>% 
  filter(!poverty %in% "NA") %>% 
  mutate(povertyi = as.numeric(poverty))


summary(lm1 <- lm(perc_crimes ~ unemployed + without_hs + poverty, data = model))
## 
## Call:
## lm(formula = perc_crimes ~ unemployed + without_hs + poverty, 
##     data = model)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -21.606  -3.197  -0.140   2.946  37.245 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.01793    2.32807   1.726  0.08860 .  
## unemployed   0.55456    0.20497   2.705  0.00848 ** 
## without_hs  -0.41875    0.08664  -4.833 7.23e-06 ***
## poverty      0.79207    0.13842   5.722 2.17e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.083 on 73 degrees of freedom
## Multiple R-squared:  0.6801, Adjusted R-squared:  0.667 
## F-statistic: 51.74 on 3 and 73 DF,  p-value: < 2.2e-16

#2) Moran’s I Test

chicago_list <- model %>% 
  poly2nb(st_geometry(model)) %>% 
  nb2listw(zero.policy = TRUE) 

#run the Moran's I test for regression residuals
lm.morantest(lm1, chicago_list)
## 
##  Global Moran I for regression residuals
## 
## data:  
## model: lm(formula = perc_crimes ~ unemployed + without_hs + poverty,
## data = model)
## weights: chicago_list
## 
## Moran I statistic standard deviate = 4.6163, p-value = 1.953e-06
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I      Expectation         Variance 
##      0.285460603     -0.033812978      0.004783458

Interpretation: Here, with a Moran’s I statistic of 0.285 and a p-value of <0.001, we can reject the null hypothesis and conclude that our data was not generated by a spatially random process. In other words, the percentage of poverty in Chicago is not spatially independent - areas that are close together tend to be more similar than areas that are further apart.

#3) Spatial Regression Model:

LM <- lm.LMtests(lm1, chicago_list, test = "all")
LM
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = perc_crimes ~ unemployed + without_hs + poverty,
## data = model)
## weights: chicago_list
## 
## LMerr = 14.722, df = 1, p-value = 0.0001246
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = perc_crimes ~ unemployed + without_hs + poverty,
## data = model)
## weights: chicago_list
## 
## LMlag = 8.7994, df = 1, p-value = 0.003013
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = perc_crimes ~ unemployed + without_hs + poverty,
## data = model)
## weights: chicago_list
## 
## RLMerr = 6.0726, df = 1, p-value = 0.01373
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = perc_crimes ~ unemployed + without_hs + poverty,
## data = model)
## weights: chicago_list
## 
## RLMlag = 0.14979, df = 1, p-value = 0.6987
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = perc_crimes ~ unemployed + without_hs + poverty,
## data = model)
## weights: chicago_list
## 
## SARMA = 14.872, df = 2, p-value = 0.0005896

Justification: LMerr is significant, based on the p-value. I would choose LMerr because it has the lowest p-value.

#4) How I would interpret the results of the model: From the model, we can see that poverty, low education level, and unemployment all have a positive and statistically significant impact on crime rate. This as poverty, number of people with no hig school education, and number of people unemployed increase, the crime rate also increases.