Homework 2

#Packages
 library(nortest)
library(car)

## Loading required package: carData

library(lmtest)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(classInt)
library(sandwich)
library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.3     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.1     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## x dplyr::recode() masks car::recode()
## x purrr::some()   masks car::some()

library(spdep)

## Loading required package: sp

## Loading required package: spData

## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`

## Loading required package: sf

## Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1

library(spatialreg)

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

## 
## Attaching package: 'spatialreg'

## The following objects are masked from 'package:spdep':
## 
##     as.spam.listw, as_dgRMatrix_listw, as_dsCMatrix_I,
##     as_dsCMatrix_IrW, as_dsTMatrix_listw, can.be.simmed, cheb_setup,
##     create_WX, do_ldet, eigen_pre_setup, eigen_setup, eigenw,
##     errorsarlm, get.ClusterOption, get.coresOption, get.mcOption,
##     get.VerboseOption, get.ZeroPolicyOption, GMargminImage, GMerrorsar,
##     griffith_sone, gstsls, Hausman.test, impacts, intImpacts,
##     Jacobian_W, jacobianSetup, l_max, lagmess, lagsarlm, lextrB,
##     lextrS, lextrW, lmSLX, LU_prepermutate_setup, LU_setup,
##     Matrix_J_setup, Matrix_setup, mcdet_setup, MCMCsamp, ME, mom_calc,
##     mom_calc_int2, moments_setup, powerWeights, sacsarlm,
##     SE_classic_setup, SE_interp_setup, SE_whichMin_setup,
##     set.ClusterOption, set.coresOption, set.mcOption,
##     set.VerboseOption, set.ZeroPolicyOption, similar.listw, spam_setup,
##     spam_update_setup, SpatialFiltering, spautolm, spBreg_err,
##     spBreg_lag, spBreg_sac, stsls, subgraph_eigenw, trW

library(tidycensus)

Create a series of spatial weights

#Queen Weight Matrix
nbs<-poly2nb(sa_acs2, queen = T)
wts<-nb2listw(nbs, style = "W")
wts

## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 362 
## Number of nonzero links: 2296 
## Percentage nonzero weights: 1.752083 
## Average number of links: 6.342541 
## 
## Weights style: W 
## Weights constants summary:
##     n     nn  S0      S1       S2
## W 362 131044 362 118.439 1480.304

#Rook Weight Matrix
nbs2<-poly2nb(sa_acs2, queen = F)
wts2<-nb2listw(nbs2, style = "W")
wts2

## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 362 
## Number of nonzero links: 1904 
## Percentage nonzero weights: 1.452947 
## Average number of links: 5.259669 
## 
## Weights style: W 
## Weights constants summary:
##     n     nn  S0       S1       S2
## W 362 131044 362 142.2161 1482.595

#k=4 Weight Matrix
knn<-knearneigh(x = st_centroid(sa_acs2)[, 1:2], k = 4)

## Warning in st_centroid.sf(sa_acs2): st_centroid assumes attributes are constant
## over geometries of x

knn4<-knn2nb(knn = knn)
knn4lw<-nb2listw(knn4)
knn4lw

## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 362 
## Number of nonzero links: 1448 
## Percentage nonzero weights: 1.104972 
## Average number of links: 4 
## Non-symmetric neighbours list
## 
## Weights style: W 
## Weights constants summary:
##     n     nn  S0      S1       S2
## W 362 131044 362 160.375 1487.875

Outcome variable is the % of language spoken at home among population 5 years and over speaking Spanish. Predictor variables are % of Hispanics, % of Hispanic Whites, and % of Hispanic Blacks.
Fit the OLS

#Regression Model
fit <- lm(Spanish ~ White + Hispanic + Black,
           data=sa_acs2)
summary(fit)

## 
## Call:
## lm(formula = Spanish ~ White + Hispanic + Black, data = sa_acs2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -16.5206  -4.5646   0.0382   4.1289  25.2215 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -11.84129    5.10952  -2.317    0.021 *  
## White         0.02812    0.05689   0.494    0.621    
## Hispanic      0.74589    0.01587  47.002   <2e-16 ***
## Black         0.11101    0.07937   1.399    0.163    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.42 on 358 degrees of freedom
## Multiple R-squared:  0.8701, Adjusted R-squared:  0.8691 
## F-statistic: 799.7 on 3 and 358 DF,  p-value: < 2.2e-16

summary(sa_acs2$olsresid)

##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -2.604999 -0.713075  0.005974  0.001651  0.646049  4.030059

## 
##  Global Moran I for regression residuals
## 
## data:  
## model: lm(formula = Spanish ~ White + Hispanic + Black, data = sa_acs2)
## weights: wts
## 
## Moran I statistic standard deviate = 11.573, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I      Expectation         Variance 
##     0.3328432930    -0.0076330198     0.0008654833

## 
##  Global Moran I for regression residuals
## 
## data:  
## model: lm(formula = Spanish ~ White + Hispanic + Black, data = sa_acs2)
## weights: wts2
## 
## Moran I statistic standard deviate = 11.345, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I      Expectation         Variance 
##      0.359614571     -0.007729595      0.001048462

## 
##  Global Moran I for regression residuals
## 
## data:  
## model: lm(formula = Spanish ~ White + Hispanic + Black, data = sa_acs2)
## weights: knn4lw
## 
## Moran I statistic standard deviate = 10.422, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I      Expectation         Variance 
##      0.351291276     -0.007875888      0.001187728

#Examine which alternative SAR model specification 
library(spatialreg)
lm.LMtests(model = fit,
           listw=wts,
           test = c("LMerr", "LMlag", "RLMerr", "RLMlag"))

## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Spanish ~ White + Hispanic + Black, data = sa_acs2)
## weights: wts
## 
## LMerr = 122.58, df = 1, p-value < 2.2e-16
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Spanish ~ White + Hispanic + Black, data = sa_acs2)
## weights: wts
## 
## LMlag = 55.534, df = 1, p-value = 9.182e-14
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Spanish ~ White + Hispanic + Black, data = sa_acs2)
## weights: wts
## 
## RLMerr = 71.857, df = 1, p-value < 2.2e-16
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Spanish ~ White + Hispanic + Black, data = sa_acs2)
## weights: wts
## 
## RLMlag = 4.8162, df = 1, p-value = 0.02819

#Selected Specification (Rook)
library(spatialreg)
lm.LMtests(model = fit,
           listw=wts2,
           test = c("LMerr", "LMlag", "RLMerr", "RLMlag"))

## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Spanish ~ White + Hispanic + Black, data = sa_acs2)
## weights: wts2
## 
## LMerr = 119.16, df = 1, p-value < 2.2e-16
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Spanish ~ White + Hispanic + Black, data = sa_acs2)
## weights: wts2
## 
## LMlag = 59.917, df = 1, p-value = 9.881e-15
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Spanish ~ White + Hispanic + Black, data = sa_acs2)
## weights: wts2
## 
## RLMerr = 64.134, df = 1, p-value = 1.11e-15
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Spanish ~ White + Hispanic + Black, data = sa_acs2)
## weights: wts2
## 
## RLMlag = 4.8885, df = 1, p-value = 0.02704

library(spatialreg)
lm.LMtests(model = fit,
           listw=knn4lw,
           test = c("LMerr", "LMlag", "RLMerr", "RLMlag"))

## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Spanish ~ White + Hispanic + Black, data = sa_acs2)
## weights: knn4lw
## 
## LMerr = 100.84, df = 1, p-value < 2.2e-16
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Spanish ~ White + Hispanic + Black, data = sa_acs2)
## weights: knn4lw
## 
## LMlag = 50.916, df = 1, p-value = 9.639e-13
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Spanish ~ White + Hispanic + Black, data = sa_acs2)
## weights: knn4lw
## 
## RLMerr = 53.631, df = 1, p-value = 2.419e-13
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Spanish ~ White + Hispanic + Black, data = sa_acs2)
## weights: knn4lw
## 
## RLMlag = 3.7114, df = 1, p-value = 0.05404

#Spatial Error model
fit.errqueen <- errorsarlm(Spanish ~ White + Hispanic + Black,
           data=sa_acs2, listw= wts)
summary(fit.errqueen)

## 
## Call:errorsarlm(formula = Spanish ~ White + Hispanic + Black, data = sa_acs2, 
##     listw = wts)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.8190  -3.7401  -0.2691   3.4637  20.1870 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.269129   5.348988 -0.9851   0.3246
## White       -0.018790   0.056989 -0.3297   0.7416
## Hispanic     0.711908   0.023269 30.5949   <2e-16
## Black        0.036611   0.084679  0.4323   0.6655
## 
## Lambda: 0.60862, LR test value: 85.265, p-value: < 2.22e-16
## Asymptotic standard error: 0.058094
##     z-value: 10.476, p-value: < 2.22e-16
## Wald statistic: 109.76, p-value: < 2.22e-16
## 
## Log likelihood: -1142.099 for error model
## ML residual variance (sigma squared): 29.914, (sigma: 5.4694)
## Number of observations: 362 
## Number of parameters estimated: 6 
## AIC: 2296.2, (AIC for lm: 2379.5)

#Spatial Error model with Selected Specification (Rook) "Best" Model
fit.errrook <- errorsarlm(Spanish ~ White + Hispanic + Black,
           data=sa_acs2, listw= wts2)
summary(fit.errrook)

## 
## Call:errorsarlm(formula = Spanish ~ White + Hispanic + Black, data = sa_acs2, 
##     listw = wts2)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -15.623350  -3.660886  -0.059265   3.586628  18.869015 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##               Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.6808209  5.3076415 -1.2587   0.2081
## White       -0.0014577  0.0567626 -0.0257   0.9795
## Hispanic     0.7108553  0.0229834 30.9291   <2e-16
## Black        0.0384777  0.0840286  0.4579   0.6470
## 
## Lambda: 0.59949, LR test value: 91.187, p-value: < 2.22e-16
## Asymptotic standard error: 0.055234
##     z-value: 10.854, p-value: < 2.22e-16
## Wald statistic: 117.8, p-value: < 2.22e-16
## 
## Log likelihood: -1139.137 for error model
## ML residual variance (sigma squared): 29.141, (sigma: 5.3983)
## Number of observations: 362 
## Number of parameters estimated: 6 
## AIC: 2290.3, (AIC for lm: 2379.5)

fit.errNN <- errorsarlm(Spanish ~ White + Hispanic + Black,
           data=sa_acs2, listw= knn4lw)
summary(fit.errNN)

## 
## Call:errorsarlm(formula = Spanish ~ White + Hispanic + Black, data = sa_acs2, 
##     listw = knn4lw)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -15.941764  -3.732651  -0.078679   3.583692  18.964301 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##               Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.8486582  5.2604170 -1.3019   0.1929
## White       -0.0033264  0.0570219 -0.0583   0.9535
## Hispanic     0.7086873  0.0224410 31.5800   <2e-16
## Black        0.0457771  0.0830670  0.5511   0.5816
## 
## Lambda: 0.53457, LR test value: 77.66, p-value: < 2.22e-16
## Asymptotic standard error: 0.055238
##     z-value: 9.6775, p-value: < 2.22e-16
## Wald statistic: 93.654, p-value: < 2.22e-16
## 
## Log likelihood: -1145.901 for error model
## ML residual variance (sigma squared): 30.768, (sigma: 5.5469)
## Number of observations: 362 
## Number of parameters estimated: 6 
## AIC: 2303.8, (AIC for lm: 2379.5)

Results: After assessing the Moran’s I across three neighbor specifications, the rook specification showed the largest degree of dependency (the residuals is .36, with a z-test of 11.35 and a very small p values, p < .0001).I further assessed the alternative SAR model specifications by their respected AICs. The error model was the best selection. Compared to the OLS model, % Hispanics is still statistically significant but the direction for % White became negative. The spatial auto-regression coefficient is 91 with a SE of 0.06, p < 0.001. The residual mean square is 5.4 which is smaller than the OLS RSE 6.42; thus, the error model is explaining more of the variation. Lastly, the AIC of the error model is 2290.3 compared to the OLS model AIC of 2379.5, which indicates that error model is better measure of model fit.