library(haven)
library(foreign) 
library(readr)
library(dplyr)
library(ggplot2)
library(broom)
library(car)
library(MASS) 
library(lmtest)
library(zoo)
library(nortest)
library(plotrix)
library(scales)
library(tableone)
library(Weighted.Desc.Stat)
library(mitools)
library(survey)
library(VGAM)
library(stargazer)
library(sandwich)
library(pastecs)
library(muhaz)
library(ggpubr)
library(survminer)
library(eha)
library(reshape2)
library(data.table)
library(magrittr)
library(tidyverse)
library(sjmisc)
library(sjPlot)
library(sjmisc)
library(sjlabelled)
library(weights)
library(GGally)
library(tigris)
library(RColorBrewer)
library(patchwork)
library(tidycensus)
library(censusapi)
library(spdep)
library(classInt)
library(sf)
library(spatialreg)

Data Extraction

options(tigris_class = "sf")

nj<-(get_acs(geography = "tract",
                state="nj",
                county = c("003", "017" , "013" , "039" , "023" , "025"),
                year = 2015,
                variables=c("DP02_0070PE","DP05_0032PE","DP05_0033PE", "DP05_0037PE", "DP05_0039PE", "DP05_0052PE", "DP02_0066E", "DP05_0002PE", "DP02_0092PE", "DP05_0001PE"),
                geometry = T,
                output = "wide"))

## Getting data from the 2011-2015 5-year ACS

## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.

## Using the ACS Data Profile

nj$county<-substr(nj$GEOID, 1, 6)

1. Form neighbors and weight matrix

nj.nbr<-(poly2nb(nj, queen=F))
nj.wtr<-nb2listw(nj.nbr, zero.policy=T)

nj.nbq<-poly2nb(nj, queen=T)
nj.wtq<-nb2listw(nj.nbr, style="W", zero.policy=T)

nj.nb6<-knearneigh(st_centroid(nj), k=6)

## Warning in st_centroid.sf(nj): st_centroid assumes attributes are constant over
## geometries of x

nj.nb6<-knn2nb(nj.nb6)
nj.wt6<-nb2listw(nj.nb6, style="W")

nj.nb5<-knearneigh(st_centroid(nj), k=5)

## Warning in st_centroid.sf(nj): st_centroid assumes attributes are constant over
## geometries of x

nj.nb5<-knn2nb(nj.nb5)
nj.wt5<-nb2listw(nj.nb5, style="W")

nj.nb4<-knearneigh(st_centroid(nj), k=4)

## Warning in st_centroid.sf(nj): st_centroid assumes attributes are constant over
## geometries of x

nj.nb4<-knn2nb(nj.nb4)
nj.wt4<-nb2listw(nj.nb4, style="W")

nj.nb3<-knearneigh(st_centroid(nj), k=3)

## Warning in st_centroid.sf(nj): st_centroid assumes attributes are constant over
## geometries of x

nj.nb3<-knn2nb(nj.nb3)
nj.wt3<-nb2listw(nj.nb3,style="W")

nj.nb2<-knearneigh(st_centroid(nj) , k=2)

## Warning in st_centroid.sf(nj): st_centroid assumes attributes are constant over
## geometries of x

nj.nb2<-knn2nb(nj.nb2)
nj.wt2<-nb2listw(nj.nb2,style="W")

2. Define outcome and predictors

# Outcome: 

         # DP02_0070PE   Percent with disability, total Population

# Predictors: 
      
         # 1 DP02_0092PE  Percent foreign-born, total Population
         # 2 DP05_0002PE  Percent male, total Population
         # 4 DP05_0032PE  Percent white, total Population, one race
         # 5 DP05_0033PE  percent black, total population, one race
         # 6 DP05_0037PE  Percent indigenous, total Population, one race
         # 7 DP05_0039PE  Percent Asian, total Population, one race
         # 8 DP05_0052PE  Percent other race, total Population, one race
         # * DP05_0001PE  total population

3. Fit OLS Model

#construct the weight for the model, based on the population size of the county
nj$popsize<-(nj$DP05_0001PE)/1

summary(nj$popsize)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0    3250    4347    4469    5561   12970

# OLS and weighted OLS regression models
fit.ols<-lm(DP02_0070PE~DP02_0092PE+DP05_0002PE+DP05_0032PE+DP05_0033PE+DP05_0037PE+DP05_0039PE+DP05_0052PE,data=nj)

fit.ols.wt<-lm(DP02_0070PE~DP02_0092PE+DP05_0002PE+DP05_0032PE+DP05_0033PE+DP05_0037PE+DP05_0039PE+DP05_0052PE,data=nj, weights=nj$popsize)

summary(fit.ols)

## 
## Call:
## lm(formula = DP02_0070PE ~ DP02_0092PE + DP05_0002PE + DP05_0032PE + 
##     DP05_0033PE + DP05_0037PE + DP05_0039PE + DP05_0052PE, data = nj)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3763.0 -1129.8  -124.3  1035.0  8230.3 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1395.763   2419.619   0.577 0.564173    
## DP02_0092PE    6.652      5.353   1.243 0.214321    
## DP05_0002PE  -42.320     11.151  -3.795 0.000157 ***
## DP05_0032PE   52.854     23.608   2.239 0.025393 *  
## DP05_0033PE   35.312     23.671   1.492 0.136081    
## DP05_0037PE 2431.991   1977.670   1.230 0.219097    
## DP05_0039PE   61.574     24.124   2.552 0.010849 *  
## DP05_0052PE   52.831     25.403   2.080 0.037811 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1640 on 972 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.1015, Adjusted R-squared:  0.09507 
## F-statistic: 15.69 on 7 and 972 DF,  p-value: < 2.2e-16

summary(fit.ols.wt)

## 
## Call:
## lm(formula = DP02_0070PE ~ DP02_0092PE + DP05_0002PE + DP05_0032PE + 
##     DP05_0033PE + DP05_0037PE + DP05_0039PE + DP05_0052PE, data = nj, 
##     weights = nj$popsize)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -233991  -94389  -48295   35887  866587 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 2559.566   2833.622   0.903   0.3666  
## DP02_0092PE   -1.310      5.695  -0.230   0.8181  
## DP05_0002PE  -18.522     14.378  -1.288   0.1980  
## DP05_0032PE   35.095     28.617   1.226   0.2203  
## DP05_0033PE   19.493     28.639   0.681   0.4963  
## DP05_0037PE 4013.115   1731.359   2.318   0.0207 *
## DP05_0039PE   58.101     29.077   1.998   0.0460 *
## DP05_0052PE   38.307     30.701   1.248   0.2124  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 115900 on 972 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.08979,    Adjusted R-squared:  0.08324 
## F-statistic:  13.7 on 7 and 972 DF,  p-value: < 2.2e-16

3a and 3b Residuals and neighbor specification

resi<-c(lm.morantest(fit.ols, listw=nj.wt2)$estimate[1],
        lm.morantest(fit.ols, listw=nj.wt3)$estimate[1],
        lm.morantest(fit.ols, listw=nj.wt4)$estimate[1],
        lm.morantest(fit.ols, listw=nj.wt5)$estimate[1],
        lm.morantest(fit.ols, listw=nj.wt6)$estimate[1],
        lm.morantest(fit.ols, listw=nj.wtq,zero.policy=T)$estimate[1],
        lm.morantest(fit.ols, listw=nj.wtr,zero.policy=T)$estimate[1])

plot(resi, type="l")

4. SAR Model Examination

Spatial error

fit.err<-errorsarlm(DP02_0070PE~DP02_0092PE+DP05_0002PE+DP05_0032PE+DP05_0033PE+DP05_0037PE+DP05_0039PE+DP05_0052PE,data=nj,
                    listw=nj.wtq,
                    method = "MC")
lm.target.us1 <- lm(fit.err$tary ~ fit.err$tarX - 1)

lmtest::coeftest(lm.target.us1,
                 vcov.=vcovHC(lm.target.us1,
                              type="HC0"))

## 
## t test of coefficients:
## 
##                                            Estimate Std. Error t value
## fit.err$tarXI(x - lambda * WX)(Intercept) 2393.2361  2226.6468  1.0748
## fit.err$tarXI(x - lambda * WX)DP02_0092PE    5.8939     6.2266  0.9466
## fit.err$tarXI(x - lambda * WX)DP05_0002PE  -40.0285    11.2062 -3.5720
## fit.err$tarXI(x - lambda * WX)DP05_0032PE   39.7476    22.2424  1.7870
## fit.err$tarXI(x - lambda * WX)DP05_0033PE   24.9060    22.1665  1.1236
## fit.err$tarXI(x - lambda * WX)DP05_0037PE 2809.7288  3605.9071  0.7792
## fit.err$tarXI(x - lambda * WX)DP05_0039PE   53.7497    23.1182  2.3250
## fit.err$tarXI(x - lambda * WX)DP05_0052PE   43.7047    23.8613  1.8316
##                                            Pr(>|t|)    
## fit.err$tarXI(x - lambda * WX)(Intercept) 0.2827237    
## fit.err$tarXI(x - lambda * WX)DP02_0092PE 0.3441016    
## fit.err$tarXI(x - lambda * WX)DP05_0002PE 0.0003716 ***
## fit.err$tarXI(x - lambda * WX)DP05_0032PE 0.0742458 .  
## fit.err$tarXI(x - lambda * WX)DP05_0033PE 0.2614648    
## fit.err$tarXI(x - lambda * WX)DP05_0037PE 0.4360506    
## fit.err$tarXI(x - lambda * WX)DP05_0039PE 0.0202772 *  
## fit.err$tarXI(x - lambda * WX)DP05_0052PE 0.0673149 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Spatial lag

fit.lag<-lagsarlm(DP02_0070PE~DP02_0092PE+DP05_0002PE+DP05_0032PE+DP05_0033PE+DP05_0037PE+DP05_0039PE+DP05_0052PE,data=nj,
                  listw=nj.wtq,
                  type="lag",
                  method = "MC")
lm.target.us2 <- lm(fit.lag$tary ~ fit.lag$tarX - 1)

lmtest::coeftest(lm.target.us2, 
                 vcov.=vcovHC(lm.target.us2,
                              type="HC0"))

## 
## t test of coefficients:
## 
##                           Estimate Std. Error t value  Pr(>|t|)    
## fit.lag$tarXx(Intercept)  494.3997  2113.1456  0.2340 0.8150623    
## fit.lag$tarXxDP02_0092PE    5.8266     5.0982  1.1429 0.2533784    
## fit.lag$tarXxDP05_0002PE  -40.0791    10.8665 -3.6883 0.0002383 ***
## fit.lag$tarXxDP05_0032PE   46.7611    21.1525  2.2107 0.0272914 *  
## fit.lag$tarXxDP05_0033PE   34.2998    21.0178  1.6319 0.1030159    
## fit.lag$tarXxDP05_0037PE 2768.1121  3687.0058  0.7508 0.4529697    
## fit.lag$tarXxDP05_0039PE   55.9514    21.9337  2.5509 0.0108956 *  
## fit.lag$tarXxDP05_0052PE   49.6697    22.9589  2.1634 0.0307519 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Additional model specifications

Spatial autoregression model

fit.spauto.w<-spautolm(DP02_0070PE~DP02_0092PE+DP05_0002PE+DP05_0032PE+DP05_0033PE+DP05_0037PE+DP05_0039PE+DP05_0052PE,data=nj,
                  listw=nj.wtq, 
                       family="SAR",
                       weights=1/nj$popsize, 
                       method = "MC")

Spatial simultaneous autoregressive lag model

fit.lag.d<-lagsarlm(DP02_0070PE~DP02_0092PE+DP05_0002PE+DP05_0032PE+DP05_0033PE+DP05_0037PE+DP05_0039PE+DP05_0052PE,data=nj,
                  listw=nj.wtq,
                    type="mixed",
                    method = "MC")

Spatial simultaneous autoregressive error model

fit.err.d<-errorsarlm(DP02_0070PE~DP02_0092PE+DP05_0002PE+DP05_0032PE+DP05_0033PE+DP05_0037PE+DP05_0039PE+DP05_0052PE,data=nj,
                  listw=nj.wtq,
                      etype = "emixed", 
                      method = "MC")

AIC Comparison of model fit

AIC(fit.err)

## [1] 17260.41

AIC(fit.lag)

## [1] 17259.4

AIC(fit.spauto.w)

## [1] 17257.12

AIC(fit.lag.d)

## [1] 17253.59

AIC(fit.err.d)

## [1] 17256.68

# the spatial simultaneous auto-regressive lag model has the best model fit.

5. Comparison: OLS & spatial simultaneous auto-regressive lag model

summary(fit.ols.wt)

## 
## Call:
## lm(formula = DP02_0070PE ~ DP02_0092PE + DP05_0002PE + DP05_0032PE + 
##     DP05_0033PE + DP05_0037PE + DP05_0039PE + DP05_0052PE, data = nj, 
##     weights = nj$popsize)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -233991  -94389  -48295   35887  866587 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 2559.566   2833.622   0.903   0.3666  
## DP02_0092PE   -1.310      5.695  -0.230   0.8181  
## DP05_0002PE  -18.522     14.378  -1.288   0.1980  
## DP05_0032PE   35.095     28.617   1.226   0.2203  
## DP05_0033PE   19.493     28.639   0.681   0.4963  
## DP05_0037PE 4013.115   1731.359   2.318   0.0207 *
## DP05_0039PE   58.101     29.077   1.998   0.0460 *
## DP05_0052PE   38.307     30.701   1.248   0.2124  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 115900 on 972 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.08979,    Adjusted R-squared:  0.08324 
## F-statistic:  13.7 on 7 and 972 DF,  p-value: < 2.2e-16

AIC(fit.ols.wt)

## [1] 17493.71

summary(fit.lag.d)

## 
## Call:lagsarlm(formula = DP02_0070PE ~ DP02_0092PE + DP05_0002PE + 
##     DP05_0032PE + DP05_0033PE + DP05_0037PE + DP05_0039PE + DP05_0052PE, 
##     data = nj, listw = nj.wtq, type = "mixed", method = "MC")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -4068.29 -1088.84  -144.74   958.78  7817.60 
## 
## Type: mixed 
## Coefficients: (asymptotic standard errors) 
##                   Estimate Std. Error z value  Pr(>|z|)
## (Intercept)     -4123.3656  4713.6267 -0.8748 0.3816960
## DP02_0092PE         3.7512     8.2874  0.4526 0.6508089
## DP05_0002PE       -43.1045    11.1513 -3.8654 0.0001109
## DP05_0032PE        33.9698    23.3624  1.4540 0.1459361
## DP05_0033PE        33.5597    24.1105  1.3919 0.1639485
## DP05_0037PE      2819.7779  1903.0092  1.4817 0.1384077
## DP05_0039PE        64.2132    24.2953  2.6430 0.0082168
## DP05_0052PE        52.5742    25.4852  2.0629 0.0391194
## lag.DP02_0092PE     6.6622    10.7361  0.6205 0.5348992
## lag.DP05_0002PE    -1.2920    22.3016 -0.0579 0.9538008
## lag.DP05_0032PE    64.8571    47.2413  1.3729 0.1697867
## lag.DP05_0033PE    48.9123    47.5539  1.0286 0.3036849
## lag.DP05_0037PE -3118.8117  3672.1682 -0.8493 0.3957084
## lag.DP05_0039PE    30.4299    48.0004  0.6340 0.5261134
## lag.DP05_0052PE    40.1694    51.5539  0.7792 0.4358784
## 
## Rho: 0.26869, LR test value: 35.857, p-value: 2.1237e-09
## Asymptotic standard error: 0.045152
##     z-value: 5.9508, p-value: 2.6677e-09
## Wald statistic: 35.413, p-value: 2.6677e-09
## 
## Log likelihood: -8609.794 for mixed model
## ML residual variance (sigma squared): 2467800, (sigma: 1570.9)
## Number of observations: 980 
## Number of parameters estimated: 17 
## AIC: 17254, (AIC for lm: 17287)
## LM test for residual autocorrelation
## test value: 16.417, p-value: 5.082e-05

# Outcome: 

         # DP02_0070PE   Percent with disability, total Population

# Predictors: 
      
         # 1 DP02_0092PE  Percent foreign-born, total Population
         # 2 DP05_0002PE  Percent male, total Population
         # 4 DP05_0032PE  Percent white, total Population, one race
         # 5 DP05_0033PE  percent black, total population, one race
         # 6 DP05_0037PE  Percent indigenous, total Population, one race
         # 7 DP05_0039PE  Percent Asian, total Population, one race
         # 8 DP05_0052PE  Percent other race, total Population, one race

Remarks:

The OLS model shows that the New Jersey counties of Bergen, Hudson, Essex, Union, Middlesex, and Monmouth the percent of individuals who are American and Alaskan Indigenous (AIAN) and the percent of individuals who are Asian, are significant positive predictors of the percent of individuals disabled.

The spatial simultaneous auto-regressive lag model shows similar results however, in addition the percent AIAN and Asian, the percent of individuals who are male is also a significant negative predictor of the percent of individuals within these counties that are disabled.

The AIC for the OLS model is 17493.71 and the AIC for SAR lag model is 17253.

Spatial autoregressive (SAR) model

Jairemy Edwards

9/27/2021