library(haven)
library(foreign) 
library(readr)
library(dplyr)
library(ggplot2)
library(broom)
library(car)
library(MASS) 
library(lmtest)
library(zoo)
library(nortest)
library(plotrix)
library(scales)
library(tableone)
library(Weighted.Desc.Stat)
library(mitools)
library(survey)
library(VGAM)
library(stargazer)
library(sandwich)
library(pastecs)
library(muhaz)
library(ggpubr)
library(survminer)
library(eha)
library(reshape2)
library(data.table)
library(magrittr)
library(tidyverse)
library(sjmisc)
library(sjPlot)
library(sjmisc)
library(sjlabelled)
library(weights)
library(GGally)
library(tigris)
library(RColorBrewer)
library(patchwork)
library(tidycensus)
library(censusapi)
library(spdep)
library(classInt)
library(sf)
library(spatialreg)

Data Extraction

options(tigris_class = "sf")

nj<-(get_acs(geography = "county",
                year = 2015,
                variables=c("DP02_0071PE","DP05_0067PE","DP05_0069PE", "DP05_0068PE", "DP02_0092PE", "DP02_0066PE", "DP03_0119PE", "DP05_0032PE", "DP05_0033PE", "DP05_0003PE","DP05_0001PE"),
                geometry = T,
                output = "wide"))

## Getting data from the 2011-2015 5-year ACS

## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.

## Using the ACS Data Profile

nj$county<-substr(nj$GEOID, 1, 6)

# Outcome: 

         # DP02_0071PE   Percent with disability, total Population

# Predictors: 
         # DP05_0067PE  Percent Mexican
         # DP05_0069PE  Percent Cuban
         # DP05_0068PE  Percent Puerto Rican
         # DP02_0092PE  Percent foreign-born, total Population
         # DP02_0066PE  Percent hs or more
         # DP03_0119PE  income below poverty threshold
         # DP05_0032PE  Percent white, total Population, one race
         # DP05_0033PE  percent black, total population, one race
         # DP05_0003PE  Percent female, total Population
         

         # * DP05_0001PE  total population

Form neighbors and weight matrix

nj.nbr<-(poly2nb(nj, queen=F))
nj.wtr<-nb2listw(nj.nbr, zero.policy=T)

nj.nbq<-poly2nb(nj, queen=T)
nj.wtq<-nb2listw(nj.nbr, style="W", zero.policy=T)

nj.nb6<-knearneigh(st_centroid(nj), k=6)

## Warning in st_centroid.sf(nj): st_centroid assumes attributes are constant over
## geometries of x

nj.nb6<-knn2nb(nj.nb6)
nj.wt6<-nb2listw(nj.nb6, style="W")

nj.nb5<-knearneigh(st_centroid(nj), k=5)

## Warning in st_centroid.sf(nj): st_centroid assumes attributes are constant over
## geometries of x

nj.nb5<-knn2nb(nj.nb5)
nj.wt5<-nb2listw(nj.nb5, style="W")

nj.nb4<-knearneigh(st_centroid(nj), k=4)

## Warning in st_centroid.sf(nj): st_centroid assumes attributes are constant over
## geometries of x

nj.nb4<-knn2nb(nj.nb4)
nj.wt4<-nb2listw(nj.nb4, style="W")

nj.nb3<-knearneigh(st_centroid(nj), k=3)

## Warning in st_centroid.sf(nj): st_centroid assumes attributes are constant over
## geometries of x

nj.nb3<-knn2nb(nj.nb3)
nj.wt3<-nb2listw(nj.nb3,style="W")

nj.nb2<-knearneigh(st_centroid(nj) , k=2)

## Warning in st_centroid.sf(nj): st_centroid assumes attributes are constant over
## geometries of x

nj.nb2<-knn2nb(nj.nb2)
nj.wt2<-nb2listw(nj.nb2,style="W")

Moran’s I

moran.test(na.omit(nj$DP02_0071PE), 
           listw=nj.wt2)

## 
##  Moran I test under randomisation
## 
## data:  na.omit(nj$DP02_0071PE)  
## weights: nj.wt2 
## omitted: 291, 292, 293, 294, 295, 296, 701, 702, 703, 948, 987, 1028, 1443, 1444, 1450, 1451, 1459, 1460, 1470, 1471, 1483, 1484, 1503, 1504, 1516, 1517, 1524, 1525, 1536, 1537, 1619, 1620, 1627, 1628, 1629, 1632, 1633, 1634, 1643, 1644, 1645, 1656, 1750, 1751, 1758, 1759, 1766, 1767, 1773, 1774, 1781, 1782, 1794, 1795, 1922, 2246, 2247, 2260, 2261, 2278, 2532, 2634, 2635, 2636, 2869, 2870, 2875, 2876, 2888, 2889, 2894, 2895, 2940, 3063, 3064, 3079, 3080, 3151   
## 
## Moran I statistic standard deviate = 38.709, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.6407010231     -0.0003183699      0.0002742284

moran.test(nj$DP05_0067PE, 
           listw=nj.wt2)

## 
##  Moran I test under randomisation
## 
## data:  nj$DP05_0067PE  
## weights: nj.wt2    
## 
## Moran I statistic standard deviate = 52.006, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.8486206440     -0.0003106555      0.0002664672

moran.test(nj$DP05_0069PE, 
           listw=nj.wt2)

## 
##  Moran I test under randomisation
## 
## data:  nj$DP05_0069PE  
## weights: nj.wt2    
## 
## Moran I statistic standard deviate = 40.475, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.4749027844     -0.0003106555      0.0001378492

moran.test(nj$DP05_0068PE, 
           listw=nj.wt2)

## 
##  Moran I test under randomisation
## 
## data:  nj$DP05_0068PE  
## weights: nj.wt2    
## 
## Moran I statistic standard deviate = 61.252, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.9964615716     -0.0003106555      0.0002648241

moran.test(na.omit(nj$DP02_0092PE), 
           listw=nj.wt2)

## 
##  Moran I test under randomisation
## 
## data:  na.omit(nj$DP02_0092PE)  
## weights: nj.wt2 
## omitted: 291, 292, 293, 294, 295, 296, 701, 702, 703, 948, 987, 1028, 1443, 1444, 1450, 1451, 1459, 1460, 1470, 1471, 1483, 1484, 1503, 1504, 1516, 1517, 1524, 1525, 1536, 1537, 1619, 1620, 1627, 1628, 1629, 1632, 1633, 1634, 1643, 1644, 1645, 1656, 1750, 1751, 1758, 1759, 1766, 1767, 1773, 1774, 1781, 1782, 1794, 1795, 1922, 2246, 2247, 2260, 2261, 2278, 2532, 2634, 2635, 2636, 2869, 2870, 2875, 2876, 2888, 2889, 2894, 2895, 2940, 3063, 3064, 3079, 3080, 3151   
## 
## Moran I statistic standard deviate = 41.002, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.6775478004     -0.0003183699      0.0002733177

moran.test(na.omit(nj$DP02_0066PE), 
           listw=nj.wt2)

## 
##  Moran I test under randomisation
## 
## data:  na.omit(nj$DP02_0066PE)  
## weights: nj.wt2 
## omitted: 291, 292, 293, 294, 295, 296, 701, 702, 703, 948, 987, 1028, 1443, 1444, 1450, 1451, 1459, 1460, 1470, 1471, 1483, 1484, 1503, 1504, 1516, 1517, 1524, 1525, 1536, 1537, 1619, 1620, 1627, 1628, 1629, 1632, 1633, 1634, 1643, 1644, 1645, 1656, 1750, 1751, 1758, 1759, 1766, 1767, 1773, 1774, 1781, 1782, 1794, 1795, 1922, 2246, 2247, 2260, 2261, 2278, 2532, 2634, 2635, 2636, 2869, 2870, 2875, 2876, 2888, 2889, 2894, 2895, 2940, 3063, 3064, 3079, 3080, 3151   
## 
## Moran I statistic standard deviate = 39.99, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.6618508975     -0.0003183699      0.0002741825

moran.test(nj$DP03_0119PE, 
           listw=nj.wt2)

## 
##  Moran I test under randomisation
## 
## data:  nj$DP03_0119PE  
## weights: nj.wt2    
## 
## Moran I statistic standard deviate = 45.954, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.7507079467     -0.0003106555      0.0002670904

moran.test(nj$DP05_0032PE, 
           listw=nj.wt2)

## 
##  Moran I test under randomisation
## 
## data:  nj$DP05_0032PE  
## weights: nj.wt2    
## 
## Moran I statistic standard deviate = 43.924, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.7182195556     -0.0003106555      0.0002675964

moran.test(nj$DP05_0033PE, 
           listw=nj.wt2)

## 
##  Moran I test under randomisation
## 
## data:  nj$DP05_0033PE  
## weights: nj.wt2    
## 
## Moran I statistic standard deviate = 49.403, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.8075108570     -0.0003106555      0.0002673747

moran.test(nj$DP05_0003PE, 
           listw=nj.wt2)

## 
##  Moran I test under randomisation
## 
## data:  nj$DP05_0003PE  
## weights: nj.wt2    
## 
## Moran I statistic standard deviate = 9.712, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.1582314705     -0.0003106555      0.0002664830

Descriptive statistics

descstats<-cbind(nj$DP02_0071PE,nj$DP05_0067PE, nj$DP05_0069PE,nj$DP05_0068PE,nj$DP02_0092PE,nj$DP02_0066PE,nj$DP03_0119PE,nj$DP05_0032PE,nj$DP05_0033PE,nj$DP05_0003PE, nj$DP05_0001PE)
options(scipen=10)
options(digits=5)
stat.desc(descstats,basic=F)

##                     V1        V2       V3        V4       V5        V6       V7
## median       15.300000   2.20000 0.000000   0.20000  2.60000 86.900000 11.60000
## mean         15.669319   6.50000 0.139752   2.85720  4.62432 85.429726 13.02087
## SE.mean       0.078389   0.20995 0.013108   0.26376  0.10105  0.118473  0.13716
## CI.mean.0.95  0.153699   0.41164 0.025702   0.51715  0.19814  0.232293  0.26892
## var          19.307024 141.93021 0.553294 224.00554 32.08499 44.100829 60.57374
## std.dev       4.393976  11.91345 0.743837  14.96681  5.66436  6.640845  7.78291
## coef.var      0.280419   1.83284 5.322567   5.23827  1.22491  0.077735  0.59773
##                     V8        V9       V10              V11
## median        89.70000   2.30000 50.500000        26035.000
## mean          83.20109   8.96553 49.986708        99409.346
## SE.mean        0.29476   0.25260  0.042270         5627.019
## CI.mean.0.95   0.57794   0.49526  0.082879        11032.903
## var          279.77151 205.45024  5.753355 101955972782.255
## std.dev       16.72637  14.33354  2.398615       319305.454
## coef.var       0.20104   1.59874  0.047985            3.212

Fit OLS Model

#construct the weight for the model, based on the population size of the county
nj$popsize<-(nj$DP05_0001PE)/1

summary(nj$popsize)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##       85    11218    26035    99409    66430 10038388

# OLS and weighted OLS regression models
fit.ols<-lm(DP02_0071PE~DP05_0067PE+DP05_0069PE+DP05_0068PE+DP02_0092PE+DP02_0066PE+DP03_0119PE+DP05_0032PE+DP05_0033PE+DP05_0003PE,data=nj)

fit.ols.wt<-lm(DP02_0071PE~DP05_0067PE+DP05_0069PE+DP05_0068PE+DP02_0092PE+DP02_0066PE+DP03_0119PE+DP05_0032PE+DP05_0033PE+DP05_0003PE,data=nj, weights=nj$popsize)

summary(fit.ols)

## 
## Call:
## lm(formula = DP02_0071PE ~ DP05_0067PE + DP05_0069PE + DP05_0068PE + 
##     DP02_0092PE + DP02_0066PE + DP03_0119PE + DP05_0032PE + DP05_0033PE + 
##     DP05_0003PE, data = nj)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -18.015  -1.922  -0.279   1.653  14.908 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 39.23826    1.79438   21.87  < 2e-16 ***
## DP05_0067PE -0.05639    0.00687   -8.21  3.1e-16 ***
## DP05_0069PE  0.14727    0.07841    1.88    0.060 .  
## DP05_0068PE  0.08084    0.04726    1.71    0.087 .  
## DP02_0092PE -0.26824    0.01526  -17.58  < 2e-16 ***
## DP02_0066PE -0.32707    0.01404  -23.30  < 2e-16 ***
## DP03_0119PE  0.22056    0.01613   13.68  < 2e-16 ***
## DP05_0032PE  0.05198    0.00652    7.98  2.1e-15 ***
## DP05_0033PE -0.01523    0.00713   -2.13    0.033 *  
## DP05_0003PE -0.01944    0.02359   -0.82    0.410    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.06 on 3132 degrees of freedom
##   (78 observations deleted due to missingness)
## Multiple R-squared:  0.517,  Adjusted R-squared:  0.516 
## F-statistic:  373 on 9 and 3132 DF,  p-value: <2e-16

summary(fit.ols.wt)

## 
## Call:
## lm(formula = DP02_0071PE ~ DP05_0067PE + DP05_0069PE + DP05_0068PE + 
##     DP02_0092PE + DP02_0066PE + DP03_0119PE + DP05_0032PE + DP05_0033PE + 
##     DP05_0003PE, data = nj, weights = nj$popsize)
## 
## Weighted Residuals:
##    Min     1Q Median     3Q    Max 
##  -3762   -257     40    344   4254 
## 
## Coefficients:
##             Estimate Std. Error t value  Pr(>|t|)    
## (Intercept) 37.04340    1.84293   20.10   < 2e-16 ***
## DP05_0067PE -0.09452    0.00390  -24.22   < 2e-16 ***
## DP05_0069PE  0.06981    0.01447    4.82 0.0000015 ***
## DP05_0068PE -0.04678    0.01482   -3.16    0.0016 ** 
## DP02_0092PE -0.19463    0.00716  -27.17   < 2e-16 ***
## DP02_0066PE -0.29297    0.01234  -23.74   < 2e-16 ***
## DP03_0119PE  0.26159    0.01448   18.07   < 2e-16 ***
## DP05_0032PE -0.00886    0.00531   -1.67    0.0953 .  
## DP05_0033PE -0.07459    0.00579  -12.87   < 2e-16 ***
## DP05_0003PE  0.05801    0.03160    1.84    0.0665 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 616 on 3132 degrees of freedom
##   (78 observations deleted due to missingness)
## Multiple R-squared:  0.681,  Adjusted R-squared:  0.68 
## F-statistic:  744 on 9 and 3132 DF,  p-value: <2e-16

Residuals and neighbor specification

resi<-c(lm.morantest(fit.ols, listw=nj.wt2)$estimate[1],
        lm.morantest(fit.ols, listw=nj.wt3)$estimate[1],
        lm.morantest(fit.ols, listw=nj.wt4)$estimate[1],
        lm.morantest(fit.ols, listw=nj.wt5)$estimate[1],
        lm.morantest(fit.ols, listw=nj.wt6)$estimate[1],
        lm.morantest(fit.ols, listw=nj.wtq,zero.policy=T)$estimate[1],
        lm.morantest(fit.ols, listw=nj.wtr,zero.policy=T)$estimate[1])

plot(resi, type="l")

Spatial simultaneous autoregressive error model

fit.err.d<-errorsarlm(DP02_0071PE~DP05_0067PE+DP05_0069PE+DP05_0068PE+DP02_0092PE+DP02_0066PE+DP03_0119PE+DP05_0032PE+DP05_0033PE+DP05_0003PE,data=nj,
                    listw=nj.wt2,
                      etype = "emixed", 
                      method = "MC")
summary(fit.err.d)

## 
## Call:errorsarlm(formula = DP02_0071PE ~ DP05_0067PE + DP05_0069PE + 
##     DP05_0068PE + DP02_0092PE + DP02_0066PE + DP03_0119PE + DP05_0032PE + 
##     DP05_0033PE + DP05_0003PE, data = nj, listw = nj.wt2, etype = "emixed", 
##     method = "MC")
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -15.90255  -1.49551  -0.13365   1.35447  13.35846 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##                   Estimate Std. Error  z value        Pr(>|z|)
## (Intercept)     40.1018622  3.2366482  12.3899       < 2.2e-16
## DP05_0067PE     -0.1119851  0.0097888 -11.4401       < 2.2e-16
## DP05_0069PE      0.0907437  0.0755334   1.2014         0.22961
## DP05_0068PE      0.0519269  0.0469215   1.1067         0.26843
## DP02_0092PE     -0.2657966  0.0155123 -17.1346       < 2.2e-16
## DP02_0066PE     -0.3338059  0.0139475 -23.9330       < 2.2e-16
## DP03_0119PE      0.1501763  0.0152915   9.8209       < 2.2e-16
## DP05_0032PE      0.0435295  0.0069890   6.2283 0.0000000004715
## DP05_0033PE      0.0068230  0.0083250   0.8196         0.41246
## DP05_0003PE     -0.0160530  0.0217084  -0.7395         0.45962
## lag.DP05_0067PE  0.0528142  0.0111833   4.7226 0.0000023284699
## lag.DP05_0069PE  0.0498461  0.0988579   0.5042         0.61411
## lag.DP05_0068PE  0.0241567  0.0602187   0.4011         0.68831
## lag.DP02_0092PE  0.0109996  0.0206996   0.5314         0.59515
## lag.DP02_0066PE  0.0076563  0.0185150   0.4135         0.67923
## lag.DP03_0119PE  0.0923555  0.0207887   4.4426 0.0000088887091
## lag.DP05_0032PE  0.0191528  0.0090640   2.1131         0.03459
## lag.DP05_0033PE -0.0186504  0.0105051  -1.7754         0.07584
## lag.DP05_0003PE -0.0468115  0.0291262  -1.6072         0.10801
## 
## Lambda: 0.50929, LR test value: 900.78, p-value: < 2.22e-16
## Approximate (numerical Hessian) standard error: 0.014495
##     z-value: 35.135, p-value: < 2.22e-16
## Wald statistic: 1234.5, p-value: < 2.22e-16
## 
## Log likelihood: -7443.9 for error model
## ML residual variance (sigma squared): 6.0243, (sigma: 2.4544)
## Number of observations: 3142 
## Number of parameters estimated: 21 
## AIC: 14930, (AIC for lm: 15829)

AIC Comparison of model fit

AIC(fit.err.d)

## [1] 14930

AIC(fit.ols.wt)

## [1] 17020

# the spatial simultaneous auto-regressive lag model has the best model fit.

Spatial autoregressive (SAR) model

Jairemy Edwards

9/27/2021

Data Extraction

Form neighbors and weight matrix

Moran’s I

Descriptive statistics

Residuals and neighbor specification

Spatial simultaneous autoregressive error model

AIC Comparison of model fit