library(haven)
library(foreign)
library(readr)
library(dplyr)
library(ggplot2)
library(broom)
library(car)
library(MASS)
library(lmtest)
library(zoo)
library(nortest)
library(plotrix)
library(scales)
library(tableone)
library(Weighted.Desc.Stat)
library(mitools)
library(survey)
library(VGAM)
library(stargazer)
library(sandwich)
library(pastecs)
library(muhaz)
library(ggpubr)
library(survminer)
library(eha)
library(reshape2)
library(data.table)
library(magrittr)
library(tidyverse)
library(sjmisc)
library(sjPlot)
library(sjmisc)
library(sjlabelled)
library(weights)
library(GGally)
library(tigris)
library(RColorBrewer)
library(patchwork)
library(tidycensus)
library(censusapi)
library(spdep)
library(classInt)
library(sf)
library(spatialreg)
options(tigris_class = "sf")
nj<-(get_acs(geography = "tract",
state="nj",
county = c("003", "017" , "013" , "039" , "023" , "025"),
year = 2015,
variables=c("DP02_0070PE","DP05_0032PE","DP05_0033PE", "DP05_0037PE", "DP05_0039PE", "DP05_0052PE", "DP02_0066E", "DP05_0002PE", "DP02_0092PE", "DP05_0001PE"),
geometry = T,
output = "wide"))
## Getting data from the 2011-2015 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
nj$county<-substr(nj$GEOID, 1, 6)
nj.nbr<-(poly2nb(nj, queen=F))
nj.wtr<-nb2listw(nj.nbr, zero.policy=T)
nj.nbq<-poly2nb(nj, queen=T)
nj.wtq<-nb2listw(nj.nbr, style="W", zero.policy=T)
nj.nb6<-knearneigh(st_centroid(nj), k=6)
## Warning in st_centroid.sf(nj): st_centroid assumes attributes are constant over
## geometries of x
nj.nb6<-knn2nb(nj.nb6)
nj.wt6<-nb2listw(nj.nb6, style="W")
nj.nb5<-knearneigh(st_centroid(nj), k=5)
## Warning in st_centroid.sf(nj): st_centroid assumes attributes are constant over
## geometries of x
nj.nb5<-knn2nb(nj.nb5)
nj.wt5<-nb2listw(nj.nb5, style="W")
nj.nb4<-knearneigh(st_centroid(nj), k=4)
## Warning in st_centroid.sf(nj): st_centroid assumes attributes are constant over
## geometries of x
nj.nb4<-knn2nb(nj.nb4)
nj.wt4<-nb2listw(nj.nb4, style="W")
nj.nb3<-knearneigh(st_centroid(nj), k=3)
## Warning in st_centroid.sf(nj): st_centroid assumes attributes are constant over
## geometries of x
nj.nb3<-knn2nb(nj.nb3)
nj.wt3<-nb2listw(nj.nb3,style="W")
nj.nb2<-knearneigh(st_centroid(nj) , k=2)
## Warning in st_centroid.sf(nj): st_centroid assumes attributes are constant over
## geometries of x
nj.nb2<-knn2nb(nj.nb2)
nj.wt2<-nb2listw(nj.nb2,style="W")
# Outcome:
# DP02_0070PE Percent with disability, total Population
# Predictors:
# 1 DP02_0092PE Percent foreign-born, total Population
# 2 DP05_0002PE Percent male, total Population
# 4 DP05_0032PE Percent white, total Population, one race
# 5 DP05_0033PE percent black, total population, one race
# 6 DP05_0037PE Percent indigenous, total Population, one race
# 7 DP05_0039PE Percent Asian, total Population, one race
# 8 DP05_0052PE Percent other race, total Population, one race
# * DP05_0001PE total population
#construct the weight for the model, based on the population size of the county
nj$popsize<-(nj$DP05_0001PE)/1
summary(nj$popsize)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 3250 4347 4469 5561 12970
# OLS and weighted OLS regression models
fit.ols<-lm(DP02_0070PE~DP02_0092PE+DP05_0002PE+DP05_0032PE+DP05_0033PE+DP05_0037PE+DP05_0039PE+DP05_0052PE,data=nj)
fit.ols.wt<-lm(DP02_0070PE~DP02_0092PE+DP05_0002PE+DP05_0032PE+DP05_0033PE+DP05_0037PE+DP05_0039PE+DP05_0052PE,data=nj, weights=nj$popsize)
summary(fit.ols)
##
## Call:
## lm(formula = DP02_0070PE ~ DP02_0092PE + DP05_0002PE + DP05_0032PE +
## DP05_0033PE + DP05_0037PE + DP05_0039PE + DP05_0052PE, data = nj)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3763.0 -1129.8 -124.3 1035.0 8230.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1395.763 2419.619 0.577 0.564173
## DP02_0092PE 6.652 5.353 1.243 0.214321
## DP05_0002PE -42.320 11.151 -3.795 0.000157 ***
## DP05_0032PE 52.854 23.608 2.239 0.025393 *
## DP05_0033PE 35.312 23.671 1.492 0.136081
## DP05_0037PE 2431.991 1977.670 1.230 0.219097
## DP05_0039PE 61.574 24.124 2.552 0.010849 *
## DP05_0052PE 52.831 25.403 2.080 0.037811 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1640 on 972 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.1015, Adjusted R-squared: 0.09507
## F-statistic: 15.69 on 7 and 972 DF, p-value: < 2.2e-16
summary(fit.ols.wt)
##
## Call:
## lm(formula = DP02_0070PE ~ DP02_0092PE + DP05_0002PE + DP05_0032PE +
## DP05_0033PE + DP05_0037PE + DP05_0039PE + DP05_0052PE, data = nj,
## weights = nj$popsize)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -233991 -94389 -48295 35887 866587
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2559.566 2833.622 0.903 0.3666
## DP02_0092PE -1.310 5.695 -0.230 0.8181
## DP05_0002PE -18.522 14.378 -1.288 0.1980
## DP05_0032PE 35.095 28.617 1.226 0.2203
## DP05_0033PE 19.493 28.639 0.681 0.4963
## DP05_0037PE 4013.115 1731.359 2.318 0.0207 *
## DP05_0039PE 58.101 29.077 1.998 0.0460 *
## DP05_0052PE 38.307 30.701 1.248 0.2124
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 115900 on 972 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.08979, Adjusted R-squared: 0.08324
## F-statistic: 13.7 on 7 and 972 DF, p-value: < 2.2e-16
resi<-c(lm.morantest(fit.ols, listw=nj.wt2)$estimate[1],
lm.morantest(fit.ols, listw=nj.wt3)$estimate[1],
lm.morantest(fit.ols, listw=nj.wt4)$estimate[1],
lm.morantest(fit.ols, listw=nj.wt5)$estimate[1],
lm.morantest(fit.ols, listw=nj.wt6)$estimate[1],
lm.morantest(fit.ols, listw=nj.wtq,zero.policy=T)$estimate[1],
lm.morantest(fit.ols, listw=nj.wtr,zero.policy=T)$estimate[1])
plot(resi, type="l")
fit.err<-errorsarlm(DP02_0070PE~DP02_0092PE+DP05_0002PE+DP05_0032PE+DP05_0033PE+DP05_0037PE+DP05_0039PE+DP05_0052PE,data=nj,
listw=nj.wtq,
method = "MC")
lm.target.us1 <- lm(fit.err$tary ~ fit.err$tarX - 1)
lmtest::coeftest(lm.target.us1,
vcov.=vcovHC(lm.target.us1,
type="HC0"))
##
## t test of coefficients:
##
## Estimate Std. Error t value
## fit.err$tarXI(x - lambda * WX)(Intercept) 2393.2361 2226.6468 1.0748
## fit.err$tarXI(x - lambda * WX)DP02_0092PE 5.8939 6.2266 0.9466
## fit.err$tarXI(x - lambda * WX)DP05_0002PE -40.0285 11.2062 -3.5720
## fit.err$tarXI(x - lambda * WX)DP05_0032PE 39.7476 22.2424 1.7870
## fit.err$tarXI(x - lambda * WX)DP05_0033PE 24.9060 22.1665 1.1236
## fit.err$tarXI(x - lambda * WX)DP05_0037PE 2809.7288 3605.9071 0.7792
## fit.err$tarXI(x - lambda * WX)DP05_0039PE 53.7497 23.1182 2.3250
## fit.err$tarXI(x - lambda * WX)DP05_0052PE 43.7047 23.8613 1.8316
## Pr(>|t|)
## fit.err$tarXI(x - lambda * WX)(Intercept) 0.2827237
## fit.err$tarXI(x - lambda * WX)DP02_0092PE 0.3441016
## fit.err$tarXI(x - lambda * WX)DP05_0002PE 0.0003716 ***
## fit.err$tarXI(x - lambda * WX)DP05_0032PE 0.0742458 .
## fit.err$tarXI(x - lambda * WX)DP05_0033PE 0.2614648
## fit.err$tarXI(x - lambda * WX)DP05_0037PE 0.4360506
## fit.err$tarXI(x - lambda * WX)DP05_0039PE 0.0202772 *
## fit.err$tarXI(x - lambda * WX)DP05_0052PE 0.0673149 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fit.lag<-lagsarlm(DP02_0070PE~DP02_0092PE+DP05_0002PE+DP05_0032PE+DP05_0033PE+DP05_0037PE+DP05_0039PE+DP05_0052PE,data=nj,
listw=nj.wtq,
type="lag",
method = "MC")
lm.target.us2 <- lm(fit.lag$tary ~ fit.lag$tarX - 1)
lmtest::coeftest(lm.target.us2,
vcov.=vcovHC(lm.target.us2,
type="HC0"))
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## fit.lag$tarXx(Intercept) 494.3997 2113.1456 0.2340 0.8150623
## fit.lag$tarXxDP02_0092PE 5.8266 5.0982 1.1429 0.2533784
## fit.lag$tarXxDP05_0002PE -40.0791 10.8665 -3.6883 0.0002383 ***
## fit.lag$tarXxDP05_0032PE 46.7611 21.1525 2.2107 0.0272914 *
## fit.lag$tarXxDP05_0033PE 34.2998 21.0178 1.6319 0.1030159
## fit.lag$tarXxDP05_0037PE 2768.1121 3687.0058 0.7508 0.4529697
## fit.lag$tarXxDP05_0039PE 55.9514 21.9337 2.5509 0.0108956 *
## fit.lag$tarXxDP05_0052PE 49.6697 22.9589 2.1634 0.0307519 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fit.spauto.w<-spautolm(DP02_0070PE~DP02_0092PE+DP05_0002PE+DP05_0032PE+DP05_0033PE+DP05_0037PE+DP05_0039PE+DP05_0052PE,data=nj,
listw=nj.wtq,
family="SAR",
weights=1/nj$popsize,
method = "MC")
fit.lag.d<-lagsarlm(DP02_0070PE~DP02_0092PE+DP05_0002PE+DP05_0032PE+DP05_0033PE+DP05_0037PE+DP05_0039PE+DP05_0052PE,data=nj,
listw=nj.wtq,
type="mixed",
method = "MC")
fit.err.d<-errorsarlm(DP02_0070PE~DP02_0092PE+DP05_0002PE+DP05_0032PE+DP05_0033PE+DP05_0037PE+DP05_0039PE+DP05_0052PE,data=nj,
listw=nj.wtq,
etype = "emixed",
method = "MC")
AIC(fit.err)
## [1] 17260.41
AIC(fit.lag)
## [1] 17259.4
AIC(fit.spauto.w)
## [1] 17257.12
AIC(fit.lag.d)
## [1] 17253.59
AIC(fit.err.d)
## [1] 17256.68
# the spatial simultaneous auto-regressive lag model has the best model fit.
summary(fit.ols.wt)
##
## Call:
## lm(formula = DP02_0070PE ~ DP02_0092PE + DP05_0002PE + DP05_0032PE +
## DP05_0033PE + DP05_0037PE + DP05_0039PE + DP05_0052PE, data = nj,
## weights = nj$popsize)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -233991 -94389 -48295 35887 866587
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2559.566 2833.622 0.903 0.3666
## DP02_0092PE -1.310 5.695 -0.230 0.8181
## DP05_0002PE -18.522 14.378 -1.288 0.1980
## DP05_0032PE 35.095 28.617 1.226 0.2203
## DP05_0033PE 19.493 28.639 0.681 0.4963
## DP05_0037PE 4013.115 1731.359 2.318 0.0207 *
## DP05_0039PE 58.101 29.077 1.998 0.0460 *
## DP05_0052PE 38.307 30.701 1.248 0.2124
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 115900 on 972 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.08979, Adjusted R-squared: 0.08324
## F-statistic: 13.7 on 7 and 972 DF, p-value: < 2.2e-16
AIC(fit.ols.wt)
## [1] 17493.71
summary(fit.lag.d)
##
## Call:lagsarlm(formula = DP02_0070PE ~ DP02_0092PE + DP05_0002PE +
## DP05_0032PE + DP05_0033PE + DP05_0037PE + DP05_0039PE + DP05_0052PE,
## data = nj, listw = nj.wtq, type = "mixed", method = "MC")
##
## Residuals:
## Min 1Q Median 3Q Max
## -4068.29 -1088.84 -144.74 958.78 7817.60
##
## Type: mixed
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4123.3656 4713.6267 -0.8748 0.3816960
## DP02_0092PE 3.7512 8.2874 0.4526 0.6508089
## DP05_0002PE -43.1045 11.1513 -3.8654 0.0001109
## DP05_0032PE 33.9698 23.3624 1.4540 0.1459361
## DP05_0033PE 33.5597 24.1105 1.3919 0.1639485
## DP05_0037PE 2819.7779 1903.0092 1.4817 0.1384077
## DP05_0039PE 64.2132 24.2953 2.6430 0.0082168
## DP05_0052PE 52.5742 25.4852 2.0629 0.0391194
## lag.DP02_0092PE 6.6622 10.7361 0.6205 0.5348992
## lag.DP05_0002PE -1.2920 22.3016 -0.0579 0.9538008
## lag.DP05_0032PE 64.8571 47.2413 1.3729 0.1697867
## lag.DP05_0033PE 48.9123 47.5539 1.0286 0.3036849
## lag.DP05_0037PE -3118.8117 3672.1682 -0.8493 0.3957084
## lag.DP05_0039PE 30.4299 48.0004 0.6340 0.5261134
## lag.DP05_0052PE 40.1694 51.5539 0.7792 0.4358784
##
## Rho: 0.26869, LR test value: 35.857, p-value: 2.1237e-09
## Asymptotic standard error: 0.045152
## z-value: 5.9508, p-value: 2.6677e-09
## Wald statistic: 35.413, p-value: 2.6677e-09
##
## Log likelihood: -8609.794 for mixed model
## ML residual variance (sigma squared): 2467800, (sigma: 1570.9)
## Number of observations: 980
## Number of parameters estimated: 17
## AIC: 17254, (AIC for lm: 17287)
## LM test for residual autocorrelation
## test value: 16.417, p-value: 5.082e-05
# Outcome:
# DP02_0070PE Percent with disability, total Population
# Predictors:
# 1 DP02_0092PE Percent foreign-born, total Population
# 2 DP05_0002PE Percent male, total Population
# 4 DP05_0032PE Percent white, total Population, one race
# 5 DP05_0033PE percent black, total population, one race
# 6 DP05_0037PE Percent indigenous, total Population, one race
# 7 DP05_0039PE Percent Asian, total Population, one race
# 8 DP05_0052PE Percent other race, total Population, one race
The OLS model shows that the New Jersey counties of Bergen, Hudson, Essex, Union, Middlesex, and Monmouth the percent of individuals who are American and Alaskan Indigenous (AIAN) and the percent of individuals who are Asian, are significant positive predictors of the percent of individuals disabled.
The spatial simultaneous auto-regressive lag model shows similar results however, in addition the percent AIAN and Asian, the percent of individuals who are male is also a significant negative predictor of the percent of individuals within these counties that are disabled.
The AIC for the OLS model is 17493.71 and the AIC for SAR lag model is 17253.