Retreiving Area Deprevation Index (ADI) Data
adi_tx <- read_csv("C:/Users/maman/OneDrive/DEM Fall 2021/Spatial Demography/adi-download.zip")
## Multiple files in zip: reading 'TX_2019_ADI_Census Block Group_v3.1.txt'
## New names:
## * `` -> ...1
## Rows: 15811 Columns: 7
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (3): GISJOIN, ADI_NATRANK, ADI_STATERNK
## dbl (4): ...1, V1, FIPS, STATE
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
adi_tract<-adi_tx%>%
mutate(adi_staterank= as.numeric(ADI_STATERNK)) %>%
mutate(adi_nationrank= as.numeric(ADI_NATRANK)) %>%
mutate(trfips = substr(FIPS, 1,11))%>%
group_by(trfips)%>%
summarise(adi_st_avg=mean(adi_staterank, na.rm=T), adi_nat_avg=mean(adi_nationrank, na.rm=T))%>%
ungroup()
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
save(adi_tract, file="tx_adi_tract_avg.Rdata")
Obtaining Census Tract Level Health Data
places_2020 <- read_csv("C:/Users/maman/OneDrive/DEM Spring 2021/DEM 7093 GIS/GIS Data/PLACES__Census_Tract_Data__GIS_Friendly_Format___2020_release.csv")
## Rows: 72337 Columns: 63
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (34): StateAbbr, StateDesc, CountyName, CountyFIPS, TractFIPS, ACCESS2_C...
## dbl (29): TotalPopulation, ACCESS2_CrudePrev, ARTHRITIS_CrudePrev, BINGE_Cru...
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
places_2020$obesity <- places_2020$OBESITY_CrudePrev
places_2020$high_chol <- places_2020$HIGHCHOL_CrudePrev
places_2020$curr_smk <- places_2020$CSMOKING_CrudePrev
places_2020$high_bp <- places_2020$BPHIGH_CrudePrev
places_2020$uninsured <- places_2020$ACCESS2_CrudePrev
places_2020$diabetes <- places_2020$DIABETES_CrudePrev
places_2020$trfips <- places_2020$TractFIPS
places_2020$pop <- places_2020$TotalPopulation
plcs<-places_2020[, c("diabetes","uninsured","high_bp","curr_smk","high_chol","obesity", "trfips", "pop")]
Merge data
library(tidycensus)
library(sf)
## Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1
options(tigris_class = "sf")
tracts<-get_acs(geography = "tract",
year = 2019,
state = "TX",
county = c("029", "013","255","091","187","493","163","311","325","019","265","171","259"),
variables = c("DP05_0071PE", "DP05_0078PE"),
output ="wide",
geometry = TRUE)%>%
rename(hispanic =DP05_0071PE,
nh_black =DP05_0078PE) %>%
filter(complete.cases(nh_black, hispanic))
## Getting data from the 2015-2019 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 9%
|
|======= | 10%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========= | 14%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 20%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 23%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 30%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 39%
|
|============================ | 40%
|
|============================ | 41%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 47%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 50%
|
|=================================== | 51%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 53%
|
|===================================== | 54%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 60%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 69%
|
|================================================= | 70%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 80%
|
|======================================================== | 81%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 90%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|==================================================================== | 98%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 100%
tracts$trfips<-substr(tracts$GEOID, 1, 5)
library(dplyr)
dog <- left_join(adi_tract, plcs, by = "trfips")
data_final <- left_join(tracts, dog, by = c("GEOID"="trfips")
)
Creating k nearest neighbor spatial weights
library(spdep)
## Loading required package: sp
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
knn<-knearneigh(x=st_centroid(data_final), k=4)
## Warning in st_centroid.sf(data_final): st_centroid assumes attributes are
## constant over geometries of x
knn4<-knn2nb(knn=knn)
knn4lw<-nb2listw(knn4)
k3<-knearneigh(x=st_centroid(data_final), k=3)
## Warning in st_centroid.sf(data_final): st_centroid assumes attributes are
## constant over geometries of x
knn3<-knn2nb(knn=knn)
knn3lw<-nb2listw(knn3)
2. Define your outcome variable and all predictors
# The predictors used are "uninsured" which is the portion of the population with no health insurance per census tract, "curr_smk" or the rate of current smokers per tract, and "adi_st_avg" that is the average state-wide rank of the area deprivation index corresponding to each census tract.
# The outcome variable is "obesity" which is the obesity rate per census tract.
3. Fit the OLS model for your outcome :
cow <- lm( obesity ~ uninsured + curr_smk + adi_st_avg,
data=data_final)
summary(cow)
##
## Call:
## lm(formula = obesity ~ uninsured + curr_smk + adi_st_avg, data = data_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.0155 -0.8804 0.0995 0.9916 5.7612
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.58630 0.40767 55.40 < 2e-16 ***
## uninsured 0.25024 0.01730 14.46 < 2e-16 ***
## curr_smk 0.38376 0.03733 10.28 < 2e-16 ***
## adi_st_avg 0.32792 0.07419 4.42 1.23e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.648 on 469 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.8909, Adjusted R-squared: 0.8902
## F-statistic: 1276 on 3 and 469 DF, p-value: < 2.2e-16
Test for autocorrelation in residuals with using each neighbor rule
# use lm.moran.test
library(spatialreg)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
##
## as.spam.listw, as_dgRMatrix_listw, as_dsCMatrix_I,
## as_dsCMatrix_IrW, as_dsTMatrix_listw, can.be.simmed, cheb_setup,
## create_WX, do_ldet, eigen_pre_setup, eigen_setup, eigenw,
## errorsarlm, get.ClusterOption, get.coresOption, get.mcOption,
## get.VerboseOption, get.ZeroPolicyOption, GMargminImage, GMerrorsar,
## griffith_sone, gstsls, Hausman.test, impacts, intImpacts,
## Jacobian_W, jacobianSetup, l_max, lagmess, lagsarlm, lextrB,
## lextrS, lextrW, lmSLX, LU_prepermutate_setup, LU_setup,
## Matrix_J_setup, Matrix_setup, mcdet_setup, MCMCsamp, ME, mom_calc,
## mom_calc_int2, moments_setup, powerWeights, sacsarlm,
## SE_classic_setup, SE_interp_setup, SE_whichMin_setup,
## set.ClusterOption, set.coresOption, set.mcOption,
## set.VerboseOption, set.ZeroPolicyOption, similar.listw, spam_setup,
## spam_update_setup, SpatialFiltering, spautolm, spBreg_err,
## spBreg_lag, spBreg_sac, stsls, subgraph_eigenw, trW
lm.morantest(model=cow,
listw = knn4lw)
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = obesity ~ uninsured + curr_smk + adi_st_avg, data =
## data_final)
## weights: knn4lw
##
## Moran I statistic standard deviate = 17.147, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.5088104663 -0.0056522600 0.0009001833
lm.morantest(model=cow,
listw = knn3lw)
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = obesity ~ uninsured + curr_smk + adi_st_avg, data =
## data_final)
## weights: knn3lw
##
## Moran I statistic standard deviate = 17.147, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.5088104663 -0.0056522600 0.0009001833
Plotting model residuals
#data_final$olsresid<-rstudent(cow)
#data_final<-st_as_sf(data_final)
#data_final%>%
# mutate(rquant=cut(olsresid,
# breaks = quantile(data_final$olsresid,
# p=seq(0,1,length.out = 8)),
# include.lowest = T))%>%
# ggplot(aes(color=rquant, fill=rquant))+
# geom_sf()+
# scale_fill_viridis_d()+
# scale_color_viridis_d()+
# geom_sf(data=metro, fill=NA, color="black")+
# ggtitle("OLS Model Residuals")
Fitting ALternative SAR Models
#Spatial Error model
fit.err<-errorsarlm(obesity ~uninsured+ curr_smk+adi_st_avg,
data=data_final, listw=knn4lw)
summary(fit.err)
##
## Call:errorsarlm(formula = obesity ~ uninsured + curr_smk + adi_st_avg,
## data = data_final, listw = knn4lw)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.260891 -0.595513 0.074728 0.770452 3.388005
##
## Type: error
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 21.897228 0.427901 51.173 < 2e-16
## uninsured 0.202927 0.020110 10.091 < 2e-16
## curr_smk 0.580234 0.038995 14.880 < 2e-16
## adi_st_avg 0.147725 0.060942 2.424 0.01535
##
## Lambda: 0.72191, LR test value: 230.42, p-value: < 2.22e-16
## Asymptotic standard error: 0.034456
## z-value: 20.951, p-value: < 2.22e-16
## Wald statistic: 438.96, p-value: < 2.22e-16
##
## Log likelihood: -790.1675 for error model
## ML residual variance (sigma squared): 1.4467, (sigma: 1.2028)
## Number of observations: 473
## Number of parameters estimated: 6
## AIC: 1592.3, (AIC for lm: 1820.8)
#Spatial Lag Model
fit.lag<-lagsarlm(obesity ~uninsured+ curr_smk+adi_st_avg,
data=data_final,
listw=knn4lw,
type="lag")
summary(fit.lag)
##
## Call:lagsarlm(formula = obesity ~ uninsured + curr_smk + adi_st_avg,
## data = data_final, listw = knn4lw, type = "lag")
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.08511 -0.76799 0.15634 0.95747 3.79728
##
## Type: lag
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 13.801364 0.939891 14.6840 < 2e-16
## uninsured 0.180247 0.017416 10.3496 < 2e-16
## curr_smk 0.432622 0.033672 12.8481 < 2e-16
## adi_st_avg 0.132201 0.068471 1.9308 0.05351
##
## Rho: 0.29503, LR test value: 85.227, p-value: < 2.22e-16
## Asymptotic standard error: 0.031071
## z-value: 9.4956, p-value: < 2.22e-16
## Wald statistic: 90.167, p-value: < 2.22e-16
##
## Log likelihood: -862.7633 for lag model
## ML residual variance (sigma squared): 2.2093, (sigma: 1.4864)
## Number of observations: 473
## Number of parameters estimated: 6
## AIC: 1737.5, (AIC for lm: 1820.8)
## LM test for residual autocorrelation
## test value: 122.4, p-value: < 2.22e-16
Testing model fit using Lagrange Multiplier Test
library(spatialreg)
lm.LMtests(model = cow,
listw=knn4lw,
test = c("LMerr", "LMlag", "RLMerr", "RLMlag"))
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = obesity ~ uninsured + curr_smk + adi_st_avg, data =
## data_final)
## weights: knn4lw
##
## LMerr = 281.11, df = 1, p-value < 2.2e-16
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = obesity ~ uninsured + curr_smk + adi_st_avg, data =
## data_final)
## weights: knn4lw
##
## LMlag = 88.945, df = 1, p-value < 2.2e-16
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = obesity ~ uninsured + curr_smk + adi_st_avg, data =
## data_final)
## weights: knn4lw
##
## RLMerr = 193.06, df = 1, p-value < 2.2e-16
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = obesity ~ uninsured + curr_smk + adi_st_avg, data =
## data_final)
## weights: knn4lw
##
## RLMlag = 0.89168, df = 1, p-value = 0.345
5. Describe the Results of the best fit SAR Model
# The results from the Lagrange Multiplier Test as well as AIC point to the spatial error model being the "best" choice for these data. In the original OLS model each of the predictor variables were significantly associated with higher rates of obesity. Using the spatial error model, all predictors remained significant. The magnitude of "uninsured" was maintained for both models, however the coefficients for "curr_smk" increased in the spatial error model from 0.38 to 0.58, while "adi_st_avg" decreased from 0.32 to 0.14.