Getting aggregate data from the area resource file
arf<-"https://github.com/coreysparks/data/blob/master/arf2018.Rdata?raw=true"
load(url(arf))
library(spdep)
## Loading required package: sp
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Loading required package: sf
## Warning: package 'sf' was built under R version 4.1.2
## Linking to GEOS 3.9.1, GDAL 3.2.1, PROJ 7.2.1
library(MASS)
library(spatialreg)
## Loading required package: Matrix
##
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
##
## as.spam.listw, as_dgRMatrix_listw, as_dsCMatrix_I,
## as_dsCMatrix_IrW, as_dsTMatrix_listw, can.be.simmed, cheb_setup,
## create_WX, do_ldet, eigen_pre_setup, eigen_setup, eigenw,
## errorsarlm, get.ClusterOption, get.coresOption, get.mcOption,
## get.VerboseOption, get.ZeroPolicyOption, GMargminImage, GMerrorsar,
## griffith_sone, gstsls, Hausman.test, impacts, intImpacts,
## Jacobian_W, jacobianSetup, l_max, lagmess, lagsarlm, lextrB,
## lextrS, lextrW, lmSLX, LU_prepermutate_setup, LU_setup,
## Matrix_J_setup, Matrix_setup, mcdet_setup, MCMCsamp, ME, mom_calc,
## mom_calc_int2, moments_setup, powerWeights, sacsarlm,
## SE_classic_setup, SE_interp_setup, SE_whichMin_setup,
## set.ClusterOption, set.coresOption, set.mcOption,
## set.VerboseOption, set.ZeroPolicyOption, similar.listw, spam_setup,
## spam_update_setup, SpatialFiltering, spautolm, spBreg_err,
## spBreg_lag, spBreg_sac, stsls, subgraph_eigenw, trW
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.4 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x tidyr::expand() masks Matrix::expand()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x tidyr::pack() masks Matrix::pack()
## x dplyr::select() masks MASS::select()
## x tidyr::unpack() masks Matrix::unpack()
library(ggplot2)
library(dplyr)
arf2018<-arf2018%>%
mutate(cofips=f00004,
coname=f00010,
state = f00011,
rucc= as.factor(f0002013),
isc_hd1416= f1193314,
othr_cvd1416= f1316514,
hpsa16= as.factor(f0978716),
hgh_pov14= f1533414,
prs_pov14= f1249014,
pop_loss15= f1397715,
pop_ovr60= (f0672410+f0672510+f0672610+f0672710+f1164010+f1164110+f1164210+f1164310),
hisp_pop10= f0453610,
hisp_prop= (f0453610/f0453010),
wa_pop16= f1547116,
wa_noins16= f1547216,
wa_unins_rate= (f1547216/f1547116))%>%
select(cofips,coname,state,rucc,isc_hd1416,othr_cvd1416,hpsa16,hgh_pov14,prs_pov14,pop_loss15,pop_ovr60,hisp_pop10,hisp_prop,wa_pop16,wa_noins16,wa_unins_rate) %>%
filter(complete.cases(.)) %>%
as.data.frame()
head(arf2018)
## cofips coname state rucc isc_hd1416 othr_cvd1416 hpsa16 hgh_pov14 prs_pov14
## 1 01001 Autauga 01 02 66 59 2 0 0
## 2 01003 Baldwin 01 03 294 232 2 0 0
## 3 01005 Barbour 01 06 18 68 2 1 1
## 4 01007 Bibb 01 01 15 49 1 0 0
## 5 01009 Blount 01 01 70 91 1 0 0
## 6 01013 Butler 01 06 33 54 2 1 1
## pop_loss15 pop_ovr60 hisp_pop10 hisp_prop wa_pop16 wa_noins16 wa_unins_rate
## 1 0 9323 1310 0.024005424 47077 43075 0.9149903
## 2 0 42580 7992 0.043848243 165740 148007 0.8930071
## 3 0 5708 1387 0.050515351 17997 15745 0.8748680
## 4 0 4240 406 0.017717652 17004 15358 0.9031992
## 5 0 12021 4626 0.080701999 47086 41382 0.8788600
## 6 1 4783 191 0.009118251 15965 14262 0.8933292
summary(arf2018)
## cofips coname state rucc
## Length:2531 Length:2531 Length:2531 06 :550
## Class :character Class :character Class :character 01 :417
## Mode :character Mode :character Mode :character 02 :362
## 07 :346
## 03 :323
## 04 :214
## (Other):319
## isc_hd1416 othr_cvd1416 hpsa16 hgh_pov14
## Min. : 10.0 Min. : 10.0 : 0 Length:2531
## 1st Qu.: 28.0 1st Qu.: 24.0 0: 356 Class :character
## Median : 52.0 Median : 45.0 1: 518 Mode :character
## Mean : 142.4 Mean : 124.9 2:1657
## 3rd Qu.: 112.0 3rd Qu.: 102.0
## Max. :11264.0 Max. :6598.0
##
## prs_pov14 pop_loss15 pop_ovr60 hisp_pop10
## Length:2531 Length:2531 Min. : 1309 Min. : 33
## Class :character Class :character 1st Qu.: 4138 1st Qu.: 402
## Mode :character Mode :character Median : 7700 Median : 1345
## Mean : 22248 Mean : 19796
## 3rd Qu.: 17431 3rd Qu.: 5954
## Max. :1517935 Max. :4687889
##
## hisp_prop wa_pop16 wa_noins16 wa_unins_rate
## Min. :0.003516 Min. : 3181 Min. : 2892 Min. :0.6912
## 1st Qu.:0.016701 1st Qu.: 14224 1st Qu.: 12558 1st Qu.:0.8656
## Median :0.033584 Median : 28675 Median : 25612 Median :0.9015
## Mean :0.078486 Mean : 104531 Mean : 94102 Mean :0.8939
## 3rd Qu.:0.080048 3rd Qu.: 75351 3rd Qu.: 67000 3rd Qu.:0.9315
## Max. :0.957448 Max. :8684851 Max. :7757894 Max. :0.9793
##
Adding shapefiles to data
library(tigris)
## To enable
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
library(sf)
library(ggplot2)
options(tigris_class="sf")
usco<-counties(cb=T, year=2010)
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|== | 4%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 9%
|
|======= | 10%
|
|======= | 11%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========= | 14%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 19%
|
|============== | 20%
|
|============== | 21%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 22%
|
|================ | 23%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|=================== | 28%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 30%
|
|===================== | 31%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 39%
|
|============================ | 40%
|
|============================ | 41%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 46%
|
|================================= | 47%
|
|================================= | 48%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 49%
|
|=================================== | 50%
|
|=================================== | 51%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 53%
|
|===================================== | 54%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 59%
|
|========================================== | 60%
|
|========================================== | 61%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 70%
|
|================================================= | 71%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|====================================================== | 78%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 80%
|
|======================================================== | 81%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 89%
|
|=============================================================== | 90%
|
|=============================================================== | 91%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================= | 94%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 96%
|
|==================================================================== | 97%
|
|==================================================================== | 98%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 99%
|
|======================================================================| 100%
usco$cofips<-substr(usco$GEO_ID, 10, 15)
sts<-states(cb = T, year=2010)
##
|
| | 0%
|
|== | 2%
|
|=== | 5%
|
|===== | 7%
|
|======= | 9%
|
|======== | 12%
|
|========== | 14%
|
|=========== | 16%
|
|============= | 19%
|
|=============== | 21%
|
|================ | 23%
|
|================= | 25%
|
|================== | 25%
|
|=================== | 28%
|
|===================== | 30%
|
|======================= | 32%
|
|======================== | 35%
|
|========================== | 37%
|
|=========================== | 39%
|
|============================= | 41%
|
|=============================== | 44%
|
|================================ | 45%
|
|================================ | 46%
|
|================================== | 48%
|
|=================================== | 51%
|
|===================================== | 53%
|
|====================================== | 54%
|
|======================================= | 55%
|
|======================================== | 58%
|
|========================================== | 60%
|
|=========================================== | 62%
|
|============================================= | 64%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|================================================ | 69%
|
|================================================== | 71%
|
|==================================================== | 74%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|======================================================= | 78%
|
|======================================================== | 81%
|
|========================================================== | 83%
|
|============================================================ | 85%
|
|============================================================= | 87%
|
|=============================================================== | 90%
|
|================================================================ | 92%
|
|================================================================== | 94%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|===================================================================== | 99%
|
|======================================================================| 100%
sts<-st_boundary(sts)%>%
filter(!STATE %in% c("02", "15", "60", "66", "69", "72", "78"))
arf2018_m<-geo_join(usco, arf2018, by_sp="cofips", by_df="cofips",how="left" )
## Warning: We recommend using the dplyr::*_join() family of functions instead.
## Warning: `group_by_()` was deprecated in dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
arf2018_m%>%
filter(!STATE %in% c("02", "15", "60", "66", "69", "72", "78"))%>%
mutate(isc_hd_rate= isc_hd1416/pop_ovr60) %>%
mutate(isc_hd_group = cut(isc_hd_rate, breaks=quantile(isc_hd_rate, p=seq(0,1,length.out = 6), na.rm=T ), include.lowest=T ))%>%
ggplot()+
geom_sf(aes(fill=isc_hd_group, color=NA))+
scale_color_brewer(palette = "Reds")+
scale_fill_brewer(palette = "Reds",na.value = "grey50")+
geom_sf(data=sts, color="black")+
coord_sf(crs = 2163)+
ggtitle(label = "Proportion Ischemic Heart Disease in Adults over 60, 2014-2016")
The predictors used for this model are hisp_prop( the proportion of hispanics in the counties population), pop_loss15 (whether or not the county experienced population loss from censuses 1990-2000 and 2000-2010), hpsa16 (an variable that tells if a county has a shortage of primary care doctors, 0= no short, 1= whole short, and 2= partial short), and hgh_pov (variable that shows if 20% or more of the countie’s residents are poor).
The offset term used for this model is the population of adults age 60+, this was chosen because the outcome variable, incidents of ichemic heart disease, typically manifests in older adults.
library(MASS)
arf2018sub<-filter(arf2018_m, is.na(isc_hd1416)==F)
fit_nb<- glm.nb(isc_hd1416 ~ offset(log(pop_ovr60))+ pop_loss15+ hisp_prop+ hpsa16+ hgh_pov14,
data=arf2018sub)
summary(fit_nb)
##
## Call:
## glm.nb(formula = isc_hd1416 ~ offset(log(pop_ovr60)) + pop_loss15 +
## hisp_prop + hpsa16 + hgh_pov14, data = arf2018sub, init.theta = 17.41658014,
## link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.0610 -0.7273 -0.0856 0.5444 3.7730
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.06401 0.01537 -329.541 < 2e-16 ***
## pop_loss151 0.12716 0.01869 6.802 1.03e-11 ***
## hisp_prop -0.12658 0.04595 -2.754 0.00588 **
## hpsa161 0.05374 0.02049 2.622 0.00874 **
## hpsa162 0.02328 0.01657 1.405 0.15995
## hgh_pov141 0.12909 0.01365 9.459 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(17.4166) family taken to be 1)
##
## Null deviance: 2650.7 on 2494 degrees of freedom
## Residual deviance: 2475.4 on 2489 degrees of freedom
## AIC: 21418
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 17.417
## Std. Err.: 0.640
##
## 2 x log-likelihood: -21404.442
exp(coef(fit_nb))
## (Intercept) pop_loss151 hisp_prop hpsa161 hpsa162 hgh_pov141
## 0.006320145 1.135600560 0.881106805 1.055208875 1.023554516 1.137787417
The models suggests that nearly each indicator has a significant relationship with the outcome of interest. Apart from the hispanic proportion variable, each predictor elevated the risk of ischemic heart disease at varying degrees. Residents in counties that experienced population loss have 13.5% greater odds of ischemic heart disease. Counties with high poverty had 13.8% greater odds and those whose whole area has a shortage of doctors had 5.5%. The results suggest that for each percentage increase in the proportion of hispanics living in the county, the odds of ischemic heart disease decrease by 12%.
scale<-sqrt(fit_nb$deviance/fit_nb$df.residual)
scale
## [1] 0.9972552
1-pchisq(fit_nb$deviance, df = fit_nb$df.residual)
## [1] 0.5731081
By evaluating the ratio of the model’s deviance and residuals along with the goodness of fit statistic, the negative binomial shows no evidence of overdispersion
Testing for residual autocorrelation
library(spdep)
nbs<-knearneigh(coordinates(as(arf2018sub, "Spatial")), k = 4, longlat = T)
nbs<-knn2nb(nbs, sym = T)
us.wt4<-nb2listw(nbs, style = "W")
lm.morantest(fit_nb, listw = us.wt4)
##
## Global Moran I for regression residuals
##
## data:
## model: glm.nb(formula = isc_hd1416 ~ offset(log(pop_ovr60)) +
## pop_loss15 + hisp_prop + hpsa16 + hgh_pov14, data = arf2018sub,
## init.theta = 17.41658014, link = log)
## weights: us.wt4
##
## Moran I statistic standard deviate = 29.15, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 3.875698e-01 -8.722841e-05 1.768521e-04
The value for Moran’s I suggests a notable amount of autocorrelation
arf2018sub<-arf2018sub%>%
filter(!STATEFP %in% c("02", "15", "60", "66", "69", "72", "78"))
nbs<-knearneigh(coordinates(as(arf2018sub, "Spatial")), k = 4, longlat = T)
nbs<-knn2nb(nbs, sym = T)
us.wt4<-nb2listw(nbs, style = "W")
fit_nb$theta
## [1] 17.41658
me.fit<-ME(isc_hd1416 ~ offset(log(pop_ovr60))+ pop_loss15+ hisp_prop+ hpsa16+ hgh_pov14,
data=arf2018sub,
family=negative.binomial(17.41),
listw = us.wt4,
verbose=T,alpha=.05 )
## eV[,2297], I: 0.2370661 ZI: NA, pr(ZI): 0.01
## eV[,25], I: 0.2041732 ZI: NA, pr(ZI): 0.01
## eV[,1], I: 0.1769362 ZI: NA, pr(ZI): 0.01
## eV[,1574], I: 0.1547611 ZI: NA, pr(ZI): 0.01
## eV[,246], I: 0.1348723 ZI: NA, pr(ZI): 0.01
## eV[,10], I: 0.1150892 ZI: NA, pr(ZI): 0.01
## eV[,192], I: 0.09783301 ZI: NA, pr(ZI): 0.01
## eV[,1237], I: 0.08224077 ZI: NA, pr(ZI): 0.01
## eV[,32], I: 0.06765768 ZI: NA, pr(ZI): 0.01
## eV[,2070], I: 0.05307622 ZI: NA, pr(ZI): 0.01
## eV[,65], I: 0.0402291 ZI: NA, pr(ZI): 0.02
## eV[,2061], I: 0.0292802 ZI: NA, pr(ZI): 0.01
## eV[,58], I: 0.02001209 ZI: NA, pr(ZI): 0.05
## eV[,1230], I: 0.01131662 ZI: NA, pr(ZI): 0.18
me.fit
## Eigenvector ZI pr(ZI)
## 0 NA NA 0.01
## 1 2297 NA 0.01
## 2 25 NA 0.01
## 3 1 NA 0.01
## 4 1574 NA 0.01
## 5 246 NA 0.01
## 6 10 NA 0.01
## 7 192 NA 0.01
## 8 1237 NA 0.01
## 9 32 NA 0.01
## 10 2070 NA 0.01
## 11 65 NA 0.02
## 12 2061 NA 0.01
## 13 58 NA 0.05
## 14 1230 NA 0.18
fits<-data.frame(fitted(me.fit))
arf2018sub$me1<-fits$vec1
arf2018sub$me10<-fits$vec10
arf2018sub%>%
ggplot()+
geom_sf(aes(fill=me1))+
scale_fill_viridis_c()+
coord_sf(crs = 2163)+
ggtitle("First Moran Eigenvector")
clean.nb<-glm.nb(isc_hd1416 ~ offset(log(pop_ovr60)) + pop_loss15 + hisp_prop+ hpsa16+ hgh_pov14+ fitted(me.fit), arf2018sub)
summary(clean.nb)
##
## Call:
## glm.nb(formula = isc_hd1416 ~ offset(log(pop_ovr60)) + pop_loss15 +
## hisp_prop + hpsa16 + hgh_pov14 + fitted(me.fit), data = arf2018sub,
## init.theta = 18.69308873, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.2870 -0.7069 -0.0929 0.5567 3.7357
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.07296 0.01528 -332.038 < 2e-16 ***
## pop_loss151 0.11685 0.01836 6.363 1.98e-10 ***
## hisp_prop -0.04735 0.04865 -0.973 0.33038
## hpsa161 0.06162 0.02014 3.059 0.00222 **
## hpsa162 0.02462 0.01629 1.512 0.13057
## hgh_pov141 0.12249 0.01350 9.073 < 2e-16 ***
## fitted(me.fit)vec2297 -0.33569 0.26708 -1.257 0.20880
## fitted(me.fit)vec25 0.79109 0.27165 2.912 0.00359 **
## fitted(me.fit)vec1 1.16425 0.28177 4.132 3.60e-05 ***
## fitted(me.fit)vec1574 -0.62604 0.26935 -2.324 0.02011 *
## fitted(me.fit)vec246 0.63261 0.26257 2.409 0.01598 *
## fitted(me.fit)vec10 -1.72459 0.26994 -6.389 1.67e-10 ***
## fitted(me.fit)vec192 0.66016 0.26876 2.456 0.01404 *
## fitted(me.fit)vec1237 0.34104 0.27112 1.258 0.20842
## fitted(me.fit)vec32 -1.22953 0.27236 -4.514 6.35e-06 ***
## fitted(me.fit)vec2070 -0.13768 0.25353 -0.543 0.58709
## fitted(me.fit)vec65 -0.71755 0.27068 -2.651 0.00803 **
## fitted(me.fit)vec2061 0.11227 0.25247 0.445 0.65656
## fitted(me.fit)vec58 -0.56542 0.27539 -2.053 0.04006 *
## fitted(me.fit)vec1230 -0.21252 0.26519 -0.801 0.42291
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(18.6931) family taken to be 1)
##
## Null deviance: 2785.2 on 2490 degrees of freedom
## Residual deviance: 2483.8 on 2471 degrees of freedom
## AIC: 21286
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 18.693
## Std. Err.: 0.703
##
## 2 x log-likelihood: -21244.333
lm.morantest(clean.nb, listw=us.wt4)
##
## Global Moran I for regression residuals
##
## data:
## model: glm.nb(formula = isc_hd1416 ~ offset(log(pop_ovr60)) +
## pop_loss15 + hisp_prop + hpsa16 + hgh_pov14 + fitted(me.fit), data =
## arf2018sub, init.theta = 18.69308873, link = log)
## weights: us.wt4
##
## Moran I statistic standard deviate = 27.276, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.3645094270 -0.0002430609 0.0001788260
The Moran’s I test went from 0.387 in the original negative binomial to 0.365 with the spatial filtering included, indicating that spatial autocorrelation was partly reduced.