Data from area resource file
arf<-"https://github.com/coreysparks/data/blob/master/arf2018.Rdata?raw=true"
load(url(arf))
library(spdep)
## Loading required package: sp
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Loading required package: sf
## Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1
library(MASS)
library(spatialreg)
## Loading required package: Matrix
##
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
##
## as.spam.listw, as_dgRMatrix_listw, as_dsCMatrix_I,
## as_dsCMatrix_IrW, as_dsTMatrix_listw, can.be.simmed, cheb_setup,
## create_WX, do_ldet, eigen_pre_setup, eigen_setup, eigenw,
## errorsarlm, get.ClusterOption, get.coresOption, get.mcOption,
## get.VerboseOption, get.ZeroPolicyOption, GMargminImage, GMerrorsar,
## griffith_sone, gstsls, Hausman.test, impacts, intImpacts,
## Jacobian_W, jacobianSetup, l_max, lagmess, lagsarlm, lextrB,
## lextrS, lextrW, lmSLX, LU_prepermutate_setup, LU_setup,
## Matrix_J_setup, Matrix_setup, mcdet_setup, MCMCsamp, ME, mom_calc,
## mom_calc_int2, moments_setup, powerWeights, sacsarlm,
## SE_classic_setup, SE_interp_setup, SE_whichMin_setup,
## set.ClusterOption, set.coresOption, set.mcOption,
## set.VerboseOption, set.ZeroPolicyOption, similar.listw, spam_setup,
## spam_update_setup, SpatialFiltering, spautolm, spBreg_err,
## spBreg_lag, spBreg_sac, stsls, subgraph_eigenw, trW
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.4 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x tidyr::expand() masks Matrix::expand()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x tidyr::pack() masks Matrix::pack()
## x dplyr::select() masks MASS::select()
## x tidyr::unpack() masks Matrix::unpack()
library(ggplot2)
Filtering and selecting variables
library(dplyr)
arf2018<-arf2018%>%
mutate(cofips=f00004,
coname=f00010,
state = f00011,
rucc= as.factor(f0002013),
isc_hd1416= f1193314,
othr_cvd1416= f1316514,
hpsa16= as.factor(f0978716),
hgh_pov14= f1533414,
prs_pov14= f1249014,
pop_loss15= f1397715,
pop_ovr60= (f0672410+f0672510+f0672610+f0672710+f1164010+f1164110+f1164210+f1164310),
hisp_pop10= f0453610,
hisp_prop= (f0453610/f0453010),
wa_pop16= f1547116,
wa_noins16= f1547216,
wa_unins_rate= (f1547216/f1547116))%>%
select(cofips,coname,state,rucc,isc_hd1416,othr_cvd1416,hpsa16,hgh_pov14,prs_pov14,pop_loss15,pop_ovr60,hisp_pop10,hisp_prop,wa_pop16,wa_noins16,wa_unins_rate) %>%
filter(complete.cases(.)) %>%
as.data.frame()
head(arf2018)
## cofips coname state rucc isc_hd1416 othr_cvd1416 hpsa16 hgh_pov14 prs_pov14
## 1 01001 Autauga 01 02 66 59 2 0 0
## 2 01003 Baldwin 01 03 294 232 2 0 0
## 3 01005 Barbour 01 06 18 68 2 1 1
## 4 01007 Bibb 01 01 15 49 1 0 0
## 5 01009 Blount 01 01 70 91 1 0 0
## 6 01013 Butler 01 06 33 54 2 1 1
## pop_loss15 pop_ovr60 hisp_pop10 hisp_prop wa_pop16 wa_noins16 wa_unins_rate
## 1 0 9323 1310 0.024005424 47077 43075 0.9149903
## 2 0 42580 7992 0.043848243 165740 148007 0.8930071
## 3 0 5708 1387 0.050515351 17997 15745 0.8748680
## 4 0 4240 406 0.017717652 17004 15358 0.9031992
## 5 0 12021 4626 0.080701999 47086 41382 0.8788600
## 6 1 4783 191 0.009118251 15965 14262 0.8933292
summary(arf2018)
## cofips coname state rucc
## Length:2531 Length:2531 Length:2531 06 :550
## Class :character Class :character Class :character 01 :417
## Mode :character Mode :character Mode :character 02 :362
## 07 :346
## 03 :323
## 04 :214
## (Other):319
## isc_hd1416 othr_cvd1416 hpsa16 hgh_pov14
## Min. : 10.0 Min. : 10.0 : 0 Length:2531
## 1st Qu.: 28.0 1st Qu.: 24.0 0: 356 Class :character
## Median : 52.0 Median : 45.0 1: 518 Mode :character
## Mean : 142.4 Mean : 124.9 2:1657
## 3rd Qu.: 112.0 3rd Qu.: 102.0
## Max. :11264.0 Max. :6598.0
##
## prs_pov14 pop_loss15 pop_ovr60 hisp_pop10
## Length:2531 Length:2531 Min. : 1309 Min. : 33
## Class :character Class :character 1st Qu.: 4138 1st Qu.: 402
## Mode :character Mode :character Median : 7700 Median : 1345
## Mean : 22248 Mean : 19796
## 3rd Qu.: 17431 3rd Qu.: 5954
## Max. :1517935 Max. :4687889
##
## hisp_prop wa_pop16 wa_noins16 wa_unins_rate
## Min. :0.003516 Min. : 3181 Min. : 2892 Min. :0.6912
## 1st Qu.:0.016701 1st Qu.: 14224 1st Qu.: 12558 1st Qu.:0.8656
## Median :0.033584 Median : 28675 Median : 25612 Median :0.9015
## Mean :0.078486 Mean : 104531 Mean : 94102 Mean :0.8939
## 3rd Qu.:0.080048 3rd Qu.: 75351 3rd Qu.: 67000 3rd Qu.:0.9315
## Max. :0.957448 Max. :8684851 Max. :7757894 Max. :0.9793
##
Adding shapefiles
library(tigris)
## To enable
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
library(sf)
library(ggplot2)
options(tigris_class="sf")
usco<-counties(cb=T, year=2010)
usco$cofips<-substr(usco$GEO_ID, 10, 15)
sts<-states(cb = T, year=2010)
sts<-st_boundary(sts)%>%
filter(!STATE %in% c("02", "15", "60", "66", "69", "72", "78"))
arf2018_m<-geo_join(usco, arf2018, by_sp="cofips", by_df="cofips",how="left" )
## Warning: We recommend using the dplyr::*_join() family of functions instead.
## Warning: `group_by_()` was deprecated in dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
The predictors used for this model are hisp_prop( the proportion of hispanics in the counties population), pop_loss15 (whether or not the county experienced population loss from censuses 1990-2000 and 2000-2010), hpsa16 (an variable that tells if a county has a shortage of primary care doctors, 0= no short, 1= whole short, and 2= partial short), and hgh_pov (variable that shows if 20% or more of the countie’s residents are poor).
A negative binomial model will be used, the offset term used for this model is the population of adults age 60+, this was chosen because the outcome variable, incidents of ichemic heart disease, typically manifests in older adults.
library(MASS)
arf2018sub<-filter(arf2018_m, is.na(isc_hd1416)==F)
fit_nb<- glm.nb(isc_hd1416 ~ offset(log(pop_ovr60))+ pop_loss15+ hisp_prop+ hpsa16+ hgh_pov14,
data=arf2018sub)
summary(fit_nb)
##
## Call:
## glm.nb(formula = isc_hd1416 ~ offset(log(pop_ovr60)) + pop_loss15 +
## hisp_prop + hpsa16 + hgh_pov14, data = arf2018sub, init.theta = 17.41658014,
## link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.0610 -0.7273 -0.0856 0.5444 3.7730
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.06401 0.01537 -329.541 < 2e-16 ***
## pop_loss151 0.12716 0.01869 6.802 1.03e-11 ***
## hisp_prop -0.12658 0.04595 -2.754 0.00588 **
## hpsa161 0.05374 0.02049 2.622 0.00874 **
## hpsa162 0.02328 0.01657 1.405 0.15995
## hgh_pov141 0.12909 0.01365 9.459 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(17.4166) family taken to be 1)
##
## Null deviance: 2650.7 on 2494 degrees of freedom
## Residual deviance: 2475.4 on 2489 degrees of freedom
## AIC: 21418
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 17.417
## Std. Err.: 0.640
##
## 2 x log-likelihood: -21404.442
exp(coef(fit_nb))
## (Intercept) pop_loss151 hisp_prop hpsa161 hpsa162 hgh_pov141
## 0.006320145 1.135600560 0.881106805 1.055208875 1.023554516 1.137787417
The models suggests that nearly each indicator has a significant relationship with the outcome of interest. Apart from the hispanic proportion variable, each predictor elevated the risk of ischemic heart disease at varying degrees. Residents in counties that experienced population loss have 13.5% greater odds of ischemic heart disease. Counties with high poverty had 13.8% greater odds and those whose whole area has a shortage of doctors had 5.5%. The results suggest that for each percentage increase in the proportion of hispanics living in the county, the odds of ischemic heart disease decrease by 12%.
Test for overdispersion
scale<-sqrt(fit_nb$deviance/fit_nb$df.residual)
scale
## [1] 0.9972552
1-pchisq(fit_nb$deviance, df = fit_nb$df.residual)
## [1] 0.5731081
No evidence of overdispersion based on the ratio of the model’s deviance and residuals as well as the goodness of fit statistic.
library(spdep)
library(INLA)
## Warning: package 'INLA' was built under R version 4.1.2
## Loading required package: foreach
##
## Attaching package: 'foreach'
## The following objects are masked from 'package:purrr':
##
## accumulate, when
## Loading required package: parallel
## This is INLA_21.11.29 built 2021-11-28 18:35:09 UTC.
## - See www.r-inla.org/contact-us for how to get help.
## - Save 80Mb of storage running 'inla.prune()'
library(tigris)
library(tidycensus)
library(tidyverse)
arf2018sub$struct<-1:dim(arf2018sub)[1]
nbs<-knearneigh(st_centroid(arf2018sub), k = 5, longlat = T) #k=5 nearest neighbors
## Warning in st_centroid.sf(arf2018sub): st_centroid assumes attributes are
## constant over geometries of x
## Warning in knearneigh(st_centroid(arf2018sub), k = 5, longlat = T): dnearneigh:
## longlat argument overrides object
nbs<-knn2nb(nbs, row.names = arf2018sub$struct, sym = T) #force symmetry!!
mat <- nb2mat(nbs, style="B",zero.policy=TRUE)
colnames(mat) <- rownames(mat)
mat <- as.matrix(mat[1:dim(mat)[1], 1:dim(mat)[1]])
nb2INLA("cl_graph",nbs)
am_adj <-paste(getwd(),"/cl_graph",sep="")
H<-inla.read.graph(filename="cl_graph")
BYM Model
loki<-isc_hd1416~ offset(log(pop_ovr60))+pop_loss15+hisp_prop+hpsa16+hgh_pov14+ f(struct, model = "bym",scale.model = T, constr = T, graph = H)
mod_bym<-inla(formula = loki,data = arf2018sub,
family = "nbinomial",
control.compute = list(waic=T,return.marginals.predictor=TRUE),
control.predictor = list(link=1),
num.threads = 3,
verbose = F)
summary(mod_bym)
##
## Call:
## c("inla(formula = loki, family = \"nbinomial\", data = arf2018sub, ", "
## verbose = F, control.compute = list(waic = T,
## return.marginals.predictor = TRUE), ", " control.predictor = list(link
## = 1), num.threads = 3)")
## Time used:
## Pre = 2.2, Running = 10.4, Post = 1.39, Total = 14
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## (Intercept) -5.052 18.257 -40.897 -5.053 30.763 -5.052 0
## pop_loss151 0.085 0.018 0.051 0.085 0.119 0.085 0
## hisp_prop -0.060 0.076 -0.209 -0.060 0.089 -0.060 0
## hpsa160 -0.021 18.257 -35.865 -0.021 35.794 -0.021 0
## hpsa161 0.021 18.257 -35.824 0.020 35.835 0.021 0
## hpsa162 -0.001 18.257 -35.846 -0.002 35.813 -0.001 0
## hgh_pov141 0.075 0.014 0.048 0.075 0.102 0.075 0
##
## Random effects:
## Name Model
## struct BYM model
##
## Model hyperparameters:
## mean sd 0.025quant
## size for the nbinomial observations (1/overdispersion) 182.28 52.851 137.93
## Precision for struct (iid component) 151.15 14.756 117.02
## Precision for struct (spatial component) 14.08 0.139 13.76
## 0.5quant 0.975quant
## size for the nbinomial observations (1/overdispersion) 176.21 7.12e+14
## Precision for struct (iid component) 146.56 5.53e+13
## Precision for struct (spatial component) 14.06 1.45e+01
## mode
## size for the nbinomial observations (1/overdispersion) 188.13
## Precision for struct (iid component) 155.48
## Precision for struct (spatial component) 14.08
##
## Watanabe-Akaike information criterion (WAIC) ...: 19545.70
## Effective number of parameters .................: 926.93
##
## Marginal log-Likelihood: -9534.49
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')
Comparing model fits
fit_nb$aic
## [1] 21418.44
mod_bym$waic$waic
## [1] 19545.7
The BYM model which includes spatial correlation and two random effects appears to fit the data better than the original negative binomial model.