Spatial DEM HW 6 INLA

Data from area resource file

arf<-"https://github.com/coreysparks/data/blob/master/arf2018.Rdata?raw=true"
load(url(arf))

library(spdep)

## Loading required package: sp

## Loading required package: spData

## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`

## Loading required package: sf

## Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1

library(MASS)
library(spatialreg)

## Loading required package: Matrix

## 
## Attaching package: 'spatialreg'

## The following objects are masked from 'package:spdep':
## 
##     as.spam.listw, as_dgRMatrix_listw, as_dsCMatrix_I,
##     as_dsCMatrix_IrW, as_dsTMatrix_listw, can.be.simmed, cheb_setup,
##     create_WX, do_ldet, eigen_pre_setup, eigen_setup, eigenw,
##     errorsarlm, get.ClusterOption, get.coresOption, get.mcOption,
##     get.VerboseOption, get.ZeroPolicyOption, GMargminImage, GMerrorsar,
##     griffith_sone, gstsls, Hausman.test, impacts, intImpacts,
##     Jacobian_W, jacobianSetup, l_max, lagmess, lagsarlm, lextrB,
##     lextrS, lextrW, lmSLX, LU_prepermutate_setup, LU_setup,
##     Matrix_J_setup, Matrix_setup, mcdet_setup, MCMCsamp, ME, mom_calc,
##     mom_calc_int2, moments_setup, powerWeights, sacsarlm,
##     SE_classic_setup, SE_interp_setup, SE_whichMin_setup,
##     set.ClusterOption, set.coresOption, set.mcOption,
##     set.VerboseOption, set.ZeroPolicyOption, similar.listw, spam_setup,
##     spam_update_setup, SpatialFiltering, spautolm, spBreg_err,
##     spBreg_lag, spBreg_sac, stsls, subgraph_eigenw, trW

library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.4     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.1     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x tidyr::expand() masks Matrix::expand()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## x tidyr::pack()   masks Matrix::pack()
## x dplyr::select() masks MASS::select()
## x tidyr::unpack() masks Matrix::unpack()

library(ggplot2)

Filtering and selecting variables

library(dplyr)
arf2018<-arf2018%>%
  mutate(cofips=f00004, 
         coname=f00010,
         state = f00011,
         rucc= as.factor(f0002013),
         isc_hd1416= f1193314,
         othr_cvd1416= f1316514,
         hpsa16= as.factor(f0978716),
         hgh_pov14= f1533414,
         prs_pov14= f1249014,
         pop_loss15= f1397715,
         pop_ovr60= (f0672410+f0672510+f0672610+f0672710+f1164010+f1164110+f1164210+f1164310),
         hisp_pop10= f0453610,
         hisp_prop= (f0453610/f0453010),
         wa_pop16= f1547116,
         wa_noins16= f1547216,
         wa_unins_rate= (f1547216/f1547116))%>% 
  select(cofips,coname,state,rucc,isc_hd1416,othr_cvd1416,hpsa16,hgh_pov14,prs_pov14,pop_loss15,pop_ovr60,hisp_pop10,hisp_prop,wa_pop16,wa_noins16,wa_unins_rate) %>% 
  filter(complete.cases(.)) %>% 
  as.data.frame()

head(arf2018)

##   cofips  coname state rucc isc_hd1416 othr_cvd1416 hpsa16 hgh_pov14 prs_pov14
## 1  01001 Autauga    01   02         66           59      2         0         0
## 2  01003 Baldwin    01   03        294          232      2         0         0
## 3  01005 Barbour    01   06         18           68      2         1         1
## 4  01007    Bibb    01   01         15           49      1         0         0
## 5  01009  Blount    01   01         70           91      1         0         0
## 6  01013  Butler    01   06         33           54      2         1         1
##   pop_loss15 pop_ovr60 hisp_pop10   hisp_prop wa_pop16 wa_noins16 wa_unins_rate
## 1          0      9323       1310 0.024005424    47077      43075     0.9149903
## 2          0     42580       7992 0.043848243   165740     148007     0.8930071
## 3          0      5708       1387 0.050515351    17997      15745     0.8748680
## 4          0      4240        406 0.017717652    17004      15358     0.9031992
## 5          0     12021       4626 0.080701999    47086      41382     0.8788600
## 6          1      4783        191 0.009118251    15965      14262     0.8933292

summary(arf2018)

##     cofips             coname             state                rucc    
##  Length:2531        Length:2531        Length:2531        06     :550  
##  Class :character   Class :character   Class :character   01     :417  
##  Mode  :character   Mode  :character   Mode  :character   02     :362  
##                                                           07     :346  
##                                                           03     :323  
##                                                           04     :214  
##                                                           (Other):319  
##    isc_hd1416       othr_cvd1416    hpsa16    hgh_pov14        
##  Min.   :   10.0   Min.   :  10.0    :   0   Length:2531       
##  1st Qu.:   28.0   1st Qu.:  24.0   0: 356   Class :character  
##  Median :   52.0   Median :  45.0   1: 518   Mode  :character  
##  Mean   :  142.4   Mean   : 124.9   2:1657                     
##  3rd Qu.:  112.0   3rd Qu.: 102.0                              
##  Max.   :11264.0   Max.   :6598.0                              
##                                                                
##   prs_pov14          pop_loss15          pop_ovr60         hisp_pop10     
##  Length:2531        Length:2531        Min.   :   1309   Min.   :     33  
##  Class :character   Class :character   1st Qu.:   4138   1st Qu.:    402  
##  Mode  :character   Mode  :character   Median :   7700   Median :   1345  
##                                        Mean   :  22248   Mean   :  19796  
##                                        3rd Qu.:  17431   3rd Qu.:   5954  
##                                        Max.   :1517935   Max.   :4687889  
##                                                                           
##    hisp_prop           wa_pop16         wa_noins16      wa_unins_rate   
##  Min.   :0.003516   Min.   :   3181   Min.   :   2892   Min.   :0.6912  
##  1st Qu.:0.016701   1st Qu.:  14224   1st Qu.:  12558   1st Qu.:0.8656  
##  Median :0.033584   Median :  28675   Median :  25612   Median :0.9015  
##  Mean   :0.078486   Mean   : 104531   Mean   :  94102   Mean   :0.8939  
##  3rd Qu.:0.080048   3rd Qu.:  75351   3rd Qu.:  67000   3rd Qu.:0.9315  
##  Max.   :0.957448   Max.   :8684851   Max.   :7757894   Max.   :0.9793  
##

Adding shapefiles

library(tigris)

## To enable 
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.

library(sf)
library(ggplot2)

options(tigris_class="sf")
usco<-counties(cb=T, year=2010)
usco$cofips<-substr(usco$GEO_ID, 10, 15)
sts<-states(cb = T, year=2010)
sts<-st_boundary(sts)%>%
  filter(!STATE %in% c("02", "15", "60", "66", "69", "72", "78"))

arf2018_m<-geo_join(usco, arf2018, by_sp="cofips", by_df="cofips",how="left" )

## Warning: We recommend using the dplyr::*_join() family of functions instead.

## Warning: `group_by_()` was deprecated in dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

The predictors used for this model are hisp_prop( the proportion of hispanics in the counties population), pop_loss15 (whether or not the county experienced population loss from censuses 1990-2000 and 2000-2010), hpsa16 (an variable that tells if a county has a shortage of primary care doctors, 0= no short, 1= whole short, and 2= partial short), and hgh_pov (variable that shows if 20% or more of the countie’s residents are poor).

A negative binomial model will be used, the offset term used for this model is the population of adults age 60+, this was chosen because the outcome variable, incidents of ichemic heart disease, typically manifests in older adults.

library(MASS)

arf2018sub<-filter(arf2018_m, is.na(isc_hd1416)==F)

fit_nb<- glm.nb(isc_hd1416 ~ offset(log(pop_ovr60))+ pop_loss15+ hisp_prop+ hpsa16+ hgh_pov14, 
               data=arf2018sub)
summary(fit_nb)

## 
## Call:
## glm.nb(formula = isc_hd1416 ~ offset(log(pop_ovr60)) + pop_loss15 + 
##     hisp_prop + hpsa16 + hgh_pov14, data = arf2018sub, init.theta = 17.41658014, 
##     link = log)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.0610  -0.7273  -0.0856   0.5444   3.7730  
## 
## Coefficients:
##             Estimate Std. Error  z value Pr(>|z|)    
## (Intercept) -5.06401    0.01537 -329.541  < 2e-16 ***
## pop_loss151  0.12716    0.01869    6.802 1.03e-11 ***
## hisp_prop   -0.12658    0.04595   -2.754  0.00588 ** 
## hpsa161      0.05374    0.02049    2.622  0.00874 ** 
## hpsa162      0.02328    0.01657    1.405  0.15995    
## hgh_pov141   0.12909    0.01365    9.459  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(17.4166) family taken to be 1)
## 
##     Null deviance: 2650.7  on 2494  degrees of freedom
## Residual deviance: 2475.4  on 2489  degrees of freedom
## AIC: 21418
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  17.417 
##           Std. Err.:  0.640 
## 
##  2 x log-likelihood:  -21404.442

exp(coef(fit_nb))

## (Intercept) pop_loss151   hisp_prop     hpsa161     hpsa162  hgh_pov141 
## 0.006320145 1.135600560 0.881106805 1.055208875 1.023554516 1.137787417

The models suggests that nearly each indicator has a significant relationship with the outcome of interest. Apart from the hispanic proportion variable, each predictor elevated the risk of ischemic heart disease at varying degrees. Residents in counties that experienced population loss have 13.5% greater odds of ischemic heart disease. Counties with high poverty had 13.8% greater odds and those whose whole area has a shortage of doctors had 5.5%. The results suggest that for each percentage increase in the proportion of hispanics living in the county, the odds of ischemic heart disease decrease by 12%.

Test for overdispersion

scale<-sqrt(fit_nb$deviance/fit_nb$df.residual)
scale

## [1] 0.9972552

1-pchisq(fit_nb$deviance, df = fit_nb$df.residual)

## [1] 0.5731081

No evidence of overdispersion based on the ratio of the model’s deviance and residuals as well as the goodness of fit statistic.

library(spdep)
library(INLA)

## Warning: package 'INLA' was built under R version 4.1.2

## Loading required package: foreach

## 
## Attaching package: 'foreach'

## The following objects are masked from 'package:purrr':
## 
##     accumulate, when

## Loading required package: parallel

## This is INLA_21.11.29 built 2021-11-28 18:35:09 UTC.
##  - See www.r-inla.org/contact-us for how to get help.
##  - Save 80Mb of storage running 'inla.prune()'

library(tigris)
library(tidycensus)
library(tidyverse)

Constructing spatial relationships

arf2018sub$struct<-1:dim(arf2018sub)[1]  
nbs<-knearneigh(st_centroid(arf2018sub), k = 5, longlat = T) #k=5 nearest neighbors

## Warning in st_centroid.sf(arf2018sub): st_centroid assumes attributes are
## constant over geometries of x

## Warning in knearneigh(st_centroid(arf2018sub), k = 5, longlat = T): dnearneigh:
## longlat argument overrides object

nbs<-knn2nb(nbs, row.names = arf2018sub$struct, sym = T) #force symmetry!!
mat <- nb2mat(nbs, style="B",zero.policy=TRUE)
colnames(mat) <- rownames(mat) 
mat <- as.matrix(mat[1:dim(mat)[1], 1:dim(mat)[1]])


nb2INLA("cl_graph",nbs)
am_adj <-paste(getwd(),"/cl_graph",sep="")
H<-inla.read.graph(filename="cl_graph")

BYM Model

loki<-isc_hd1416~ offset(log(pop_ovr60))+pop_loss15+hisp_prop+hpsa16+hgh_pov14+ f(struct, model = "bym",scale.model = T, constr = T, graph = H)

mod_bym<-inla(formula = loki,data = arf2018sub,
           family = "nbinomial",
           control.compute = list(waic=T,return.marginals.predictor=TRUE), 
           control.predictor = list(link=1), 
           num.threads = 3, 
               verbose = F)

summary(mod_bym)

## 
## Call:
##    c("inla(formula = loki, family = \"nbinomial\", data = arf2018sub, ", " 
##    verbose = F, control.compute = list(waic = T, 
##    return.marginals.predictor = TRUE), ", " control.predictor = list(link 
##    = 1), num.threads = 3)") 
## Time used:
##     Pre = 2.2, Running = 10.4, Post = 1.39, Total = 14 
## Fixed effects:
##               mean     sd 0.025quant 0.5quant 0.975quant   mode kld
## (Intercept) -5.052 18.257    -40.897   -5.053     30.763 -5.052   0
## pop_loss151  0.085  0.018      0.051    0.085      0.119  0.085   0
## hisp_prop   -0.060  0.076     -0.209   -0.060      0.089 -0.060   0
## hpsa160     -0.021 18.257    -35.865   -0.021     35.794 -0.021   0
## hpsa161      0.021 18.257    -35.824    0.020     35.835  0.021   0
## hpsa162     -0.001 18.257    -35.846   -0.002     35.813 -0.001   0
## hgh_pov141   0.075  0.014      0.048    0.075      0.102  0.075   0
## 
## Random effects:
##   Name     Model
##     struct BYM model
## 
## Model hyperparameters:
##                                                          mean     sd 0.025quant
## size for the nbinomial observations (1/overdispersion) 182.28 52.851     137.93
## Precision for struct (iid component)                   151.15 14.756     117.02
## Precision for struct (spatial component)                14.08  0.139      13.76
##                                                        0.5quant 0.975quant
## size for the nbinomial observations (1/overdispersion)   176.21   7.12e+14
## Precision for struct (iid component)                     146.56   5.53e+13
## Precision for struct (spatial component)                  14.06   1.45e+01
##                                                          mode
## size for the nbinomial observations (1/overdispersion) 188.13
## Precision for struct (iid component)                   155.48
## Precision for struct (spatial component)                14.08
## 
## Watanabe-Akaike information criterion (WAIC) ...: 19545.70
## Effective number of parameters .................: 926.93
## 
## Marginal log-Likelihood:  -9534.49 
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')

Comparing model fits

fit_nb$aic

## [1] 21418.44

mod_bym$waic$waic

## [1] 19545.7

The BYM model which includes spatial correlation and two random effects appears to fit the data better than the original negative binomial model.

Spatial DEM HW 6 INLA

Daniel Mamani

11/28/2021

Constructing spatial relationships