library(haven)
library(foreign) 
library(readr)
library(dplyr)
library(ggplot2)
library(broom)
library(car)
library(MASS) 
library(lmtest)
library(zoo)
library(nortest)
library(plotrix)
library(scales)
library(tableone)
library(Weighted.Desc.Stat)
library(mitools)
library(survey)
library(VGAM)
library(stargazer)
library(sandwich)
library(pastecs)
library(muhaz)
library(ggpubr)
library(survminer)
library(eha)
library(reshape2)
library(data.table)
library(magrittr)
library(tidyverse)
library(sjmisc)
library(sjPlot)
library(sjmisc)
library(sjlabelled)
library(weights)
library(GGally)
library(tigris)
library(RColorBrewer)
library(patchwork)
library(tidycensus)
library(censusapi)
library(spdep)
library(classInt)
library(sf)
library(spatialreg)
library(tidyverse)
library(rgdal)
library(INLA)
library(naivebayes)
library(psych)
library(mlbench)
library(caret)
library(caretEnsemble)
library(Amelia)
library(mice)
library(rpart)
library(randomForest)
library(e1071)
library(klaR)

Data extraction, define outcome and predictors (AHRF 2018-2019)

arf<-read_sas("C://Users//Jaire//OneDrive//Desktop//Spatial Demography//Data//ahrf2019.sas7bdat")
# Outcome/offset - suicides  (2015-2017)
# Denominator - total deaths (2015-2017)

# Predictors:

# number of mental health facilities
# number of long-term psychiatric hospitals
# number of short-term psychiatric hospitals
# number of VA hospital admissions
set.seed(123)
drop_na(arf)
arf$cofips=arf$f00002
arf$coname=arf$f00010
arf$state = arf$f00008
arf$deaths1517=arf$f1194115
arf$deaths0810=arf$f1194108
arf$suicide1517=arf$f1316615
arf$suicide0810=arf$f1316608
arf$mentfac10=arf$f1322110
arf$psychlt=arf$f1067817
arf$psychst=arf$f1318517
arf$vaadm=arf$f0892017

scale(arf$mentfac10)
scale(arf$psychst)
scale(arf$psychlt)
scale(arf$vaadm)
imp_complete <- complete(imp_mod)
arf$suicide1517 <- imp_complete$suicide1517
arf$deaths1517 <- imp_complete$deaths1517
arf$mentfac10 <- imp_complete$mentfac10
arf$psychst <- imp_complete$psychst
arf$psychlt <- imp_complete$psychlt
arf$vaadm <- imp_complete$vaadm
missmap(arf)

arf$E_d<-arf$deaths1517*(sum(arf$suicide1517)/sum(arf$deaths1517))
table(arf$E_d)

2010 Census data

options(tigris_class="sf")
us_co<-counties(cb=T, year=2010)
us_co$cofips<-substr(us_co$GEO_ID, 10, 15)
sts<-states(cb = T, year=2010)
sts<-st_boundary(sts)

spatial relationships

us_co$struct<-1:dim(us_co)[1]  
nbs<-knearneigh(st_centroid(us_co), k = 5, longlat = T)
## Warning in st_centroid.sf(us_co): st_centroid assumes attributes are constant
## over geometries of x
## Warning in knearneigh(st_centroid(us_co), k = 5, longlat = T): dnearneigh:
## longlat argument overrides object
nbs<-knn2nb(nbs, row.names = us_co$struct, sym = T) #force symmetry!!
mat <- nb2mat(nbs, style="B",zero.policy=TRUE)
colnames(mat) <- rownames(mat) 
mat <- as.matrix(mat[1:dim(mat)[1], 1:dim(mat)[1]])


nb2INLA("cl_graph",nbs)
am_adj <-paste(getwd(),"/cl_graph",sep="")
H<-inla.read.graph(filename="cl_graph")
library(sf)
us_co<-st_as_sf(us_co)
us_co$cofips<-paste(us_co$STATEFP, us_co$COUNTYFP, sep="")
us_co%>%
  ggplot()+
  geom_sf()+
  coord_sf(crs = 2163)

arf<-merge( us_co,arf, by="cofips")
arf<-arf[order(arf$cofips),]

INLA Model specification and fit

ggplot(data = arf)+geom_histogram(aes(x =log(suicide1517/deaths1517) ,
                                            y=0.5*..density..))+

  ggtitle(label = "Distribution of Suicides by Year",
          subtitle = "US Counties, 2015-2017")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = arf)+geom_histogram(aes(x =suicide1517/(deaths1517*(sum(suicide1517)/sum(deaths1517))) ,
                                            y=0.9*..density..))+
  ggtitle(label = "Distribution of Suicides Relative Risk by Year", 
          subtitle = "US Counties, 2015-2017")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

arf%>%
  
  mutate(qrr=cut(I(suicide1517/E_d), 
                 breaks = quantile(I(suicide1517/E_d),
                                   p=seq(0,1,length.out = 5)),
                 include.lowest = T))%>%
  ggplot()+
  geom_sf(aes(fill=qrr, color=NA))+
  geom_sf(data=sts, color="black")+
  scale_colour_brewer(palette = "RdBu" )+
  scale_fill_brewer(palette = "RdBu", na.value="grey")+
  guides(fill=guide_legend(title="Relative Risk Quartile"))+
  ggtitle(label="Relative Risk Quartile - IMR Raw data, 2015-2017")+
  coord_sf(crs = 2163)

f1<-suicide1517~mentfac10+psychst+psychlt+vaadm

mod1<-inla(formula = f1,data = arf, 
           family = "poisson", E = E_d,  
           control.compute = list(waic=T), 
           control.predictor = list(link=1), 
           num.threads = 1, 
               verbose = F)
summary(mod1)
## 
## Call:
##    c("inla(formula = f1, family = \"poisson\", data = arf, E = E_d, 
##    verbose = F, ", " control.compute = list(waic = T), control.predictor = 
##    list(link = 1), ", " num.threads = 1)") 
## Time used:
##     Pre = 2, Running = 2.26, Post = 0.454, Total = 4.72 
## Fixed effects:
##               mean    sd 0.025quant 0.5quant 0.975quant   mode kld
## (Intercept)  0.268 0.004      0.260    0.268      0.275  0.268   0
## mentfac10   -0.025 0.001     -0.027   -0.025     -0.023 -0.025   0
## psychst     -0.118 0.003     -0.125   -0.118     -0.112 -0.118   0
## psychlt     -0.558 0.011     -0.579   -0.558     -0.537 -0.558   0
## vaadm        0.000 0.000      0.000    0.000      0.000  0.000   0
## 
## Expected number of effective parameters(stdev): 5.58(0.00)
## Number of equivalent replicates : 576.52 
## 
## Watanabe-Akaike information criterion (WAIC) ...: 153671.21
## Effective number of parameters .................: 1033.01
## 
## Marginal log-Likelihood:  -76289.33 
## Posterior marginals for the linear predictor and
##  the fitted values are computed

The model shows that the number of mental hospitals, and long- and short-term psychiatric facilities is negatively associated with suicide rates in these areas. (WAIC-153671.21)

basic random intercept model (county)

f2<-suicide1517~mentfac10+psychst+psychlt+vaadm+
  f(struct, model = "iid") 
  
mod2<-inla(formula = f2,
           data = arf,
           family = "poisson",
           E = E_d, 
           control.compute = list(waic=T), 
           control.predictor = list(link=1),
           num.threads = 1, 
               verbose = F)
summary(mod2)
## 
## Call:
##    c("inla(formula = f2, family = \"poisson\", data = arf, E = E_d, 
##    verbose = F, ", " control.compute = list(waic = T), control.predictor = 
##    list(link = 1), ", " num.threads = 1)") 
## Time used:
##     Pre = 1.5, Running = 7.18, Post = 0.52, Total = 9.2 
## Fixed effects:
##               mean    sd 0.025quant 0.5quant 0.975quant   mode kld
## (Intercept)  0.793 0.022      0.749    0.793      0.836  0.793   0
## mentfac10   -0.051 0.014     -0.079   -0.051     -0.024 -0.051   0
## psychst     -0.385 0.043     -0.469   -0.385     -0.301 -0.385   0
## psychlt     -0.855 0.102     -1.055   -0.855     -0.654 -0.855   0
## vaadm        0.000 0.000      0.000    0.000      0.000  0.000   0
## 
## Random effects:
##   Name     Model
##     struct IID model
## 
## Model hyperparameters:
##                       mean    sd 0.025quant 0.5quant 0.975quant mode
## Precision for struct 0.711 0.018      0.675    0.711      0.747 0.71
## 
## Expected number of effective parameters(stdev): 3101.52(2.98)
## Number of equivalent replicates : 1.04 
## 
## Watanabe-Akaike information criterion (WAIC) ...: 21590.04
## Effective number of parameters .................: 1744.23
## 
## Marginal log-Likelihood:  -15169.44 
## Posterior marginals for the linear predictor and
##  the fitted values are computed
1/.832
## [1] 1.201923

Similarly, the model shows that the number of mental hospitals, and long- and short-term psychiatric facilities is negatively associated with suicide rates in these areas. This model has a much lower WAIC (WAIC= 21590.04) and higher number of effective parameters (1744.23). Over-dispersion is present within the model (variance/mean precision of struct = 1.201923)

posterior distribution of hyperparamters

m2<- inla.tmarginal(
        function(x) (1/x), 
        mod2$marginals.hyperpar$`Precision for struct`)

inla.hpdmarginal(.95, marginal=m2)
##                low     high
## level:0.95 1.33739 1.478979
plot(m2,
     type="l",
     main=c("Posterior distibution for between county variance", "- IID model -"), 
     xlim=c(1, 1.5))

arf$fitted_m2<-mod2$summary.fitted.values$mean

arf%>%
#  filter(year%in%c(2000))%>%
  mutate(qrr=cut(fitted_m2,
                 breaks = quantile(fitted_m2, p=seq(0,1,length.out = 6)),
                 include.lowest = T))%>%
  ggplot()+geom_sf(aes(fill=qrr))+
  scale_colour_brewer(palette = "RdBu" )+
  scale_fill_brewer(palette = "RdBu", na.value="grey")+
  guides(fill=guide_legend(title="Relative Risk Quartile"))+
  ggtitle(label="Relative Risk Quartile - IID Model, 2015-2017")+
  coord_sf(crs = 2163)

correct over-dispersion with Besag, York, and Mollie model

f3<-f2<-suicide1517~mentfac10+psychst+psychlt+vaadm+
  f(struct, model = "bym",scale.model = T, constr = T, graph = H)

mod3<-inla(formula = f3,data = arf,
           family = "poisson",
           E = E_d, 
           control.compute = list(waic=T), #return.marginals.predictor=TRUE removed - argument was void
           control.predictor = list(link=1), 
           num.threads = 3, 
               verbose = F)

summary(mod3)
## 
## Call:
##    c("inla(formula = f3, family = \"poisson\", data = arf, E = E_d, 
##    verbose = F, ", " control.compute = list(waic = T), control.predictor = 
##    list(link = 1), ", " num.threads = 3)") 
## Time used:
##     Pre = 1.72, Running = 34.4, Post = 2.62, Total = 38.8 
## Fixed effects:
##               mean    sd 0.025quant 0.5quant 0.975quant   mode kld
## (Intercept)  0.754 0.016      0.722    0.754      0.786  0.754   0
## mentfac10   -0.031 0.012     -0.055   -0.031     -0.007 -0.031   0
## psychst     -0.237 0.036     -0.308   -0.237     -0.166 -0.237   0
## psychlt     -0.511 0.085     -0.677   -0.511     -0.345 -0.511   0
## vaadm        0.000 0.000      0.000    0.000      0.000  0.000   0
## 
## Random effects:
##   Name     Model
##     struct BYM model
## 
## Model hyperparameters:
##                                          mean    sd 0.025quant 0.5quant
## Precision for struct (iid component)     1.44 0.067       1.30     1.44
## Precision for struct (spatial component) 1.46 0.169       1.16     1.45
##                                          0.975quant mode
## Precision for struct (iid component)           1.57 1.44
## Precision for struct (spatial component)       1.83 1.42
## 
## Expected number of effective parameters(stdev): 3048.13(4.24)
## Number of equivalent replicates : 1.06 
## 
## Watanabe-Akaike information criterion (WAIC) ...: 21551.60
## Effective number of parameters .................: 1729.01
## 
## Marginal log-Likelihood:  -13503.57 
## Posterior marginals for the linear predictor and
##  the fitted values are computed
1/1.44
## [1] 0.6944444
1/1.46
## [1] 0.6849315

The BYM model shows that the number of mental hospitals, and long- and short-term psychiatric facilities is negatively associated with suicide rates in these areas. This model has a much lower WAIC (WAIC=21551.60) and number of effective parameters (1729.00). Over-dispersion is present within the model (.694 and .685) however, the over-dispersion is much lower than in previous models.

compare model fits

mod1$waic$waic
## [1] 153671.2
mod2$waic$waic
## [1] 21590.04
mod3$waic$waic
## [1] 21551.6

regression effect posterior distributions

#compute posterior mean
b1 <- inla.emarginal(exp, mod2$marginals.fixed[[2]])
b1
## [1] 0.9499585
#CI for mean
b1ci <-inla.qmarginal(c(.025, .975), 
                      inla.tmarginal(exp, mod2$marginals.fixed[[2]]))
b1ci
## [1] 0.9240635 0.9763370

hyper parameter distributions

Green line is spatial variation, black line is nonspatial variation

m3a<- inla.tmarginal(
        function(x) (1/x),
        mod3$marginals.hyperpar$`Precision for struct (iid component)`)
m3b<- inla.tmarginal(
        function(x) (1/x),
        mod3$marginals.hyperpar$`Precision for struct (spatial component)`)

plot(m3a, type="l",
     main=c("Posterior distibution for between county variance", "- BYM model -"),
     xlim=c(.2, 1)) # you may need to change this
#lines(m3b, col="red")
lines(m3b, col="green")

variance interval for hypers

inla.hpdmarginal(.95,m3a)
##                  low      high
## level:0.95 0.6351627 0.7627479
inla.hpdmarginal(.95,m3b)
##                  low      high
## level:0.95 0.5414591 0.8498942

spatial mapping of fitted values

arf$fitted_m3<-mod3$summary.fitted.values$mean

arf%>%
 # filter(year%in%c(2000))%>%
  mutate(qrr=cut(fitted_m3,
                 breaks = quantile(fitted_m3, p=seq(0,1,length.out = 6)),
                 include.lowest = T))%>%
  ggplot()+
  geom_sf(aes(fill=qrr))+
  scale_colour_brewer(palette = "RdBu" )+
  scale_fill_brewer(palette = "RdBu", na.value="grey")+
  guides(fill=guide_legend(title="Relative Risk Quartile"))+
  ggtitle(label="Relative Risk Quartile - BYM Model, 2015-2017")+
  coord_sf(crs = 2163)

exceedence probabilities of suicide risk

thetastar<-1.5#theta*
inlaprob<- unlist(lapply(mod3$marginals.fitted.values, function(X){
   1-inla.pmarginal(thetastar, X)
}))
hist(inlaprob)

arf$exceedprob<-inlaprob

arf%>%
  mutate(qrr=cut(exceedprob,
                 breaks = c(0, .5, .9, .95, .99, 1),
                 include.lowest = T))%>%
  ggplot()+
  geom_sf(aes(fill=qrr, color=NA))+
  geom_sf(data=sts)+
  scale_colour_brewer(palette = "Blues" )+
  scale_fill_brewer(palette = "Blues", na.value="grey")+
  guides(fill=guide_legend(title=""))+
  ggtitle(label=expression(paste("Exceedence Probability Relative Risk ","Pr( ",theta," >1.5"," )  - 2000") ))+
  coord_sf(crs = 2163)