library(haven)
library(foreign)
library(readr)
library(dplyr)
library(ggplot2)
library(broom)
library(car)
library(MASS)
library(lmtest)
library(zoo)
library(nortest)
library(plotrix)
library(scales)
library(tableone)
library(Weighted.Desc.Stat)
library(mitools)
library(survey)
library(VGAM)
library(stargazer)
library(sandwich)
library(pastecs)
library(muhaz)
library(ggpubr)
library(survminer)
library(eha)
library(reshape2)
library(data.table)
library(magrittr)
library(tidyverse)
library(sjmisc)
library(sjPlot)
library(sjmisc)
library(sjlabelled)
library(weights)
library(GGally)
library(tigris)
library(RColorBrewer)
library(patchwork)
library(tidycensus)
library(censusapi)
library(spdep)
library(classInt)
library(sf)
library(spatialreg)
library(tidyverse)
library(rgdal)
library(INLA)
library(naivebayes)
library(psych)
library(mlbench)
library(caret)
library(caretEnsemble)
library(Amelia)
library(mice)
library(rpart)
library(randomForest)
library(e1071)
library(klaR)
arf<-read_sas("C://Users//Jaire//OneDrive//Desktop//Spatial Demography//Data//ahrf2019.sas7bdat")
# Outcome/offset - suicides (2015-2017)
# Denominator - total deaths (2015-2017)
# Predictors:
# number of mental health facilities
# number of long-term psychiatric hospitals
# number of short-term psychiatric hospitals
# number of VA hospital admissions
set.seed(123)
drop_na(arf)
arf$cofips=arf$f00002
arf$coname=arf$f00010
arf$state = arf$f00008
arf$deaths1517=arf$f1194115
arf$deaths0810=arf$f1194108
arf$suicide1517=arf$f1316615
arf$suicide0810=arf$f1316608
arf$mentfac10=arf$f1322110
arf$psychlt=arf$f1067817
arf$psychst=arf$f1318517
arf$vaadm=arf$f0892017
scale(arf$mentfac10)
scale(arf$psychst)
scale(arf$psychlt)
scale(arf$vaadm)
imp_complete <- complete(imp_mod)
arf$suicide1517 <- imp_complete$suicide1517
arf$deaths1517 <- imp_complete$deaths1517
arf$mentfac10 <- imp_complete$mentfac10
arf$psychst <- imp_complete$psychst
arf$psychlt <- imp_complete$psychlt
arf$vaadm <- imp_complete$vaadm
missmap(arf)
arf$E_d<-arf$deaths1517*(sum(arf$suicide1517)/sum(arf$deaths1517))
table(arf$E_d)
options(tigris_class="sf")
us_co<-counties(cb=T, year=2010)
us_co$cofips<-substr(us_co$GEO_ID, 10, 15)
sts<-states(cb = T, year=2010)
sts<-st_boundary(sts)
us_co$struct<-1:dim(us_co)[1]
nbs<-knearneigh(st_centroid(us_co), k = 5, longlat = T)
## Warning in st_centroid.sf(us_co): st_centroid assumes attributes are constant
## over geometries of x
## Warning in knearneigh(st_centroid(us_co), k = 5, longlat = T): dnearneigh:
## longlat argument overrides object
nbs<-knn2nb(nbs, row.names = us_co$struct, sym = T) #force symmetry!!
mat <- nb2mat(nbs, style="B",zero.policy=TRUE)
colnames(mat) <- rownames(mat)
mat <- as.matrix(mat[1:dim(mat)[1], 1:dim(mat)[1]])
nb2INLA("cl_graph",nbs)
am_adj <-paste(getwd(),"/cl_graph",sep="")
H<-inla.read.graph(filename="cl_graph")
library(sf)
us_co<-st_as_sf(us_co)
us_co$cofips<-paste(us_co$STATEFP, us_co$COUNTYFP, sep="")
us_co%>%
ggplot()+
geom_sf()+
coord_sf(crs = 2163)
arf<-merge( us_co,arf, by="cofips")
arf<-arf[order(arf$cofips),]
ggplot(data = arf)+geom_histogram(aes(x =log(suicide1517/deaths1517) ,
y=0.5*..density..))+
ggtitle(label = "Distribution of Suicides by Year",
subtitle = "US Counties, 2015-2017")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = arf)+geom_histogram(aes(x =suicide1517/(deaths1517*(sum(suicide1517)/sum(deaths1517))) ,
y=0.9*..density..))+
ggtitle(label = "Distribution of Suicides Relative Risk by Year",
subtitle = "US Counties, 2015-2017")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
arf%>%
mutate(qrr=cut(I(suicide1517/E_d),
breaks = quantile(I(suicide1517/E_d),
p=seq(0,1,length.out = 5)),
include.lowest = T))%>%
ggplot()+
geom_sf(aes(fill=qrr, color=NA))+
geom_sf(data=sts, color="black")+
scale_colour_brewer(palette = "RdBu" )+
scale_fill_brewer(palette = "RdBu", na.value="grey")+
guides(fill=guide_legend(title="Relative Risk Quartile"))+
ggtitle(label="Relative Risk Quartile - IMR Raw data, 2015-2017")+
coord_sf(crs = 2163)
f1<-suicide1517~mentfac10+psychst+psychlt+vaadm
mod1<-inla(formula = f1,data = arf,
family = "poisson", E = E_d,
control.compute = list(waic=T),
control.predictor = list(link=1),
num.threads = 1,
verbose = F)
summary(mod1)
##
## Call:
## c("inla(formula = f1, family = \"poisson\", data = arf, E = E_d,
## verbose = F, ", " control.compute = list(waic = T), control.predictor =
## list(link = 1), ", " num.threads = 1)")
## Time used:
## Pre = 2, Running = 2.26, Post = 0.454, Total = 4.72
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## (Intercept) 0.268 0.004 0.260 0.268 0.275 0.268 0
## mentfac10 -0.025 0.001 -0.027 -0.025 -0.023 -0.025 0
## psychst -0.118 0.003 -0.125 -0.118 -0.112 -0.118 0
## psychlt -0.558 0.011 -0.579 -0.558 -0.537 -0.558 0
## vaadm 0.000 0.000 0.000 0.000 0.000 0.000 0
##
## Expected number of effective parameters(stdev): 5.58(0.00)
## Number of equivalent replicates : 576.52
##
## Watanabe-Akaike information criterion (WAIC) ...: 153671.21
## Effective number of parameters .................: 1033.01
##
## Marginal log-Likelihood: -76289.33
## Posterior marginals for the linear predictor and
## the fitted values are computed
The model shows that the number of mental hospitals, and long- and short-term psychiatric facilities is negatively associated with suicide rates in these areas. (WAIC-153671.21)
f2<-suicide1517~mentfac10+psychst+psychlt+vaadm+
f(struct, model = "iid")
mod2<-inla(formula = f2,
data = arf,
family = "poisson",
E = E_d,
control.compute = list(waic=T),
control.predictor = list(link=1),
num.threads = 1,
verbose = F)
summary(mod2)
##
## Call:
## c("inla(formula = f2, family = \"poisson\", data = arf, E = E_d,
## verbose = F, ", " control.compute = list(waic = T), control.predictor =
## list(link = 1), ", " num.threads = 1)")
## Time used:
## Pre = 1.5, Running = 7.18, Post = 0.52, Total = 9.2
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## (Intercept) 0.793 0.022 0.749 0.793 0.836 0.793 0
## mentfac10 -0.051 0.014 -0.079 -0.051 -0.024 -0.051 0
## psychst -0.385 0.043 -0.469 -0.385 -0.301 -0.385 0
## psychlt -0.855 0.102 -1.055 -0.855 -0.654 -0.855 0
## vaadm 0.000 0.000 0.000 0.000 0.000 0.000 0
##
## Random effects:
## Name Model
## struct IID model
##
## Model hyperparameters:
## mean sd 0.025quant 0.5quant 0.975quant mode
## Precision for struct 0.711 0.018 0.675 0.711 0.747 0.71
##
## Expected number of effective parameters(stdev): 3101.52(2.98)
## Number of equivalent replicates : 1.04
##
## Watanabe-Akaike information criterion (WAIC) ...: 21590.04
## Effective number of parameters .................: 1744.23
##
## Marginal log-Likelihood: -15169.44
## Posterior marginals for the linear predictor and
## the fitted values are computed
1/.832
## [1] 1.201923
Similarly, the model shows that the number of mental hospitals, and long- and short-term psychiatric facilities is negatively associated with suicide rates in these areas. This model has a much lower WAIC (WAIC= 21590.04) and higher number of effective parameters (1744.23). Over-dispersion is present within the model (variance/mean precision of struct = 1.201923)
m2<- inla.tmarginal(
function(x) (1/x),
mod2$marginals.hyperpar$`Precision for struct`)
inla.hpdmarginal(.95, marginal=m2)
## low high
## level:0.95 1.33739 1.478979
plot(m2,
type="l",
main=c("Posterior distibution for between county variance", "- IID model -"),
xlim=c(1, 1.5))
arf$fitted_m2<-mod2$summary.fitted.values$mean
arf%>%
# filter(year%in%c(2000))%>%
mutate(qrr=cut(fitted_m2,
breaks = quantile(fitted_m2, p=seq(0,1,length.out = 6)),
include.lowest = T))%>%
ggplot()+geom_sf(aes(fill=qrr))+
scale_colour_brewer(palette = "RdBu" )+
scale_fill_brewer(palette = "RdBu", na.value="grey")+
guides(fill=guide_legend(title="Relative Risk Quartile"))+
ggtitle(label="Relative Risk Quartile - IID Model, 2015-2017")+
coord_sf(crs = 2163)
f3<-f2<-suicide1517~mentfac10+psychst+psychlt+vaadm+
f(struct, model = "bym",scale.model = T, constr = T, graph = H)
mod3<-inla(formula = f3,data = arf,
family = "poisson",
E = E_d,
control.compute = list(waic=T), #return.marginals.predictor=TRUE removed - argument was void
control.predictor = list(link=1),
num.threads = 3,
verbose = F)
summary(mod3)
##
## Call:
## c("inla(formula = f3, family = \"poisson\", data = arf, E = E_d,
## verbose = F, ", " control.compute = list(waic = T), control.predictor =
## list(link = 1), ", " num.threads = 3)")
## Time used:
## Pre = 1.72, Running = 34.4, Post = 2.62, Total = 38.8
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## (Intercept) 0.754 0.016 0.722 0.754 0.786 0.754 0
## mentfac10 -0.031 0.012 -0.055 -0.031 -0.007 -0.031 0
## psychst -0.237 0.036 -0.308 -0.237 -0.166 -0.237 0
## psychlt -0.511 0.085 -0.677 -0.511 -0.345 -0.511 0
## vaadm 0.000 0.000 0.000 0.000 0.000 0.000 0
##
## Random effects:
## Name Model
## struct BYM model
##
## Model hyperparameters:
## mean sd 0.025quant 0.5quant
## Precision for struct (iid component) 1.44 0.067 1.30 1.44
## Precision for struct (spatial component) 1.46 0.169 1.16 1.45
## 0.975quant mode
## Precision for struct (iid component) 1.57 1.44
## Precision for struct (spatial component) 1.83 1.42
##
## Expected number of effective parameters(stdev): 3048.13(4.24)
## Number of equivalent replicates : 1.06
##
## Watanabe-Akaike information criterion (WAIC) ...: 21551.60
## Effective number of parameters .................: 1729.01
##
## Marginal log-Likelihood: -13503.57
## Posterior marginals for the linear predictor and
## the fitted values are computed
1/1.44
## [1] 0.6944444
1/1.46
## [1] 0.6849315
The BYM model shows that the number of mental hospitals, and long- and short-term psychiatric facilities is negatively associated with suicide rates in these areas. This model has a much lower WAIC (WAIC=21551.60) and number of effective parameters (1729.00). Over-dispersion is present within the model (.694 and .685) however, the over-dispersion is much lower than in previous models.
mod1$waic$waic
## [1] 153671.2
mod2$waic$waic
## [1] 21590.04
mod3$waic$waic
## [1] 21551.6
#compute posterior mean
b1 <- inla.emarginal(exp, mod2$marginals.fixed[[2]])
b1
## [1] 0.9499585
#CI for mean
b1ci <-inla.qmarginal(c(.025, .975),
inla.tmarginal(exp, mod2$marginals.fixed[[2]]))
b1ci
## [1] 0.9240635 0.9763370
Green line is spatial variation, black line is nonspatial variation
m3a<- inla.tmarginal(
function(x) (1/x),
mod3$marginals.hyperpar$`Precision for struct (iid component)`)
m3b<- inla.tmarginal(
function(x) (1/x),
mod3$marginals.hyperpar$`Precision for struct (spatial component)`)
plot(m3a, type="l",
main=c("Posterior distibution for between county variance", "- BYM model -"),
xlim=c(.2, 1)) # you may need to change this
#lines(m3b, col="red")
lines(m3b, col="green")
inla.hpdmarginal(.95,m3a)
## low high
## level:0.95 0.6351627 0.7627479
inla.hpdmarginal(.95,m3b)
## low high
## level:0.95 0.5414591 0.8498942
arf$fitted_m3<-mod3$summary.fitted.values$mean
arf%>%
# filter(year%in%c(2000))%>%
mutate(qrr=cut(fitted_m3,
breaks = quantile(fitted_m3, p=seq(0,1,length.out = 6)),
include.lowest = T))%>%
ggplot()+
geom_sf(aes(fill=qrr))+
scale_colour_brewer(palette = "RdBu" )+
scale_fill_brewer(palette = "RdBu", na.value="grey")+
guides(fill=guide_legend(title="Relative Risk Quartile"))+
ggtitle(label="Relative Risk Quartile - BYM Model, 2015-2017")+
coord_sf(crs = 2163)
thetastar<-1.5#theta*
inlaprob<- unlist(lapply(mod3$marginals.fitted.values, function(X){
1-inla.pmarginal(thetastar, X)
}))
hist(inlaprob)
arf$exceedprob<-inlaprob
arf%>%
mutate(qrr=cut(exceedprob,
breaks = c(0, .5, .9, .95, .99, 1),
include.lowest = T))%>%
ggplot()+
geom_sf(aes(fill=qrr, color=NA))+
geom_sf(data=sts)+
scale_colour_brewer(palette = "Blues" )+
scale_fill_brewer(palette = "Blues", na.value="grey")+
guides(fill=guide_legend(title=""))+
ggtitle(label=expression(paste("Exceedence Probability Relative Risk ","Pr( ",theta," >1.5"," ) - 2000") ))+
coord_sf(crs = 2163)