BayesX Model

uses a normal prior for the $\beta$s and an Inverse Gamma, $IG(a,b)$ for precision parameter $\tau^{2}$. $a=b=0.001$ is assumed
The variance parameter $tau^{2}_{j}$ is equivalent to the inverse smoothing parameter in a frequentist approach and controls the trade off between flexibility and smoothness.
Markov random fields (Fahrmeir et al. 2004): Defines a Markov random field prior for a spatial covariate, where geographical information is provided by a map object in boundary or graph file format.
mrf prior is used for the spatial effects and an MCMC algorithm implemented for sampling on the parameters

BayesX model formula

\[logit(P(Y_i=1))=\beta_0+\beta_1x_{i1}+...+\beta_px_{ip}+f_{spat}(s_i)\]

\[f_{spat}(s_i) \sim MRF ~PRIOR\]

Syntax

slm<-bayesx(Y~X1+X2+sx(region,bs="mrf"),family="binomial",
                                        data=mydata,
                                        map="region.graph")

INLA model

\[logit(P(Y_i=1))=\beta_0+\beta_1x_{i1}+...+\beta_px_{ip}+u(s_i)\]

\[u(s_i) \sim ICAR ~or~ SPDE\]

Syntax

formula<-Y~X1+X2+f(region,model="besag",graph="region.graph")

result<-bayesx(formula,family="binomial",data=mydata)

Installing R packages

Part 1 - Installations (ignore this part and move straight to Part 2 and choose yes on updating all)

#renv::init(repos = "https://packagemanager.posit.co/cran/2023-10-13")
#install.packages("maptools")
pkg <- c("methods", 
         "kfigr",
         "bookdown", 
         "markdown", 
         "bamlss", 
         "ape", 
         "BayesX", 
         "R2BayesX",
         "bayespref", 
         "R2WinBUGS",
         "BayesXsrc", 
         "spikeSlabGAM", 
         "sp", "leaflet", "raster", "spdep", "rgdal","akima", "httpuv", "e1071", "XML", "foreign",
        "dplyr", "Hmisc", "readr", "haven", "data.table", "car", "tidyr", "aod", "ggplot2", "maptools", 
         "rlang", "tibble", "mcmc", "quantreg", "mvtnorm", "deldir", "expm", "purrr", "backports", "sf", 
         "tmaptools", "digest", "mime", "tmap", "RColorBrewer", "reshape2", "formatR", "spacetime", "plm","maptools")

pacman::p_load(pkg)

Note

Installing INLA - 2024 August seems to be a tough one unless one uses this binary type use of this website https://www.r-inla.org/download-install

Windows will not update a package already loaded, so then you have to remove [use remove.packages] the package and install it from scratch. There are two suggested packages, ‘graph’ and ‘Rgraphviz’, that are on Bioconductor, and you can install those with:

Part 2 - Installations

if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install(c("graph", "Rgraphviz"), dep=TRUE)

install.packages("INLA",repos=c(getOption("repos"),INLA="https://inla.r-inla-download.org/R/testing"), type="binary", dep=TRUE)
#Update all/some/none? [a/s/n]:

Calling in the libraries

# Loading key packages for this session
library(INLA)
library(spdep)
library(stringdist)
#library(rgeos)
library(sf)
library(SpatialEpi)
#library(rgdal)
library(raster)
library(tidyverse)
library(dplyr)

The Moran’s I statistic

Checking for spatial autocorrelation

Note

p$percent<-c(21.9, 24.6, 15.5, 15.5, 13.4, 16.2, 21.2, 25.5, 28.9) is bringing in provincial percentages


p <- shapefile("ZAF_adm/ZAF_adm1_1.shp")

p$percent<-c(21.9, 24.6, 15.5, 15.5, 13.4, 16.2, 21.2, 25.5, 28.9)
w <- poly2nb(p, row.names=p$PR_CODE)
class(w)
## [1] "nb"
data.frame(p)

ID_0	ISO	NAME_0	ID_1	NAME_1	TYPE_1	ENGTYPE_1	NL_NAME_1	VARNAME_1	percent
211	ZAF	South Africa	1	Eastern Cape	Provinsie	Province	NA	Oos-Kaap	21.9
211	ZAF	South Africa	2	Free State	Provinsie	Province	NA	Orange Free State\|Vrystaat	24.6
211	ZAF	South Africa	3	Gauteng	Provinsie	Province	NA	Pretoria/Witwatersrand/Vaal	15.5
211	ZAF	South Africa	4	KwaZulu-Natal	Provinsie	Province	NA	Natal and Zululand	15.5
211	ZAF	South Africa	5	Limpopo	Provinsie	Province	NA	Noordelike Provinsie\|Northern Transvaal\|Northern Province	13.4
211	ZAF	South Africa	6	Mpumalanga	Provinsie	Province	NA	Eastern Transvaal	16.2
211	ZAF	South Africa	7	North West	Provinsie	Province	NA	North-West\|Noordwes	21.2
211	ZAF	South Africa	8	Northern Cape	Provinsie	Province	NA	Noord-Kaap	25.5
211	ZAF	South Africa	9	Western Cape	Provinsie	Province	NA	Wes-Kaap	28.9

summary(w)
## Neighbour list object:
## Number of regions: 9 
## Number of nonzero links: 34 
## Percentage nonzero weights: 41.97531 
## Average number of links: 3.777778 
## Link number distribution:
## 
## 2 3 4 6 
## 1 2 5 1 
## 1 least connected region:
## 9 with 2 links
## 1 most connected region:
## 2 with 6 links

Plotting

#display.brewer.all()
p <- st_as_sf(p)
library(tmap)
tm_shape(p) + 
  tm_polygons()+
  tm_fill("percent", style = "quantile", n=5, palette = "Reds", title = "Hypertension prevalence") +
  tm_borders(alpha = .4)+ 
  tm_compass() + 
  tm_text("NAME_1") + 
  tm_layout(title = "South AFRICA, DHIS2016", 
            legend.text.size = 0.5, 
            legend.title.size =0.5 ,
            legend.position = c("right", "bottom"), 
            frame = FALSE)

Adjacency matrix

Note

determining which province is closer to which
depicting whether one is a neighbor or not

wm <- nb2mat(w, style='B')
wm
##   1 2 3 4 5 6 7 8 9
## 1 0 1 0 1 0 0 0 1 1
## 2 1 0 1 1 0 1 1 1 0
## 3 0 1 0 0 1 1 1 0 0
## 4 1 1 0 0 0 1 0 0 0
## 5 0 0 1 0 0 1 1 0 0
## 6 0 1 1 1 1 0 0 0 0
## 7 0 1 1 0 1 0 0 1 0
## 8 1 1 0 0 0 0 1 0 1
## 9 1 0 0 0 0 0 0 1 0
## attr(,"call")
## nb2mat(neighbours = w, style = "B")
n <- length(p)

ww <-  nb2listw(w, style='B')
ww
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 9 
## Number of nonzero links: 34 
## Percentage nonzero weights: 41.97531 
## Average number of links: 3.777778 
## 
## Weights style: B 
## Weights constants summary:
##   n nn S0 S1  S2
## B 9 81 34 68 552
moran(p$percent, ww, n=length(ww$neighbours), S0=Szero(ww))
## $I
## [1] 0.2909315
## 
## $K
## [1] 1.679342

Szero(ww)
## [1] 34

moran.test(p$percent, ww, randomisation=FALSE)
## 
##  Moran I test under normality
## 
## data:  p$percent  
## weights: ww    
## 
## Moran I statistic standard deviate = 2.4985, p-value = 0.006237
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##         0.2909315        -0.1250000         0.0277141
# Moran's I global test
moran.mc(p$percent, ww, nsim=99)
## 
##  Monte-Carlo simulation of Moran I
## 
## data:  p$percent 
## weights: ww  
## number of simulations + 1: 100 
## 
## statistic = 0.29093, observed rank = 96, p-value = 0.04
## alternative hypothesis: greater

Note

Index - indicate random or presence of clustering

BayesX CAR model for hypertension in South Africa

library(sf)
SA_shp<-st_read("ZAF_adm/ZAF_adm1_1.shp")
## Reading layer `ZAF_adm1_1' from data source 
##   `C:\Users\r1953\Downloads\Spatial Tutorial\ZAF_adm\ZAF_adm1_1.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 9 features and 9 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 16.45802 ymin: -34.83514 xmax: 32.89125 ymax: -22.12661
## Geodetic CRS:  WGS 84


plot(SA_shp)

Note

Getting the SA shape file using rgdal package

SA_shp<-readOGR(dsn=“ZAF_adm”,layer=“ZAF_adm1_1”)

library(R2BayesX)
library(BayesX)

#Getting the SA shape file using rgdal package - uncomment the commands below
#SA_shp<-readOGR(dsn="ZAF_adm",layer="ZAF_adm1_1")
#plot(SA_shp)
#setting library for bnd file using bayesX


## read shapefile into bnd object
shpname2 <- file.path("ZAF_adm" , "ZAF_adm1_1")
SA_bnd_prov <- BayesX::shp2bnd(shpname = shpname2, regionnames = "ID_1", check.is.in = F)
## Reading map ... finished
## Note: map consists originally of 53 polygons
## Note: map consists of 9 regions

BayesX::drawmap(map=SA_bnd_prov)

sagra<-BayesX::bnd2gra(SA_bnd_prov)
## Start neighbor search ...
## progress: 50%
## progress: 100%
## Neighbor search finished.

Note

shp2bnd - shapefile to a boundary file
bnd2gra - boundary to graph

Loading Hypertension dataset

library(haven)

hypertensiondata <- haven::read_dta("hypertensiondat.dta")
head(hypertensiondata, 3)

current_age	age_cat	region	type_residence	residence	education_level	wealth_level	caseid	province
45	7	7	1	1	2	2	1	3
60	10	7	1	1	2	2	2	3
30	4	7	1	1	2	3	3	3

recoding data to align with shapefile regions codes and names

hypertensiondata<-hypertensiondata |> 
   mutate(province =factor(province,
                       levels=c(1,2,3,4,5,6,7,8,9),
                       labels=c("Eastern cape",
                                "Freestate", 
                                "Gauteng","Kwazulu natal",
                                "Limpopo", "Mpumalanga", 
                                "North west", 
                                "Northern cape",
                                "Western cape")),
          residence = factor(residence, 
                             levels=c(1,2), 
                             labels=c("Urban", "Rural")),
          education_level=factor(education_level, 
                                 levels = c(0,1,2,3), 
                                 labels = c("no education", "primary", "secondary","higher")),
          wealth_level= factor(wealth_level, 
                               levels = c(1,2,3,4,5), 
                               labels = c("poorest","poor","middle","richer","richest")),
          sex=factor(sex, levels = c(0,1), labels = c("male","female")))

Logistic regression model using Statamarkdown

#install.packages("Statamarkdown")
library(Statamarkdown)

cd
use "C:\Users\a0006892\Downloads\CAR_models-master 2024\CAR_2024\hypertensiondat.dta", clear
describe
tab1 hypertension2
logistic hypertension2 i.sex i.residence current_age i.wealth_level
*histogram(diabetes)
## C:\Users\r1953\Downloads\Spatial Tutorial
## 
## file C:\Users\a0006892\Downloads\CAR_models-master
##     2024\CAR_2024\hypertensiondat.dta not found
## r(601);
## 
## r(601);

Running Bayesx model

library(colorspace)
library(R2BayesX)

imodel <- bayesx(hypertension2 ~ sex + residence + wealth_level + sx(current_age) + sx(province, bs="mrf", map=sagra) 
                 + sx(province, bs="re"), family = "binomial", method = "MCMC", iterations = 12000, burnin = 2000, 
                 step = 10, weights = NULL,subset = NULL, offset = NULL, na.action = NULL, seed = 12345, 
                 predict = TRUE, data = hypertensiondata)

summary(imodel)
## Call:
## bayesx(formula = hypertension2 ~ sex + residence + wealth_level + 
##     sx(current_age) + sx(province, bs = "mrf", map = sagra) + 
##     sx(province, bs = "re"), data = hypertensiondata, weights = NULL, 
##     subset = NULL, offset = NULL, na.action = NULL, family = "binomial", 
##     method = "MCMC", iterations = 12000, burnin = 2000, step = 10, 
##     seed = 12345, predict = TRUE)
##  
## Fixed effects estimation results:
## 
## Parametric coefficients:
##                        Mean      Sd    2.5%     50%   97.5%
## (Intercept)         -1.7710  0.1226 -2.0097 -1.7741 -1.5310
## sexfemale            0.6363  0.0605  0.5228  0.6347  0.7545
## residenceRural      -0.1413  0.0695 -0.2744 -0.1431 -0.0066
## wealth_levelpoor     0.4170  0.1050  0.2171  0.4170  0.6200
## wealth_levelmiddle   0.3824  0.0995  0.1836  0.3829  0.5747
## wealth_levelricher   0.4000  0.1026  0.1938  0.4042  0.5951
## wealth_levelrichest  0.4161  0.1009  0.2231  0.4150  0.6087
## 
## Smooth terms variances:
##                    Mean     Sd   2.5%    50%  97.5%    Min    Max
## sx(current_age)  0.0153 0.0155 0.0027 0.0102 0.0621 0.0016 0.1353
## sx(province):mrf 0.2329 0.3008 0.0015 0.1622 0.9720 0.0002 4.6444
##  
## Random effects variances:
##                   Mean     Sd   2.5%    50%  97.5%    Min    Max
## sx(province):re 0.0508 0.0727 0.0008 0.0240 0.2491 0.0003 0.8077
##  
## N = 10294  burnin = 2000  method = MCMC  family = binomial  
## iterations = 12000  step = 10

Getting Odds Ratios

exp(coef(imodel))

	Mean	Sd	2.5%	50%	97.5%
(Intercept)	0.1701610	1.130458	0.1340302	0.1696428	0.2163214
sexfemale	1.8895392	1.062383	1.6867760	1.8863675	2.1264544
residenceRural	0.8682583	1.071962	0.7600394	0.8666787	0.9933815
wealth_levelpoor	1.5174511	1.110705	1.2424249	1.5173661	1.8589355
wealth_levelmiddle	1.4657822	1.104662	1.2014895	1.4664742	1.7765531
wealth_levelricher	1.4918516	1.108018	1.2138778	1.4980571	1.8131760
wealth_levelrichest	1.5160678	1.106205	1.2498856	1.5143526	1.8380496

#wald.test(b = coef(imodel), Sigma = vcov(imodel), Terms=3:4)

## odds ratios and 95% CI
exp(cbind(OR = coef(imodel), confint(imodel)))

	OR.Mean	OR.Sd	OR.2.5%	OR.50%	OR.97.5%	2.5%	97.5%
(Intercept)	0.1701610	1.130458	0.1340302	0.1696428	0.2163214	0.1341410	0.2161817
sexfemale	1.8895392	1.062383	1.6867760	1.8863675	2.1264544	1.6868416	2.1254891
residenceRural	0.8682583	1.071962	0.7600394	0.8666787	0.9933815	0.7612302	0.9933567
wealth_levelpoor	1.5174511	1.110705	1.2424249	1.5173661	1.8589355	1.2425264	1.8575973
wealth_levelmiddle	1.4657822	1.104662	1.2014895	1.4664742	1.7765531	1.2021311	1.7761734
wealth_levelricher	1.4918516	1.108018	1.2138778	1.4980571	1.8131760	1.2147972	1.8119689
wealth_levelrichest	1.5160678	1.106205	1.2498856	1.5143526	1.8380496	1.2501492	1.8338462

Plotting some results

par(mar=c(1,1,1,1))

plot(imodel, term =  "sx(current_age)")

Visualization of posterior mean of structured and unstructured spatial effects

plot all spatial effects

plot(imodel, term = c("sx(province):mrf", "sx(province):re"))


par(mar=c(1,1,1,1))

Plotting autocorrelation for age variance samples

plot(imodel, term = "sx(current_age)", which = "var-samples", acf = TRUE)


par(mar=c(1,1,1,1))


#plot(imodel, which = "max-acf")

zs <- samples(imodel, term = "linear-samples")
par(mar=c(1,1,1,1))
plot(zs)

extract model residuals and plot histogram

#par(mar=c(1,1,1,1))
hist(residuals(imodel))

Plot structured spatial effects maps etc


plot(imodel, term = "sx(province):mrf", map = SA_bnd_prov, main = "Structured spatial effect")

Plot unstructured spatial effects

plot(imodel, term = "sx(province):re", map = SA_bnd_prov, main = "Unstructured spatial effect")

Plot both structured and unstructured effects

plot(imodel, term = "sx(province):total", map = SA_bnd_prov, main = "Total spatial effect", digits = 4)

Map the fitted spatial term only

SA_shp$spatial <- fitted(imodel, term = "sx(province):mrf")[, "Mean"]
plot(SA_shp, "spatial", main="fitted logit values")

Running same model with INLA

#install.packages("INLA", repos=c(getOption("repos"), INLA="https://inla.r-inla-download.org/R/stable"), dep=TRUE)

#SA_OGR <- rgdal::readOGR("ZAF_adm/ZAF_adm1_1.shp")
SA_OGR <- sf::st_read("ZAF_adm/ZAF_adm1_1.shp")
## Reading layer `ZAF_adm1_1' from data source 
##   `C:\Users\r1953\Downloads\Spatial Tutorial\ZAF_adm\ZAF_adm1_1.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 9 features and 9 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 16.45802 ymin: -34.83514 xmax: 32.89125 ymax: -22.12661
## Geodetic CRS:  WGS 84
plot(SA_OGR, border = "blue", axes = TRUE, las = 1)


#SA_valid <- data.table::data.table(valid = gIsValid(SA_OGR, byid = TRUE)) 
SA_valid <- data.table::data.table(SA_OGR, byid = TRUE) 
sum(SA_valid$valid == F)
## [1] 0

#install.packages("https://cran.r-project.org/src/contrib/Archive/maptools/maptools_1.1-6.tar.gz", repos = NULL, type = "source")

## saving the Ssplus map in nam_shp folder 
#maptools::sp2WB(map = as(SA_shp, "SpatialPolygons"), filename = "ZAF_adm/sa_spus")
library(maptools)

SA_shp_s <- readShapePoly("SA_shp.shp",IDvar="OBJECTID",proj4string=CRS("+proj=longlat +ellps=clrk66"))
## Error in getinfo.shape(filen): Error opening SHP file

plot(SA_shp_s, border="blue", axes=TRUE, las=1)
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'plot': object 'SA_shp_s' not found
#tf <- tempfile()
sp2WB(as(SA_shp_s, "SpatialPolygons"), filename="splus_911")
## Error: object 'SA_shp_s' not found

## create the nam_nb map using poly2nb function 
SA_nb <- spdep::poly2nb(SA_shp)


num <- sapply(SA_nb, length) 
adj <- unlist(SA_nb) 

sumNumNeigh <- length(unlist(SA_nb))

# Generate an adjacency
# Spasial model in INLA

nb2INLA("ZAF_adm/sa_inla.graph", SA_nb) 
sa_adj <- paste(getwd(), "/ZAF_adm/sa_inla.graph", sep = "") 


#generate adjacency
H <- inla.read.graph(filename = "ZAF_adm/sa_inla.graph") 
image(inla.graph2matrix(H), xlab = "", ylab = "")

add ID_1 to the data

hyp_id <- readr::read_csv("ZAF_adm/ZAF_adm1_1`.csv")
hypertensiondata <- hypertensiondata |> 
  mutate(NAME_1=province) |> 
  left_join(hyp_id |> 
              dplyr::select(NAME_0,ID_1,NAME_1) , by="NAME_1")

fitting CAR model

  #image(inla.graph2matrix(H), xlab = "", ylab = "")
formula_inla <- hypertension2 ~ 1 + as.factor(sex) + as.factor(residence) + current_age + as.factor(wealth_level) +
  ##our CAR specification 
f(ID_1, model="bym",graph=sa_adj, scale.model=TRUE, ## spefiying the priors for the unstri and str 
    hyper=list(prec.unstruct=list(prior="loggamma",param=c(1,0.001)), 
               prec.spatial=list(prior="loggamma",param=c(1,0.001)))) 

model_inla <- inla(formula_inla,family="binomial", data=hypertensiondata, control.compute=list(dic=TRUE))
summary(model_inla)
## Time used:
##     Pre = 0.859, Running = 2.08, Post = 0.266, Total = 3.21 
## Fixed effects:
##                                  mean    sd 0.025quant 0.5quant 0.975quant
## (Intercept)                    -4.971 0.124     -5.214   -4.971     -4.729
## as.factor(sex)female            0.629 0.062      0.508    0.629      0.750
## as.factor(residence)Rural      -0.260 0.063     -0.385   -0.260     -0.136
## current_age                     0.067 0.002      0.064    0.067      0.070
## as.factor(wealth_level)poor     0.407 0.101      0.209    0.407      0.605
## as.factor(wealth_level)middle   0.402 0.100      0.206    0.402      0.598
## as.factor(wealth_level)richer   0.435 0.099      0.241    0.435      0.630
## as.factor(wealth_level)richest  0.462 0.098      0.270    0.462      0.654
##                                  mode kld
## (Intercept)                    -4.971   0
## as.factor(sex)female            0.629   0
## as.factor(residence)Rural      -0.260   0
## current_age                     0.067   0
## as.factor(wealth_level)poor     0.407   0
## as.factor(wealth_level)middle   0.402   0
## as.factor(wealth_level)richer   0.435   0
## as.factor(wealth_level)richest  0.462   0
## 
## Random effects:
##   Name     Model
##     ID_1 BYM model
## 
## Model hyperparameters:
##                                           mean      sd 0.025quant 0.5quant
## Precision for ID_1 (iid component)     1097.78 1205.74      72.58   719.41
## Precision for ID_1 (spatial component)   10.32    7.50       2.00     8.39
##                                        0.975quant   mode
## Precision for ID_1 (iid component)        4299.03 197.59
## Precision for ID_1 (spatial component)      29.92   5.20
## 
## Deviance Information Criterion (DIC) ...............: 7813.98
## Deviance Information Criterion (DIC, saturated) ....: 7799.09
## Effective number of parameters .....................: 10.84
## 
## Marginal log-Likelihood:  -3947.62 
##  is computed 
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')

#summary of fixed effects
round(model_inla$summary.fixed, 3)

	mean	sd	0.025quant	0.5quant	0.975quant	mode
(Intercept)	-4.971	0.124	-5.214	-4.971	-4.729	-4.971
as.factor(sex)female	0.629	0.062	0.508	0.629	0.750	0.629
as.factor(residence)Rural	-0.260	0.063	-0.385	-0.260	-0.136	-0.260
current_age	0.067	0.002	0.064	0.067	0.070	0.067
as.factor(wealth_level)poor	0.407	0.101	0.209	0.407	0.605	0.407
as.factor(wealth_level)middle	0.402	0.100	0.206	0.402	0.598	0.402
as.factor(wealth_level)richer	0.435	0.099	0.241	0.435	0.630	0.435
as.factor(wealth_level)richest	0.462	0.098	0.270	0.462	0.654	0.462


#summary of random effects
round(head(model_inla$summary.random$ID_1), 3)

ID	mean	sd	0.025quant	0.5quant	0.975quant	mode	kld
1	0.328	0.328	-0.329	0.325	0.995	0.325	0.001
2	0.086	0.288	-0.496	0.085	0.670	0.085	0.001
3	-0.352	0.100	-0.549	-0.351	-0.156	-0.351	0.000
4	0.194	0.404	-0.614	0.191	1.016	0.192	0.001
5	-0.777	0.103	-0.980	-0.777	-0.575	-0.777	0.000
6	-0.144	0.093	-0.326	-0.144	0.037	-0.144	0.000

Plotting posterior graphs from INLA

Try this out in your own time….

Count Data at location level

For this we will use the COVID19 data
We use the Poisson log normal model for this analysis

Three steps for the lognormal model

Stage 1: The likelihood \[Y_{i}|\theta_{i}\sim Poisson(E_{i}\theta_{i})\] with $i=1,2,\dots, n$ with \[log(\theta_{i})=\beta_{0}+\beta_{1}X_{i} + v_{i}\] where $X_{i}$ is the covariate measured at county level (areal level)
Stage 2: The random effects (prior distribution) is \[v_{i}|\sigma^{2}_{i}\sim N(0,\sigma^{2}_{v})\]
stage 3: The hyperprior on the hyperparameters $\beta_{0}, ~\beta_{1},~ \sigma^{2}_{v}$ is given by \[p(\beta_{0},\beta_{1}, \sigma^{2}_{v})=p(\beta_{0})p(\beta_{1})p(\sigma^{2}_{v})\]
priors are assumed independent and for ease of Bayesian implementation, we work with the precision $\tau_{v}=\sigma^{-2}_{v}$ rather than $\sigma^{2}_{v}$.

Lognormal spatial model with covariates

We now add spatial (ICAR) random effects to the model. and for this we need a graph file as before
INLA specification of the model

\[log(\theta_{i})=\beta_{0}+\beta_{1}X_{i}+u_{i}+v_{i}\] with $u_{i}$ structured spatial effects with a CAR prior and $v_{i}$ the unstructured random effects with an iid normal prior.

Data exploration and computing standardized mortality rate

Modelling corona virus deaths in South Africa

Hypothesis: COVID deaths in South Africa have a significant spatial association

sadistricts <- shapefile("ZAF_adm/district2.shp")

#attach(sadistricts)

Describe patterns of deaths with hypertension prevalence (assess association visually)

summary(sadistricts)
## Object of class SpatialPolygonsDataFrame
## Coordinates:
##         min       max
## x  16.45189  32.94498
## y -34.83417 -22.12503
## Is projected: FALSE 
## proj4string : [+proj=longlat +datum=WGS84 +no_defs]
## Data attributes:
##    CATEGORY           DC_MDB_C              ID_3          DC_MN_C     
##  Length:52          Length:52          Min.   : 1.00   Min.   :101.0  
##  Class :character   Class :character   1st Qu.:13.75   1st Qu.:289.2  
##  Mode  :character   Mode  :character   Median :26.50   Median :522.5  
##                                        Mean   :26.50   Mean   :498.7  
##                                        3rd Qu.:39.25   3rd Qu.:665.5  
##                                        Max.   :52.00   Max.   :947.0  
##   DC_MN_C_st          DC_NAME           DC_NAME_C          MAP_TITLE        
##  Length:52          Length:52          Length:52          Length:52         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##    PR_MDB_C            PR_CODE       PR_CODE_st          PR_NAME         
##  Length:52          Min.   :1.000   Length:52          Length:52         
##  Class :character   1st Qu.:2.000   Class :character   Class :character  
##  Mode  :character   Median :5.000   Mode  :character   Mode  :character  
##                     Mean   :4.615                                        
##                     3rd Qu.:6.250                                        
##                     Max.   :9.000                                        
##    ALBERS_ARE       SHAPE_Leng       SHAPE_Area          FID_1      
##  Min.   :  1645   Min.   : 2.806   Min.   : 0.1485   Min.   : 1.00  
##  1st Qu.:  7888   1st Qu.: 6.261   1st Qu.: 0.7269   1st Qu.:13.75  
##  Median : 15779   Median : 9.512   Median : 1.4313   Median :26.50  
##  Mean   : 23477   Mean   :10.011   Mean   : 2.1766   Mean   :26.50  
##  3rd Qu.: 28934   3rd Qu.:12.311   3rd Qu.: 2.6284   3rd Qu.:39.25  
##  Max.   :126836   Max.   :28.727   Max.   :11.9244   Max.   :52.00  
##    district           covidcases         deaths           expected      
##  Length:52          Min.   :   345   Min.   :   3.00   Min.   :   6.90  
##  Class :character   1st Qu.:  4702   1st Qu.:  40.75   1st Qu.:  94.05  
##  Mode  :character   Median :  8906   Median :  82.00   Median : 178.12  
##                     Mean   : 19209   Mean   : 248.00   Mean   : 384.19  
##                     3rd Qu.: 18274   3rd Qu.: 259.25   3rd Qu.: 365.47  
##                     Max.   :138219   Max.   :2764.00   Max.   :2764.38  
##    hypertensi   
##  Min.   :19.50  
##  1st Qu.:24.38  
##  Median :30.05  
##  Mean   :31.11  
##  3rd Qu.:37.58  
##  Max.   :56.00

sadistricts1 <- st_as_sf(sadistricts)
tm_shape(sadistricts1) + 
  tm_polygons(col = "hypertensi", title = "Hypertension") +
  tm_bubbles(size = "deaths", col = "deaths", palette = "Blues",
           style = "quantile", legend.size.show = FALSE, title.col = "Corona virus deaths",
border.col = "black", border.lwd = 0.1, border.alpha = 0.1) +
  tm_layout(legend.text.size = 0.8, legend.title.size = 1.1,
frame = FALSE)

Pull the data points (information) from the map object for use in simple models

Simple poisson model withn intercept and random error. The exponentiated intercept is the average rate (relative risk rate) fo deaths across the districts.

deaths.fit1<-inla(deaths ~ 1 + f(FID_1, model = "iid", param = c(0.5, 5e-04)), data=sadata, family = "poisson", E=expected)


summary(deaths.fit1)
## Time used:
##     Pre = 0.407, Running = 0.236, Post = 0.0469, Total = 0.69 
## Fixed effects:
##               mean    sd 0.025quant 0.5quant 0.975quant   mode kld
## (Intercept) -0.683 0.081     -0.844   -0.683     -0.525 -0.683   0
## 
## Random effects:
##   Name     Model
##     FID_1 IID model
## 
## Model hyperparameters:
##                     mean   sd 0.025quant 0.5quant 0.975quant mode
## Precision for FID_1 3.27 0.71       2.05     3.21       4.82 3.10
## 
## Marginal log-Likelihood:  -288.30 
##  is computed 
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')
exp(deaths.fit1$summary.fixed[1,])

	mean	sd	0.025quant	0.5quant	0.975quant	mode	kld
(Intercept)	0.5050132	1.08442	0.4299473	0.5052501	0.5916091	0.505248	1

Fitting non-spatial lognormal model with covariate

deaths.fit2<-inla(deaths ~ 1 + hypertensi + f(FID_1, model = "iid", param=c(1, 0.014)), data=sadata, family = "poisson", E = expected, control.compute =list(dic = TRUE))

summary(deaths.fit2)
## Time used:
##     Pre = 0.281, Running = 0.198, Post = 0.0397, Total = 0.518 
## Fixed effects:
##               mean    sd 0.025quant 0.5quant 0.975quant   mode kld
## (Intercept) -1.273 0.306     -1.874   -1.274     -0.667 -1.274   0
## hypertensi   0.019 0.010      0.000    0.019      0.038  0.019   0
## 
## Random effects:
##   Name     Model
##     FID_1 IID model
## 
## Model hyperparameters:
##                     mean    sd 0.025quant 0.5quant 0.975quant mode
## Precision for FID_1 3.64 0.808       2.26     3.57       5.42 3.43
## 
## Deviance Information Criterion (DIC) ...............: 435.78
## Deviance Information Criterion (DIC, saturated) ....: 104.08
## Effective number of parameters .....................: 48.79
## 
## Marginal log-Likelihood:  -293.76 
##  is computed 
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')

Examine the posterior summaries for covariate, on the original (log scale- log RR) or exponential (RR)

deaths.fit2$summary.fixed

	mean	sd	0.025quant	0.5quant	0.975quant	mode	kld
(Intercept)	-1.2729664	0.3063442	-1.8738130	-1.2738241	-0.6672445	-1.2738137	0
hypertensi	0.0192011	0.0095942	0.0001816	0.0192459	0.0379660	0.0192455	0

deaths.fit2$summary.fixed[2,]

	mean	sd	0.025quant	0.5quant	0.975quant	mode	kld
hypertensi	0.0192011	0.0095942	0.0001816	0.0192459	0.037966	0.0192455	0

exp(deaths.fit2$summary.fixed[2,])

	mean	sd	0.025quant	0.5quant	0.975quant	mode	kld
hypertensi	1.019387	1.00964	1.000182	1.019432	1.038696	1.019432	1

Lognormal spatial model with covariates

#install.packages("SpatialEpi")
library(SpatialEpi)
sadata<-sadistricts@data
data(sadata)
Y<- sadistricts@data$deaths
X<- sadistricts@data$hypertensi
E<- sadistricts@data$expected
smr<-Y/E
sadata$smr<-smr

#plot the smr by district
par(mar = c(1, 1, 1, 1))
#mapvariable(sadata$smr, sadistrict.map)
#display.brewer.all()
sadistricts@data$smr<-smr

sadistricts1 <- st_as_sf(sadistricts)
tm_shape(sadistricts1) + tm_fill("smr", style = "quantile", n=5, palette = "Blues", title = "Covid smr") +tm_borders(alpha = .4)+ tm_compass()  + tm_layout(title = "South AFRICA, COVID2020", legend.text.size = 0.5, legend.title.size =0.5 ,legend.position = c("right", "bottom"), frame = FALSE)


#plot(sadistricts)

sadist_nb <- spdep::poly2nb(sadistricts)
nb2INLA("ZAF_adm/sadist_inla.graph", sadist_nb)
sadist_adj<-paste(getwd(), "/ZAF_adm/sadist_inla.graph", sep = "")

#Take a look at the data. Uncomment the following command!
#dplyr::glimpse(sadata)
#add variables region 1 and region 2 into the dataset for 
sadistricts@data$region1<-sadata$FID_1
sadistricts@data$region2<-sadata$FID_1
#The dataset is as below. Uncomment to see...
#sadistricts@data

Plotting the expected deaths

summary(sadistricts@data)
##    CATEGORY           DC_MDB_C              ID_3          DC_MN_C     
##  Length:52          Length:52          Min.   : 1.00   Min.   :101.0  
##  Class :character   Class :character   1st Qu.:13.75   1st Qu.:289.2  
##  Mode  :character   Mode  :character   Median :26.50   Median :522.5  
##                                        Mean   :26.50   Mean   :498.7  
##                                        3rd Qu.:39.25   3rd Qu.:665.5  
##                                        Max.   :52.00   Max.   :947.0  
##   DC_MN_C_st          DC_NAME           DC_NAME_C          MAP_TITLE        
##  Length:52          Length:52          Length:52          Length:52         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##    PR_MDB_C            PR_CODE       PR_CODE_st          PR_NAME         
##  Length:52          Min.   :1.000   Length:52          Length:52         
##  Class :character   1st Qu.:2.000   Class :character   Class :character  
##  Mode  :character   Median :5.000   Mode  :character   Mode  :character  
##                     Mean   :4.615                                        
##                     3rd Qu.:6.250                                        
##                     Max.   :9.000                                        
##    ALBERS_ARE       SHAPE_Leng       SHAPE_Area          FID_1      
##  Min.   :  1645   Min.   : 2.806   Min.   : 0.1485   Min.   : 1.00  
##  1st Qu.:  7888   1st Qu.: 6.261   1st Qu.: 0.7269   1st Qu.:13.75  
##  Median : 15779   Median : 9.512   Median : 1.4313   Median :26.50  
##  Mean   : 23477   Mean   :10.011   Mean   : 2.1766   Mean   :26.50  
##  3rd Qu.: 28934   3rd Qu.:12.311   3rd Qu.: 2.6284   3rd Qu.:39.25  
##  Max.   :126836   Max.   :28.727   Max.   :11.9244   Max.   :52.00  
##    district           covidcases         deaths           expected      
##  Length:52          Min.   :   345   Min.   :   3.00   Min.   :   6.90  
##  Class :character   1st Qu.:  4702   1st Qu.:  40.75   1st Qu.:  94.05  
##  Mode  :character   Median :  8906   Median :  82.00   Median : 178.12  
##                     Mean   : 19209   Mean   : 248.00   Mean   : 384.19  
##                     3rd Qu.: 18274   3rd Qu.: 259.25   3rd Qu.: 365.47  
##                     Max.   :138219   Max.   :2764.00   Max.   :2764.38  
##    hypertensi         smr            region1         region2     
##  Min.   :19.50   Min.   :0.1106   Min.   : 1.00   Min.   : 1.00  
##  1st Qu.:24.38   1st Qu.:0.3683   1st Qu.:13.75   1st Qu.:13.75  
##  Median :30.05   Median :0.5642   Median :26.50   Median :26.50  
##  Mean   :31.11   Mean   :0.5755   Mean   :26.50   Mean   :26.50  
##  3rd Qu.:37.58   3rd Qu.:0.8409   3rd Qu.:39.25   3rd Qu.:39.25  
##  Max.   :56.00   Max.   :1.0331   Max.   :52.00   Max.   :52.00
mapvariable(sadistricts@data$expected, sadistricts)

Plotting the standardized death rates

mapvariable(sadistricts@data$smr, sadistricts)


#check class of adjacency object
class(sadist_adj)
## [1] "character"

## [1] "character"

formula<- deaths ~ 1 + hypertensi + f(region1, model = "iid", param = c(1, 0.014)) + 
  f(region2, model = "besag", graph = sadist_adj, param = c(1, 0.68))

deaths.fit3<-inla(formula, data = sadistricts@data, family = "poisson", E = expected, control.predictor = list(compute = TRUE))

summary(sadistricts@data)
##    CATEGORY           DC_MDB_C              ID_3          DC_MN_C     
##  Length:52          Length:52          Min.   : 1.00   Min.   :101.0  
##  Class :character   Class :character   1st Qu.:13.75   1st Qu.:289.2  
##  Mode  :character   Mode  :character   Median :26.50   Median :522.5  
##                                        Mean   :26.50   Mean   :498.7  
##                                        3rd Qu.:39.25   3rd Qu.:665.5  
##                                        Max.   :52.00   Max.   :947.0  
##                                                                       
##   DC_MN_C_st          DC_NAME           DC_NAME_C          MAP_TITLE        
##  Length:52          Length:52          Length:52          Length:52         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    PR_MDB_C            PR_CODE       PR_CODE_st          PR_NAME         
##  Length:52          Min.   :1.000   Length:52          Length:52         
##  Class :character   1st Qu.:2.000   Class :character   Class :character  
##  Mode  :character   Median :5.000   Mode  :character   Mode  :character  
##                     Mean   :4.615                                        
##                     3rd Qu.:6.250                                        
##                     Max.   :9.000                                        
##                                                                          
##    ALBERS_ARE       SHAPE_Leng       SHAPE_Area          FID_1      
##  Min.   :  1645   Min.   : 2.806   Min.   : 0.1485   Min.   : 1.00  
##  1st Qu.:  7888   1st Qu.: 6.261   1st Qu.: 0.7269   1st Qu.:13.75  
##  Median : 15779   Median : 9.512   Median : 1.4313   Median :26.50  
##  Mean   : 23477   Mean   :10.011   Mean   : 2.1766   Mean   :26.50  
##  3rd Qu.: 28934   3rd Qu.:12.311   3rd Qu.: 2.6284   3rd Qu.:39.25  
##  Max.   :126836   Max.   :28.727   Max.   :11.9244   Max.   :52.00  
##                                                                     
##    district           covidcases         deaths           expected      
##  Length:52          Min.   :   345   Min.   :   3.00   Min.   :   6.90  
##  Class :character   1st Qu.:  4702   1st Qu.:  40.75   1st Qu.:  94.05  
##  Mode  :character   Median :  8906   Median :  82.00   Median : 178.12  
##                     Mean   : 19209   Mean   : 248.00   Mean   : 384.19  
##                     3rd Qu.: 18274   3rd Qu.: 259.25   3rd Qu.: 365.47  
##                     Max.   :138219   Max.   :2764.00   Max.   :2764.38  
##                                                                         
##    hypertensi         smr            region1         region2           re_u   
##  Min.   :19.50   Min.   :0.1106   Min.   : 1.00   Min.   : 1.00   1      : 1  
##  1st Qu.:24.38   1st Qu.:0.3683   1st Qu.:13.75   1st Qu.:13.75   2      : 1  
##  Median :30.05   Median :0.5642   Median :26.50   Median :26.50   3      : 1  
##  Mean   :31.11   Mean   :0.5755   Mean   :26.50   Mean   :26.50   4      : 1  
##  3rd Qu.:37.58   3rd Qu.:0.8409   3rd Qu.:39.25   3rd Qu.:39.25   5      : 1  
##  Max.   :56.00   Max.   :1.0331   Max.   :52.00   Max.   :52.00   6      : 1  
##                                                                   (Other):46  
##       re_v   
##  1      : 1  
##  2      : 1  
##  3      : 1  
##  4      : 1  
##  5      : 1  
##  6      : 1  
##  (Other):46
summary(deaths.fit3)
## Time used:
##     Pre = 0.565, Running = 0.398, Post = 0.0777, Total = 1.04 
## Fixed effects:
##               mean    sd 0.025quant 0.5quant 0.975quant   mode kld
## (Intercept) -0.435 0.346     -1.114   -0.435      0.247 -0.435   0
## hypertensi  -0.008 0.011     -0.030   -0.008      0.014 -0.008   0
## 
## Random effects:
##   Name     Model
##     region1 IID model
##    region2 Besags ICAR model
## 
## Model hyperparameters:
##                        mean     sd 0.025quant 0.5quant 0.975quant  mode
## Precision for region1 85.32 98.079      6.038    54.90     344.34 16.52
## Precision for region2  1.46  0.389      0.844     1.41       2.37  1.32
## 
## Marginal log-Likelihood:  -318.40 
##  is computed 
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')

Plotting the maps

#scan the output data structure- uncomment to see
#deaths.fit3$summary.random$region1
#pull the 5th column and exponentiate
Residual_nonspatial <- exp(deaths.fit3$summary.random$region1[5])
Residual_spatial <- exp(deaths.fit3$summary.random$region2[5])
par(mfrow = c(1, 1), mar = c(0.1, 0.1, 0.1, 0.1))
mapvariable(Residual_nonspatial[, 1], sadistricts)

Spatial effects

par(mfrow = c(1, 1), mar = c(0.1, 0.1, 0.1, 0.1))
mapvariable(Residual_spatial[, 1], sadistricts)

About 90% of COVID deaths in the Western Cape is spatially related and this can be due to socio-economic dynamics in that region and more specifically those districts.