library(R2BayesX)
library(BayesXsrc)
library(BayesX)
library(INLA)
library(spdep)
library(stringdist)
library(sf)
library(SpatialEpi)
library(raster)
library(tidyverse)
library(dplyr)
Geo Spatial Statistics in R
An R script to spatial analysis
0.1 The Moran’s I statistic
- Checking for spatial autocorrelation
setwd("C:/Users/VUSI/Downloads/CAR_models-master/ZAF_adm")
<- st_read("ZAF_adm1_1.shp")) (p
Reading layer `ZAF_adm1_1' from data source
`C:\Users\VUSI\Downloads\CAR_models-master\ZAF_adm\ZAF_adm1_1.shp'
using driver `ESRI Shapefile'
Simple feature collection with 9 features and 9 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 16.45802 ymin: -34.83514 xmax: 32.89125 ymax: -22.12661
Geodetic CRS: WGS 84
Simple feature collection with 9 features and 9 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 16.45802 ymin: -34.83514 xmax: 32.89125 ymax: -22.12661
Geodetic CRS: WGS 84
ID_0 ISO NAME_0 ID_1 NAME_1 TYPE_1 ENGTYPE_1 NL_NAME_1
1 211 ZAF South Africa 1 Eastern Cape Provinsie Province <NA>
2 211 ZAF South Africa 2 Free State Provinsie Province <NA>
3 211 ZAF South Africa 3 Gauteng Provinsie Province <NA>
4 211 ZAF South Africa 4 KwaZulu-Natal Provinsie Province <NA>
5 211 ZAF South Africa 5 Limpopo Provinsie Province <NA>
6 211 ZAF South Africa 6 Mpumalanga Provinsie Province <NA>
7 211 ZAF South Africa 7 North West Provinsie Province <NA>
8 211 ZAF South Africa 8 Northern Cape Provinsie Province <NA>
9 211 ZAF South Africa 9 Western Cape Provinsie Province <NA>
VARNAME_1
1 Oos-Kaap
2 Orange Free State|Vrystaat
3 Pretoria/Witwatersrand/Vaal
4 Natal and Zululand
5 Noordelike Provinsie|Northern Transvaal|Northern Province
6 Eastern Transvaal
7 North-West|Noordwes
8 Noord-Kaap
9 Wes-Kaap
geometry
1 MULTIPOLYGON (((25.64375 -3...
2 MULTIPOLYGON (((27.98611 -2...
3 MULTIPOLYGON (((28.75139 -2...
4 MULTIPOLYGON (((31.63736 -2...
5 MULTIPOLYGON (((29.66671 -2...
6 MULTIPOLYGON (((31.87835 -2...
7 MULTIPOLYGON (((26.40328 -2...
8 MULTIPOLYGON (((16.99514 -2...
9 MULTIPOLYGON (((19.66764 -3...
library(RColorBrewer)
# ---- 1) Read shapefile as sf ----
<- st_read("C:/Users/VUSI/Downloads/CAR_models-master/ZAF_adm/ZAF_adm1_1.shp")) (p
Reading layer `ZAF_adm1_1' from data source
`C:\Users\VUSI\Downloads\CAR_models-master\ZAF_adm\ZAF_adm1_1.shp'
using driver `ESRI Shapefile'
Simple feature collection with 9 features and 9 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 16.45802 ymin: -34.83514 xmax: 32.89125 ymax: -22.12661
Geodetic CRS: WGS 84
Simple feature collection with 9 features and 9 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 16.45802 ymin: -34.83514 xmax: 32.89125 ymax: -22.12661
Geodetic CRS: WGS 84
ID_0 ISO NAME_0 ID_1 NAME_1 TYPE_1 ENGTYPE_1 NL_NAME_1
1 211 ZAF South Africa 1 Eastern Cape Provinsie Province <NA>
2 211 ZAF South Africa 2 Free State Provinsie Province <NA>
3 211 ZAF South Africa 3 Gauteng Provinsie Province <NA>
4 211 ZAF South Africa 4 KwaZulu-Natal Provinsie Province <NA>
5 211 ZAF South Africa 5 Limpopo Provinsie Province <NA>
6 211 ZAF South Africa 6 Mpumalanga Provinsie Province <NA>
7 211 ZAF South Africa 7 North West Provinsie Province <NA>
8 211 ZAF South Africa 8 Northern Cape Provinsie Province <NA>
9 211 ZAF South Africa 9 Western Cape Provinsie Province <NA>
VARNAME_1
1 Oos-Kaap
2 Orange Free State|Vrystaat
3 Pretoria/Witwatersrand/Vaal
4 Natal and Zululand
5 Noordelike Provinsie|Northern Transvaal|Northern Province
6 Eastern Transvaal
7 North-West|Noordwes
8 Noord-Kaap
9 Wes-Kaap
geometry
1 MULTIPOLYGON (((25.64375 -3...
2 MULTIPOLYGON (((27.98611 -2...
3 MULTIPOLYGON (((28.75139 -2...
4 MULTIPOLYGON (((31.63736 -2...
5 MULTIPOLYGON (((29.66671 -2...
6 MULTIPOLYGON (((31.87835 -2...
7 MULTIPOLYGON (((26.40328 -2...
8 MULTIPOLYGON (((16.99514 -2...
9 MULTIPOLYGON (((19.66764 -3...
# ---- 2) Add your variable (ensure numeric, length matches rows) ----
$percent <- c(21.9, 24.6, 15.5, 15.5, 13.4, 16.2, 21.2, 25.5, 28.9)
pstopifnot(nrow(p) == length(p$percent))
stopifnot(is.numeric(p$percent))
# ---- 3) Make neighbors (queen contiguity) ----
# Use a unique ID present in your data; NAME_1 exists and is unique
<- poly2nb(p, row.names = p$NAME_1)
w <- nb2listw(w, style = "B") # binary weights ww
# 4) Map - corrected tmap syntax for v4+
library(tmap)
tmap_mode("plot")
tm_shape(p) +
tm_polygons(
col = "percent", # Use 'col' instead of 'fill' in newer tmap versions
palette = "Reds",
style = "quantile",
n = 5,
title = "Hypertension prevalence",
border.alpha = 0.4 # Use dot notation instead of underscore
+
) tm_text("NAME_1") +
tm_compass() +
tm_title("South AFRICA, DHIS2016") +
tm_layout(
legend.text.size = 0.5,
legend.title.size = 0.5,
legend.position = c("right", "bottom"),
frame = FALSE
)
# ---- 5) Moran's I (global) ----
# Quick statistic
<- nrow(p)
n_areas <- moran(p$percent, ww, n = n_areas, S0 = Szero(ww))
moran_stat print(moran_stat)
$I
[1] 0.2909315
$K
[1] 1.679342
# Analytical test
print(moran.test(p$percent, ww, randomisation = FALSE))
Moran I test under normality
data: p$percent
weights: ww
Moran I statistic standard deviate = 2.4985, p-value = 0.006237
alternative hypothesis: greater
sample estimates:
Moran I statistic Expectation Variance
0.2909315 -0.1250000 0.0277141
# Permutation test
set.seed(123)
print(moran.mc(p$percent, ww, nsim = 99)) # permutation test
Monte-Carlo simulation of Moran I
data: p$percent
weights: ww
number of simulations + 1: 100
statistic = 0.29093, observed rank = 99, p-value = 0.01
alternative hypothesis: greater
1 BayesX CAR model for hypertension in South Africa
library(sf)
setwd("C:/Users/VUSI/Downloads/CAR_models-master/ZAF_adm")
<-st_read("ZAF_adm1_1.shp") SA_shp
Reading layer `ZAF_adm1_1' from data source
`C:\Users\VUSI\Downloads\CAR_models-master\ZAF_adm\ZAF_adm1_1.shp'
using driver `ESRI Shapefile'
Simple feature collection with 9 features and 9 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 16.45802 ymin: -34.83514 xmax: 32.89125 ymax: -22.12661
Geodetic CRS: WGS 84
plot(SA_shp)
## read shapefile into bnd object
setwd("C:/Users/VUSI/Downloads/CAR_models-master/ZAF_adm")
<- file.path("C:/Users/VUSI/Downloads/CAR_models-master/ZAF_adm/ZAF_adm1_1")
shpname2 <- BayesX::shp2bnd(shpname = shpname2, regionnames = "ID_1", check.is.in = F) SA_bnd_prov
Reading map ... finished
Note: map consists originally of 53 polygons
Note: map consists of 9 regions
::drawmap(map=SA_bnd_prov) BayesX
<-BayesX::bnd2gra(SA_bnd_prov) sagra
Start neighbor search ...
progress: 50%
progress: 100%
Neighbor search finished.
1.1 BayesX Model
-uses a normal prior for the \(\beta\)s and an Inverse Gamma, \(IG(a,b)\) for precision parameter \(\tau^{2}\). \(a=b=0.001\) is assumed
The variance parameter \(tau^{2}_{j}\) is equivalent to the inverse smoothing parameter in a frequentist approach and controls the trade off between flexibility and smoothness.
Markov random fields (Fahrmeir et al. 2004): Defines a Markov random field prior for a spatial covariate, where geographical information is provided by a map object in boundary or graph file format.
mrf prior is used for the spatial effects and an MCMC algorithm implemented for sampling on the parameters
library(haven)
<- haven::read_dta("hypertensiondat.dta")) (hypertensiondata
# A tibble: 10,294 × 13
current_age age_cat region type_residence residence education_level
<dbl> <dbl+lbl> <dbl+lbl> <dbl+lbl> <dbl+lbl> <dbl+lbl>
1 45 7 [7. 45-49] 7 [7. ga… 1 [1. urban] 1 [1. ur… 2 [2. secondar…
2 60 10 [10. 60-64] 7 [7. ga… 1 [1. urban] 1 [1. ur… 2 [2. secondar…
3 30 4 [4. 30-34] 7 [7. ga… 1 [1. urban] 1 [1. ur… 2 [2. secondar…
4 43 6 [6. 40-44] 7 [7. ga… 1 [1. urban] 1 [1. ur… 2 [2. secondar…
5 15 1 [1. 15-19] 5 [5. kw… 2 [2. rural] 2 [2. ru… 2 [2. secondar…
6 20 2 [2. 20-24] 5 [5. kw… 2 [2. rural] 2 [2. ru… 2 [2. secondar…
7 15 1 [1. 15-19] 5 [5. kw… 2 [2. rural] 2 [2. ru… 2 [2. secondar…
8 55 9 [9. 55-59] 5 [5. kw… 2 [2. rural] 2 [2. ru… 1 [1. primary]
9 17 1 [1. 15-19] 5 [5. kw… 2 [2. rural] 2 [2. ru… 2 [2. secondar…
10 23 2 [2. 20-24] 4 [4. fr… 1 [1. urban] 1 [1. ur… 1 [1. primary]
# ℹ 10,284 more rows
# ℹ 7 more variables: wealth_level <dbl+lbl>, hypertension <dbl+lbl>,
# sex <dbl+lbl>, hypertension2 <dbl+lbl>, caseid <dbl>, province <dbl+lbl>,
# diabetes <dbl>
#recoding data to align with shapefile regions codes and names
$province<-factor(hypertensiondata$province,
hypertensiondatalevels=c(1,2,3,4,5,6,7,8,9),
labels=c("Eastern cape","Freestate", "Gauteng","Kwazulu natal","Limpopo", "Mpumalanga", "North west", "Northern cape", "Western cape"))
#defining factor variables
<- within(hypertensiondata, {
hypertensiondata <- factor(residence, levels=c(1,2), labels=c("Urban", "Rural"))
residence <-factor(education_level, levels = c(0,1,2,3), labels = c("no education", "primary", "secondary","higher"))
education_level<- factor(wealth_level, levels = c(1,2,3,4,5), labels = c("poorest","poor","middle","richer","richest"))
wealth_level<-factor(sex, levels = c(0,1), labels = c("male","female"))
sex
})
1.2 Logistic regression model using stata and R
library(Statamarkdown)
"C:\Users\VUSI\Downloads\CAR_models-master"
cd
use "hypertensiondat.dta", clear
logit hypertension2 i.sex i.residence current_age i.wealth_level
C:\Users\VUSI\Downloads\CAR_models-master
Iteration 0: Log likelihood = -5075.6754
Iteration 1: Log likelihood = -4043.8348
Iteration 2: Log likelihood = -3937.7108
Iteration 3: Log likelihood = -3936.8902
Iteration 4: Log likelihood = -3936.89
Logistic regression Number of obs = 10,294
LR chi2(7) = 2277.57
Prob > chi2 = 0.0000
Log likelihood = -3936.89 Pseudo R2 = 0.2244
------------------------------------------------------------------------------
hypertensi~2 | Coefficient Std. err. z P>|z| [95% conf. interval]
-------------+----------------------------------------------------------------
sex |
1. Female | .6251579 .0614456 10.17 0.000 .5047268 .7455889
|
residence |
2. rural | -.3940172 .0585302 -6.73 0.000 -.5087343 -.2793002
current_age | .0662251 .0016896 39.20 0.000 .0629135 .0695367
|
wealth_level |
2. poorer | .3269566 .0993283 3.29 0.001 .1322768 .5216364
3. middle | .3187024 .0983854 3.24 0.001 .1258705 .5115343
4. richer | .3845553 .0984207 3.91 0.000 .1916543 .5774563
5. richest | .3996982 .0969397 4.12 0.000 .2096998 .5896966
|
_cons | -4.956168 .1198338 -41.36 0.000 -5.191038 -4.721298
------------------------------------------------------------------------------
names(hypertensiondata)
[1] "current_age" "age_cat" "region" "type_residence"
[5] "residence" "education_level" "wealth_level" "hypertension"
[9] "sex" "hypertension2" "caseid" "province"
[13] "diabetes"
library(haven); library(dplyr); library(survival); library(survminer); library(broom)
<- c(
cat_vars "age_cat","region","type_residence","residence","education_level",
"wealth_level","hypertension","sex","hypertension2","province"
)
<- hypertensiondata %>%
(hyp mutate(across(
all_of(cat_vars),
~ if (inherits(.x, "haven_labelled")) haven::as_factor(.x) else as.factor(.x)
)))
# A tibble: 10,294 × 13
current_age age_cat region type_residence residence education_level
<dbl> <fct> <fct> <fct> <fct> <fct>
1 45 7. 45-49 7. gauteng 1. urban Urban secondary
2 60 10. 60-64 7. gauteng 1. urban Urban secondary
3 30 4. 30-34 7. gauteng 1. urban Urban secondary
4 43 6. 40-44 7. gauteng 1. urban Urban secondary
5 15 1. 15-19 5. kwazulu-na… 2. rural Rural secondary
6 20 2. 20-24 5. kwazulu-na… 2. rural Rural secondary
7 15 1. 15-19 5. kwazulu-na… 2. rural Rural secondary
8 55 9. 55-59 5. kwazulu-na… 2. rural Rural primary
9 17 1. 15-19 5. kwazulu-na… 2. rural Rural secondary
10 23 2. 20-24 4. free state 1. urban Urban primary
# ℹ 10,284 more rows
# ℹ 7 more variables: wealth_level <fct>, hypertension <fct>, sex <fct>,
# hypertension2 <fct>, caseid <dbl>, province <fct>, diabetes <dbl>
<- glm(hypertension2 ~ sex + residence + current_age + wealth_level,
model data=hypertensiondata, family=binomial())
summary(model)
Call:
glm(formula = hypertension2 ~ sex + residence + current_age +
wealth_level, family = binomial(), data = hypertensiondata)
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -4.95617 0.11983 -41.359 < 2e-16 ***
sexfemale 0.62516 0.06144 10.174 < 2e-16 ***
residenceRural -0.39402 0.05853 -6.732 1.67e-11 ***
current_age 0.06623 0.00169 39.195 < 2e-16 ***
wealth_levelpoor 0.32696 0.09933 3.292 0.000996 ***
wealth_levelmiddle 0.31870 0.09839 3.239 0.001198 **
wealth_levelricher 0.38455 0.09842 3.907 9.33e-05 ***
wealth_levelrichest 0.39970 0.09694 4.123 3.74e-05 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 10151.4 on 10293 degrees of freedom
Residual deviance: 7873.8 on 10286 degrees of freedom
AIC: 7889.8
Number of Fisher Scoring iterations: 5
exp(coef(model)) # odds ratios
(Intercept) sexfemale residenceRural current_age
0.007039851 1.868541187 0.674342357 1.068467207
wealth_levelpoor wealth_levelmiddle wealth_levelricher wealth_levelrichest
1.386741395 1.375342052 1.468961041 1.491374635
library(colorspace)
<- bayesx(hypertension2 ~ sex + residence + wealth_level + sx(current_age) + sx(province, bs="mrf", map=sagra)
imodel + sx(province, bs="re"), family = "binomial", method = "MCMC", iterations = 12000, burnin = 2000,
step = 10, weights = NULL,subset = NULL, offset = NULL, na.action = NULL, seed = 12345,
predict = TRUE, data = hypertensiondata)
summary(imodel)
Call:
bayesx(formula = hypertension2 ~ sex + residence + wealth_level +
sx(current_age) + sx(province, bs = "mrf", map = sagra) +
sx(province, bs = "re"), data = hypertensiondata, weights = NULL,
subset = NULL, offset = NULL, na.action = NULL, family = "binomial",
method = "MCMC", iterations = 12000, burnin = 2000, step = 10,
seed = 12345, predict = TRUE)
Fixed effects estimation results:
Parametric coefficients:
Mean Sd 2.5% 50% 97.5%
(Intercept) -1.7710 0.1226 -2.0097 -1.7741 -1.5310
sexfemale 0.6363 0.0605 0.5228 0.6347 0.7545
residenceRural -0.1413 0.0695 -0.2744 -0.1431 -0.0066
wealth_levelpoor 0.4170 0.1050 0.2171 0.4170 0.6200
wealth_levelmiddle 0.3824 0.0995 0.1836 0.3829 0.5747
wealth_levelricher 0.4000 0.1026 0.1938 0.4042 0.5951
wealth_levelrichest 0.4161 0.1009 0.2231 0.4150 0.6087
Smooth terms variances:
Mean Sd 2.5% 50% 97.5% Min Max
sx(current_age) 0.0153 0.0155 0.0027 0.0102 0.0621 0.0016 0.1353
sx(province):mrf 0.2329 0.3008 0.0015 0.1622 0.9720 0.0002 4.6444
Random effects variances:
Mean Sd 2.5% 50% 97.5% Min Max
sx(province):re 0.0508 0.0727 0.0008 0.0240 0.2491 0.0003 0.8077
N = 10294 burnin = 2000 method = MCMC family = binomial
iterations = 12000 step = 10
#Getting Odds Ratios
exp(coef(imodel))
Mean Sd 2.5% 50% 97.5%
(Intercept) 0.1701610 1.130458 0.1340302 0.1696428 0.2163214
sexfemale 1.8895392 1.062382 1.6867760 1.8863675 2.1264544
residenceRural 0.8682583 1.071962 0.7600394 0.8666787 0.9933815
wealth_levelpoor 1.5174511 1.110705 1.2424249 1.5173661 1.8589355
wealth_levelmiddle 1.4657822 1.104662 1.2014895 1.4664742 1.7765531
wealth_levelricher 1.4918516 1.108018 1.2138778 1.4980571 1.8131760
wealth_levelrichest 1.5160678 1.106205 1.2498856 1.5143526 1.8380496
#wald.test(b = coef(imodel), Sigma = vcov(imodel), Terms=3:4)
## odds ratios and 95% CI
exp(cbind(OR = coef(imodel), confint(imodel)))
OR.Mean OR.Sd OR.2.5% OR.50% OR.97.5% 2.5%
(Intercept) 0.1701610 1.130458 0.1340302 0.1696428 0.2163214 0.1341410
sexfemale 1.8895392 1.062382 1.6867760 1.8863675 2.1264544 1.6868416
residenceRural 0.8682583 1.071962 0.7600394 0.8666787 0.9933815 0.7612302
wealth_levelpoor 1.5174511 1.110705 1.2424249 1.5173661 1.8589355 1.2425264
wealth_levelmiddle 1.4657822 1.104662 1.2014895 1.4664742 1.7765531 1.2021311
wealth_levelricher 1.4918516 1.108018 1.2138778 1.4980571 1.8131760 1.2147972
wealth_levelrichest 1.5160678 1.106205 1.2498856 1.5143526 1.8380496 1.2501492
97.5%
(Intercept) 0.2161817
sexfemale 2.1254891
residenceRural 0.9933567
wealth_levelpoor 1.8575973
wealth_levelmiddle 1.7761734
wealth_levelricher 1.8119689
wealth_levelrichest 1.8338462
1.3 Plotting some results
par(mar=c(1,1,1,1))
plot(imodel, term = "sx(current_age)")
#Visualization of posterior mean of structured and unstructured spatial effects
## plot all spatial effects
plot(imodel, term = c("sx(province):mrf", "sx(province):re"))
par(mar=c(1,1,1,1))
#Plotting autocorrelation for age variance samples
plot(imodel, term = "sx(current_age)", which = "var-samples", acf = TRUE)
par(mar=c(1,1,1,1))
#plot(imodel, which = "max-acf")
<- samples(imodel, term = "linear-samples")
zs par(mar=c(1,1,1,1))
plot(zs)
## extract model residuals and plot histogram
#par(mar=c(1,1,1,1))
hist(residuals(imodel))
1.4 Plot structured spatial effects maps etc
plot(imodel, term = "sx(province):mrf", map = SA_bnd_prov, main = "Structured spatial effect")
- Plot unstructured spatial effects
plot(imodel, term = "sx(province):re", map = SA_bnd_prov, main = "Unstructured spatial effect")
- Plot both structured and unstructured effects
plot(imodel, term = "sx(province):total", map = SA_bnd_prov, main = "Total spatial effect", digits = 4)
- Map the fitted spatial term only
library(ggplot2)
<- fitted(imodel, term = "sx(province):mrf")[, "Mean"]
fv $spatial <- fv) (SA_shp
[1] 0.0989143 0.1146970 -0.1237380 -0.1069340 -0.3885810 -0.0552362 0.0642649
[8] 0.1957780 0.2008350
SA_shp
Simple feature collection with 9 features and 10 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 16.45802 ymin: -34.83514 xmax: 32.89125 ymax: -22.12661
Geodetic CRS: WGS 84
ID_0 ISO NAME_0 ID_1 NAME_1 TYPE_1 ENGTYPE_1 NL_NAME_1
1 211 ZAF South Africa 1 Eastern Cape Provinsie Province <NA>
2 211 ZAF South Africa 2 Free State Provinsie Province <NA>
3 211 ZAF South Africa 3 Gauteng Provinsie Province <NA>
4 211 ZAF South Africa 4 KwaZulu-Natal Provinsie Province <NA>
5 211 ZAF South Africa 5 Limpopo Provinsie Province <NA>
6 211 ZAF South Africa 6 Mpumalanga Provinsie Province <NA>
7 211 ZAF South Africa 7 North West Provinsie Province <NA>
8 211 ZAF South Africa 8 Northern Cape Provinsie Province <NA>
9 211 ZAF South Africa 9 Western Cape Provinsie Province <NA>
VARNAME_1
1 Oos-Kaap
2 Orange Free State|Vrystaat
3 Pretoria/Witwatersrand/Vaal
4 Natal and Zululand
5 Noordelike Provinsie|Northern Transvaal|Northern Province
6 Eastern Transvaal
7 North-West|Noordwes
8 Noord-Kaap
9 Wes-Kaap
geometry spatial
1 MULTIPOLYGON (((25.64375 -3... 0.0989143
2 MULTIPOLYGON (((27.98611 -2... 0.1146970
3 MULTIPOLYGON (((28.75139 -2... -0.1237380
4 MULTIPOLYGON (((31.63736 -2... -0.1069340
5 MULTIPOLYGON (((29.66671 -2... -0.3885810
6 MULTIPOLYGON (((31.87835 -2... -0.0552362
7 MULTIPOLYGON (((26.40328 -2... 0.0642649
8 MULTIPOLYGON (((16.99514 -2... 0.1957780
9 MULTIPOLYGON (((19.66764 -3... 0.2008350
library(sf)
library(dplyr)
library(ggplot2)
# 0) Sanity checks
stopifnot(inherits(SA_shp, "sf")) # must be an sf
stopifnot(!is.null(sf::st_geometry(SA_shp))) # geometry must exist
stopifnot(length(SA_shp$spatial) == nrow(SA_shp)) # one value per region
# 1) Choropleth of the fitted spatial effect (log-odds)
ggplot(SA_shp) +
geom_sf(aes(fill = spatial)) +
scale_fill_distiller(palette = "RdBu", direction = -1, # blue=low, red=high
limits = range(SA_shp$spatial, na.rm = TRUE),
oob = scales::squish) +
labs(title = "Fitted spatial effect (logit scale)",
fill = "log-odds") +
theme_minimal()
1.5 Running same model with INLA
<- sf::st_read("C:/Users/VUSI/Downloads/CAR_models-master/ZAF_adm/ZAF_adm1_1.shp") SA_OGR
Reading layer `ZAF_adm1_1' from data source
`C:\Users\VUSI\Downloads\CAR_models-master\ZAF_adm\ZAF_adm1_1.shp'
using driver `ESRI Shapefile'
Simple feature collection with 9 features and 9 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 16.45802 ymin: -34.83514 xmax: 32.89125 ymax: -22.12661
Geodetic CRS: WGS 84
plot(SA_OGR, border = "blue", axes = TRUE, las = 1)
#SA_valid <- data.table::data.table(valid = gIsValid(SA_OGR, byid = TRUE))
<- data.table::data.table(SA_OGR, byid = TRUE)
SA_valid sum(SA_valid$valid == F)
[1] 0
# install.packages(c("sf","spdep","ggplot2"))
library(sf)
library(spdep)
library(ggplot2)
# --- 1) Ensure SA_shp is sf ---
# If you already have SA_shp in memory but it's not sf, convert or re-read.
if (!inherits(SA_shp, "sf")) {
if (inherits(SA_shp, "Spatial")) {
<- st_as_sf(SA_shp) # sp -> sf
SA_shp else if ("geometry" %in% names(SA_shp)) {
} <- st_as_sf(SA_shp) # list-column exists but class lost
SA_shp else {
} <- st_read("SA_shp") # read from disk (adjust path)
SA_shp
}
}
# --- 2) Set CRS only AFTER it's sf ---
if (is.na(st_crs(SA_shp))) st_crs(SA_shp) <- 4326 # WGS84; change if you know better
# --- 3) Unique id for neighbors (if missing) ---
if (!"OBJECTID" %in% names(SA_shp)) SA_shp$OBJECTID <- seq_len(nrow(SA_shp))
# --- 4) Plot with ggplot2 ---
ggplot(SA_shp) + geom_sf(color = "blue", fill = NA) + theme_minimal()
# --- 5) Neighbors + WinBUGS vectors (optional) ---
<- spdep::poly2nb(SA_shp, row.names = SA_shp$OBJECTID)
nb <- spdep::nb2WB(nb)
wb
# Write a simple 3-line text file (like sp2WB output)
writeLines(c(
paste(wb$num, collapse = " "),
paste(wb$adj, collapse = " "),
paste(wb$weights, collapse = " ")
"splus_911.txt") ),
<- spdep::poly2nb(SA_shp)
SA_nb
# 2. Generate adjacency matrix for INLA
nb2INLA("sa_inla.graph", SA_nb)
<- file.path(getwd(), "sa_inla.graph") # Use file.path for proper path handling
sa_adj
# 3. Read the graph
<- inla.read.graph(filename = "sa_inla.graph")
H image(inla.graph2matrix(H), xlab = "", ylab = "")
# 4. Add ID_1 to the data - FIXED PATH (use forward slashes or double backslashes)
<- read_csv("C:/Users/VUSI/Downloads/CAR_models-master/ZAF_adm/ZAF_adm.csv") # Corrected path
hyp_id
# 5. PROPER JOINING with diagnostics
cat("Unique provinces in hypertensiondata:\n")
Unique provinces in hypertensiondata:
print(unique(hypertensiondata$province))
[1] Gauteng Kwazulu natal Freestate Mpumalanga Eastern cape
[6] North west Western cape Northern cape Limpopo
9 Levels: Eastern cape Freestate Gauteng Kwazulu natal Limpopo ... Western cape
cat("\nUnique NAME_1 in hyp_id:\n")
Unique NAME_1 in hyp_id:
print(unique(hyp_id$NAME_1))
[1] "Eastern Cape" "Free State" "Gauteng" "KwaZulu-Natal"
[5] "Limpopo" "Mpumalanga" "North West" "Northern Cape"
[9] "Western Cape"
# Clean and standardize names for joining
<- hypertensiondata %>%
hypertensiondata mutate(
NAME_1 = tolower(trimws(province)), # Clean province names
NAME_1 = gsub("[^a-zA-Z0-9]", " ", NAME_1) # Remove special characters
)
<- hyp_id %>%
hyp_id mutate(
NAME_1_clean = tolower(trimws(NAME_1)),
NAME_1_clean = gsub("[^a-zA-Z0-9]", " ", NAME_1_clean)
)
# Perform the join
<- hypertensiondata %>%
hypertensiondata left_join(hyp_id %>% select(NAME_0, ID_1, NAME_1_clean),
by = c("NAME_1" = "NAME_1_clean"))
# 6. Check if join worked and handle missing IDs
cat("\nMissing ID_1 values after join:", sum(is.na(hypertensiondata$ID_1)), "\n")
Missing ID_1 values after join: 1029
# If there are missing IDs, check the first few mismatches
if(any(is.na(hypertensiondata$ID_1))) {
<- hypertensiondata %>%
mismatches distinct(province, NAME_1) %>%
head(5)
cat("Sample mismatches:\n")
print(mismatches)
}
Sample mismatches:
# A tibble: 5 × 2
province NAME_1
<fct> <chr>
1 Gauteng gauteng
2 Kwazulu natal kwazulu natal
3 Freestate freestate
4 Mpumalanga mpumalanga
5 Eastern cape eastern cape
# 7. Ensure ID_1 is numeric and properly ordered
<- hypertensiondata %>%
hypertensiondata_clean mutate(ID_1 = as.numeric(ID_1))
# 8. Verify spatial structure alignment
cat("Spatial regions:", length(SA_nb), "\n")
Spatial regions: 9
cat("Unique ID_1 in data:", length(unique(hypertensiondata_clean$ID_1)), "\n")
Unique ID_1 in data: 9
cat("ID_1 range in data:", range(hypertensiondata_clean$ID_1), "\n")
ID_1 range in data: NA NA
# 9. Fitting CAR model - CORRECTED (use H instead of sa_adj)
<- hypertension2 ~ 1 +
formula_inla as.factor(sex) +
as.factor(residence) +
+
current_age as.factor(wealth_level) +
f(ID_1, model = "bym", graph = H, scale.model = TRUE, # Use H, not sa_adj
hyper = list(
prec.unstruct = list(prior = "loggamma", param = c(1, 0.001)),
prec.spatial = list(prior = "loggamma", param = c(1, 0.001))
) )
# 10. Run INLA model
<- inla(
model_inla
formula_inla,family = "binomial",
data = hypertensiondata_clean,
control.compute = list(dic = TRUE),
control.predictor = list(compute = TRUE),
control.inla = list(strategy = "gaussian"), # More stable
verbose = TRUE
)
# 11. Check results
summary(model_inla)
Time used:
Pre = 1.85, Running = 7.39, Post = 0.421, Total = 9.66
Fixed effects:
mean sd 0.025quant 0.5quant 0.975quant
(Intercept) -4.952 0.141 -5.228 -4.953 -4.674
as.factor(sex)female 0.638 0.062 0.517 0.638 0.760
as.factor(residence)Rural -0.201 0.069 -0.335 -0.201 -0.065
current_age 0.067 0.002 0.063 0.067 0.070
as.factor(wealth_level)poor 0.404 0.102 0.205 0.404 0.603
as.factor(wealth_level)middle 0.379 0.101 0.181 0.379 0.577
as.factor(wealth_level)richer 0.398 0.101 0.200 0.398 0.595
as.factor(wealth_level)richest 0.442 0.100 0.246 0.442 0.638
mode kld
(Intercept) -4.953 0
as.factor(sex)female 0.638 0
as.factor(residence)Rural -0.201 0
current_age 0.067 0
as.factor(wealth_level)poor 0.404 0
as.factor(wealth_level)middle 0.379 0
as.factor(wealth_level)richer 0.398 0
as.factor(wealth_level)richest 0.442 0
Random effects:
Name Model
ID_1 BYM model
Model hyperparameters:
mean sd 0.025quant 0.5quant
Precision for ID_1 (iid component) 10.27 5.98 2.92 8.91
Precision for ID_1 (spatial component) 1044.23 1177.40 60.68 669.29
0.975quant mode
Precision for ID_1 (iid component) 25.58 6.61
Precision for ID_1 (spatial component) 4167.88 160.80
Deviance Information Criterion (DIC) ...............: 7793.05
Deviance Information Criterion (DIC, saturated) ....: 7778.16
Effective number of parameters .....................: 15.34
Marginal log-Likelihood: -3941.12
is computed
Posterior summaries for the linear predictor and the fitted values are computed
(Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')
<- st_read("C:/Users/VUSI/Downloads/CAR_models-master/ZAF_adm/ZAF_adm.csv", quiet = TRUE)
SA_shp1 $ID_1 <- as.character(SA_shp1$ID_1) # use character IDs
SA_shp1$NAME_1 <- as.character(SA_shp1$NAME_1) SA_shp1
# --- 0) Read the SHAPEFILE (not the CSV) ---
<- st_read("C:/Users/VUSI/Downloads/CAR_models-master/ZAF_adm/ZAF_adm1_1.shp") SA_sf
Reading layer `ZAF_adm1_1' from data source
`C:\Users\VUSI\Downloads\CAR_models-master\ZAF_adm\ZAF_adm1_1.shp'
using driver `ESRI Shapefile'
Simple feature collection with 9 features and 9 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 16.45802 ymin: -34.83514 xmax: 32.89125 ymax: -22.12661
Geodetic CRS: WGS 84
<- SA_sf %>% mutate(ID_1 = as.character(ID_1), NAME_1 = as.character(NAME_1))
SA_sf
# --- 1) Build adjacency only from polygons ---
<- file.path(getwd(), "ZAF_adm", "sa_inla.graph")
sa_graph_path if (!file.exists(sa_graph_path)) {
<- poly2nb(SA_sf, row.names = SA_sf$ID_1) # use ID_1 as node name
SA_nb ::nb2INLA(sa_graph_path, SA_nb)
INLA
}<- INLA::inla.read.graph(sa_graph_path)
H <- if (!is.null(H$names)) H$names else as.character(seq_len(H$n))
graph_names
# --- 2) Make a robust lookup (NAME_1 -> ID_1) with normalization ---
<- function(x) x %>%
norm_key str_trim() %>%
str_to_lower() %>%
str_replace_all("[^[:alnum:]]", "") # drop spaces, hyphens, etc.
<- SA_sf %>%
id_map st_drop_geometry() %>%
transmute(NAME_1,
ID_1,key = norm_key(NAME_1))
# Since your province codes (1-9) correspond directly to ID_1 values (1-9)
<- hypertensiondata %>%
hyp mutate(
ID_1 = as.character(as.numeric(province)), # Convert to character to match shapefile
y = case_when(
%in% c(1, "1", TRUE, "TRUE", "Yes", "yes") ~ 1L,
hypertension2 %in% c(0, "0", FALSE, "FALSE", "No", "no") ~ 0L,
hypertension2 TRUE ~ NA_integer_
),sex = factor(sex),
residence = factor(residence),
wealth_level = factor(wealth_level),
region = factor(ID_1, levels = graph_names)
%>%
) ::drop_na(y, region, sex, residence, wealth_level, current_age) tidyr
# --- 4) Fit your BYM model, passing the graph object directly ---
<- y ~ 1 + sex + residence + current_age + wealth_level +
form_bym f(region,
model = "bym",
graph = H, # graph object (not a file path string)
values = graph_names, # map factor levels to graph nodes
scale.model = TRUE,
hyper = list(
prec.unstruct = list(prior = "loggamma", param = c(1, 0.001)),
prec.spatial = list(prior = "loggamma", param = c(1, 0.001))
) )
<- INLA::inla(
model_inla
form_bym,family = "binomial",
data = hyp,
control.compute = list(dic = TRUE)
)
summary(model_inla)
Time used:
Pre = 1.37, Running = 4.63, Post = 0.936, Total = 6.94
Fixed effects:
mean sd 0.025quant 0.5quant 0.975quant mode kld
(Intercept) -5.103 0.127 -5.352 -5.103 -4.855 -5.103 0
sexfemale 0.635 0.062 0.514 0.635 0.757 0.635 0
residenceRural -0.185 0.068 -0.318 -0.185 -0.051 -0.185 0
current_age 0.067 0.002 0.063 0.067 0.070 0.067 0
wealth_levelpoor 0.398 0.102 0.198 0.398 0.597 0.398 0
wealth_levelmiddle 0.375 0.101 0.177 0.375 0.573 0.375 0
wealth_levelricher 0.395 0.101 0.198 0.395 0.593 0.395 0
wealth_levelrichest 0.441 0.100 0.245 0.441 0.637 0.441 0
Random effects:
Name Model
region BYM model
Model hyperparameters:
mean sd 0.025quant 0.5quant
Precision for region (iid component) 1089.97 1201.01 70.80 712.30
Precision for region (spatial component) 17.90 9.60 5.57 15.85
0.975quant mode
Precision for region (iid component) 4278.48 192.11
Precision for region (spatial component) 42.19 12.27
Deviance Information Criterion (DIC) ...............: 7791.47
Deviance Information Criterion (DIC, saturated) ....: 7776.58
Effective number of parameters .....................: 15.15
Marginal log-Likelihood: -3938.87
is computed
Posterior summaries for the linear predictor and the fitted values are computed
(Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')
round(model_inla$summary.fixed, 3)
mean sd 0.025quant 0.5quant 0.975quant mode kld
(Intercept) -5.103 0.127 -5.352 -5.103 -4.855 -5.103 0
sexfemale 0.635 0.062 0.514 0.635 0.757 0.635 0
residenceRural -0.185 0.068 -0.318 -0.185 -0.051 -0.185 0
current_age 0.067 0.002 0.063 0.067 0.070 0.067 0
wealth_levelpoor 0.398 0.102 0.198 0.398 0.597 0.398 0
wealth_levelmiddle 0.375 0.101 0.177 0.375 0.573 0.375 0
wealth_levelricher 0.395 0.101 0.198 0.395 0.593 0.395 0
wealth_levelrichest 0.441 0.100 0.245 0.441 0.637 0.441 0
head(model_inla$summary.random$region, 3)
ID mean sd 0.025quant 0.5quant 0.975quant mode
1 1 0.1361564 0.07520687 -0.01171638 0.1362717 0.28335261 0.1362484
2 2 0.2199034 0.08021864 0.06482728 0.2191213 0.37950370 0.2191920
3 3 -0.2026820 0.08840432 -0.37697060 -0.2024190 -0.02988096 -0.2024254
kld
1 4.472543e-07
2 2.550223e-07
3 1.936613e-08
1.6 Plotting posterior graphs from INLA
Try this out in your own time….
2 Count Data at location level
For this we will use the COVID19 data
We use the Poisson log normal model for this analysis
2.1 Three steps for the lognormal model
Stage 1: The likelihood \[Y_{i}|\theta_{i}\sim Poisson(E_{i}\theta_{i})\] with \(i=1,2,\dots, n\) with \[log(\theta_{i})=\beta_{0}+\beta_{1}X_{i} + v_{i}\] where \(X_{i}\) is the covariate measured at county level (areal level)
Stage 2: The random effects (prior distribution) is \[v_{i}|\sigma^{2}_{i}\sim N(0,\sigma^{2}_{v})\]
stage 3: The hyperprior on the hyperparameters \(\beta_{0}, ~\beta_{1},~ \sigma^{2}_{v}\) is given by \[p(\beta_{0},\beta_{1}, \sigma^{2}_{v})=p(\beta_{0})p(\beta_{1})p(\sigma^{2}_{v})\]
priors are assumed independent and for ease of Bayesian implementation, we work with the precision \(\tau_{v}=\sigma^{-2}_{v}\) rather than \(\sigma^{2}_{v}\).
2.2 Lognormal spatial model with covariates
We now add spatial (ICAR) random effects to the model. and for this we need a graph file as before
INLA specification of the model
\[log(\theta_{i})=\beta_{0}+\beta_{1}X_{i}+u_{i}+v_{i}\] with \(u_{i}\) structured spatial effects with a CAR prior and \(v_{i}\) the unstructured random effects with an iid normal prior.
- Data exploration and computing standardized mortality rate
3 Modelling corona virus deaths in South Africa
Hypothesis: COVID deaths in South Africa have a significant spatial association
setwd("C:/Users/VUSI/Downloads/CAR_models-master/ZAF_adm")
<- shapefile("district2.shp")
sadistricts sadistricts
class : SpatialPolygonsDataFrame
features : 52
extent : 16.45189, 32.94498, -34.83417, -22.12503 (xmin, xmax, ymin, ymax)
crs : +proj=longlat +datum=WGS84 +no_defs
variables : 21
names : CATEGORY, DC_MDB_C, ID_3, DC_MN_C, DC_MN_C_st, DC_NAME, DC_NAME_C, MAP_TITLE, PR_MDB_C, PR_CODE, PR_CODE_st, PR_NAME, ALBERS_ARE, SHAPE_Leng, SHAPE_Area, ...
min values : A, BUF, 1, 101, 101, Alfred Nzo, Alfred Nzo(DC44 ), Alfred Nzo District Municipality, EC, 1, 1, Eastern Cape, 1644.98312348, 2.8057080426, 0.14852545311, ...
max values : C, TSH, 52, 947, 947, Zululand, Zululand(DC26 ), Zululand District Municipality, WC, 9, 9, Western Cape, 126836.337661, 28.7266980377, 11.9243802163, ...
Describe patterns of deaths with hypertension prevalence (assess association visually)
library(tmap)
library(tmap)
library(sf)
# Ensure sadistricts is an sf object with proper geometry column
if(!"sf" %in% class(sadistricts)) {
<- st_as_sf(sadistricts)
sadistricts
}
# Check the geometry column name
<- attr(sadistricts, "sf_column")
geometry_col print(paste("Geometry column name:", geometry_col))
[1] "Geometry column name: geometry"
# If the geometry column isn't named "geometry", rename it
if(geometry_col != "geometry") {
<- sadistricts %>%
sadistricts rename(geometry = all_of(geometry_col)) %>%
st_set_geometry("geometry")
}
# Now create the map
tm_shape(sadistricts) +
tm_polygons(col = "hypertensi", title = "Hypertension") +
tm_bubbles(size = "deaths", col = "deaths", palette = "Blues",
style = "quantile", legend.size.show = FALSE,
title.col = "Corona virus deaths",
border.col = "black", border.lwd = 0.1, border.alpha = 0.1) +
tm_layout(legend.text.size = 0.8, legend.title.size = 1.1,
frame = FALSE)
Pull the data points (information) from the map object for use in simple models
Simple feature collection with 52 features and 21 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 16.45189 ymin: -34.83417 xmax: 32.94498 ymax: -22.12503
Geodetic CRS: +proj=longlat +datum=WGS84 +no_defs
First 10 features:
CATEGORY DC_MDB_C ID_3 DC_MN_C DC_MN_C_st DC_NAME
1 A MAN 1 499 499 Mangaung
2 A NMA 2 299 299 Nelson Mandela Bay
3 A TSH 3 799 799 City of Tshwane
4 A JHB 4 798 798 City of Johannesburg
5 A BUF 5 260 260 Buffalo City
6 A CPT 6 199 199 City of Cape Town
7 C DC1 7 101 101 West Coast
8 C DC10 8 210 210 Cacadu
9 C DC12 9 212 212 Amathole
10 C DC13 10 213 213 Chris Hani
DC_NAME_C MAP_TITLE
1 Mangaung(MAN ) Mangaung Metropolitan Municipality
2 Nelson Mandela Bay(NMA ) Nelson Mandela Bay Metropolitan Municipality
3 City of Tshwane(TSH ) City of Tshwane Metropolitan Municipality
4 City of Johannesburg(JHB ) City of Johannesburg Metropolitan Municipality
5 Buffalo City(BUF ) Buffalo City Metropolitan Municipality
6 City of Cape Town(CPT ) City of Cape Town Metropolitan Municipality
7 West Coast(DC1 ) West Coast District Municipality
8 Cacadu(DC10 ) Cacadu District Municipality
9 Amathole(DC12 ) Amathole District Municipality
10 Chris Hani(DC13 ) Chris Hani District Municipality
PR_MDB_C PR_CODE PR_CODE_st PR_NAME ALBERS_ARE SHAPE_Leng SHAPE_Area
1 FS 4 4 Free State 6283.988 6.196157 0.5827334
2 EC 2 2 Eastern Cape 1958.908 2.939828 0.1907285
3 GT 7 7 Gauteng 6297.833 6.356642 0.5661993
4 GT 7 7 Gauteng 1644.983 3.031305 0.1485255
5 EC 2 2 Eastern Cape 2535.932 4.458223 0.2445463
6 WC 1 1 Western Cape 2444.963 5.198175 0.2382967
7 WC 1 1 Western Cape 31118.684 14.651131 2.9732263
8 EC 2 2 Eastern Cape 58243.283 17.874817 5.6239269
9 EC 2 2 Eastern Cape 21594.916 16.058800 2.0734975
10 EC 2 2 Eastern Cape 36143.538 14.247915 3.4418078
FID_1 district covidcases deaths expected hypertensi
1 1 Mangaung 13236 225 264.72 29.4
2 2 Nelson Mandela Bay 42831 885 856.62 35.0
3 3 City of Tshwane 77990 749 1559.80 24.5
4 4 City of Johannesburg 88609 1277 1772.18 30.8
5 5 Buffalo city 39635 629 792.70 40.1
6 6 City of Cape Town 138219 2764 2764.38 31.4
7 7 West coast 6769 134 135.38 46.1
8 8 Cacadu 11865 199 237.30 37.5
9 9 Amathole 16835 272 336.70 39.7
10 10 Chris Hani 16292 278 325.84 30.8
geometry
1 MULTIPOLYGON (((25.90767 -2...
2 MULTIPOLYGON (((25.46655 -3...
3 MULTIPOLYGON (((28.75833 -2...
4 MULTIPOLYGON (((27.98484 -2...
5 MULTIPOLYGON (((27.35458 -3...
6 MULTIPOLYGON (((18.52309 -3...
7 MULTIPOLYGON (((18.07458 -3...
8 MULTIPOLYGON (((24.4407 -31...
9 MULTIPOLYGON (((28.26665 -3...
10 MULTIPOLYGON (((26.37107 -3...
Simple poisson model withn intercept and random error. The exponentiated intercept is the average rate (relative risk rate) fo deaths across the districts.
<-inla(deaths ~ 1 + f(FID_1, model = "iid", param = c(0.5, 5e-04)), data=sadata, family = "poisson", E=expected)
deaths.fit1
summary(deaths.fit1)
Time used:
Pre = 1.45, Running = 1.07, Post = 0.204, Total = 2.72
Fixed effects:
mean sd 0.025quant 0.5quant 0.975quant mode kld
(Intercept) -0.683 0.081 -0.844 -0.683 -0.525 -0.683 0
Random effects:
Name Model
FID_1 IID model
Model hyperparameters:
mean sd 0.025quant 0.5quant 0.975quant mode
Precision for FID_1 3.27 0.71 2.05 3.21 4.82 3.10
Marginal log-Likelihood: -288.30
is computed
Posterior summaries for the linear predictor and the fitted values are computed
(Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')
exp(deaths.fit1$summary.fixed[1,])
mean sd 0.025quant 0.5quant 0.975quant mode kld
(Intercept) 0.5050245 1.084355 0.430014 0.5052593 0.5915584 0.5052577 1
3.1 Fitting non-spatial lognormal model with covariate
<-inla(deaths ~ 1 + hypertensi + f(FID_1, model = "iid", param=c(1, 0.014)), data=sadata, family = "poisson", E = expected, control.compute =list(dic = TRUE))
deaths.fit2
summary(deaths.fit2)
Time used:
Pre = 1.43, Running = 1.04, Post = 0.278, Total = 2.75
Fixed effects:
mean sd 0.025quant 0.5quant 0.975quant mode kld
(Intercept) -1.273 0.306 -1.874 -1.274 -0.667 -1.274 0
hypertensi 0.019 0.010 0.000 0.019 0.038 0.019 0
Random effects:
Name Model
FID_1 IID model
Model hyperparameters:
mean sd 0.025quant 0.5quant 0.975quant mode
Precision for FID_1 3.64 0.808 2.26 3.57 5.42 3.43
Deviance Information Criterion (DIC) ...............: 435.78
Deviance Information Criterion (DIC, saturated) ....: 104.08
Effective number of parameters .....................: 48.79
Marginal log-Likelihood: -293.76
is computed
Posterior summaries for the linear predictor and the fitted values are computed
(Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')
- Examine the posterior summaries for covariate, on the original (log scale- log RR) or exponential (RR)
$summary.fixed deaths.fit2
mean sd 0.025quant 0.5quant 0.975quant
(Intercept) -1.27297495 0.306313064 -1.8737596297 -1.27383261 -0.66731452
hypertensi 0.01920151 0.009593265 0.0001839012 0.01924635 0.03796449
mode kld
(Intercept) -1.2738222 3.141363e-08
hypertensi 0.0192459 3.274789e-08
$summary.fixed[2,] deaths.fit2
mean sd 0.025quant 0.5quant 0.975quant mode
hypertensi 0.01920151 0.009593265 0.0001839012 0.01924635 0.03796449 0.0192459
kld
hypertensi 3.274789e-08
exp(deaths.fit2$summary.fixed[2,])
mean sd 0.025quant 0.5quant 0.975quant mode kld
hypertensi 1.019387 1.009639 1.000184 1.019433 1.038694 1.019432 1
3.2 Lognormal spatial model with covariates
tmap_mode("plot")
if (!"smr" %in% names(sadistricts)) {
stopifnot(all(c("deaths","expected") %in% names(sadistricts)))
$smr <- with(sadistricts, ifelse(expected > 0, deaths/expected, NA_real_))
sadistricts
}if (!inherits(sadistricts, "sf")) sadistricts <- st_as_sf(sadistricts)
if (!is.numeric(sadistricts$smr)) sadistricts$smr <- as.numeric(sadistricts$smr)
tm_shape(sadistricts) +
tm_polygons(
fill = "smr",
fill.scale = tm_scale_intervals(
style = "quantile",
n = 5,
values = RColorBrewer::brewer.pal(5, "Blues")
),fill.legend = tm_legend(title = "Covid SMR"),
col_alpha = 0.4
+
) tm_compass() +
tm_title("South AFRICA, COVID2020") +
tm_layout(
legend.text.size = 0.5,
legend.title.size = 0.5,
legend.position = c("right","bottom"),
frame = FALSE
)
# --- 0) Ensure `sadistricts` is sf and has an ID to use as node names ---
if (inherits(sadistricts, "Spatial")) sadistricts <- st_as_sf(sadistricts)
if (!"FID_1" %in% names(sadistricts)) sadistricts$FID_1 <- seq_len(nrow(sadistricts))
$FID_1 <- as.character(sadistricts$FID_1)
sadistricts
<- spdep::poly2nb(sadistricts, row.names = sadistricts$FID_1)
sadist_nb <- file.path(getwd(), "sadist_inla.graph")
graph_path ::nb2INLA(graph_path, sadist_nb)
spdep
<- INLA::inla.read.graph(graph_path)
H
<- sadistricts %>%
sadistricts mutate(
region1 = FID_1,
region2 = FID_1
)
Plotting the expected deaths
# summary of attributes (no geometry)
summary(sf::st_drop_geometry(sadistricts))
CATEGORY DC_MDB_C ID_3 DC_MN_C
Length:52 Length:52 Min. : 1.00 Min. :101.0
Class :character Class :character 1st Qu.:13.75 1st Qu.:289.2
Mode :character Mode :character Median :26.50 Median :522.5
Mean :26.50 Mean :498.7
3rd Qu.:39.25 3rd Qu.:665.5
Max. :52.00 Max. :947.0
DC_MN_C_st DC_NAME DC_NAME_C MAP_TITLE
Length:52 Length:52 Length:52 Length:52
Class :character Class :character Class :character Class :character
Mode :character Mode :character Mode :character Mode :character
PR_MDB_C PR_CODE PR_CODE_st PR_NAME
Length:52 Min. :1.000 Length:52 Length:52
Class :character 1st Qu.:2.000 Class :character Class :character
Mode :character Median :5.000 Mode :character Mode :character
Mean :4.615
3rd Qu.:6.250
Max. :9.000
ALBERS_ARE SHAPE_Leng SHAPE_Area FID_1
Min. : 1645 Min. : 2.806 Min. : 0.1485 Length:52
1st Qu.: 7888 1st Qu.: 6.261 1st Qu.: 0.7269 Class :character
Median : 15779 Median : 9.512 Median : 1.4313 Mode :character
Mean : 23477 Mean :10.011 Mean : 2.1766
3rd Qu.: 28934 3rd Qu.:12.311 3rd Qu.: 2.6284
Max. :126836 Max. :28.727 Max. :11.9244
district covidcases deaths expected
Length:52 Min. : 345 Min. : 3.00 Min. : 6.90
Class :character 1st Qu.: 4702 1st Qu.: 40.75 1st Qu.: 94.05
Mode :character Median : 8906 Median : 82.00 Median : 178.12
Mean : 19209 Mean : 248.00 Mean : 384.19
3rd Qu.: 18274 3rd Qu.: 259.25 3rd Qu.: 365.47
Max. :138219 Max. :2764.00 Max. :2764.38
hypertensi smr region1 region2
Min. :19.50 Min. :0.1106 Length:52 Length:52
1st Qu.:24.38 1st Qu.:0.3683 Class :character Class :character
Median :30.05 Median :0.5642 Mode :character Mode :character
Mean :31.11 Mean :0.5755
3rd Qu.:37.58 3rd Qu.:0.8409
Max. :56.00 Max. :1.0331
# map the 'expected' column with tmap v4
library(tmap)
library(RColorBrewer)
tmap_mode("plot")
tm_shape(sadistricts) +
tm_polygons(
fill = "expected",
fill.scale = tm_scale_intervals(
style = "quantile",
n = 5,
values = RColorBrewer::brewer.pal(5, "Blues")
),fill.legend = tm_legend(title = "Expected"),
col_alpha = 0.4
+
) tm_layout(legend.position = c("right","bottom"), frame = FALSE)
Plotting the standardized death rates
library(spdep)
library(INLA)
# neighbours -> INLA graph file
<- spdep::poly2nb(sadistricts, row.names = sadistricts$FID_1)
sadist_nb <- file.path(getwd(), "sadist_inla.graph")
graph_path ::nb2INLA(graph_path, sadist_nb)
spdep
# read the graph into an INLA graph object
<- INLA::inla.read.graph(graph_path)
H
class(sadist_nb) # "nb" (spdep neighbours)
[1] "nb"
class(graph_path) # "character" (file path)
[1] "character"
class(H) # "inla.graph" (use this in f(..., graph = H, values = H$names))
[1] "inla.graph"
<- y ~ ... +
form f(region, model = "bym2", graph = H, values = H$names, scale.model = TRUE)
# ---------- 1) Neighbours and proper INLA graph ----------
<- poly2nb(sadistricts, row.names = sadistricts$FID_1)
sadist_nb <- file.path(getwd(), "sadist_inla.graph")
graph_path nb2INLA(graph_path, sadist_nb)
<- INLA::inla.read.graph(graph_path) # <-- graph OBJECT
H <- if (!is.null(H$names)) H$names else as.character(seq_len(H$n))
g_names
# ---------- 2) Build modelling data (sf-safe; no @data) ----------
<- sadistricts |>
dat st_drop_geometry() |>
mutate(
region1 = FID_1, # IID part index (character)
region2 = FID_1 # Besag part index (character)
)
# Safety checks
stopifnot(all(unique(dat$region2) %in% g_names))
<- dat[match(g_names, dat$region2), , drop = FALSE] # align to graph order (optional but safe) dat
# ---------- 3) Fit Poisson BYM (IID + Besag) ----------
# Use hyper=list(prec=list(param=c(shape, rate))) for log-Gamma prior on precision
<- deaths ~ 1 +
form f(region1, model = "iid", hyper = list(prec = list(param = c(1, 0.014)))) +
f(region2, model = "besag", graph = H, values = g_names,
hyper = list(prec = list(param = c(1, 0.68))), scale.model = TRUE)
<- INLA::inla(
fit family = "poisson",
form, data = dat,
E = expected, # expected counts (offset via log(E))
control.predictor = list(compute = TRUE),
control.compute = list(dic = TRUE, waic = TRUE, cpo = TRUE)
)
# ---------- 4) Summaries ----------
print(summary(fit))
Time used:
Pre = 1.59, Running = 1.23, Post = 0.358, Total = 3.18
Fixed effects:
mean sd 0.025quant 0.5quant 0.975quant mode kld
(Intercept) -0.68 0.029 -0.738 -0.68 -0.623 -0.68 0
Random effects:
Name Model
region1 IID model
region2 Besags ICAR model
Model hyperparameters:
mean sd 0.025quant 0.5quant 0.975quant mode
Precision for region1 89.47 97.708 7.51 59.66 347.93 20.81
Precision for region2 2.96 0.756 1.75 2.87 4.71 2.70
Deviance Information Criterion (DIC) ...............: 433.99
Deviance Information Criterion (DIC, saturated) ....: 102.29
Effective number of parameters .....................: 47.07
Watanabe-Akaike information criterion (WAIC) ...: 425.72
Effective number of parameters .................: 27.96
Marginal log-Likelihood: -291.71
CPO, PIT is computed
Posterior summaries for the linear predictor and the fitted values are computed
(Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')
print(round(fit$summary.fixed, 3))
mean sd 0.025quant 0.5quant 0.975quant mode kld
(Intercept) -0.68 0.029 -0.738 -0.68 -0.623 -0.68 0
# ---------- 5) Extract RR for mapping (use posterior means) ----------
# Random effects are on log scale; exponentiate to get multiplicative RR
<- exp(fit$summary.random$region1$mean)
rr_nonspatial <- exp(fit$summary.random$region2$mean)
rr_spatial
# Convert sf -> sp for SpatialEpi::mapvariable
<- sf::as_Spatial(sadistricts)
sadistricts_sp
library(tmap)
if (!inherits(sadistricts, "sf")) sadistricts <- sf::st_as_sf(sadistricts)
$rr_nonspatial <- rr_nonspatial
sadistricts
tmap_mode("plot")
tm_shape(sadistricts) +
tm_polygons(fill = "rr_nonspatial") +
tm_title("Spatial RR (Besag)")
library(tmap)
if (!inherits(sadistricts, "sf")) sadistricts <- sf::st_as_sf(sadistricts)
$rr_spatial <- rr_spatial
sadistricts
tmap_mode("plot")
tm_shape(sadistricts) +
tm_polygons(fill = "rr_spatial") +
tm_title("Spatial RR (Besag)")
About 90% of COVID deaths in the Western Cape is spatially related and this can be due to socio-economic dynamics in that region and more specifically those districts.