setwd("D:/ES3 PreTCV paper D22_june23/Reanalysis Jan2025/PreTCV_WW_INLA")
# Add the INLA repository and install the package
install.packages("INLA", repos = c(getOption("repos"), INLA = "https://inla.r-inla-download.org/R/stable"))
# Load libraries
library(sf)
## Linking to GEOS 3.13.0, GDAL 3.10.1, PROJ 9.5.1; sf_use_s2() is TRUE
library(INLA)
## Loading required package: Matrix
## This is INLA_24.12.11 built 2024-12-11 19:58:26 UTC.
## - See www.r-inla.org/contact-us for how to get help.
## - List available models/likelihoods/etc with inla.list.models()
## - Use inla.doc(<NAME>) to access documentation
library(readxl)
library(ggplot2)
library(tmap)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(terra)
## terra 1.8.21
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:terra':
##
## intersect, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(sf)
library(sp)
library(raster)
##
## Attaching package: 'raster'
## The following object is masked from 'package:dplyr':
##
## select
library(tmap)
library(ggspatial)
# Load dataset
df <- read_excel("D:/ES3 PreTCV paper D22_june23/Reanalysis Jan2025/PreTCV_WW_INLA/ww.xlsx")
# Convert to data frame
df <- as.data.frame(df)
df$disease <- as.numeric(df$disease > 0) # Convert to binary
colSums(is.na(df)) # Identify missing values
## sample date site methods
## 0 0 0 0
## labmethod Temp_max Rain_mm Humidity
## 0 1 0 0
## latitude longitude TotalPop Typhoidcases_cum
## 0 0 0 0
## Overlap Cases_total Popden_CA hf_183
## 0 0 0 191
## disease
## 0
df <- na.omit(df)
df$site <- as.factor(df$site) # Convert site to a factor
df$date <- as.Date(df$date, format = "%d-%m-%Y") # Convert date format
df$time <- as.integer(difftime(df$date, min(df$date), units = "weeks")) %/% 2
df$time <- as.factor(df$time)
formula <- disease ~ methods + Temp_max + Rain_mm + TotalPop +
Typhoidcases_cum + hf_183 +
f(site, model = "iid") + # Random effect for site
f(time, model = "ar1") # Biweekly time effect
inla_model <- inla(formula,
family = "binomial",
data = df,
control.predictor = list(compute = TRUE),
control.compute = list(dic = TRUE, waic = TRUE, cpo = TRUE))
summary(inla_model)
## Time used:
## Pre = 0.371, Running = 0.551, Post = 0.536, Total = 1.46
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## (Intercept) -3.109 2.636 -8.397 -3.075 1.980 -3.076 0
## methodsNanoA 1.588 0.328 0.946 1.588 2.232 1.588 0
## Temp_max 0.112 0.037 0.040 0.112 0.185 0.112 0
## Rain_mm 0.041 0.015 0.011 0.041 0.071 0.041 0
## TotalPop 0.000 0.001 -0.001 0.000 0.001 0.000 0
## Typhoidcases_cum 0.118 0.135 -0.147 0.118 0.381 0.118 0
## hf_183 -0.207 0.057 -0.319 -0.207 -0.096 -0.207 0
##
## Random effects:
## Name Model
## site IID model
## time AR1 model
##
## Model hyperparameters:
## mean sd 0.025quant 0.5quant 0.975quant mode
## Precision for site 3.08e-01 1.27e-01 0.128 2.86e-01 6.19e-01 0.245
## Precision for time 2.20e+04 2.41e+04 1466.430 1.44e+04 8.61e+04 3997.754
## Rho for time 0.00e+00 6.98e-01 -0.987 0.00e+00 9.88e-01 0.997
##
## Deviance Information Criterion (DIC) ...............: 417.77
## Deviance Information Criterion (DIC, saturated) ....: 415.96
## Effective number of parameters .....................: 40.13
##
## Watanabe-Akaike information criterion (WAIC) ...: 405.97
## Effective number of parameters .................: 26.57
##
## Marginal log-Likelihood: -266.58
## CPO, PIT is computed
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')
# Extract fixed effects summary
fixed_effects <- inla_model$summary.fixed
# Select relevant columns
fixed_effects <- fixed_effects[, c("mean", "sd", "0.025quant", "0.975quant")]
# Rename columns for clarity
colnames(fixed_effects) <- c("Estimate", "Std. Error", "Lower 95% CI", "Upper 95% CI")
# Print the results
print(fixed_effects)
## Estimate Std. Error Lower 95% CI Upper 95% CI
## (Intercept) -3.1092854351 2.6355257414 -8.396930166 1.9800216748
## methodsNanoA 1.5878612440 0.3278776383 0.945872914 2.2317914404
## Temp_max 0.1122760013 0.0370592416 0.039741286 0.1850966585
## Rain_mm 0.0405293719 0.0153091790 0.010518089 0.0705600824
## TotalPop -0.0002079891 0.0005716034 -0.001342152 0.0009247946
## Typhoidcases_cum 0.1177904632 0.1346357452 -0.147316074 0.3809923790
## hf_183 -0.2073495775 0.0569101614 -0.319103072 -0.0958837101
df$predicted_prob <- inla_model$summary.fitted.values$mean
#Formula
formula <- disease ~ methods + Temp_max + Rain_mm + TotalPop +
Typhoidcases_cum + hf_183 +
f(site, model = "iid") + # Random effect for site
f(time, model = "ar1") # Biweekly time effect
#Run INLA model
inla_model <- inla(formula,
family = "binomial",
data = df,
control.predictor = list(compute = TRUE),
control.compute = list(dic = TRUE, waic = TRUE, cpo = TRUE))
summary(inla_model)
## Time used:
## Pre = 0.23, Running = 0.54, Post = 0.436, Total = 1.21
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## (Intercept) -3.109 2.634 -8.390 -3.075 1.979 -3.076 0
## methodsNanoA 1.588 0.328 0.946 1.588 2.232 1.588 0
## Temp_max 0.112 0.037 0.040 0.112 0.185 0.112 0
## Rain_mm 0.041 0.015 0.011 0.041 0.071 0.041 0
## TotalPop 0.000 0.001 -0.001 0.000 0.001 0.000 0
## Typhoidcases_cum 0.118 0.135 -0.147 0.118 0.381 0.118 0
## hf_183 -0.207 0.057 -0.319 -0.207 -0.096 -0.207 0
##
## Random effects:
## Name Model
## site IID model
## time AR1 model
##
## Model hyperparameters:
## mean sd 0.025quant 0.5quant 0.975quant mode
## Precision for site 3.08e-01 1.27e-01 0.129 2.86e-01 6.19e-01 0.245
## Precision for time 2.20e+04 2.40e+04 1492.057 1.45e+04 8.58e+04 4079.973
## Rho for time 0.00e+00 6.77e-01 -0.982 0.00e+00 9.82e-01 0.993
##
## Deviance Information Criterion (DIC) ...............: 417.73
## Deviance Information Criterion (DIC, saturated) ....: 415.91
## Effective number of parameters .....................: 40.10
##
## Watanabe-Akaike information criterion (WAIC) ...: 405.95
## Effective number of parameters .................: 26.56
##
## Marginal log-Likelihood: -266.66
## CPO, PIT is computed
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')
# Extract fixed effects summary
fixed_effects <- inla_model$summary.fixed
# Select relevant columns
fixed_effects <- fixed_effects[, c("mean", "sd", "0.025quant", "0.975quant")]
# Rename columns for clarity
colnames(fixed_effects) <- c("Estimate", "Std. Error", "Lower 95% CI", "Upper 95% CI")
# Print the results
print(fixed_effects)
## Estimate Std. Error Lower 95% CI Upper 95% CI
## (Intercept) -3.1087294578 2.6337111723 -8.389860459 1.9789650783
## methodsNanoA 1.5878766074 0.3278429292 0.945933942 2.2317155881
## Temp_max 0.1122761146 0.0370571120 0.039742361 0.1850890235
## Rain_mm 0.0405291191 0.0153093793 0.010517238 0.0705599580
## TotalPop -0.0002078337 0.0005711019 -0.001340637 0.0009237144
## Typhoidcases_cum 0.1177781567 0.1346353288 -0.147301133 0.3809988871
## hf_183 -0.2073459132 0.0569092589 -0.319093876 -0.0958787426
# Define file paths (update if necessary)
library(sf)
library(terra)
boundary_path <- "D:/ES3 PreTCV paper D22_june23/Reanalysis Jan2025/PreTCV_WW_INLA/boundary.shp"
sampling_locations_path <- "D:/ES3 PreTCV paper D22_june23/Reanalysis Jan2025/PreTCV_WW_INLA/WW_sampling_locations.shp"
typhoid_cases_path <- "D:/ES3 PreTCV paper D22_june23/Reanalysis Jan2025/PreTCV_WW_INLA/Typhoid_cases.shp"
typhoid_hotspots_path <- "D:/ES3 PreTCV paper D22_june23/Reanalysis Jan2025/PreTCV_WW_INLA/Typhoid_hotspots.shp"
# Load the shapefiles
boundary <- st_read(boundary_path)
## Reading layer `boundary' from data source
## `D:\ES3 PreTCV paper D22_june23\Reanalysis Jan2025\PreTCV_WW_INLA\boundary.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 1 feature and 12 fields
## Geometry type: POLYGON
## Dimension: XYZ
## Bounding box: xmin: 79.0805 ymin: 12.87212 xmax: 79.15776 ymax: 12.93729
## z_range: zmin: 0 zmax: 0
## Geodetic CRS: WGS 84
sampling_locations <- st_read(sampling_locations_path)
## Reading layer `WW_sampling_locations' from data source
## `D:\ES3 PreTCV paper D22_june23\Reanalysis Jan2025\PreTCV_WW_INLA\WW_sampling_locations.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 50 features and 14 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 79.11484 ymin: 12.88458 xmax: 79.14474 ymax: 12.93527
## Geodetic CRS: WGS 84
typhoid_cases <- st_read(typhoid_cases_path)
## Reading layer `Typhoid_cases' from data source
## `D:\ES3 PreTCV paper D22_june23\Reanalysis Jan2025\PreTCV_WW_INLA\Typhoid_cases.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 207 features and 34 fields
## Geometry type: POINT
## Dimension: XYZ
## Bounding box: xmin: 79.10487 ymin: 12.89173 xmax: 79.14354 ymax: 12.92995
## z_range: zmin: 183 zmax: 591
## Geodetic CRS: WGS 84
typhoid_hotspots <- st_read(typhoid_hotspots_path)
## Reading layer `Typhoid_hotspots' from data source
## `D:\ES3 PreTCV paper D22_june23\Reanalysis Jan2025\PreTCV_WW_INLA\Typhoid_hotspots.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 34 features and 7 fields
## Geometry type: POINT
## Dimension: XYZ
## Bounding box: xmin: 79.13675 ymin: 12.91845 xmax: 79.14227 ymax: 12.9232
## z_range: zmin: 227 zmax: 587
## Geodetic CRS: WGS 84
library(terra)
# Load the INLA prediction raster
raster_path <- "D:/PreTCV_WW_INLA/INLA_WW/INLA_predictions.tif"
r_pred <- rast(raster_path)
# Convert raster to a data frame for ggplot
r_pred_df <- as.data.frame(r_pred, xy = TRUE)
colnames(r_pred_df)[3] <- "predicted_prob"
p <- ggplot() +
# Add the INLA raster layer
geom_raster(data = r_pred_df, aes(x = x, y = y, fill = predicted_prob)) +
scale_fill_gradientn(name = "Predicted Probability of S. Typhi detection in WW samples",
colors = c("#fff7bc", "#99d8c9", "#2ca25f")) + # Custom colors+
# Add the boundary shapefile
geom_sf(data = boundary, fill = NA, color = "black", size = 3) +
# Add sampling locations (dark blue)
geom_sf(data = sampling_locations, aes(color = "Sampling Locations"), size = 6, shape = 18) +
# Add Typhoid cases (black)
geom_sf(data = typhoid_cases, aes(color = "Typhoid_cases"), size = 3.5, shape = 16) +
# Add Typhoid hotspots (orange)
geom_sf(data = typhoid_hotspots, aes(color = "Typhoid Hotspots"), size = 4, shape = 16) +
# Define custom colors for legend
scale_color_manual(name = "Legend", values = c("Sampling Locations" = "purple",
"Typhoid_cases" = "black",
"Typhoid Hotspots" = "orange")) +
# Apply minimal theme
theme_minimal() +
# Customize legend: move inside the map window
theme(legend.text = element_text(size = 14),
legend.title = element_text(size = 14),
legend.position = c(0.2, 0.2), # Moves legend inside the map (adjust as needed)
legend.background = element_rect(fill = "white", color = "black")) + # Add background
labs(title = "INLA Model Predictions for S. Typhi detection in wastewater and hotspots of Typhoid cases", size = 16) +
# Add a scale bar (requires ggspatial)
annotation_scale(location = "br", width_hint = 0.5) +
# Add a north arrow (requires ggspatial)
annotation_north_arrow(location = "tl", which_north = "true",
style = north_arrow_fancy_orienteering())
## Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
## 3.5.0.
## ℹ Please use the `legend.position.inside` argument of `theme()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Print the map
print(p)

ggsave("D:/ES3 PreTCV paper D22_june23/Reanalysis Jan2025/PreTCV_WW_INLA/INLA_predictions_final2.jpeg",
plot = p, width = 16, height = 14, dpi = 500, device = "jpeg")