Predicting WW positivity pre TCV

setwd("D:/ES3 PreTCV paper D22_june23/Reanalysis Jan2025/PreTCV_WW_INLA")

# Add the INLA repository and install the package
install.packages("INLA", repos = c(getOption("repos"), INLA = "https://inla.r-inla-download.org/R/stable"))

# Load libraries
library(sf)

## Linking to GEOS 3.13.0, GDAL 3.10.1, PROJ 9.5.1; sf_use_s2() is TRUE

library(INLA)

## Loading required package: Matrix

## This is INLA_24.12.11 built 2024-12-11 19:58:26 UTC.
##  - See www.r-inla.org/contact-us for how to get help.
##  - List available models/likelihoods/etc with inla.list.models()
##  - Use inla.doc(<NAME>) to access documentation

library(readxl)
library(ggplot2)
library(tmap)
library(lubridate)

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

library(terra)

## terra 1.8.21

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:terra':
## 
##     intersect, union

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)
library(sf)
library(sp)
library(raster)

## 
## Attaching package: 'raster'

## The following object is masked from 'package:dplyr':
## 
##     select

library(tmap)
library(ggspatial)

# Load dataset
df <- read_excel("D:/ES3 PreTCV paper D22_june23/Reanalysis Jan2025/PreTCV_WW_INLA/ww.xlsx")

# Convert to data frame
df <- as.data.frame(df)

df$disease <- as.numeric(df$disease > 0)  # Convert to binary
colSums(is.na(df))  # Identify missing values

##           sample             date             site          methods 
##                0                0                0                0 
##        labmethod         Temp_max          Rain_mm         Humidity 
##                0                1                0                0 
##         latitude        longitude         TotalPop Typhoidcases_cum 
##                0                0                0                0 
##          Overlap      Cases_total        Popden_CA           hf_183 
##                0                0                0              191 
##          disease 
##                0

df <- na.omit(df)
df$site <- as.factor(df$site)  # Convert site to a factor

df$date <- as.Date(df$date, format = "%d-%m-%Y")  # Convert date format
df$time <- as.integer(difftime(df$date, min(df$date), units = "weeks")) %/% 2
df$time <- as.factor(df$time)

formula <- disease ~ methods + Temp_max + Rain_mm + TotalPop + 
  Typhoidcases_cum + hf_183 + 
  f(site, model = "iid") +  # Random effect for site
  f(time, model = "ar1")    # Biweekly time effect

inla_model <- inla(formula, 
                   family = "binomial",  
                   data = df,
                   control.predictor = list(compute = TRUE),
                   control.compute = list(dic = TRUE, waic = TRUE, cpo = TRUE))

summary(inla_model)

## Time used:
##     Pre = 0.371, Running = 0.551, Post = 0.536, Total = 1.46 
## Fixed effects:
##                    mean    sd 0.025quant 0.5quant 0.975quant   mode kld
## (Intercept)      -3.109 2.636     -8.397   -3.075      1.980 -3.076   0
## methodsNanoA      1.588 0.328      0.946    1.588      2.232  1.588   0
## Temp_max          0.112 0.037      0.040    0.112      0.185  0.112   0
## Rain_mm           0.041 0.015      0.011    0.041      0.071  0.041   0
## TotalPop          0.000 0.001     -0.001    0.000      0.001  0.000   0
## Typhoidcases_cum  0.118 0.135     -0.147    0.118      0.381  0.118   0
## hf_183           -0.207 0.057     -0.319   -0.207     -0.096 -0.207   0
## 
## Random effects:
##   Name     Model
##     site IID model
##    time AR1 model
## 
## Model hyperparameters:
##                        mean       sd 0.025quant 0.5quant 0.975quant     mode
## Precision for site 3.08e-01 1.27e-01      0.128 2.86e-01   6.19e-01    0.245
## Precision for time 2.20e+04 2.41e+04   1466.430 1.44e+04   8.61e+04 3997.754
## Rho for time       0.00e+00 6.98e-01     -0.987 0.00e+00   9.88e-01    0.997
## 
## Deviance Information Criterion (DIC) ...............: 417.77
## Deviance Information Criterion (DIC, saturated) ....: 415.96
## Effective number of parameters .....................: 40.13
## 
## Watanabe-Akaike information criterion (WAIC) ...: 405.97
## Effective number of parameters .................: 26.57
## 
## Marginal log-Likelihood:  -266.58 
## CPO, PIT is computed 
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')

# Extract fixed effects summary
fixed_effects <- inla_model$summary.fixed

# Select relevant columns
fixed_effects <- fixed_effects[, c("mean", "sd", "0.025quant", "0.975quant")]

# Rename columns for clarity
colnames(fixed_effects) <- c("Estimate", "Std. Error", "Lower 95% CI", "Upper 95% CI")

# Print the results
print(fixed_effects)

##                       Estimate   Std. Error Lower 95% CI  Upper 95% CI
## (Intercept)      -3.1092854351 2.6355257414 -8.396930166  1.9800216748
## methodsNanoA      1.5878612440 0.3278776383  0.945872914  2.2317914404
## Temp_max          0.1122760013 0.0370592416  0.039741286  0.1850966585
## Rain_mm           0.0405293719 0.0153091790  0.010518089  0.0705600824
## TotalPop         -0.0002079891 0.0005716034 -0.001342152  0.0009247946
## Typhoidcases_cum  0.1177904632 0.1346357452 -0.147316074  0.3809923790
## hf_183           -0.2073495775 0.0569101614 -0.319103072 -0.0958837101

df$predicted_prob <- inla_model$summary.fitted.values$mean

#Formula
formula <- disease ~ methods + Temp_max + Rain_mm + TotalPop + 
  Typhoidcases_cum + hf_183 + 
  f(site, model = "iid") +  # Random effect for site
  f(time, model = "ar1")    # Biweekly time effect

#Run INLA model
inla_model <- inla(formula, 
                   family = "binomial",  
                   data = df,
                   control.predictor = list(compute = TRUE),
                   control.compute = list(dic = TRUE, waic = TRUE, cpo = TRUE))

summary(inla_model)

## Time used:
##     Pre = 0.23, Running = 0.54, Post = 0.436, Total = 1.21 
## Fixed effects:
##                    mean    sd 0.025quant 0.5quant 0.975quant   mode kld
## (Intercept)      -3.109 2.634     -8.390   -3.075      1.979 -3.076   0
## methodsNanoA      1.588 0.328      0.946    1.588      2.232  1.588   0
## Temp_max          0.112 0.037      0.040    0.112      0.185  0.112   0
## Rain_mm           0.041 0.015      0.011    0.041      0.071  0.041   0
## TotalPop          0.000 0.001     -0.001    0.000      0.001  0.000   0
## Typhoidcases_cum  0.118 0.135     -0.147    0.118      0.381  0.118   0
## hf_183           -0.207 0.057     -0.319   -0.207     -0.096 -0.207   0
## 
## Random effects:
##   Name     Model
##     site IID model
##    time AR1 model
## 
## Model hyperparameters:
##                        mean       sd 0.025quant 0.5quant 0.975quant     mode
## Precision for site 3.08e-01 1.27e-01      0.129 2.86e-01   6.19e-01    0.245
## Precision for time 2.20e+04 2.40e+04   1492.057 1.45e+04   8.58e+04 4079.973
## Rho for time       0.00e+00 6.77e-01     -0.982 0.00e+00   9.82e-01    0.993
## 
## Deviance Information Criterion (DIC) ...............: 417.73
## Deviance Information Criterion (DIC, saturated) ....: 415.91
## Effective number of parameters .....................: 40.10
## 
## Watanabe-Akaike information criterion (WAIC) ...: 405.95
## Effective number of parameters .................: 26.56
## 
## Marginal log-Likelihood:  -266.66 
## CPO, PIT is computed 
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')

# Extract fixed effects summary

fixed_effects <- inla_model$summary.fixed

# Select relevant columns
fixed_effects <- fixed_effects[, c("mean", "sd", "0.025quant", "0.975quant")]

# Rename columns for clarity
colnames(fixed_effects) <- c("Estimate", "Std. Error", "Lower 95% CI", "Upper 95% CI")

# Print the results
print(fixed_effects)

##                       Estimate   Std. Error Lower 95% CI  Upper 95% CI
## (Intercept)      -3.1087294578 2.6337111723 -8.389860459  1.9789650783
## methodsNanoA      1.5878766074 0.3278429292  0.945933942  2.2317155881
## Temp_max          0.1122761146 0.0370571120  0.039742361  0.1850890235
## Rain_mm           0.0405291191 0.0153093793  0.010517238  0.0705599580
## TotalPop         -0.0002078337 0.0005711019 -0.001340637  0.0009237144
## Typhoidcases_cum  0.1177781567 0.1346353288 -0.147301133  0.3809988871
## hf_183           -0.2073459132 0.0569092589 -0.319093876 -0.0958787426

 # Define file paths (update if necessary)
library(sf)
library(terra)
  boundary_path <- "D:/ES3 PreTCV paper D22_june23/Reanalysis Jan2025/PreTCV_WW_INLA/boundary.shp"
  
  sampling_locations_path <- "D:/ES3 PreTCV paper D22_june23/Reanalysis Jan2025/PreTCV_WW_INLA/WW_sampling_locations.shp"
  
  typhoid_cases_path <- "D:/ES3 PreTCV paper D22_june23/Reanalysis Jan2025/PreTCV_WW_INLA/Typhoid_cases.shp"
  
  typhoid_hotspots_path <- "D:/ES3 PreTCV paper D22_june23/Reanalysis Jan2025/PreTCV_WW_INLA/Typhoid_hotspots.shp"

 # Load the shapefiles
  boundary <- st_read(boundary_path)

## Reading layer `boundary' from data source 
##   `D:\ES3 PreTCV paper D22_june23\Reanalysis Jan2025\PreTCV_WW_INLA\boundary.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 1 feature and 12 fields
## Geometry type: POLYGON
## Dimension:     XYZ
## Bounding box:  xmin: 79.0805 ymin: 12.87212 xmax: 79.15776 ymax: 12.93729
## z_range:       zmin: 0 zmax: 0
## Geodetic CRS:  WGS 84

  sampling_locations <- st_read(sampling_locations_path)

## Reading layer `WW_sampling_locations' from data source 
##   `D:\ES3 PreTCV paper D22_june23\Reanalysis Jan2025\PreTCV_WW_INLA\WW_sampling_locations.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 50 features and 14 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 79.11484 ymin: 12.88458 xmax: 79.14474 ymax: 12.93527
## Geodetic CRS:  WGS 84

  typhoid_cases <- st_read(typhoid_cases_path)

## Reading layer `Typhoid_cases' from data source 
##   `D:\ES3 PreTCV paper D22_june23\Reanalysis Jan2025\PreTCV_WW_INLA\Typhoid_cases.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 207 features and 34 fields
## Geometry type: POINT
## Dimension:     XYZ
## Bounding box:  xmin: 79.10487 ymin: 12.89173 xmax: 79.14354 ymax: 12.92995
## z_range:       zmin: 183 zmax: 591
## Geodetic CRS:  WGS 84

  typhoid_hotspots <- st_read(typhoid_hotspots_path)

## Reading layer `Typhoid_hotspots' from data source 
##   `D:\ES3 PreTCV paper D22_june23\Reanalysis Jan2025\PreTCV_WW_INLA\Typhoid_hotspots.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 34 features and 7 fields
## Geometry type: POINT
## Dimension:     XYZ
## Bounding box:  xmin: 79.13675 ymin: 12.91845 xmax: 79.14227 ymax: 12.9232
## z_range:       zmin: 227 zmax: 587
## Geodetic CRS:  WGS 84

library(terra)
  
  # Load the INLA prediction raster
  raster_path <- "D:/PreTCV_WW_INLA/INLA_WW/INLA_predictions.tif"
  r_pred <- rast(raster_path)
  
  # Convert raster to a data frame for ggplot
  r_pred_df <- as.data.frame(r_pred, xy = TRUE)
  colnames(r_pred_df)[3] <- "predicted_prob"

p <- ggplot() +
    # Add the INLA raster layer
    geom_raster(data = r_pred_df, aes(x = x, y = y, fill = predicted_prob)) +
    scale_fill_gradientn(name = "Predicted Probability of S. Typhi detection in WW samples", 
                         colors = c("#fff7bc", "#99d8c9", "#2ca25f")) +  # Custom colors+
    
    # Add the boundary shapefile
    geom_sf(data = boundary, fill = NA, color = "black", size = 3) +
    
       # Add sampling locations (dark blue)
    geom_sf(data = sampling_locations, aes(color = "Sampling Locations"), size = 6, shape = 18) +
    
    # Add Typhoid cases (black)
    geom_sf(data = typhoid_cases, aes(color = "Typhoid_cases"), size = 3.5, shape = 16) +
    
    # Add Typhoid hotspots (orange)
    geom_sf(data = typhoid_hotspots, aes(color = "Typhoid Hotspots"), size = 4, shape = 16) +
    
    # Define custom colors for legend
    scale_color_manual(name = "Legend", values = c("Sampling Locations" = "purple",
                                                   "Typhoid_cases" = "black",
                                                   "Typhoid Hotspots" = "orange")) +
    
    # Apply minimal theme
    theme_minimal() +
    

    # Customize legend: move inside the map window
    theme(legend.text = element_text(size = 14),  
          legend.title = element_text(size = 14),  
          legend.position = c(0.2, 0.2),  # Moves legend inside the map (adjust as needed)
          legend.background = element_rect(fill = "white", color = "black")) +  # Add background
    
    labs(title = "INLA Model Predictions for S. Typhi detection in wastewater and hotspots of Typhoid cases", size = 16) +
    
    # Add a scale bar (requires ggspatial)
    annotation_scale(location = "br", width_hint = 0.5) +
    
    # Add a north arrow (requires ggspatial)
    annotation_north_arrow(location = "tl", which_north = "true", 
                           style = north_arrow_fancy_orienteering())

## Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
## 3.5.0.
## ℹ Please use the `legend.position.inside` argument of `theme()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

  # Print the map
  print(p)

 ggsave("D:/ES3 PreTCV paper D22_june23/Reanalysis Jan2025/PreTCV_WW_INLA/INLA_predictions_final2.jpeg", 
         plot = p, width = 16, height = 14, dpi = 500, device = "jpeg")

Predicting WW positivity pre TCV

Venkat

2025-02-28