Cholera Disease Project - Environmental Raster Preparation
Introduction: Environmental Data Preparation for Cholera Ecological Niche Modeling
Objectives of Data Preparation
Before running the ecological niche model (ENM), we
must:
✅ Load and preprocess environmental raster data
(NetCDF format).
✅ Compute average climate conditions across multiple
time periods.
✅ Remove problematic variables (NA values,
zero-inflated layers).
✅ Standardize and scale climate predictors for
modeling consistency.
✅ Mask climate data to the study region for spatial
relevance.
Workflow Overview
1️⃣ Load Environmental Raster Data
Import climate variables stored in NetCDF format for different time periods (2000s & 2010s).
Use an automated function to streamline raster loading.
2️⃣ Process and Aggregate Climate Variables
Compute the mean environmental conditions across time periods.
Create a single environmental dataset representing present-day conditions.
3️⃣ Mask Climate Data to the Marine Region
Ensure that environmental predictors align with Cholera’s habitat (marine regions).
Exclude land-based environmental layers to maintain model accuracy.
4️⃣ Standardize and Scale Raster Data
Remove correlated or problematic variables to avoid model bias.
Normalize climate variables to a common scale before analysis.
# Load necessary libraries for spatial and data manipulation
library(terra) # Handling raster data
library(tidyverse) # Data manipulation
library(ncdf4) # NetCDF file handling
library(maps) # Mapping supportLoad Environmental Data
Define a Function to Load Rasters
To streamline the process, we define a function that reads NetCDF files for different variables.
# Function to load raster data
load_raster <- function(var, decade) {
path <- sprintf("Cholera/Climate/%ss/%s_%s.nc",
decade,
var,
decade)
rast(path)
}Load Rasters Efficiently
Instead of manually loading each file, we use a list approach.
# Define environmental variables with full names as comments
variables <- c("so", # Salinity (-)
"thetao", # Ocean Temperature (°C)
"chl", # Chlorophyll (mg.m⁻³)
"ph", # pH (-)
"clt", # Cloud Cover (%)
"dfe", # Dissolved Molecular Iron (mmol.m⁻³)
"kdpar", # Diffuse Attenuation (m⁻¹)
"mlotst", # Mixed Layer Depth (m)
"no3", # Nitrate (mmol.m⁻³)
"o2", # Dissolved Molecular Oxygen (mmol.m⁻³)
"par", # Photosynthetically Available Radiation (E.m⁻².day⁻¹)
"phyc", # Primary Productivity; Total Phytoplankton (mmol.m⁻³)
"po4", # Phosphate (mmol.m⁻³)
"si", # Silicate (mmol.m⁻³)
"siconc", # Sea Ice Cover (Fraction)
"sithick", # Sea Ice Thickness (m)
"swd", # Sea Water Direction (degree)
"sws", # Sea Water Speed (m.s⁻¹)
"tas", # Air Temperature (°C)
"terrain") # Bathymetry (m)
# Define time periods
time_periods <- c("2000s",
"2010s")
# Function to load raster data for each variable and period
load_raster <- function(var, period, stat) {
path <- sprintf("Cholera/Climate/%s/%s_%s_%s.nc",
period,
var,
stat,
period)
rast(path)
}
# Load rasters for each variable, period, and statistic
rasters <- list()
for (var in variables) {
for (period in time_periods) {
rasters[[paste0(var, "_mean_", period)]] <- load_raster(var, period, "mean")
}
}Compute Mean Climate Variables for the Present Period
mean_rasters <- list()
for (var in variables) {
mean_rasters[[paste0(var, "_present")]] <- mean(
rasters[[paste0(var, "_mean_2000s")]], rasters[[paste0(var, "_mean_2010s")]]
)
}
# Combine all present-day climate variables into a single stack
current_clim <- rast(mean_rasters)
names(current_clim) <- paste0(variables)
# Plot current climate layers
plot(current_clim, main = names(current_clim))# Save climate data
writeCDF(current_clim, "Cholera/Climate/Current/current_clim.nc", split = TRUE, overwrite = TRUE)Load and Process Study Region
# Load marine region shapefile
m_region <- vect("Cholera/M_region/EEZ_IHO_v3_coastal.shp")
# Dissolve into a single polygon
m_region <- aggregate(m_region,
dissolve = TRUE)
# Save cleaned shapefile
writeVector(m_region, "Cholera/M_region/mask_m/m_coastal.shp", overwrite = TRUE)## Warning in x@pntr$write(filename, layer, filetype, insert[1], overwrite[1], :
## GDAL Message 6: dataset Cholera/M_region/mask_m/m_coastal.shp does not
## support layer creation option OVERWRITE
# Reload for consistency
m_region <- vect("Cholera/M_region/mask_m/m_coastal.shp")
# Assign coordinate reference system (CRS)
crs(m_region) <- "epsg:4326"
crs(current_clim) <- "epsg:4326"
# Plot marine region
plot(m_region)
map("world", add = TRUE)Load and Process Cholera Occurrence Data
# Load cholera occurrence dataset
cholera <- read_csv("Cholera/data/Occurrence_original.csv")## Rows: 1907 Columns: 6
## ── Column specification ──────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Country, Citation, References
## dbl (3): x, y, Year
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Convert occurrence data to spatial vector
cholera_vec <- vect(cholera,
geom = c("x", "y"),
crs = "+proj=longlat +datum=WGS84 +no_defs")
# Crop cholera points to study region
cholera_vec_crop <- terra::crop(cholera_vec,
m_region)
# Convert cropped vector to dataframe
cholera_dat_crop <- as.data.frame(cholera_vec_crop,
geom = "XY")
# Save cropped occurrence data
write.csv(cholera_dat_crop, "Cholera/data/Occurrence_crop.csv")
# Plot occurrence data
plot(cholera_vec, col = "green")
plot(cholera_vec_crop, add = TRUE)
map("world", add = TRUE)Mask Climate Data to Study Region
# Crop and mask climate data to the marine region
clim_present_masked <- terra::crop(current_clim,
m_region,
mask = TRUE)
# Plot masked climate data
plot(clim_present_masked, main = names(clim_present_masked))# Save masked climate data
writeCDF(clim_present_masked, "Cholera/Climate/Mask/current_clim_m.nc", split = TRUE, overwrite = TRUE)Define Climate Scenarios and Variables
# Define future climate scenarios
scenarios <- c("ssp119", "ssp585")
# Define the environmental variables to be used
variables <- c("so",
"thetao",
"chl",
"ph",
"clt",
"dfe",
"mlotst",
"no3",
"o2",
"phyc",
"po4",
"si",
"siconc",
"sithick",
"swd",
"sws",
"tas")Load Future Climate Rasters
# Function to load raster files for a given scenario
load_future_rasters <- function(scenario, variables) {
raster_list <- list()
for (var in variables) {
file_path <- sprintf("Cholera/Climate/Future_2050_2060/%s/%s_%s.nc", scenario, var, scenario)
# Check if file exists before loading
if (file.exists(file_path)) {
raster_list[[var]] <- rast(file_path)
} else {
message(sprintf("Warning: File not found - %s", file_path))
}
}
return(raster_list)
}
# Load future climate variables for both SSP scenarios
future_clim_ssp119 <- load_future_rasters("ssp119", variables)
future_clim_ssp585 <- load_future_rasters("ssp585", variables)Convert Raster Lists to SpatRaster Objects
# Convert lists to SpatRaster objects
future_clim_ssp119_2050s <- rast(future_clim_ssp119)
future_clim_ssp585_2050s <- rast(future_clim_ssp585)Visualize Future Climate Rasters
# Plot SSP119 future climate raster
plot(future_clim_ssp119_2050s, main = names(future_clim_ssp119_2050s))# Plot SSP585 future climate raster
plot(future_clim_ssp585_2050s, main = names(future_clim_ssp585_2050s))Save Processed Future Climate Data
# Save processed future climate rasters to NetCDF format
writeCDF(future_clim_ssp119_2050s,
"Cholera/Climate/Future_2050_2060/future_clim_ssp119_2050s.nc",
split = TRUE,
overwrite = TRUE)
writeCDF(future_clim_ssp585_2050s,
"Cholera/Climate/Future_2050_2060/future_clim_ssp585_2050s.nc",
split = TRUE,
overwrite = TRUE)