if (!require("pacman")) install.packages("pacman")
pacman::p_load(
raster, terra, dplyr, tidyr, purrr, sf, dismo, sdm, ggplot2, usdm, httr, archive, readr, leaflet, htmltools
)
species_occurrence <- read_csv(
"https://raw.githubusercontent.com/quyforest/Pterygoplichthys_pardalis/main/svg_vn.csv",
show_col_types = FALSE
)
points_vect <- terra::vect(
species_occurrence,
geom = c("lon", "lat"),
crs = 'epsg:4326'
)
url <- "https://github.com/quyforest/Pterygoplichthys_pardalis/raw/main/current_vn.rar"
destfile <- "current_vn.rar"
httr::GET(url, httr::write_disk(destfile, overwrite = TRUE))
## Response [https://raw.githubusercontent.com/quyforest/Pterygoplichthys_pardalis/main/current_vn.rar]
## Date: 2025-11-11 01:40
## Status: 200
## Content-Type: application/octet-stream
## Size: 13.6 MB
## <ON DISK> C:\Users\Dell 3530\OneDrive\Máy tính\ENMTML-master\ENMTML-master\vignettes\current_vn.rar
archive_files <- archive::archive(destfile)
asc_files <- archive_files$path[grepl("\\.asc$", archive_files$path)]
if (!dir.exists("./asc_files")) dir.create("./asc_files")
archive::archive_extract(destfile, files = asc_files, dir = "./asc_files")
asc_paths <- list.files("./asc_files", pattern = "\\.asc$", full.names = TRUE, recursive = TRUE)
currentEnv <- terra::rast(asc_paths)
names(currentEnv) <- paste0("bio", 1:19)
set.seed(123)
sample_df <- terra::spatSample(currentEnv, size = 3000, na.rm = TRUE, as.df = TRUE)
v <- usdm::vifstep(sample_df, th = 5)
selected_variables <- v@results$Variables
currentEnv_selected <- terra::subset(currentEnv, selected_variables)
d <- sdmData(species ~ ., points_vect,
predictors = currentEnv_selected,
bg = list(method = "gRandom", n = 500))
d
## class : sdmdata
## ===========================================================
## number of species : 1
## species names : species
## number of features : 7
## feature names : bio2, bio8, bio12, ...
## type : Presence-Background
## has independent test data? : FALSE
## number of records : 516
## has Coordinates? : TRUE
m <- sdm(species ~ ., d, methods = c("maxent"),
replication = c("sub", "boot"), test.p = 30, n = 3,
parallelSetting = list(cores = 10, method = 'parallel'))
m
## class : sdmModels
## ========================================================
## number of species : 1
## number of modelling methods : 1
## names of modelling methods : maxent
## replicate.methods (data partitioning) : subsampling,bootstrap
## number of replicates (each method) : 3
## toral number of replicates per model : 6 (per species)
## test percentage (in subsampling) : 30
## ------------------------------------------
## model run success percentage (per species) :
## ------------------------------------------
## method species
## ----------------------
## maxent : 100 %
##
## ###################################################################
## model Mean performance (per species), using test dataset (generated using partitioning):
## -------------------------------------------------------------------------------
##
## ## species : species
## =========================
##
## methods : AUC | COR | TSS | Deviance
## -------------------------------------------------------------------------
## maxent : 0.92 | 0.38 | 0.77 | 0.44
en <- ensemble(m, currentEnv, filename = 'en.img',
setting = list(method = "weighted",
stat = "tss", opt = 2), overwrite = TRUE)
en
## class : SpatRaster
## dimensions : 1779, 878, 1 (nrow, ncol, nlyr)
## resolution : 0.008333333, 0.008333333 (x, y)
## extent : 102.1447, 109.4614, 8.566548, 23.39155 (xmin, xmax, ymin, ymax)
## coord. ref. : lon/lat WGS 84 (CRS84) (OGC:CRS84)
## source : en.img
## name : ensemble_weighted
## min value : 0.003084289
## max value : 0.999972701
classified_raster <- terra::classify(
en,
rcl = matrix(c(-Inf, 0.2, 1,
0.2, 0.4, 2,
0.4, 0.6, 3,
0.6, Inf, 4),
ncol = 3, byrow = TRUE)
)
cell_area <- terra::cellSize(classified_raster, unit = "km")
area_values <- terra::zonal(cell_area, classified_raster, fun = "sum", na.rm = TRUE)
colnames(area_values) <- c("Value", "Area_km2")
area_df <- data.frame(
Value = 1:4,
Class = c("Unsuitable", "Suitable", "Highly suitable", "Extremely suitable")
) %>%
left_join(area_values, by = "Value") %>%
mutate(
Area_km2 = ifelse(is.na(Area_km2), 0, Area_km2)
)
raster_land_area <- sum(area_df$Area_km2)
area_df <- area_df %>%
mutate(
Percentage = Area_km2 / raster_land_area * 100
)
legend_labels <- paste0(
area_df$Class, "\n(",
format(round(area_df$Percentage, 2), nsmall = 1, decimal.mark = ","), "%)"
)
total_suitable_percent <- sum(area_df$Percentage[2:4])
r_leaf <- terra::project(classified_raster, "EPSG:4326")
pal <- colorFactor(
palette = c(
"1" = "#440154", # Unsuitable
"2" = "#21918c", # Suitable
"3" = "#fde725", # Highly suitable
"4" = "#e41a1c" # Extremely suitable
),
domain = c(1,2,3,4)
)
leaflet() %>%
addProviderTiles("CartoDB.Positron") %>%
addRasterImage(
r_leaf,
colors = pal,
opacity = 0.7,
project = FALSE
) %>%
addLegend(
pal = pal,
values = c(1,2,3,4),
title = "Legend",
labFormat = function(type, cuts, p) { legend_labels },
position = "bottomright"
) %>%
addControl(
html = htmltools::tags$div(
HTML(
paste0(
"<b>Potential distribution zones of Pterygoplichthys pardalis in Vietnam</b><br>",
"<span style='color:gray;'>Model: MaxEnt</span><br>",
"The total area of suitable zones (including suitable, highly suitable and extremely suitable) accounts for <b>",
format(round(total_suitable_percent, 2), nsmall = 1, decimal.mark = ","),
"%</b> of Vietnam's land area.<br>",
"<span style='color:red; font-style:italic;'>Note: Paracel and Spratly Islands are under Vietnam's sovereignty.</span>"
)
)
),
position = "topright"
)
# Calculate ROC curve data
.calculate_roc_data <- function(observed, predicted) {
th <- quantile(predicted, probs = seq(0, 1, length.out = 101), na.rm = TRUE)
roc_data <- purrr::map_dfr(th, function(t) {
predicted_class <- as.integer(predicted >= t)
cm <- table(factor(predicted_class, levels = 0:1),
factor(observed, levels = 0:1))
sensitivity <- cm[2,2] / sum(cm[,2])
specificity <- cm[1,1] / sum(cm[,1])
data.frame(threshold = t, sensitivity = sensitivity, specificity = specificity)
})
# Start at coordinate (0,0)
roc_data <- dplyr::bind_rows(
data.frame(threshold = NA, sensitivity = 0, specificity = 1),
roc_data
)
roc_data
}
# Smooth ROC curve
.smooth_roc <- function(roc_data) {
tryCatch({
smoothed <- supsmu(1 - roc_data$specificity, roc_data$sensitivity, bass = 0)
if (smoothed$y[1] != 0) {
smoothed$x <- c(0, smoothed$x)
smoothed$y <- c(0, smoothed$y)
}
data.frame(fpr = smoothed$x, sensitivity = smoothed$y)
}, error = function(e) {
data.frame(fpr = 1 - roc_data$specificity, sensitivity = roc_data$sensitivity)
})
}
# Main ROC plotting function using ggplot2
ggroc <- function(x, p = NULL, species = NULL, method = NULL,
replication = NULL, run = NULL, wtest = NULL,
smooth = FALSE, legend = TRUE, ...) {
dot_args <- list(...)
if (inherits(x, "sdmModels")) {
mi <- sdm::getModelInfo(x, species = species, method = method,
replication = replication, run = run)
if (is.null(wtest)) {
wtest <- names(x@models[[1]][[1]][[1]]@evaluation)
} else {
wtest <- match.arg(wtest, c("training", "test.dep", "test.indep"), several.ok = TRUE)
if (length(wtest) > 2) wtest <- wtest[1:2]
}
roc_data <- purrr::map(wtest, function(wt) {
purrr::map(mi$modelID, function(id) {
eval_data <- x@models[[mi$species[1]]][[mi$method[1]]][[as.character(id)]]@evaluation[[wt]]
if (inherits(eval_data, "sdmEvaluate")) {
.calculate_roc_data(eval_data@observed, eval_data@predicted)
} else NULL
}) %>% purrr::compact()
}) %>% rlang::set_names(wtest)
auc_values <- purrr::map_dbl(wtest, function(wt) {
mean(purrr::map_dbl(mi$modelID, function(id) {
eval_data <- x@models[[mi$species[1]]][[mi$method[1]]][[as.character(id)]]@evaluation[[wt]]
if (inherits(eval_data, "sdmEvaluate")) {
eval_data@statistics$AUC
} else NA
}), na.rm = TRUE)
})
plot_data <- purrr::map2_dfr(roc_data, wtest, function(rd, wt) {
purrr::map_dfr(rd, function(r) {
if (smooth) {
smoothed <- .smooth_roc(r)
data.frame(
fpr = smoothed$fpr,
sensitivity = smoothed$sensitivity,
test = wt,
type = "smoothed"
)
} else {
data.frame(
fpr = 1 - r$specificity,
sensitivity = r$sensitivity,
test = wt,
type = "raw"
)
}
}, .id = "model")
})
roc_palette <- c("training" = "#21918c", "test.dep" = "#fde725", "test.indep" = "#e41a1c")
p <- ggplot(plot_data, aes(x = fpr, y = sensitivity, color = test)) +
geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "#440154") +
geom_line(aes(group = interaction(model, test)), alpha = 0.35) +
stat_summary(aes(group = test), fun = mean, geom = "line", size = 1.7) +
coord_equal() +
scale_x_continuous(limits = c(0, 1), expand = c(0, 0)) +
scale_y_continuous(limits = c(0, 1), expand = c(0, 0)) +
labs(
x = "1 - specificity (False Positive Rate)",
y = "Sensitivity (True Positive Rate)",
color = "Test Data"
) +
scale_color_manual(values = roc_palette) +
theme_minimal(base_size = 14) +
theme(
legend.position = if (legend) "bottom" else "none",
panel.grid.minor = element_blank()
)
if (legend && !is.null(auc_values)) {
auc_labels <- paste0(wtest, " (AUC = ", round(auc_values, 3), ")")
p <- p + scale_color_manual(labels = auc_labels, values = roc_palette)
}
if (!is.null(dot_args$main)) {
p <- p + ggtitle(dot_args$main)
}
return(p)
} else if (is.numeric(x) && is.numeric(p)) {
roc_data <- .calculate_roc_data(x, p)
auc_value <- as.numeric(pROC::auc(x, p))
if (smooth) {
smoothed <- .smooth_roc(roc_data)
plot_data <- data.frame(
fpr = smoothed$fpr,
sensitivity = smoothed$sensitivity
)
} else {
plot_data <- data.frame(
fpr = 1 - roc_data$specificity,
sensitivity = roc_data$sensitivity
)
}
p <- ggplot(plot_data, aes(x = fpr, y = sensitivity)) +
geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "#440154") +
geom_line(color = "#21918c", size = 1.3) +
coord_equal() +
scale_x_continuous(limits = c(0, 1), expand = c(0, 0)) +
scale_y_continuous(limits = c(0, 1), expand = c(0, 0)) +
labs(
x = "1 - specificity (False Positive Rate)",
y = "Sensitivity (True Positive Rate)"
) +
theme_minimal(base_size = 14) +
theme(panel.grid.minor = element_blank())
if (legend) {
p <- p + annotate("text", x = 0.7, y = 0.2,
label = paste("AUC =", round(auc_value, 3)), size = 5, color = "#e41a1c")
}
if (!is.null(dot_args$main)) {
p <- p + ggtitle(dot_args$main)
}
return(p)
} else {
stop("Invalid input. `x` must be sdmModels or a vector of observed values; `p` must be either a predicted vector or model ID.")
}
}
ggroc(m, smooth = TRUE) # ROC for all models (training & test)
library(ggplot2)
library(dplyr)
library(tidyr)
var_names <- c("bio2", "bio8", "bio12", "bio14", "bio15", "bio18", "bio19")
cor_values <- c(20.4, 6.3, 0.1, 9.4, 0.6, 76.3, 1.4)
auc_values <- c(13.2, 3.7, 0.1, 5, 1.3, 58.3, 0.3)
vi_data <- data.frame(
variable = var_names,
Correlation = cor_values,
AUC = auc_values
)
vi_long <- vi_data %>%
pivot_longer(
cols = c("Correlation", "AUC"),
names_to = "metric",
values_to = "importance"
)
ggplot(vi_long %>% filter(metric == "AUC"),
aes(x = reorder(variable, importance), y = importance)) +
geom_bar(stat = "identity", fill = "#21918c", width = 0.7) +
labs(title = "Environmental Variable Importance (AUC)",
x = "Variable",
y = "Contribution (%)") +
coord_flip() +
theme_minimal(base_size = 14)
ggplot(vi_long %>% filter(metric == "Correlation"),
aes(x = reorder(variable, importance), y = importance)) +
geom_bar(stat = "identity", fill = "#e41a1c", width = 0.7) +
labs(title = "Environmental Variable Importance (Correlation)",
x = "Variable",
y = "Contribution (%)") +
coord_flip() +
theme_minimal(base_size = 14)
rcurve(m, n = c("bio14", "bio15", "bio18"), smooth = TRUE)
## The id argument is not specified; The modelIDs of 6 successfully fitted models are assigned to id...!
rcurve(m, n = c("bio19", "bio2", "bio8"), smooth = TRUE)
## The id argument is not specified; The modelIDs of 6 successfully fitted models are assigned to id...!