## vector with package names
x <- c("pbapply", "ggbeeswarm", "ggplot2", "viridis","cluster", "dplyr", "magrittr", "reshape", "plyr", "ade4", "svMisc", "geometry", "ape")
aa <- lapply(x, function(y) {
# check if installed, if not then install
if (!y %in% installed.packages()[,"Package"])
install.packages(y)
# load package
try(require(y, character.only = T), silent = T)
})
para <- parallel::detectCores() - 2
theme_set(theme_classic(base_size = 20))
knitr::opts_chunk$set(dpi = 100, fig.width = 12, warning = FALSE, root.dir = "..")
knitr::opts_knit$set(root.dir = "..")
source("./scripts/assemblage_random_subsets.R")
# species_abundances <- readRDS("./data/species_abundances.rds")
abundance_and_func_traits <- readRDS("~/Dropbox/Projects/eastern_tropical_pacific_herbivory/data_intermediate/fish/regional/herbfishes_regional_ecoregion_traits_biomass.rds")
# transect_areas1 <- readRDS("./data/transect_areas.RDS")
transect_areas <- abundance_and_func_traits[!duplicated(paste(abundance_and_func_traits$ID_transect,abundance_and_func_traits$site_id)), c("ID_transect", "site_id", "area_uvc", "region", "date")]
transect_areas$ID_transect_num <- 1:nrow(transect_areas)
# Max number of sites a transect is found in (should be 1!)
max(table(transect_areas$ID_transect, transect_areas$site_id))
## [1] 1
predictors <- readRDS("~/Dropbox/Projects/eastern_tropical_pacific_herbivory/data_intermediate/predictors.RDS")
# these ones don't have coordinates (should be 4 sites from mexico)
setdiff(transect_areas$site_id, predictors$site_id)
## character(0)
transect_areas <- merge(transect_areas, predictors[!duplicated(predictors$site_id), ], by = "site_id")
transect_areas$years_protected[is.na(transect_areas$years_protected)] <- 0
transect_areas$ID_transect_num <- 1:nrow(transect_areas)
subtransect_areas <- transect_areas[transect_areas$Ecoregion == "Revillagigedos" & !is.na(transect_areas$Ecoregion), ]
set.seed(1001)
FD_test <- assemblage_random_subsets(transc_metadata = subtransect_areas, sp_abund = abundance_and_func_traits, reps = 100, buffer = 0.1, parallel = 1, target.area = 600, overrep.cutoff = 0.9, site.specific.biomass = TRUE)
head(FD_test[FD_test$taxa_n > 3, ])
#
# sapply(FD_test, ncol)
FD_results <- assemblage_random_subsets(X = transect_areas, reps = 1000, buffer = 0.1, parallel = para, target.area = 500)
#
# saveRDS(FD_results, "preliminary_results_random_sample_of_transects.RDS")
#
FDres <- readRDS("preliminary_results_random_sample_of_transects.RDS")
#
nrow(FDres)
#
anyDuplicated(FDres$ID_transect_num) / nrow(FDres)
#
# FDres <- FDres[!duplicated(FDres$ID_transect_num), ]
#
# FDres <- FDres[!is.na(FDres$Nb_sp), ]
#
# nrow(FDres)
#
# agg_sites <- aggregate(ID_transect_num ~ sites + locality, data = transect_areas,
# FUN = function(x) length(unique(x)))
# nrow(agg_sites)
#
# agg_random_samples <- aggregate(ID_transect_num ~ taxa_n + sites + locality, data = FDres,
# FUN = function(x) length(unique(x)))
#
#
# length(unique(agg_sites$locality))
# length(unique(agg_random_samples$locality))
#
# nrow(agg_random_samples)
#
#
# agg <- aggregate(area_uvc ~ sites + locality, data = transect_areas,
# FUN = sum)
#
#
# hist(agg$area_uvc, breaks = 20)
#
# agg_sites <- aggregate(ID_transect_num ~ sites + locality, data = transect_areas,
# FUN = function(x) length(unique(x)))
#
param_grid <- expand.grid(buffer = seq(0, 0.3, 0.05), target.area = seq(200, 800, 50))
pboptions(type = "none")
# loop to test different parameters
for(i in 1:nrow(param_grid)){
progress(value = i, max.value = nrow(param_grid), init = (i == 1), progress.bar = TRUE, char = "+")
if (!file.exists(file.path("./output/fd_by_target_area_and_buffer_1000_reps",paste0("random_sample_of_transects_buffer_", param_grid$buffer[i], "_target_area_", param_grid$target.area[i], ".RDS"))))
{
fdres <- try(assemblage_random_subsets(transc_metadata = transect_areas, sp_abund = abundance_and_func_traits, reps = 1000, buffer = param_grid$buffer[i], parallel = para, target.area = param_grid$target.area[i], site.specific.biomass = TRUE, pb = FALSE), silent = TRUE)
try(saveRDS(fdres, file.path("./output/fd_by_target_area_and_buffer_1000_reps",paste0("random_sample_of_transects_buffer_", param_grid$buffer[i], "_target_area_", param_grid$target.area[i], ".RDS"))), silent = TRUE)
rm(fdres)
}
}
FD_results <- assemblage_random_subsets(X = transect_areas, Y = species_abundances, Z = functional_traits, reps = 10000, buffer = 0.1, parallel = para, target.area = 500, site.specific.biomass = TRUE)
saveRDS(FD_results, "../output/preliminary_results_random_sample_of_transects.RDS")
rdss <- list.files(path = "./output/fd_by_target_area_and_buffer_1000_reps/", full.names = TRUE)
transect_areas$ID_transect_num <- 1:nrow(transect_areas)
orignal_ecoregion_n <- length(unique(transect_areas$Ecoregion))
orignal_locality_n <- length(unique(transect_areas$locality))
orignal_site_n <- length(unique(transect_areas$site_id))
orignal_transect_n <- nrow(transect_areas)
original_total_area <- sum(transect_areas$area_uvc)
out <- pbapply::pblapply(rdss, cl = 4, function(x){
rds <- readRDS(x)
rds <- as.data.frame(rds)
rds$Nb_sp <- apply(rds, 1, function(x) sum(x > 0))
rds$sites <- sapply(strsplit(rownames(rds), "/", fixed = TRUE), "[", 1)
rds$ID_transect_num <- sapply(strsplit(rownames(rds), "/", fixed = TRUE), "[", 2)
# rds <- rds[!duplicated(rds$ID_transect_num), ]
# rds <- rds[!is.na(rds$Nb_sp), ]
rds$ecoregion <- sapply(1:nrow(rds), function(y) transect_areas$Ecoregion[transect_areas$site_id == rds$sites[y]][1])
rds$locality <- sapply(1:nrow(rds), function(y) transect_areas$locality[transect_areas$site_id == rds$sites[y]][1])
buffer <- as.numeric(sapply(strsplit(basename(x), "_", fixed = TRUE), "[", 6))
target_area <- as.numeric(gsub(".RDS", "", sapply(strsplit(basename(x), "_", fixed = TRUE), "[", 9)))
ecoregion_n <- round(length(unique(rds$ecoregion)) / orignal_ecoregion_n, 2)
locality_n <- round(length(unique(rds$locality)) / orignal_locality_n, 2)
sites_n <- round(length(unique(rds$sites)) / orignal_site_n, 2)
ID_transect_num <- unlist(strsplit(rds$ID_transect_num, "-", fixed = TRUE))
transect_n <- round(length(unique(ID_transect_num)) / orignal_transect_n, 2)
total_area <- sum(transect_areas$area_uvc[transect_areas$ID_transect_num %in% ID_transect_num]) / original_total_area
mean_site_random_samples <- mean(table(rds$sites))
# transect over-representation index
tb <- table(unlist(sapply(rds$ID_transect_num, function(x) strsplit(x, "-", fixed = TRUE))))
transect_overrepresentation <- sd(tb / mean(tb))
df <- data.frame(call = basename(x), buffer, target_area, ecoregion_n, locality_n, sites_n, transect_n, total_area, transect_overrepresentation, mean_site_random_samples)
return(df)
})
model_parameters <- do.call(rbind, out)
write.csv(model_parameters, "./output/model_parameters.csv", row.names = FALSE)
model_parameters <- read.csv("./output/model_parameters.csv")
model_parameters$buffer <- factor(model_parameters$buffer, levels = sort(unique(model_parameters$buffer)))
model_parameters$target_area <- factor(model_parameters$target_area, levels = sort(unique(model_parameters$target_area)))
params_grid <- expand.grid(settings = c("buffer", "target_area"), parameters = c("total_area", "transect_n", "transect_overrepresentation", "locality_n", "sites_n", "ecoregion_n", "mean_site_random_samples"), stringsAsFactors = FALSE)
params_grid <- params_grid[order(params_grid$settings, decreasing = TRUE), ]
ggs <- lapply(1:nrow(params_grid), function(x){
agg <- aggregate(as.formula(paste(params_grid$parameters[x], "~", params_grid$settings[x])), model_parameters, mean, na.rm = TRUE)
gg <- ggplot(model_parameters, aes(x = get(params_grid$settings[x]), y = get(params_grid$parameters[x]))) +
geom_violin(fill = viridis(10, alpha = 0.5)[which(unique(params_grid$parameters) %in% params_grid$parameters[x])]) +
geom_point(data = agg, aes(x = get(params_grid$settings[x]), y = get(params_grid$parameters[x])), size = 3) +
geom_beeswarm(alpha = 0.4) +
labs(x = params_grid$settings[x], y = params_grid$parameters[x]) +
theme(legend.position = "none")
if (params_grid$parameters[x] == "transect_overrepresentation")
gg <- gg + ylim(c(0, max(model_parameters$transect_overrepresentation, na.rm = TRUE))) else
if (params_grid$parameters[x] == "mean_site_random_samples")
gg <- gg + ylim(c(0, max(model_parameters$mean_site_random_samples))) else
if (params_grid$parameters[x] %in% c("locality_n", "sites_n", "ecoregion_n")) gg <- gg + ylim(c(0, 1)) else
return(gg)
})
names(ggs) <- apply(params_grid, 1, paste, collapse = " vs ")
ggs
## $`target_area vs total_area`
##
## $`target_area vs transect_n`
##
## $`target_area vs transect_overrepresentation`
##
## $`target_area vs locality_n`
##
## $`target_area vs sites_n`
##
## $`target_area vs ecoregion_n`
##
## $`target_area vs mean_site_random_samples`
##
## $`buffer vs total_area`
##
## $`buffer vs transect_n`
##
## $`buffer vs transect_overrepresentation`
##
## $`buffer vs locality_n`
##
## $`buffer vs sites_n`
##
## $`buffer vs ecoregion_n`
##
## $`buffer vs mean_site_random_samples`
mod <- model_parameters$call[which.max(model_parameters$locality_n)]
rds <- readRDS(file.path("./output/fd_by_target_area_and_buffer_1000_reps/", mod))
rds <- as.data.frame(rds)
rds$Nb_sp <- apply(rds, 1, function(x) sum(x > 0))
rds$sites <- sapply(strsplit(rownames(rds), "/", fixed = TRUE), "[", 1)
rds$ID_transect_num <- sapply(strsplit(rownames(rds), "/", fixed = TRUE), "[", 2)
rds$ecoregion <- sapply(1:nrow(rds), function(y) transect_areas$Ecoregion[transect_areas$site_id == rds$sites[y]][1])
rds$locality <- sapply(1:nrow(rds), function(y) transect_areas$locality[transect_areas$site_id == rds$sites[y]][1])
# ecoregions lost
# setdiff(unique(transect_areas$ecoregion), unique(rds$ecoregion))
lost_localities <- setdiff(unique(transect_areas$locality), unique(rds$locality))
The maximum percentage of the total area from any model that can be included is 97%
These are the localities that are lost no matter which model parameters are used:
el_faro
R session information
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
##
## locale:
## [1] LC_CTYPE=pt_BR.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=es_CR.UTF-8 LC_COLLATE=pt_BR.UTF-8
## [5] LC_MONETARY=es_CR.UTF-8 LC_MESSAGES=pt_BR.UTF-8
## [7] LC_PAPER=es_CR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ape_5.5 geometry_0.4.5 svMisc_1.1.4 ade4_1.7-17
## [5] plyr_1.8.6 reshape_0.8.8 magrittr_2.0.3 dplyr_1.0.7
## [9] cluster_2.1.2 viridis_0.6.2 viridisLite_0.4.0 ggbeeswarm_0.6.0
## [13] ggplot2_3.3.5 pbapply_1.5-0
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.8.3 lattice_0.20-44 assertthat_0.2.1 digest_0.6.29
## [5] utf8_1.2.2 R6_2.5.1 magic_1.5-9 evaluate_0.15
## [9] highr_0.9 pillar_1.6.4 rlang_1.0.1 rstudioapi_0.13
## [13] jquerylib_0.1.4 rmarkdown_2.13 labeling_0.4.2 stringr_1.4.0
## [17] munsell_0.5.0 compiler_4.1.0 vipor_0.4.5 xfun_0.30
## [21] pkgconfig_2.0.3 htmltools_0.5.2 tidyselect_1.1.1 tibble_3.1.6
## [25] gridExtra_2.3 fansi_1.0.0 crayon_1.5.0 withr_2.4.3
## [29] MASS_7.3-54 grid_4.1.0 nlme_3.1-152 jsonlite_1.8.0
## [33] gtable_0.3.0 lifecycle_1.0.1 DBI_1.1.1 scales_1.1.1
## [37] cli_3.1.0 stringi_1.7.6 farver_2.1.0 bslib_0.2.5.1
## [41] ellipsis_0.3.2 generics_0.1.0 vctrs_0.3.8 tools_4.1.0
## [45] glue_1.6.2 beeswarm_0.4.0 purrr_0.3.4 abind_1.4-5
## [49] parallel_4.1.0 fastmap_1.1.0 yaml_2.3.5 colorspace_2.0-2
## [53] knitr_1.38 sass_0.4.0