## vector with package names
x <- c("pbapply", "ggbeeswarm", "ggplot2", "viridis","cluster", "dplyr", "magrittr", "reshape", "plyr", "ade4", "svMisc", "geometry", "ape")

aa <- lapply(x, function(y) {
  
  # check if installed, if not then install 
  if (!y %in% installed.packages()[,"Package"]) 
    install.packages(y) 

  # load package
  try(require(y, character.only = T), silent = T)
})
para <- parallel::detectCores() - 2

theme_set(theme_classic(base_size = 20))

knitr::opts_chunk$set(dpi = 100, fig.width = 12, warning = FALSE, root.dir = "..")
knitr::opts_knit$set(root.dir = "..")
source("./scripts/assemblage_random_subsets.R")
# species_abundances <- readRDS("./data/species_abundances.rds")
abundance_and_func_traits <- readRDS("~/Dropbox/Projects/eastern_tropical_pacific_herbivory/data_intermediate/fish/regional/herbfishes_regional_ecoregion_traits_biomass.rds")

# transect_areas1 <- readRDS("./data/transect_areas.RDS")
transect_areas <- abundance_and_func_traits[!duplicated(paste(abundance_and_func_traits$ID_transect,abundance_and_func_traits$site_id)), c("ID_transect", "site_id", "area_uvc", "region", "date")]

transect_areas$ID_transect_num <- 1:nrow(transect_areas)

# Max number of sites a transect is found in (should be 1!)
max(table(transect_areas$ID_transect, transect_areas$site_id))
## [1] 1
predictors <- readRDS("~/Dropbox/Projects/eastern_tropical_pacific_herbivory/data_intermediate/predictors.RDS")

# these ones don't have coordinates (should be 4 sites from mexico)
setdiff(transect_areas$site_id, predictors$site_id)
## character(0)
transect_areas <- merge(transect_areas, predictors[!duplicated(predictors$site_id), ], by = "site_id")

transect_areas$years_protected[is.na(transect_areas$years_protected)] <- 0

transect_areas$ID_transect_num <- 1:nrow(transect_areas)
subtransect_areas <- transect_areas[transect_areas$Ecoregion == "Revillagigedos" & !is.na(transect_areas$Ecoregion), ]

set.seed(1001)
FD_test <- assemblage_random_subsets(transc_metadata = subtransect_areas, sp_abund = abundance_and_func_traits, reps = 100, buffer = 0.1, parallel = 1, target.area = 600, overrep.cutoff = 0.9, site.specific.biomass = TRUE)
head(FD_test[FD_test$taxa_n > 3, ])
# 
# sapply(FD_test, ncol)

FD_results <- assemblage_random_subsets(X = transect_areas, reps = 1000, buffer = 0.1, parallel = para, target.area = 500)
# 
# saveRDS(FD_results, "preliminary_results_random_sample_of_transects.RDS")
# 
FDres <- readRDS("preliminary_results_random_sample_of_transects.RDS")
# 
nrow(FDres)
# 
anyDuplicated(FDres$ID_transect_num) / nrow(FDres)
# 
# FDres <- FDres[!duplicated(FDres$ID_transect_num), ]
# 
# FDres <- FDres[!is.na(FDres$Nb_sp), ]
# 
# nrow(FDres)
# 
# agg_sites <- aggregate(ID_transect_num ~ sites + locality, data = transect_areas,
#                                 FUN = function(x) length(unique(x)))
# nrow(agg_sites)
# 
# agg_random_samples <- aggregate(ID_transect_num ~ taxa_n + sites + locality, data = FDres,
#                                 FUN = function(x) length(unique(x)))
# 
# 
# length(unique(agg_sites$locality))
# length(unique(agg_random_samples$locality))
# 
# nrow(agg_random_samples)
# 
# 
# agg <- aggregate(area_uvc  ~ sites + locality, data = transect_areas,
#           FUN = sum)
# 
# 
# hist(agg$area_uvc, breaks = 20)
# 
# agg_sites <- aggregate(ID_transect_num ~ sites + locality, data = transect_areas,
#                        FUN = function(x) length(unique(x)))
#
param_grid <- expand.grid(buffer = seq(0, 0.3, 0.05), target.area = seq(200, 800, 50))

pboptions(type = "none")


# loop to test different parameters
for(i in 1:nrow(param_grid)){

  progress(value = i, max.value = nrow(param_grid), init = (i == 1), progress.bar = TRUE, char = "+")    

  if (!file.exists(file.path("./output/fd_by_target_area_and_buffer_1000_reps",paste0("random_sample_of_transects_buffer_", param_grid$buffer[i], "_target_area_", param_grid$target.area[i], ".RDS"))))
    {
  fdres <- try(assemblage_random_subsets(transc_metadata = transect_areas, sp_abund = abundance_and_func_traits, reps = 1000, buffer = param_grid$buffer[i], parallel = para, target.area = param_grid$target.area[i], site.specific.biomass = TRUE, pb = FALSE), silent = TRUE)

  try(saveRDS(fdres, file.path("./output/fd_by_target_area_and_buffer_1000_reps",paste0("random_sample_of_transects_buffer_", param_grid$buffer[i], "_target_area_", param_grid$target.area[i], ".RDS"))), silent = TRUE)

    rm(fdres)
    }
}
FD_results <- assemblage_random_subsets(X = transect_areas, Y = species_abundances, Z = functional_traits, reps = 10000, buffer = 0.1, parallel = para, target.area = 500, site.specific.biomass = TRUE)
 
saveRDS(FD_results, "../output/preliminary_results_random_sample_of_transects.RDS")
rdss <- list.files(path = "./output/fd_by_target_area_and_buffer_1000_reps/", full.names = TRUE)

transect_areas$ID_transect_num <- 1:nrow(transect_areas)

orignal_ecoregion_n <- length(unique(transect_areas$Ecoregion))
orignal_locality_n <- length(unique(transect_areas$locality))
orignal_site_n <- length(unique(transect_areas$site_id))
orignal_transect_n <- nrow(transect_areas)
original_total_area <- sum(transect_areas$area_uvc)

out <- pbapply::pblapply(rdss, cl = 4, function(x){
  
  rds <- readRDS(x) 
  rds <- as.data.frame(rds)
  rds$Nb_sp <- apply(rds, 1, function(x) sum(x > 0))
  rds$sites <- sapply(strsplit(rownames(rds), "/", fixed = TRUE), "[", 1)
  rds$ID_transect_num <- sapply(strsplit(rownames(rds), "/", fixed = TRUE), "[", 2)
  # rds <- rds[!duplicated(rds$ID_transect_num), ]
  
  # rds <- rds[!is.na(rds$Nb_sp), ]
  
  rds$ecoregion <- sapply(1:nrow(rds), function(y) transect_areas$Ecoregion[transect_areas$site_id == rds$sites[y]][1])
   rds$locality <- sapply(1:nrow(rds), function(y) transect_areas$locality[transect_areas$site_id == rds$sites[y]][1])
  
  buffer <- as.numeric(sapply(strsplit(basename(x), "_", fixed = TRUE), "[", 6))
  target_area <- as.numeric(gsub(".RDS", "", sapply(strsplit(basename(x), "_", fixed = TRUE), "[", 9)))
  ecoregion_n <- round(length(unique(rds$ecoregion)) / orignal_ecoregion_n, 2)
  locality_n <- round(length(unique(rds$locality)) / orignal_locality_n, 2)
  sites_n <- round(length(unique(rds$sites)) / orignal_site_n, 2)
  ID_transect_num <- unlist(strsplit(rds$ID_transect_num, "-", fixed = TRUE))
  transect_n <- round(length(unique(ID_transect_num)) / orignal_transect_n, 2)
  total_area <- sum(transect_areas$area_uvc[transect_areas$ID_transect_num %in% ID_transect_num]) / original_total_area
  mean_site_random_samples <- mean(table(rds$sites)) 
  
    # transect over-representation index
    tb <- table(unlist(sapply(rds$ID_transect_num, function(x) strsplit(x, "-", fixed = TRUE))))
    
    transect_overrepresentation <- sd(tb / mean(tb))
  

  
  df <- data.frame(call = basename(x), buffer, target_area, ecoregion_n, locality_n, sites_n, transect_n, total_area, transect_overrepresentation, mean_site_random_samples)

  return(df)
})


model_parameters <- do.call(rbind, out)

write.csv(model_parameters, "./output/model_parameters.csv", row.names = FALSE)
model_parameters <- read.csv("./output/model_parameters.csv")

model_parameters$buffer <- factor(model_parameters$buffer, levels = sort(unique(model_parameters$buffer)))
model_parameters$target_area <- factor(model_parameters$target_area, levels = sort(unique(model_parameters$target_area)))


params_grid <- expand.grid(settings = c("buffer", "target_area"), parameters = c("total_area", "transect_n",   "transect_overrepresentation",  "locality_n", "sites_n", "ecoregion_n", "mean_site_random_samples"), stringsAsFactors = FALSE)

params_grid <- params_grid[order(params_grid$settings, decreasing = TRUE), ]

ggs <- lapply(1:nrow(params_grid), function(x){

  agg <- aggregate(as.formula(paste(params_grid$parameters[x], "~", params_grid$settings[x])), model_parameters, mean, na.rm = TRUE)
  
  gg <- ggplot(model_parameters, aes(x = get(params_grid$settings[x]), y = get(params_grid$parameters[x]))) +
    geom_violin(fill = viridis(10, alpha = 0.5)[which(unique(params_grid$parameters) %in% params_grid$parameters[x])]) +
    geom_point(data = agg,  aes(x = get(params_grid$settings[x]), y = get(params_grid$parameters[x])), size = 3) +
    geom_beeswarm(alpha = 0.4) +
    labs(x = params_grid$settings[x], y = params_grid$parameters[x]) +
    theme(legend.position = "none")
 
  if (params_grid$parameters[x] == "transect_overrepresentation")
      gg <- gg + ylim(c(0, max(model_parameters$transect_overrepresentation, na.rm =  TRUE)))  else
         if (params_grid$parameters[x] == "mean_site_random_samples")
      gg <- gg + ylim(c(0, max(model_parameters$mean_site_random_samples))) else
        if (params_grid$parameters[x] %in% c("locality_n", "sites_n", "ecoregion_n"))  gg <- gg + ylim(c(0, 1)) else

 return(gg)
  })

names(ggs) <- apply(params_grid, 1, paste, collapse = " vs ")

ggs
## $`target_area vs total_area`

## 
## $`target_area vs transect_n`

## 
## $`target_area vs transect_overrepresentation`

## 
## $`target_area vs locality_n`

## 
## $`target_area vs sites_n`

## 
## $`target_area vs ecoregion_n`

## 
## $`target_area vs mean_site_random_samples`

## 
## $`buffer vs total_area`

## 
## $`buffer vs transect_n`

## 
## $`buffer vs transect_overrepresentation`

## 
## $`buffer vs locality_n`

## 
## $`buffer vs sites_n`

## 
## $`buffer vs ecoregion_n`

## 
## $`buffer vs mean_site_random_samples`

mod <- model_parameters$call[which.max(model_parameters$locality_n)]

rds <- readRDS(file.path("./output/fd_by_target_area_and_buffer_1000_reps/", mod))
rds <- as.data.frame(rds)
rds$Nb_sp <- apply(rds, 1, function(x) sum(x > 0))
rds$sites <- sapply(strsplit(rownames(rds), "/", fixed = TRUE), "[", 1)
rds$ID_transect_num <- sapply(strsplit(rownames(rds), "/", fixed = TRUE), "[", 2)


 rds$ecoregion <- sapply(1:nrow(rds), function(y) transect_areas$Ecoregion[transect_areas$site_id == rds$sites[y]][1])
   rds$locality <- sapply(1:nrow(rds), function(y) transect_areas$locality[transect_areas$site_id == rds$sites[y]][1])



 # ecoregions lost
# setdiff(unique(transect_areas$ecoregion), unique(rds$ecoregion))

lost_localities <- setdiff(unique(transect_areas$locality), unique(rds$locality))

el_faro


R session information

## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
## 
## locale:
##  [1] LC_CTYPE=pt_BR.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=es_CR.UTF-8        LC_COLLATE=pt_BR.UTF-8    
##  [5] LC_MONETARY=es_CR.UTF-8    LC_MESSAGES=pt_BR.UTF-8   
##  [7] LC_PAPER=es_CR.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] ape_5.5           geometry_0.4.5    svMisc_1.1.4      ade4_1.7-17      
##  [5] plyr_1.8.6        reshape_0.8.8     magrittr_2.0.3    dplyr_1.0.7      
##  [9] cluster_2.1.2     viridis_0.6.2     viridisLite_0.4.0 ggbeeswarm_0.6.0 
## [13] ggplot2_3.3.5     pbapply_1.5-0    
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.8.3     lattice_0.20-44  assertthat_0.2.1 digest_0.6.29   
##  [5] utf8_1.2.2       R6_2.5.1         magic_1.5-9      evaluate_0.15   
##  [9] highr_0.9        pillar_1.6.4     rlang_1.0.1      rstudioapi_0.13 
## [13] jquerylib_0.1.4  rmarkdown_2.13   labeling_0.4.2   stringr_1.4.0   
## [17] munsell_0.5.0    compiler_4.1.0   vipor_0.4.5      xfun_0.30       
## [21] pkgconfig_2.0.3  htmltools_0.5.2  tidyselect_1.1.1 tibble_3.1.6    
## [25] gridExtra_2.3    fansi_1.0.0      crayon_1.5.0     withr_2.4.3     
## [29] MASS_7.3-54      grid_4.1.0       nlme_3.1-152     jsonlite_1.8.0  
## [33] gtable_0.3.0     lifecycle_1.0.1  DBI_1.1.1        scales_1.1.1    
## [37] cli_3.1.0        stringi_1.7.6    farver_2.1.0     bslib_0.2.5.1   
## [41] ellipsis_0.3.2   generics_0.1.0   vctrs_0.3.8      tools_4.1.0     
## [45] glue_1.6.2       beeswarm_0.4.0   purrr_0.3.4      abind_1.4-5     
## [49] parallel_4.1.0   fastmap_1.1.0    yaml_2.3.5       colorspace_2.0-2
## [53] knitr_1.38       sass_0.4.0