## vector with package names
x <- c("pbapply", "ggbeeswarm", "ggplot2", "viridis","cluster", "dplyr", "magrittr", "reshape", "plyr", "ade4", "svMisc", "geometry", "ape", "zoo", "MuMIn", "phangorn", "brms", "lme4", "vegan", "gamlss", "gamlss.dist", "gamlss.add", "tidybayes", "ggridges", "HDInterval", "kableExtra", "ggcorrplot", "cmdstanr", "cowplot", "bayestestR", "warbleR", "cluster")

aa <- lapply(x, function(y) {
  
  # check if installed, if not then install 
  if (!y %in% installed.packages()[,"Package"]) 
    install.packages(y) 

  # load package
  try(require(y, character.only = T), silent = T)
})
theme_set(theme_classic(base_size = 20))

knitr::opts_knit$set(dpi = 100, fig.width = 16, fig.height = 8, warning = FALSE, root.dir = "..", message = FALSE)

cols <- viridis(10, alpha = 0.7)
source("~/Dropbox/Projects/reef_functional_diversity/scripts/assemblage_random_subsets.R")

source("./scripts/multidimFD.R")

# source("~/Dropbox/R_package_testing/warbleR/warbleR/R/pblapply_wrblr_int.R")

pblapply_wrblr_int <- warbleR:::pblapply_wrblr_int
# species_abundances <- readRDS("./data/species_abundances.rds")
abundance_and_func_traits <- readRDS("~/Dropbox/Projects/eastern_tropical_pacific_herbivory/data_intermediate/fish/regional/herbfishes_regional_ecoregion_traits_biomass.rds")

abundance_and_func_traits$site_id[abundance_and_func_traits$site_id == "nicaragua-toro_miscal/toro_marsella"] <- "nicaragua-toro_miscal-toro_marsella"

# transect_areas1 <- readRDS("./data/transect_areas.RDS")
transect_areas <- abundance_and_func_traits[!duplicated(paste(abundance_and_func_traits$ID_transect,abundance_and_func_traits$site_id)), c("ID_transect", "site_id", "area_uvc", "region", "date_y_m")]

transect_areas$ID_transect_num <- 1:nrow(transect_areas)

# Max number of sites a transect is found in (should be 1!)
max(table(transect_areas$ID_transect, transect_areas$site_id))
## [1] 1
uvc_sst_df <-readRDS("~/Dropbox/Projects/eastern_tropical_pacific_herbivory/results/biophy/uvc_sst_fishes_sites_data_frame.RDS")
uvc_chl_df <-readRDS("~/Dropbox/Projects/eastern_tropical_pacific_herbivory/results/biophy/uvc_chl_fishes_sites_data_frame.RDS")
predictors <- readRDS("~/Dropbox/Projects/eastern_tropical_pacific_herbivory/data_intermediate/predictors.RDS")

uvc_sst_df$site_id[uvc_sst_df$site_id == "nicaragua-toro_miscal/toro_marsella"] <-
  uvc_chl_df$site_id[uvc_chl_df$site_id == "nicaragua-toro_miscal/toro_marsella"] <-
   predictors$site_id[predictors$site_id == "nicaragua-toro_miscal/toro_marsella"] <- "nicaragua-toro_miscal-toro_marsella"



# these ones don't have coordinates (should be 4 sites from mexico)
setdiff(transect_areas$site_id, predictors$site_id)
## character(0)
transect_areas <- merge(transect_areas, predictors[!duplicated(predictors$site_id), ], by = "site_id")

transect_areas$years_protected[is.na(transect_areas$years_protected)] <- 0

transect_areas$ID_transect_num <- 1:nrow(transect_areas)

# load fish taxa to have "taxonomy"
load("~/Dropbox/Projects/eastern_tropical_pacific_herbivory/data_intermediate/taxa/fish_taxa.rda")

# sampling coverage
sc <- readRDS("~/Dropbox/Projects/eastern_tropical_pacific_herbivory/results/mixed/sc.RDS")

rownames(sc)[rownames(sc) == "nicaragua-toro_miscal/toro_marsella"] <- "nicaragua-toro_miscal-toro_marsella"


# remove those with low coverage
transect_areas <- transect_areas[transect_areas$site_id %in% rownames(sc)[sc$sc >= 0.95], ]

Create 100 random subsets

random_FD_list <- warbleR:::pblapply_wrblr_int(X = 1:110, FUN = function(x) {
  # print(x)
random_assemblages <- assemblage_random_subsets(transc_metadata = transect_areas, sp_abund = abundance_and_func_traits, reps = 100, buffer = 0.1, parallel = 1, target.area = 400, pb = FALSE, seed = x, site.specific.biomass = TRUE)
  
  fds <- FD_random_assemblages(random_assemblages = random_assemblages, sp_cutoff = 3, prop_cutoff = 0.5, abundance_and_func_traits = abundance_and_func_traits, taxonomy = fish_taxa,
                               transect_areas = transect_areas)
  
  fds_predictors <- try(add_predictors(X = fds, transect_areas = transect_areas, uvc_chl_df = uvc_chl_df, uvc_sst_df = uvc_sst_df, parallel = 1, pb = FALSE), silent = TRUE)
  
return(fds_predictors)
  }, cl = 1)

# remove errors
random_FD_list <- random_FD_list[sapply(random_FD_list, class) == "data.frame"]

# keep 100
random_FD_list <- random_FD_list[1:100]

saveRDS(random_FD_list, "./data/100_replicated_random_assemblages_data_set_400m2_target_area.RDS")

Check collinearity

Colinearity on the first 15 random transect data sets

Predictors

random_FD_list <- readRDS("./data/100_replicated_random_assemblages_data_set_400m2_target_area.RDS")

# dev.off()
# par(mfrow = c(4, 4))

# for(i in 1:16){
  # print(head(random_FD_list[[i]]))

ggcorrplots <- lapply(1:15, function(i){
cormat <- cor(random_FD_list[[i]][, c("sst_range", "chl_range", "gravity", "years_protected")])

ggcorrplot(cormat, type = "lower", outline.col = "black",
 lab=TRUE, 
 ggtheme = ggplot2::theme_classic,
 colors = c("#6D9EC1", "white", "#E46726"))

# 
# print(cp <- corrplot::corrplot.mixed(cormat, upper = "ellipse", lower.col = "black"))
# }
})
cowplot::plot_grid(plotlist = ggcorrplots, ncol = 3)

Responses

random_FD_list <- readRDS("./data/100_replicated_random_assemblages_data_set_400m2_target_area.RDS")

# dev.off()
# par(mfrow = c(4, 4))

# for(i in 1:16){
  # print(head(random_FD_list[[i]]))

ggcorrplots <- lapply(1:15, function(i){
cormat <- cor(random_FD_list[[i]][, c("FRic", "FDiv", "FEve", "FDis", "tax_distinctnes", "corrected_biomass", "density")])

ggcorrplot(cormat, type = "lower", outline.col = "black",
 lab=TRUE, 
 ggtheme = ggplot2::theme_classic,
 colors = c("#6D9EC1", "white", "#E46726"))

# 
# print(cp <- corrplot::corrplot.mixed(cormat, upper = "ellipse", lower.col = "black"))
# }
})
cowplot::plot_grid(plotlist = ggcorrplots, ncol = 3)

Models

# modelos
## FDiv
mod_all_noiucn_FDiv <- "FDiv ~ sst_range + chl_range + gravity + years_protected + geomorphology + (1|Ecoregion)"  

mod_all_noyears_FDiv <- "FDiv ~ sst_range + chl_range + gravity + iucn_cat + geomorphology + (1|Ecoregion)" 

mod_all_intr_FDiv <- "FDiv ~ sst_range + chl_range + gravity + years_protected : iucn_cat + geomorphology + (1|Ecoregion)"  

mod_envir_FDiv <- "FDiv ~ sst_range + chl_range + geomorphology + (1|Ecoregion)"
  
mod_antrop_noiucn_FDiv <- "FDiv ~ gravity + years_protected + (1|Ecoregion)" 

mod_antrop_noyears_FDiv <- "FDiv ~ gravity + iucn_cat + (1|Ecoregion)" 
  
mod_antrop_intr_FDiv <- "FDiv ~ gravity + years_protected : iucn_cat + (1|Ecoregion)" 

### FEve

mod_all_noiucn_FEve <- "FEve ~ sst_range + chl_range + gravity + years_protected + geomorphology + (1|Ecoregion)"  

mod_all_noyears_FEve <- "FEve ~ sst_range + chl_range + gravity + iucn_cat + geomorphology + (1|Ecoregion)" 

mod_all_intr_FEve <- "FEve ~ sst_range + chl_range + gravity + years_protected : iucn_cat + geomorphology + (1|Ecoregion)"  

mod_envir_FEve <- "FEve ~ sst_range + chl_range + geomorphology + (1|Ecoregion)"
  
mod_antrop_noiucn_FEve <- "FEve ~ gravity + years_protected + (1|Ecoregion)" 

mod_antrop_noyears_FEve <- "FEve ~ gravity + iucn_cat + (1|Ecoregion)" 
  
mod_antrop_intr_FEve <- "FEve ~ gravity + years_protected : iucn_cat + (1|Ecoregion)" 

### FDis

mod_all_noiucn_FDis <- "FDis ~ sst_range + chl_range + gravity + years_protected + geomorphology + (1|Ecoregion)"  

mod_all_noyears_FDis <- "FDis ~ sst_range + chl_range + gravity + iucn_cat + geomorphology + (1|Ecoregion)" 

mod_all_intr_FDis <- "FDis ~ sst_range + chl_range + gravity + years_protected : iucn_cat + geomorphology + (1|Ecoregion)"  

mod_envir_FDis <- "FDis ~ sst_range + chl_range + geomorphology + (1|Ecoregion)"
  
mod_antrop_noiucn_FDis <- "FDis ~ gravity + years_protected + (1|Ecoregion)" 

mod_antrop_noyears_FDis <- "FDis ~ gravity + iucn_cat + (1|Ecoregion)" 
  
mod_antrop_intr_FDis <- "FDis ~ gravity + years_protected : iucn_cat + (1|Ecoregion)" 

### tax dis

mod_all_noiucn_taxdis <- "tax_distinctnes ~ sst_range + chl_range + gravity + years_protected + geomorphology + (1|Ecoregion)"  

mod_all_noyears_taxdis <- "tax_distinctnes ~ sst_range + chl_range + gravity + iucn_cat + geomorphology + (1|Ecoregion)" 

mod_all_intr_taxdis <- "tax_distinctnes ~ sst_range + chl_range + gravity + years_protected : iucn_cat + geomorphology + (1|Ecoregion)"

mod_envir_taxdis <- "tax_distinctnes ~ sst_range + chl_range + geomorphology + (1|Ecoregion)"
  
mod_antrop_noiucn_taxdis <- "tax_distinctnes ~ gravity + years_protected + (1|Ecoregion)" 

mod_antrop_noyears_taxdis <- "tax_distinctnes ~ gravity + iucn_cat + (1|Ecoregion)" 
  
mod_antrop_intr_taxdis <- "tax_distinctnes ~ gravity + years_protected : iucn_cat + (1|Ecoregion)" 

models <- list(
  mod_all_noiucn_FDiv     = mod_all_noiucn_FDiv,
  mod_all_noyears_FDiv    = mod_all_noyears_FDiv,
  mod_all_intr_FDiv       = mod_all_intr_FDiv,
  mod_envir_FDiv          = mod_envir_FDiv,
  mod_antrop_noiucn_FDiv  = mod_antrop_noiucn_FDiv, 
  mod_antrop_noyears_FDiv = mod_antrop_noyears_FDiv, 
  mod_antrop_intr_FDiv    = mod_antrop_intr_FDiv,
  
  mod_all_noiucn_FEve     = mod_all_noiucn_FEve,
  mod_all_noyears_FEve    = mod_all_noyears_FEve, 
  mod_all_intr_FEve       = mod_all_intr_FEve, 
  mod_envir_FEve          = mod_envir_FEve,
  mod_antrop_noiucn_FEve  = mod_antrop_noiucn_FEve,
  mod_antrop_noyears_FEve = mod_antrop_noyears_FEve,
  mod_antrop_intr_FEve    = mod_antrop_intr_FEve,
  
  mod_all_noiucn_FDis     = mod_all_noiucn_FDis,
  mod_all_noyears_FDis    = mod_all_noyears_FDis, 
  mod_all_intr_FDis       = mod_all_intr_FDis, 
  mod_envir_FDis          = mod_envir_FDis,
  mod_antrop_noiucn_FDis  = mod_antrop_noiucn_FDis,
  mod_antrop_noyears_FDis = mod_antrop_noyears_FDis,
  mod_antrop_intr_FDis    = mod_antrop_intr_FDis,
  
  mod_all_noiucn_taxdis     = mod_all_noiucn_taxdis,
  mod_all_noyears_taxdis    = mod_all_noyears_taxdis, 
  mod_all_intr_taxdis       = mod_all_intr_taxdis, 
  mod_envir_taxdis          = mod_envir_taxdis,
  mod_antrop_noiucn_taxdis  = mod_antrop_noiucn_taxdis,
  mod_antrop_noyears_taxdis = mod_antrop_noyears_taxdis,
  mod_antrop_intr_taxdis    = mod_antrop_intr_taxdis
  )

# temporarily remove those including "iucn_cat"
models <- models[grep(pattern = "iucn", x = models, invert = TRUE)]

print(models)
## $mod_all_noiucn_FDiv
## [1] "FDiv ~ sst_range + chl_range + gravity + years_protected + geomorphology + (1|Ecoregion)"
## 
## $mod_envir_FDiv
## [1] "FDiv ~ sst_range + chl_range + geomorphology + (1|Ecoregion)"
## 
## $mod_antrop_noiucn_FDiv
## [1] "FDiv ~ gravity + years_protected + (1|Ecoregion)"
## 
## $mod_all_noiucn_FEve
## [1] "FEve ~ sst_range + chl_range + gravity + years_protected + geomorphology + (1|Ecoregion)"
## 
## $mod_envir_FEve
## [1] "FEve ~ sst_range + chl_range + geomorphology + (1|Ecoregion)"
## 
## $mod_antrop_noiucn_FEve
## [1] "FEve ~ gravity + years_protected + (1|Ecoregion)"
## 
## $mod_all_noiucn_FDis
## [1] "FDis ~ sst_range + chl_range + gravity + years_protected + geomorphology + (1|Ecoregion)"
## 
## $mod_envir_FDis
## [1] "FDis ~ sst_range + chl_range + geomorphology + (1|Ecoregion)"
## 
## $mod_antrop_noiucn_FDis
## [1] "FDis ~ gravity + years_protected + (1|Ecoregion)"
## 
## $mod_all_noiucn_taxdis
## [1] "tax_distinctnes ~ sst_range + chl_range + gravity + years_protected + geomorphology + (1|Ecoregion)"
## 
## $mod_envir_taxdis
## [1] "tax_distinctnes ~ sst_range + chl_range + geomorphology + (1|Ecoregion)"
## 
## $mod_antrop_noiucn_taxdis
## [1] "tax_distinctnes ~ gravity + years_protected + (1|Ecoregion)"

Bayesian mixed models

  • 12 models for each data subset
random_FD_list <- readRDS("./data/100_replicated_random_assemblages_data_set_400m2_target_area.RDS")

# check sites on all data sets
sites <- unlist(sapply(random_FD_list, function(x) unique(x$site_id)))

sites_count <- table(sites)

shared_sites <- names(sites_count)[sites_count == length(random_FD_list)]

length(shared_sites)

# predictors to scale
vars <- c("sst_range", "chl_range", "gravity", "years_protected")

iters <- 30000

# z-transform predictors and keep only shared sites
random_FD_list <- lapply(random_FD_list, function(W){
    # scale parameters
  for(i in vars)
    W[, i] <- scale(W[, i])  
  
  # remove non-shared sites
  W <- W[W$site_id %in% shared_sites, ]  
  
  #make taxonomic distinctness a proportion
  W$tax_distinctnes <- W$tax_distinctnes / (max(W$tax_distinctnes) * 1.0001)

  # set base level for iucn categories
  W$iucn_cat <- as.character(W$iucn_cat)
  
  return(W)
})



priors <- c(set_prior("normal(0, 1.5)", class = "Intercept"), set_prior("normal(0, 2)", class = "b"))


# loop to run bayesian mcmc glmmm in brms (using stan) controlling for spatial autocorrelation

# remove iucn
models <- grep("iucn", models, value = TRUE, invert = TRUE)

shuffled_models <- sample(1:length(models))

brms_model_out <- warbleR:::pblapply_wrblr_int(shuffled_models, function(u){
  
  x <- models[[u]]
  print(x)
  print(names(models)[u])
  rds_name <- file.path("./data/statistical_models_2024", paste0(names(models)[u], "-10-data-sets_brm_multiple.RDS"))
   
  # check Rhats
#   if (file.exists(rds_name)){
#     mod <- readRDS(rds_name) 
#     rhats <- rhat(mod[[1]][[1]])
#     rhats <- rhats[-length(rhats)]
#     iters <- attr(mod[[1]][[1]]$fit, "stan_args")[[1]]$iter + 10000
#     
#     go <- if (any(rhats > 1.05)) TRUE else
#       FALSE
# } else go <- TRUE
#   
  # if (go) {
if (TRUE) {

  # get priors for model
  sub_priors <- priors[sapply(priors$coef, function(x) grepl(x,  as.character(x)[3])), ]
  
    # mod <- try(brm(formula = x, iter = 0000, thin = 1, warmup = 5000,
    # data = random_FD_list[[1]],
    # family = Beta(),
    # control = list(adapt_delta = 0.999),
    #   silent = 2,
    # chains = 1,
    # prior = sub_priors
    # ), silent = TRUE)

    mod <- try(brm_multiple(formula = x, iter = iters, thin = 1,
    data = random_FD_list[1:10],
    family = Beta(),
    cores = 10,
    control = list(adapt_delta = 0.999),
      silent = 2,
    chains = 1,
    combine = FALSE,
    prior = sub_priors), silent = TRUE)
  
    # add loo
    if (!is(mod, "try-error"))
      for (i in 1:length(mod)) 
        mod[[i]] <- add_criterion(mod[[i]], c("loo"))
 
     
    
    
    if (!is(mod, "try-error"))
      saveRDS(mod, file = rds_name)
  
  rm(mod)
  }
  return(NULL)
  })
random_FD_list <-
  readRDS("./data/100_replicated_random_assemblages_data_set_300m2_target_area.RDS")

random_FD_df <- do.call(rbind, random_FD_list)

# predictors to scale
vars <- c("sst_range", "chl_range", "gravity", "years_protected")

iters <- 50000

for (i in vars)
  random_FD_df[, i] <- scale(random_FD_df[, i])


#make taxonomic distinctness a proportion
random_FD_df$tax_distinctnes <-
  random_FD_df$tax_distinctnes / (max(random_FD_df$tax_distinctnes) * 1.0001)

priors <-
  c(
    set_prior("normal(0, 1.5)", class = "Intercept"),
    set_prior("normal(0, 2)", class = "b")
  )


# loop to run bayesian mcmc glmmm in brms (using stan) controlling for spatial autocorrelation

# remove iucn
models <- grep("iucn", models, value = TRUE, invert = TRUE)

shuffled_models <- sample(1:length(models))

brms_model_out <-
  warbleR:::pblapply_wrblr_int(shuffled_models, function(u) {
    x <- models[[u]]
    print(x)
    print(names(models)[u])
    rds_name <-
      file.path("./data/statistical_models",
                paste0(names(models)[u], "-single_combined_100_data_sets.RDS"))
    
    # check Rhats
      if (!file.exists(rds_name)){
    #     mod <- readRDS(rds_name)
    #     rhats <- rhat(mod[[1]][[1]])
    #     rhats <- rhats[-length(rhats)]
    #     iters <- attr(mod[[1]][[1]]$fit, "stan_args")[[1]]$iter + 10000
    #
    #     go <- if (any(rhats > 1.05)) TRUE else
    #       FALSE
    # } else go <- TRUE
    #
    # if (go) {
    # if (TRUE) {
      # get priors for model
      sub_priors <-
        priors[sapply(priors$coef, function(x)
          grepl(x,  as.character(x)[3])),]
      
      mod <-
        try(brm(
          formula = paste(x, "+ (1|site_id)"),
          iter = iters,
          thin = 1,
          data = random_FD_df,
          family = Beta(),
          control = list(adapt_delta = 0.999),
          silent = 2,
          chains = 2,
          cores = 2,
          prior = sub_priors,
          save_pars = save_pars(all = TRUE)
        ),
        silent = TRUE)
      
      if (!is(mod, "try-error"))
   mod2 <- try(add_criterion(mod, c("loo")), silent = TRUE)
      
      if (!is(mod2, "try-error"))
      mod <- mod2
      
      if (!is(mod, "try-error"))
        saveRDS(mod, file = rds_name)
      
      rm(mod)
    }
    return(NULL)
  })
brms_model_rds <- list.files("./data/statistical_models", pattern = "combined_100_data_sets.RDS", full.names = TRUE)

brms_model_rds <- brms_model_rds[brms_model_rds != "./data/statistical_models/models_combined_100_data_sets.RDS" ]

# temporarily remove iucn 
# brms_model_rds <- grep(pattern = "iucn", x = brms_model_rds, invert = TRUE, value = TRUE)

combined_mod_results <- pblapply(brms_model_rds, cl = 1, function(e) {

  mod <- readRDS(e)
  # comb_mod <- combine_models(mlist = mods, check_data = FALSE)
  
  # mods[[1]]$criteria$loo$estimates
  
  # print(x$formula)
  summ <- summary(mod)$fixed
  fit <- mod$fit  
  betas <- grep("^b_", names(fit@sim$samples[[1]]), value = TRUE)  
  hdis <- t(sapply(betas, function(y)   hdi(fit@sim$samples[[1]][[y]]))
)
  summ_table <- data.frame(summ, hdis, iterations = attr(fit, "stan_args")[[1]]$iter, chains = length(attr(fit, "stan_args")))
  summ_table <- summ_table[rownames(summ_table) != "Intercept", c("Estimate", "Rhat", "Bulk_ESS", "CI_low", "CI_high", "iterations", "chains")]
  
  summ_table <- as.data.frame(summ_table)  
  summ_table$Rhat <- round(summ_table$Rhat, digits = 3)  
  summ_table$CI_low <- round(unlist(summ_table$CI_low), digits = 3)  
  summ_table$CI_high <- round(unlist(summ_table$CI_high), digits = 3)  
  
  
    out <- lapply(betas, function(y)  data.frame(variable = y, value = sort(fit@sim$samples[[1]][[y]], decreasing = FALSE)))
  
  posteriors <- do.call(rbind, out)
  posteriors <- posteriors[posteriors$variable != "b_Intercept", ]
  
   gg <- ggplot(data = posteriors, aes(y = variable, x = value, fill = stat(quantile))) + 
  geom_density_ridges_gradient(quantile_lines = TRUE, quantile_fun = HDInterval::hdi,  vline_linetype = 2) + 
  theme_classic() + 
     xlim(c(min(summ_table[ , "CI_low"]), max(summ_table[ , "CI_high"]))) +
  geom_vline(xintercept = 0, col = "red") + 
  scale_fill_manual(values = c("transparent", "lightblue", "transparent"), guide = "none") + ggtitle(mod$formula)

    # print(gg)

  
  summ_table$Rhat <- ifelse(summ_table$Rhat > 1.05, cell_spec(summ_table$Rhat, "html", color ="white", background = "red", bold = TRUE,  font_size = 12),  cell_spec(summ_table$Rhat, "html"))
  
  signif <- summ_table[,"CI_low"] * summ_table[,"CI_high"] > 0
  
  # summ_table$CI_low <- ifelse(signif, cell_spec(summ_table$CI_low, "html", color ="black", background = adjustcolor(cols[9], alpha.f = 0.3), bold = TRUE,  font_size = 12),  cell_spec(summ_table$CI_low, "html"))
  
  # summ_table$CI_high <- ifelse(signif, cell_spec(summ_table$CI_high, "html", color ="black", background = adjustcolor(cols[9], alpha.f = 0.3), bold = TRUE,  font_size = 12),  cell_spec(summ_table$CI_high, "html"))
  
  df1 <- kbl(summ_table, row.names = TRUE, escape = FALSE, format = "html", digits = 3)
    
  df1 <- row_spec(kable_input = df1,row =  which(summ_table$CI_low * summ_table$CI_high > 0), background = adjustcolor(cols[9], alpha.f = 0.3))

  df1 <- kable_styling(df1, bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = FALSE, font_size = 12)

  # print(df1)

  
results <- list(table = df1, gg = gg)    
  
return(results)
  
  return(cm)
  }
)

names(combined_mod_results) <- gsub("-single_combined_100_data_sets.RDS", "", basename(brms_model_rds))

saveRDS(combined_mod_results, "./data/statistical_models/models_combined_100_data_sets.RDS")

Results for a single combined data set per model (100 subsets, site as random factor)

models_combined_100_data_set <- readRDS("./data/statistical_models/models_combined_100_data_sets.RDS")

for(i in 1:length(models_combined_100_data_set)){
  print(names(models_combined_100_data_set)[i])
  print(models_combined_100_data_set[[i]]$table)
  print(models_combined_100_data_set[[i]]$gg)
}

Model comparison

brms_model_rds <- list.files("./data/statistical_models", pattern = "-single_combined_100_data_sets.RDS", full.names = TRUE)

looic_list <- pblapply(brms_model_rds, cl = 1, function(e) {
  
  mod <- readRDS(e)

  loos <- mod$criteria$loo$estimates
  
  out_df <- data.frame(
    models = gsub("-single_combined_100_data_sets.RDS", "", basename(e)),
    looic = loos[3, 1], 
    SE = loos[3, 2]
    )
  
  return(out_df)
  })

looic_df <- do.call(rbind, looic_list)

saveRDS(looic_df, "./data/looic_by_model_site_id_as_random_effect.RDS")
looic_df <- readRDS("./data/looic_by_model_site_id_as_random_effect.RDS")

brms_model_rds <- list.files("./data/statistical_models", pattern = "-single_combined_100_data_sets.RDS", full.names = TRUE)

mod_groups <- c("FDis", "FDiv", "FEve", "taxdis")

loo_comparisons <- lapply(mod_groups, function(i){
  
  sub_looic <- looic_df[grep(i, looic_df$models), ]
  sub_looic$delta.looic <- sub_looic$looic - min(sub_looic$looic)
  
  mods <- lapply(file.path("./data/statistical_models", paste0(sub_looic$models, "-single_combined_100_data_sets.RDS")), FUN = readRDS)
    
  weights <- brms::loo_model_weights(mods[[1]], mods[[2]], mods[[3]], model_names = sub_looic$models, ndraws = 5000)
  
  rm(mods)
  
  sub_looic$looic.weights <- sapply(sub_looic$models, function(x) as.numeric(weights[names(weights) == x]))
  
  sub_looic <- sub_looic[order(sub_looic$delta.looic, decreasing = TRUE), ]
  sub_looic$cum.weight <- cumsum(sub_looic$looic.weights)
  
  sub_looic <- sub_looic[order(sub_looic$delta.looic, decreasing = FALSE), ]
  
  df1 <- knitr::kable(sub_looic, row.names = TRUE, escape = FALSE, format = "html", digits = 3, caption = i)
    
    df1 <- row_spec(kable_input = df1,row = 1:which(sub_looic$cum.weight >= 0.95)[1], background = adjustcolor(cols[9], alpha.f = 0.3))
  
  df1 <- kable_styling(df1, bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = FALSE, font_size = 12)

  return(df1)  
})

names(loo_comparisons) <- mod_groups

saveRDS(loo_comparisons, "./data/statistical_models/models_comparison_results_100_data_sets.RDS")
loo_comparisons <- readRDS("./data/statistical_models/models_comparison_results_100_data_sets.RDS")

for(i in 1:length(loo_comparisons)){
  print(names(loo_comparisons)[i])
  print(loo_comparisons[[i]])
}
[1] “FDis”
FDis
models looic SE delta.looic looic.weights cum.weight
1 mod_all_noiucn_FDis -55785.91 658.976 0.000 0.576 1.000
9 mod_envir_FDis -55785.08 658.672 0.828 0.000 0.424
5 mod_antrop_noiucn_FDis -55785.04 659.038 0.877 0.424 0.424
[1] “FDiv”
FDiv
models looic SE delta.looic looic.weights cum.weight
10 mod_envir_FDiv -75101.71 646.847 0.000 0.724 1.000
2 mod_all_noiucn_FDiv -75099.81 647.075 1.901 0.006 0.276
6 mod_antrop_noiucn_FDiv -75060.02 645.061 41.698 0.270 0.270
[1] “FEve”
FEve
models looic SE delta.looic looic.weights cum.weight
11 mod_envir_FEve -53230.47 690.732 0.000 0.802 1.000
3 mod_all_noiucn_FEve -53229.28 691.194 1.188 0.001 0.198
7 mod_antrop_noiucn_FEve -52950.80 681.434 279.668 0.197 0.197
[1] “taxdis”
taxdis
models looic SE delta.looic looic.weights cum.weight
12 mod_envir_taxdis -78817.34 749.970 0.000 0.745 1.000
4 mod_all_noiucn_taxdis -78808.54 750.022 8.800 0.000 0.255
8 mod_antrop_noiucn_taxdis -78763.13 752.589 54.214 0.255 0.255

Model averaging

brms_model_rds <- list.files("./data/statistical_models", pattern = "-single_combined_100_data_sets.RDS", full.names = TRUE)

average_model_list <- pblapply(c("FDis", "FDiv", "FEve", "taxdis"), function(x){
  
  mods <- lapply(grep(x, brms_model_rds, value = TRUE), readRDS)
  
  params <- lapply(mods, function(x) rownames(summary(x)$fixed)[-1])  
  
  combs <- combn(1:length(mods), 2)
  
  post_avrg_list <- lapply(1:ncol(combs), function(y){

   prm <- paste0("b_", intersect(params[[combs[1, y]]], params[[combs[2, y]]]))
  
        if (!prm[1] == "b_")
    # calculate average posteriors and model
    post_avrg <- posterior_average(mods[[combs[1, y]]], mods[[combs[1, y]]], weights = "loo", pars = prm) else
      post_avrg <- NULL
    
    return(post_avrg)
  })
  
  post_avrg_list <- post_avrg_list[sapply(post_avrg_list, is.data.frame)]
  
post_avrgs <- do.call(cbind, post_avrg_list)

average_model <- describe_posterior(as.data.frame(post_avrgs), ci_method = "HDI")

average_model <- as.data.frame(average_model)

average_model_raw <- average_model
  average_model$CI_low <- round(average_model$CI_low, digits = 3)  
  average_model$CI_high <- round(average_model$CI_high, digits = 3)  
  

  signif <- average_model[, "CI_low"] * average_model[, "CI_high"] > 0
  
  # average_model$CI_low <- ifelse(signif, cell_spec(average_model$CI_low, "html", color ="black", background = adjustcolor(cols[9], alpha.f = 0.3), bold = TRUE,  font_size = 12),  cell_spec(average_model$CI_low, "html"))
  # 
  # average_model$CI_high <- ifelse(signif, cell_spec(average_model$CI_high, "html", color ="black", background = adjustcolor(cols[9], alpha.f = 0.3), bold = TRUE,  font_size = 12),  cell_spec(average_model$CI_high, "html"))
  # 
      
  
  
  df1 <- kbl(average_model, row.names = TRUE, escape = FALSE, format = "html", digits = 3)

  df1 <- row_spec(kable_input = df1,row =  which(signif), background = adjustcolor(cols[9], alpha.f = 0.3))
  
  df1 <- kable_styling(df1, bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = FALSE, font_size = 12)

  average_model_raw$response <- x
  
  out <- list(average_model = average_model_raw, table = df1)
  
  return(out)
})

names(average_model_list) <- c("FDis", "FDiv", "FEve", "taxdis")

averaged_model_table <- do.call(rbind, lapply(average_model_list, "[[", 'average_model'))

saveRDS(averaged_model_table, "./data/statistical_models/averaged_model_table.RDS")
                                
saveRDS(average_model_list, "./data/statistical_models/average_models.RDS")
average_model_list <- readRDS("./data/statistical_models/average_models.RDS")

for(i in 1:length(average_model_list)){
  print(names(average_model_list)[i])
  print(average_model_list[[i]]$table)
}
[1] “FDis”
Parameter Median CI CI_low CI_high pd ROPE_CI ROPE_low ROPE_high ROPE_Percentage
4 b_gravity 0.083 0.95 -0.073 0.213 0.867 0.95 -0.1 0.1 0.595
6 b_years_protected 0.098 0.95 -0.112 0.302 0.822 0.95 -0.1 0.1 0.497
5 b_sst_range -0.034 0.95 -0.091 0.023 0.873 0.95 -0.1 0.1 1.000
1 b_chl_range -0.010 0.95 -0.041 0.022 0.732 0.95 -0.1 0.1 1.000
2 b_geomorphologycoastal_island 0.138 0.95 -0.182 0.426 0.791 0.95 -0.1 0.1 0.348
3 b_geomorphologyoceanic_island 0.104 0.95 -0.505 0.699 0.640 0.95 -0.1 0.1 0.269
[1] “FDiv”
Parameter Median CI CI_low CI_high pd ROPE_CI ROPE_low ROPE_high ROPE_Percentage
4 b_gravity 0.246 0.95 0.042 0.516 0.990 0.95 -0.1 0.1 0.053
6 b_years_protected 0.187 0.95 -0.051 0.445 0.940 0.95 -0.1 0.1 0.211
5 b_sst_range -0.229 0.95 -0.297 -0.160 1.000 0.95 -0.1 0.1 0.000
1 b_chl_range 0.135 0.95 0.088 0.180 1.000 0.95 -0.1 0.1 0.056
2 b_geomorphologycoastal_island 0.062 0.95 -0.378 0.495 0.641 0.95 -0.1 0.1 0.404
3 b_geomorphologyoceanic_island -0.367 0.95 -1.595 0.665 0.761 0.95 -0.1 0.1 0.137
[1] “FEve”
Parameter Median CI CI_low CI_high pd ROPE_CI ROPE_low ROPE_high ROPE_Percentage
4 b_gravity -0.293 0.95 -0.444 -0.148 1.000 0.95 -0.1 0.1 0.000
6 b_years_protected -0.075 0.95 -0.240 0.097 0.798 0.95 -0.1 0.1 0.620
5 b_sst_range 0.007 0.95 -0.059 0.074 0.577 0.95 -0.1 0.1 1.000
1 b_chl_range 0.201 0.95 0.167 0.235 1.000 0.95 -0.1 0.1 0.000
2 b_geomorphologycoastal_island 0.042 0.95 -0.254 0.319 0.615 0.95 -0.1 0.1 0.521
3 b_geomorphologyoceanic_island 0.096 0.95 -0.728 0.749 0.603 0.95 -0.1 0.1 0.213
[1] “taxdis”
Parameter Median CI CI_low CI_high pd ROPE_CI ROPE_low ROPE_high ROPE_Percentage
4 b_gravity -0.080 0.95 -0.204 0.010 0.955 0.95 -0.1 0.1 0.65
6 b_years_protected 0.081 0.95 -0.064 0.198 0.887 0.95 -0.1 0.1 0.65
5 b_sst_range -0.067 0.95 -0.094 -0.042 1.000 0.95 -0.1 0.1 1.00
1 b_chl_range 0.050 0.95 0.037 0.064 1.000 0.95 -0.1 0.1 1.00
2 b_geomorphologycoastal_island 0.127 0.95 -0.057 0.332 0.907 0.95 -0.1 0.1 0.38
3 b_geomorphologyoceanic_island 0.698 0.95 0.241 1.175 0.996 0.95 -0.1 0.1 0.00

Results for a single data set per model

models_1_data_set <- readRDS("./data/statistical_models/combined_models_results_1_data_set.RDS")

for(i in 1:length(models_1_data_set)){
  print(names(models_1_data_set)[i])
  print(models_1_data_set[[i]]$table)
  print(models_1_data_set[[i]]$gg)
}

Results for 10 data set models (propagating FD uncertainty)

models_10_data_set <- readRDS("./data/statistical_models/combined_models_results_10_data_set_no_iucn.RDS")

for(i in 1:length(models_10_data_set)){
  print(names(models_10_data_set)[i])
  print(models_10_data_set[[i]]$table)
  print(models_10_data_set[[i]]$gg)
}

Model comparison

brms_model_rds <- list.files("./data/statistical_models", pattern = "10-data", full.names = TRUE)

mean_looic <- pblapply(brms_model_rds, cl = 1, function(e) {
  
  mods <- readRDS(e)

  loos <- sapply(mods, function(x) x$criteria$loo$estimates)[c(3, 6), ]
  
  out_df <- data.frame(
    models = gsub("-10-data-sets_brm_multiple.RDS", "", basename(e)),
    looic = mean(loos[1 , ]), 
    min.looic = min(loos[1 , ]), 
    max.looic = max(loos[1 , ]), 
    SE = mean(loos[2 , ])
    )
  
  return(out_df)
  })

mean_looic <- do.call(rbind, mean_looic)

saveRDS(mean_looic, "mean_looic_by_model.RDS")
mean_looic <- readRDS("mean_looic_by_model.RDS")

for(i in c("FDis", "FDiv", "FEve", "taxdis")){
  
  sub_looic <- mean_looic[grep(i, mean_looic$models), ]
  sub_looic <- sub_looic[order(sub_looic$looic, decreasing = FALSE), ]  
  sub_looic$delta.looic <- sub_looic$looic - min(sub_looic$looic)
  
  df1 <- knitr::kable(sub_looic, row.names = TRUE, escape = FALSE, format = "html", digits = 3, caption = i)
    
  df1 <- kable_styling(df1, bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = FALSE, font_size = 12)
  
  print(df1)
}

Results for 100 data set models (propagating FD uncertainty)

models_100_data_set <- readRDS("./data/statistical_models/combined_models_results_10_data_sets.RDS")

for(i in 1:length(models_100_data_set)){
  print(names(models_100_data_set)[i])
  print(models_100_data_set[[i]]$table)
  print(models_100_data_set[[i]]$gg)
}