Distribución y abundancia de mugílidos en la cuenca media del Río Sarapiquí

Análisis estatísticos y visualización de datos

Author
Published

September 11, 2025

Code
# print link to github repo if any
if (file.exists("./.git/config")){
  config <- readLines("./.git/config")
  url <- grep("url",  config, value = TRUE)
  url <- gsub("\\turl = |.git$", "", url)
  if (length(url) > 1){
  cat("\nSource code and data found at [", url, "](", url, ")", sep = "")
      }
  }
Code
# options to customize chunk outputs
knitr::opts_chunk$set(
  tidy.opts = list(width.cutoff = 65), 
  tidy = TRUE,
  message = FALSE
 )

Propósito

  • Graficar y analizar la diversidad de peces dulceacuícolas en ríos de Costa Rica.

 

Cargar paquetes

Code
# knitr is require for creating html/pdf/word reports formatR is
# used for soft-wrapping code

# install/ load packages
sketchy::load_packages(packages = c("knitr", "formatR", "readxl",
    "DT", "ggplot2", "brms", "viridis", "vegan", "caret"))

theme_classic <- function(...) ggplot2::theme_classic(base_size = 20,
    ...)
Code
options(DT.options = list(pageLength = 200, scrollX = TRUE, scrollY = "800px",
    dom = "Bfrtip", buttons = c("copy", "csv", "excel")))

datatable2 <- function(x, ...) DT::datatable(data = x, extensions = "Buttons",
    ...)

1 Leer y explorar datos

Code
dat <- read_xlsx("./data/raw/FECOP 2025 peces ICE.xlsx")
# remove things after '('

dat$Especie <- gsub(" \\(.*", "", dat$Especie)

dat$Especie[dat$Especie == "Theraps urderwoodi"] <- "Tomocichla tuba"
dat$Sitio[grepl(dat$Sitio, pattern = "La virgen")] <- "La Virgen"

dat$Sitio[grepl(dat$Sitio, pattern = "San Miguel")] <- "San Miguel"


# convertir fecha en mes
dat$mes <- format(as.Date(dat$fecha, format = "%d/%m/%Y"), "%m")

# convertir a espanol
month.abb.esp <- c("Ene", "Feb", "Mar", "Abr", "May", "Jun", "Jul",
    "Ago", "Sep", "Oct", "Nov", "Dic")

dat$mes_chr <- month.abb.esp[as.numeric(dat$mes)]

# hacer numerica
dat$`Long total cm` <- as.numeric(dat$`Long total cm`)

dat$`Peso g` <- as.numeric(dat$`Peso g`)

datatable2(dat)

Muestras por sitio

Code
as.data.frame(table(dat$Sitio))
Var1 Freq
Cariblanco 99
Chilamate 296
El Roble 111
La Guaria 166
La Virgen 125
San Miguel 60
Tirimbina 185

Muestras por especie

Code
tab_sp <- as.data.frame(table(dat$Especie))

tab_sp <- tab_sp[order(-tab_sp$Freq), ]

tab_sp
Var1 Freq
7 Dajaus monticola 298
14 Rhamdia laticauda 271
16 Tomocichla tuba 148
2 Astyanax orstedii 132
1 Arcos nudus 82
13 Poecilia mexicana 36
11 Joturus pichardi 20
12 Neetroplus nematopus 18
5 Brycon costaricensis 16
9 Gobiomorus dormitor 6
6 Cribroheros alfari 5
15 Rhamdia nicaraguensis 4
4 Awaus banana 3
3 Aterinella 1
8 Eretmobrycon scleroparius 1
10 Hypostomus aspidolepis 1

Solo especies de interés comercial

Code
all_spp_dat <- dat

dat <- dat[dat$Especie %in% c("Brycon costaricensis", "Gobiomorus dormitor",
    "Awaus banana", "Rhamdia laticauda", "Rhamdia nicaraguensis",
    "Dajaus monticola", "Joturus pichardi"), ]

tab_sp <- as.data.frame(table(dat$Especie))

tab_sp <- tab_sp[order(-tab_sp$Freq), ]

tab_sp
Var1 Freq
3 Dajaus monticola 298
6 Rhamdia laticauda 271
5 Joturus pichardi 20
2 Brycon costaricensis 16
4 Gobiomorus dormitor 6
7 Rhamdia nicaraguensis 4
1 Awaus banana 3

2 Graficar datos

2.1 Abundancia

Individuos por especie y sitio

Code
dat$indiv <- 1

# Barras con proporcion de indiv por especie x sitio
agg_dat2 <- aggregate(indiv ~ Sitio + Especie, data = dat, FUN = sum)


agg_dat2$Sitio <- factor(agg_dat2$Sitio, levels = c("La Guaria", "Chilamate",
    "El Roble", "Tirimbina", "La Virgen", "San Miguel", "Cariblanco"))

agg_dat2$Especie <- factor(agg_dat2$Especie, levels = tab_sp$Var1)

gg1 <- ggplot(agg_dat2, aes(x = Sitio, y = indiv, fill = Especie)) +
    geom_bar(stat = "identity", position = "fill") + labs(y = "Porcentaje\nde individuos",
    x = "Sitio") + theme_classic() + scale_y_continuous(labels = scales::percent,
    n.breaks = 4) + scale_fill_viridis_d(alpha = 1, option = "G",
    direction = -1) + theme(axis.text.x = element_text(angle = 90,
    hjust = 1))

# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {

    print(gg1)

    ggsave(gg1, path = "./scripts/", device = grDevices::png, filename = "especies_por_sitio.png",
        width = 10, height = 6, dpi = 300)
}

Code
# Barras con proporcion de indiv por especie x sitio y por mes
agg_dat3 <- aggregate(indiv ~ mes + Especie, data = dat, FUN = sum)

# convertir a espanol
agg_dat3$mes_chr <- month.abb.esp[as.numeric(agg_dat3$mes)]

agg_dat3$mes_chr <- factor(agg_dat3$mes_chr, levels = month.abb.esp)

agg_dat3$Especie <- factor(agg_dat3$Especie, levels = tab_sp$Var1)

# agregar meses faltantes
sub_agg <- agg_dat3[1:2, ]
sub_agg$indiv <- 0
sub_agg$mes_chr <- c("Ene", "Feb")

agg_dat3 <- rbind(agg_dat3, sub_agg)

gg2 <- ggplot(agg_dat3, aes(x = mes_chr, y = indiv, fill = Especie)) +
    geom_bar(stat = "identity", position = "fill") + labs(y = "Porcentaje\nde individuos",
    x = "Mes") + theme_classic() + scale_y_continuous(labels = scales::percent,
    n.breaks = 4) + scale_fill_viridis_d(alpha = 1, option = "G",
    direction = -1) + theme(axis.text.x = element_text(angle = 90,
    hjust = 1))

# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {

    print(gg2)

    ggsave(gg2, path = "./scripts/", device = grDevices::png, filename = "especies_por_mes.png",
        width = 10, height = 6, dpi = 300)

}

Code
# Barras con proporcion de indiv por especie x sitio y por mes
agg_dat3 <- aggregate(indiv ~ mes + Especie + Sitio, data = dat, FUN = sum)

# convertir a espanol
agg_dat3$mes_chr <- month.abb.esp[as.numeric(agg_dat3$mes)]

agg_dat3$mes_chr <- factor(agg_dat3$mes_chr, levels = month.abb.esp)

agg_dat3$Especie <- factor(agg_dat3$Especie, levels = tab_sp$Var1)

# agregar meses faltantes
sub_agg <- agg_dat3[1:2, ]
sub_agg$indiv <- 0
sub_agg$mes_chr <- c("Ene", "Feb")

agg_dat3$Sitio <- factor(agg_dat3$Sitio, levels = c("La Guaria", "Chilamate",
    "El Roble", "Tirimbina", "La Virgen", "San Miguel", "Cariblanco"))

agg_dat3 <- rbind(agg_dat3, sub_agg)


gg3 <- ggplot(agg_dat3, aes(x = mes_chr, y = indiv, fill = Especie)) +
    geom_bar(stat = "identity", position = "fill") + labs(y = "Porcentaje de individuos",
    x = "Mes") + theme_classic() + scale_y_continuous(labels = scales::percent,
    n.breaks = 4) + scale_fill_viridis_d(alpha = 1, option = "G",
    direction = -1) + theme(axis.text.x = element_text(angle = 90,
    hjust = 1)) + facet_wrap(~Sitio, scales = "free_y", ncol = 2)

# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {

    print(gg3)

    ggsave(gg3, path = "./scripts/", device = grDevices::png, filename = "especies_por_mes_sitio.png",
        width = 10, height = 6, dpi = 300)

}

2.2 Tamaño

Tamaño por especie y mes

Code
dat2 <- all_spp_dat[all_spp_dat$Especie %in% c("Rhamdia laticauda",
    "Dajaus monticola", "Joturus pichardi", "Tomocichla tuba"), ]

agg_dat4 <- aggregate(`Long total cm` ~ mes + Especie, data = dat2,
    FUN = mean, na.rm = TRUE)

se <- function(x) {
    x <- na.omit(x)
    sd(x)/sqrt(length(x))
}

agg_dat4$se <- aggregate(`Long total cm` ~ mes + Especie, data = dat2,
    FUN = se)$`Long total cm`


agg_dat4$mes_chr <- month.abb.esp[as.numeric(agg_dat4$mes)]

agg_dat4$mes_chr <- factor(agg_dat4$mes_chr, levels = month.abb.esp)

agg_dat4$Especie <- factor(agg_dat4$Especie, levels = unique(agg_dat4$Especie))

gg4 <- ggplot(agg_dat4, aes(x = as.numeric(mes), y = `Long total cm`,
    color = Especie)) + geom_line(size = 1, linetype = "dotdash") +
    geom_errorbar(aes(ymin = `Long total cm` - se, ymax = `Long total cm` +
        se), width = 0.2) + geom_point(size = 3) + labs(y = "Longitud total (cm)",
    x = "Mes") + theme_classic() + scale_color_viridis_d(alpha = 1,
    option = "G", direction = 1) + scale_x_continuous(breaks = 1:12,
    labels = month.abb.esp) + theme(axis.text.x = element_text(angle = 90,
    hjust = 1))

# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {

    print(gg4)

    ggsave(gg4, path = "./scripts/", device = grDevices::png, filename = "talla_por_especie_mes.png",
        width = 10, height = 4, dpi = 300)

}

Code
datatable2(agg_dat4)
Code
dat2 <- all_spp_dat[all_spp_dat$Especie %in% c("Brycon costaricensis",
    "Gobiomorus dormitor", "Joturus pichardi", "Tomocichla tuba"),
    ]

agg_dat4 <- aggregate(`Long total cm` ~ Sitio + Especie, data = dat2,
    FUN = mean, na.rm = TRUE)

se <- function(x) {
    x <- na.omit(x)
    sd(x)/sqrt(length(x))
}

agg_dat4$se <- aggregate(`Long total cm` ~ Sitio + Especie, data = dat2,
    FUN = se)$`Long total cm`

# add missing rows for 'Cariblanco' with 0s
car_agg <- agg_dat4[rep(1, 3), ]
car_agg$`Long total cm` <- 0
car_agg$se <- NA
car_agg$Especie <- unique(agg_dat4$Especie)[-1]

agg_dat4 <- rbind(agg_dat4, car_agg)

agg_dat4$Sitio <- factor(agg_dat4$Sitio, levels = c("La Guaria", "Chilamate",
    "El Roble", "Tirimbina", "La Virgen", "San Miguel", "Cariblanco"))

agg_dat4$Especie <- factor(agg_dat4$Especie, levels = unique(agg_dat4$Especie))

gg4 <- ggplot(agg_dat4, aes(x = Sitio, y = `Long total cm`, fill = Especie,
    group = Especie)) + geom_bar(stat = "identity", position = "dodge",
    width = 0.9) + geom_errorbar(aes(ymin = `Long total cm` - se,
    ymax = `Long total cm` + se), width = 0.1, position = position_dodge(0.9)) +
    labs(y = "Longitud total (cm)", x = "Sitio") + theme_classic() +
    scale_color_viridis_d(alpha = 1, option = "G", direction = 1) +
    scale_fill_viridis_d(alpha = 1, option = "G", direction = 1) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))

# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {

    print(gg4)

    ggsave(gg4, path = "./scripts/", device = grDevices::png, filename = "longitud_por_especies_por_sitio.png",
        width = 10, height = 4, dpi = 300)

}

Code
datatable2(agg_dat4)
Code
dat2 <- all_spp_dat[all_spp_dat$Especie %in% c("Brycon costaricensis",
    "Gobiomorus dormitor", "Joturus pichardi", "Tomocichla tuba"),
    ]

agg_dat4 <- aggregate(`Peso g` ~ Sitio + Especie, data = dat2, FUN = mean,
    na.rm = TRUE)

agg_dat4$se <- aggregate(`Peso g` ~ Sitio + Especie, data = dat2,
    FUN = se)$`Peso g`

# add missing rows for 'Cariblanco' with 0s
car_agg <- agg_dat4[rep(1, 3), ]
car_agg$`Peso g` <- 0
car_agg$se <- NA
car_agg$Especie <- unique(agg_dat4$Especie)[-1]

agg_dat4 <- rbind(agg_dat4, car_agg)

agg_dat4$Sitio <- factor(agg_dat4$Sitio, levels = c("La Guaria", "Chilamate",
    "El Roble", "Tirimbina", "La Virgen", "San Miguel", "Cariblanco"))

agg_dat4$Especie <- factor(agg_dat4$Especie, levels = unique(agg_dat4$Especie))

gg4 <- ggplot(agg_dat4, aes(x = Sitio, y = `Peso g`, fill = Especie,
    group = Especie)) + geom_bar(stat = "identity", position = "dodge",
    width = 0.9) + geom_errorbar(aes(ymin = `Peso g` - se, ymax = `Peso g` +
    se), width = 0.1, position = position_dodge(0.9)) + labs(y = "Peso (g)",
    x = "Sitio") + theme_classic() + scale_color_viridis_d(alpha = 1,
    option = "G", direction = 1) + scale_fill_viridis_d(alpha = 1,
    option = "G", direction = 1) + theme(axis.text.x = element_text(angle = 45,
    hjust = 1))

# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {

    print(gg4)

    ggsave(gg4, path = "./scripts/", device = grDevices::png, filename = "peso_por_especies_por_sitio.png",
        width = 10, height = 4, dpi = 300)

}

Code
datatable2(agg_dat4)
Code
agg_dat0 <- aggregate(`Long total cm` ~ Especie, data = all_spp_dat,
    FUN = mean, na.rm = TRUE)

agg_dat0$se <- aggregate(`Long total cm` ~ Especie, data = all_spp_dat,
    FUN = se)$`Long total cm`

agg_dat0 <- agg_dat0[order(agg_dat0$`Long total cm`, decreasing = FALSE),
    ]

agg_dat0$Especie <- factor(agg_dat0$Especie, levels = agg_dat0$Especie)

gg0 <- ggplot(agg_dat0, aes(x = Especie, y = `Long total cm`)) + geom_bar(stat = "identity",
    position = "dodge", fill = "gray") + geom_errorbar(aes(ymin = `Long total cm` -
    se, ymax = `Long total cm` + se), width = 0.2) + labs(y = "Longitud total (cm)",
    x = "Especie") + theme_classic() + theme(axis.text.x = element_text(angle = 45,
    hjust = 1))

# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {

    print(gg0)

    ggsave(gg0, path = "./scripts/", device = grDevices::png, filename = "talla_por_especie.png",
        width = 10, height = 8, dpi = 300)

}

Code
datatable2(agg_dat0)

Peso por especie y mes

Code
agg_dat5 <- aggregate(`Peso g` ~ mes + Especie, data = dat2, FUN = mean,
    na.rm = TRUE)

agg_dat5$se <- aggregate(`Peso g` ~ mes + Especie, data = dat2, FUN = se)$`Peso g`


agg_dat5$mes_chr <- month.abb.esp[as.numeric(agg_dat5$mes)]

agg_dat5$mes_chr <- factor(agg_dat5$mes_chr, levels = month.abb.esp)

agg_dat5$Especie <- factor(agg_dat5$Especie, levels = c("Brycon costaricensis",
    "Gobiomorus dormitor", "Joturus pichardi", "Tomocichla tuba"))

gg5 <- ggplot(agg_dat5, aes(x = as.numeric(mes), y = `Peso g`, color = Especie)) +
    geom_line(size = 1, linetype = "dotdash") + geom_point(size = 3) +
    geom_errorbar(aes(ymin = `Peso g` - se, ymax = `Peso g` + se),
        width = 0.2) + labs(y = "Peso (g)", x = "Mes") + theme_classic() +
    scale_color_viridis_d(alpha = 1, option = "G", direction = 1) +
    scale_x_continuous(breaks = 1:12, labels = month.abb.esp) + theme(axis.text.x = element_text(angle = 90,
    hjust = 1))

# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {

    print(gg5)

    ggsave(gg5, path = "./scripts/", device = grDevices::png, filename = "peso_por_especie_mes.png",
        width = 10, height = 4, dpi = 300)

}

Code
datatable2(agg_dat5)
Code
agg_dat0 <- aggregate(`Peso g` ~ Especie, data = all_spp_dat, FUN = mean,
    na.rm = TRUE)

agg_dat0$se <- aggregate(`Peso g` ~ Especie, data = all_spp_dat, FUN = se)$`Peso g`

agg_dat0 <- agg_dat0[order(agg_dat0$`Peso g`, decreasing = FALSE),
    ]

agg_dat0$Especie <- factor(agg_dat0$Especie, levels = agg_dat0$Especie)

gg0 <- ggplot(agg_dat0, aes(x = Especie, y = `Peso g`)) + geom_bar(stat = "identity",
    position = "dodge", fill = "gray") + geom_errorbar(aes(ymin = `Peso g` -
    se, ymax = `Peso g` + se), width = 0.2) + labs(y = "Peso (g)",
    x = "Especie") + theme_classic() + theme(axis.text.x = element_text(angle = 45,
    hjust = 1))

# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {

    print(gg0)

    ggsave(gg0, path = "./scripts/", device = grDevices::png, filename = "peso_por_especie.png",
        width = 10, height = 8, dpi = 300)

}

Code
datatable2(agg_dat0)

2.3 Peso

Peso por especie y mes para las que tienen mas datos

Code
agg_dat6 <- aggregate(`Long total cm` ~ mes + Especie, data = dat[!dat$Especie %in%
    c("Awaus banana", "Rhamdia nicaraguensis"), ], FUN = mean, na.rm = TRUE)

agg_dat6$mes_chr <- month.abb.esp[as.numeric(agg_dat6$mes)]

agg_dat6$mes_chr <- factor(agg_dat6$mes_chr, levels = month.abb.esp)

agg_dat6$Especie <- factor(agg_dat6$Especie, levels = tab_sp$Var1)

gg6 <- ggplot(agg_dat6, aes(x = as.numeric(mes), y = `Long total cm`,
    color = Especie, shape = Especie)) + geom_line(size = 1) + geom_point(size = 2) +
    labs(y = "Peso (g)", x = "Mes") + theme_classic() + scale_color_viridis_d(alpha = 1,
    option = "G", direction = 1) + scale_x_continuous(breaks = 1:12,
    labels = month.abb.esp) + theme(axis.text.x = element_text(angle = 90,
    hjust = 1))


# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {

    print(gg6)

    ggsave(gg6, path = "./scripts/", device = grDevices::png, filename = "peso_por_especie_mes_5sp.png",
        width = 10, height = 4, dpi = 300)

}

3 Dia y noche

Code
dat_met <- read_xlsx("./data/raw/FECOP 2025 peces ICE.xlsx", sheet = "metodo")

dat_met$Especie[dat_met$Especie == "RhamDía laticauda (rogersi)"] <- "Rhamdia laticauda"

dat_met <- dat_met[dat_met$Especie %in% c("Dajaus monticola", "Astyanax orstedii",
    "Rhamdia laticauda", "Tomocichla tuba"), ]

table(dat_met$Especie)

Astyanax orstedii  Dajaus monticola Rhamdia laticauda   Tomocichla tuba 
                4                 4                 4                 4 
Code
# plot abundance by method and period using bars
gg7 <- ggplot(dat_met, aes(x = Especie, fill = periodo, y = indivs,
    group = metodo)) + geom_bar(stat = "identity", color = "black",
    position = "fill") + labs(y = "Abundancia", x = "Especie", fill = "Periodo") +
    theme_classic() + scale_y_continuous(labels = scales::percent,
    n.breaks = 4) + scale_fill_manual(values = c("black", "white")) +
    facet_wrap(~metodo, ncol = 1) + theme(axis.text.x = element_text(angle = 25,
    hjust = 1))

# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {

    print(gg7)

    ggsave(gg7, path = "./scripts/", device = grDevices::png, filename = "abundancia_por_especie_metodo_y_periodo.png",
        width = 10, height = 8, dpi = 300)

}

Code
datatable2(dat_met)
Code
dat_peso <- read_xlsx("./data/raw/FECOP 2025 peces ICE.xlsx", sheet = "peso")
dat_peso$SE <- dat_peso$s/sqrt(dat_peso$N)
# dat_met$Especie[dat_met$Especie == 'RhamDía laticauda
# (rogersi)'] <- 'Rhamdia laticauda'

dat_peso <- dat_peso[dat_peso$Especie %in% c("Dajaus monticola", "Astyanax orstedii",
    "Poecilia mexicana", "Tomocichla tuba"), ]

dat_peso$Especie <- factor(dat_peso$Especie, levels = c("Dajaus monticola",
    "Tomocichla tuba", "Astyanax orstedii", "Poecilia mexicana"))

table(dat_peso$Especie)

 Dajaus monticola   Tomocichla tuba Astyanax orstedii Poecilia mexicana 
                2                 2                 2                 2 
Code
# plot abundance by method and period using bars
gg8 <- ggplot(dat_peso, aes(x = Especie, fill = Periodo, y = Media,
    group = Periodo)) + geom_bar(stat = "identity", color = "black",
    position = "dodge") + geom_errorbar(aes(ymin = Media - SE, ymax = Media +
    SE), width = 0.2, position = position_dodge(0.9)) + labs(y = "Peso (g)",
    x = "Especie", fill = "Periodo") + theme_classic() + scale_fill_manual(values = c("black",
    "white")) + theme(axis.text.x = element_text(angle = 25, hjust = 1))

# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {

    print(gg8)

    ggsave(gg8, path = "./scripts/", device = grDevices::png, filename = "peso_por_especie_periodo.png",
        width = 10, height = 4, dpi = 300)

}

Code
datatable2(dat_peso)

4 Análisis estadísticos

Code
dat$año_mes <- paste0(format(as.Date(dat$fecha, format = "%d/%m/%Y"),
    "%Y"), "-", dat$mes)

sort(unique(dat$año_mes))

div_agg <- aggregate(Especie ~ Sitio + año_mes, data = dat, FUN = function(x) length(unique(x)))

4.1 Bioquimica

4.1.1 Regresion de random forest prediciendo diversidad a partir de variables fisico-químicas

Code
bioq <- read_xls("./data/raw/muestreos ice FINAL para fecop ALL.xls")

bioq$Sitio[bioq$Sitio == "Guaria"] <- "La Guaria"
bioq$Sitio[bioq$Sitio == "Virgen"] <- "La Virgen"
bioq$Sitio[bioq$Sitio == "Roble"] <- "El Roble"
bioq$Sitio[bioq$Sitio == "SM, bajo la chancha"] <- "San Miguel"
bioq$Sitio[bioq$Sitio == "Chancha"] <- "San Miguel"

spp_sitio <- aggregate(Especie ~ Sitio, data = all_spp_dat, FUN = function(x) length(unique(x)))

# unique(bioq$Sitio) unique(dat$Sitio)
# setdiff(unique(bioq$Sitio), unique(dat$Sitio))

bioq <- bioq[bioq$Sitio %in% unique(dat$Sitio), ]

bioq <- merge(bioq, spp_sitio, by = "Sitio")

nNA <- sapply(bioq, function(x) sum(is.na(x)))

# names(nNA)[nNA <= 5]

sub_bioq <- bioq[, c("ox mg/l", "Temp °C", "pH", "Conductividad",
    "TDS", "Especie")]

sub_bioq <- sub_bioq[complete.cases(sub_bioq), ]
Code
# random forest with caret predicting diversity based on
# bioquimica
set.seed(123)
rf_fit <- train(Especie ~ ., data = sub_bioq, method = "rf", importance = TRUE,
    trControl = trainControl(method = "repeatedcv", number = 100,
        repeats = 30))

saveRDS(rf_fit, file = "./data/processed/rf_fit_diversity_bioq.rds")
Code
rf_fit <- readRDS(file = "./data/processed/rf_fit_diversity_bioq.rds")

# importance
varImp(rf_fit)
rf variable importance

              Overall
`Temp °C`      100.00
TDS             74.03
Conductividad   61.82
pH              32.33
`ox mg/l`        0.00
Code
## importance with ggplot2
imp <- as.data.frame(varImp(rf_fit)$importance)
imp$Variable <- rownames(imp)
imp <- imp[order(imp$Overall), ]
imp$Variable <- factor(imp$Variable, levels = imp$Variable)

gg_imp <- ggplot(imp, aes(x = Variable, y = Overall)) + geom_bar(stat = "identity",
    fill = "gray") + coord_flip() + theme_classic() + labs(y = "Importancia (Random Forest)",
    x = "Variable fisico-química") + scale_y_continuous(breaks = scales::pretty_breaks(n = 5))

# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {

    print(gg_imp)

    ggsave(gg_imp, path = "./scripts/", device = grDevices::png, filename = "importancia.png",
        width = 10, height = 4, dpi = 300)

}

4.1.2 Análisis de correspondencia canónica (CCA)

Code
all_spp_dat <- all_spp_dat[all_spp_dat$Especie != "Aterinella", ]

# convert species name from 'Genus species' to 'G. species'
all_spp_dat$Especie <- gsub("^(\\w)\\w+ ", "\\1. ", all_spp_dat$Especie)

all_spp_dat$Especie[all_spp_dat$Especie == "D. monticola"] <- "D. monticola"


out <- lapply(unique(all_spp_dat$Sitio), function(x) {

    sub <- all_spp_dat[all_spp_dat$Sitio == x, ]

    tab <- as.data.frame(table(sub$Especie))

    # add missing species wtih 0s
    missing_spp <- setdiff(unique(all_spp_dat$Especie), tab$Var1)

    if (length(missing_spp) > 0) {

        add_tab <- data.frame(Var1 = missing_spp, Freq = 0)

        tab <- rbind(tab, add_tab)

    }

    tab <- tab[order(tab$Var1), ]

    return(tab$Freq)
})

fish_spec <- do.call(rbind, out)

colnames(fish_spec) <- sort(unique(all_spp_dat$Especie))

sub_bioq2 <- bioq[, c("ox mg/l", "Temp °C", "pH", "Conductividad",
    "TDS", "Sitio")]
fish_chem <- aggregate(. ~ Sitio, data = sub_bioq2, FUN = mean, na.rm = TRUE)

# order rows as in unique(all_spp_dat$Sitio)
fish_chem <- fish_chem[match(unique(all_spp_dat$Sitio), fish_chem$Sitio),
    ]
rownames(fish_spec) <- rownames(fish_chem) <- fish_chem$Sitio

fish_chem <- fish_chem[, -1]

fish_cca <- cca(fish_spec ~ `ox mg/l` + `Temp °C` + pH + Conductividad +
    TDS, data = fish_chem)

summary(fish_cca)

Call:
cca(formula = fish_spec ~ `ox mg/l` + `Temp °C` + pH + Conductividad +      TDS, data = fish_chem) 

Partitioning of scaled Chi-square:
              Inertia Proportion
Total          1.7195     1.0000
Constrained    1.5468     0.8996
Unconstrained  0.1727     0.1004

Eigenvalues, and their contribution to the scaled Chi-square 

Importance of components:
                        CCA1   CCA2   CCA3    CCA4    CCA5    CA1
Eigenvalue            0.6717 0.4629 0.1922 0.13054 0.08941 0.1727
Proportion Explained  0.3907 0.2692 0.1118 0.07592 0.05200 0.1004
Cumulative Proportion 0.3907 0.6599 0.7716 0.84756 0.89955 1.0000

Accumulated constrained eigenvalues
Importance of components:
                        CCA1   CCA2   CCA3    CCA4    CCA5
Eigenvalue            0.6717 0.4629 0.1922 0.13054 0.08941
Proportion Explained  0.4343 0.2993 0.1242 0.08439 0.05780
Cumulative Proportion 0.4343 0.7336 0.8578 0.94220 1.00000
Code
cols <- mako(3, alpha = 0.9, begin = 0.2, end = 0.8)

fish_cca_df <- ggvegan::fortify(fish_cca)

fish_cca_df$score <- as.character(fish_cca_df$score)

fish_cca_df$score[fish_cca_df$score == "sites"] <- "Sitio"
fish_cca_df$score[fish_cca_df$score == "species"] <- "Especie"

site_cca_df <- fish_cca_df[fish_cca_df$score == "Sitio", ]
sp_cca_df <- fish_cca_df[fish_cca_df$score == "Especie", ]

site_cca_df$vjust <- c(-2.5, 2, 0.2, -1.5, 1.2, 1, 0.5)
site_cca_df$hjust <- c(0.2, 0.5, 1.4, 1, -0.5, 1, -0.1)

sp_cca_df$vjust <- c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3)
sp_cca_df$hjust <- c(1, 1, 1, 1, 1, 1, -0.1, 1, 1, 1, 1, 1, 1)

autoplot(fish_cca) + theme_classic() + xlim(c(-1.9, 2.7)) + ylim(c(-1.5,
    2.7)) + labs(x = "CCA1 (43.4%)", y = "CCA2 (30.0%)", color = "",
    shape = "") + scale_color_manual(values = cols[c(1, 3)], labels = c("Sitio",
    "Especie")) + scale_shape_manual(values = c(20, 1), labels = c("Sitio",
    "Especie")) + geom_text(data = site_cca_df, aes(label = label,
    x = CCA1, y = CCA2, vjust = vjust, hjust = hjust), size = 3, color = cols[1]) +
    geom_text(data = sp_cca_df, aes(label = label, x = CCA1, y = CCA2,
        vjust = vjust, hjust = hjust), size = 3, color = cols[3]) +
    theme(axis.text = element_text(size = 12), axis.title = element_text(size = 14)) +
    theme(legend.position = c(0.85, 0.9), legend.background = element_rect(fill = alpha("white",
        0.5)))

Code
anova.cca(fish_cca)
Df ChiSquare F Pr(>F)
Model 5 1.5467755 1.791102 0.217
Residual 1 0.1727177 NA NA

Info de la sesión

Click to see
─ Session info ───────────────────────────────────────────────────────────────
 setting  value
 version  R version 4.5.0 (2025-04-11)
 os       Ubuntu 22.04.5 LTS
 system   x86_64, linux-gnu
 ui       X11
 language (EN)
 collate  en_US.UTF-8
 ctype    en_US.UTF-8
 tz       America/Costa_Rica
 date     2025-09-11
 pandoc   3.2 @ /usr/lib/rstudio/resources/app/bin/quarto/bin/tools/x86_64/ (via rmarkdown)
 quarto   1.7.31 @ /usr/local/bin/quarto

─ Packages ───────────────────────────────────────────────────────────────────
 package        * version    date (UTC) lib source
 abind            1.4-8      2024-09-12 [1] CRAN (R 4.5.0)
 backports        1.5.0      2024-05-23 [1] CRAN (R 4.5.0)
 bayesplot        1.12.0     2025-04-10 [1] CRAN (R 4.5.0)
 bridgesampling   1.1-2      2021-04-16 [1] CRAN (R 4.5.0)
 brms           * 2.22.0     2024-09-23 [1] CRAN (R 4.5.0)
 Brobdingnag      1.2-9      2022-10-19 [1] CRAN (R 4.5.0)
 bslib            0.9.0      2025-01-30 [1] CRAN (R 4.5.0)
 cachem           1.1.0      2024-05-16 [1] CRAN (R 4.5.0)
 caret          * 7.0-1      2024-12-10 [1] CRAN (R 4.5.0)
 cellranger       1.1.0      2016-07-27 [3] CRAN (R 4.0.1)
 checkmate        2.3.3      2025-08-18 [1] CRAN (R 4.5.0)
 class            7.3-23     2025-01-01 [4] CRAN (R 4.4.2)
 cli              3.6.5      2025-04-23 [1] CRAN (R 4.5.0)
 cluster          2.1.8.1    2025-03-12 [4] CRAN (R 4.4.3)
 coda             0.19-4.1   2024-01-31 [1] CRAN (R 4.5.0)
 codetools        0.2-20     2024-03-31 [4] CRAN (R 4.5.0)
 crayon           1.5.3      2024-06-20 [1] CRAN (R 4.5.0)
 crosstalk        1.2.1      2023-11-23 [3] CRAN (R 4.5.0)
 data.table       1.17.0     2025-02-22 [3] CRAN (R 4.5.0)
 devtools         2.4.5      2022-10-11 [1] CRAN (R 4.5.0)
 digest           0.6.37     2024-08-19 [1] CRAN (R 4.5.0)
 distributional   0.5.0      2024-09-17 [1] CRAN (R 4.5.0)
 dplyr            1.1.4      2023-11-17 [1] CRAN (R 4.5.0)
 DT             * 0.33       2024-04-04 [3] CRAN (R 4.5.0)
 ellipsis         0.3.2      2021-04-29 [3] CRAN (R 4.1.1)
 emmeans          1.11.1     2025-05-04 [3] CRAN (R 4.5.0)
 estimability     1.5.1      2024-05-12 [3] CRAN (R 4.5.0)
 evaluate         1.0.4      2025-06-18 [1] CRAN (R 4.5.0)
 farver           2.1.2      2024-05-13 [1] CRAN (R 4.5.0)
 fastmap          1.2.0      2024-05-15 [1] CRAN (R 4.5.0)
 foreach          1.5.2      2022-02-02 [3] CRAN (R 4.1.2)
 formatR        * 1.14       2023-01-17 [1] CRAN (R 4.5.0)
 fs               1.6.6      2025-04-12 [1] CRAN (R 4.5.0)
 future           1.49.0     2025-05-09 [1] CRAN (R 4.5.0)
 future.apply     1.11.3     2024-10-27 [1] CRAN (R 4.5.0)
 generics         0.1.4      2025-05-09 [1] CRAN (R 4.5.0)
 ggplot2        * 3.5.2      2025-04-09 [1] CRAN (R 4.5.0)
 ggrepel          0.9.6      2024-09-07 [1] CRAN (R 4.5.0)
 ggvegan          0.1.999    2025-09-05 [1] Github (gavinsimpson/ggvegan@3056a43)
 globals          0.18.0     2025-05-08 [1] CRAN (R 4.5.0)
 glue             1.8.0      2024-09-30 [1] CRAN (R 4.5.0)
 gower            1.0.2      2024-12-17 [3] CRAN (R 4.5.0)
 gridExtra        2.3        2017-09-09 [1] CRAN (R 4.5.0)
 gtable           0.3.6      2024-10-25 [1] CRAN (R 4.5.0)
 hardhat          1.4.1      2025-01-31 [1] CRAN (R 4.5.0)
 htmltools        0.5.8.1    2024-04-04 [1] CRAN (R 4.5.0)
 htmlwidgets      1.6.4      2023-12-06 [1] RSPM (R 4.5.0)
 httpuv           1.6.16     2025-04-16 [1] RSPM (R 4.5.0)
 ipred            0.9-15     2024-07-18 [3] CRAN (R 4.5.0)
 iterators        1.0.14     2022-02-05 [3] CRAN (R 4.1.2)
 jquerylib        0.1.4      2021-04-26 [1] CRAN (R 4.5.0)
 jsonlite         2.0.0      2025-03-27 [1] CRAN (R 4.5.0)
 knitr          * 1.50       2025-03-16 [1] CRAN (R 4.5.0)
 labeling         0.4.3      2023-08-29 [1] CRAN (R 4.5.0)
 later            1.4.2      2025-04-08 [1] RSPM (R 4.5.0)
 lattice        * 0.22-7     2025-04-02 [4] CRAN (R 4.5.0)
 lava             1.8.1      2025-01-12 [3] CRAN (R 4.5.0)
 lifecycle        1.0.4      2023-11-07 [1] CRAN (R 4.5.0)
 listenv          0.9.1      2024-01-29 [1] CRAN (R 4.5.0)
 loo              2.8.0      2024-07-03 [1] CRAN (R 4.5.0)
 lubridate        1.9.4      2024-12-08 [3] CRAN (R 4.5.0)
 magrittr         2.0.3      2022-03-30 [1] CRAN (R 4.5.0)
 MASS             7.3-65     2025-02-28 [4] CRAN (R 4.4.3)
 Matrix           1.7-3      2025-03-11 [4] CRAN (R 4.4.3)
 matrixStats      1.5.0      2025-01-07 [1] CRAN (R 4.5.0)
 memoise          2.0.1      2021-11-26 [3] CRAN (R 4.1.2)
 mgcv             1.9-3      2025-04-04 [4] CRAN (R 4.5.0)
 mime             0.13       2025-03-17 [1] CRAN (R 4.5.0)
 miniUI           0.1.2      2025-04-17 [3] CRAN (R 4.5.0)
 ModelMetrics     1.2.2.2    2020-03-17 [3] CRAN (R 4.0.1)
 multcomp         1.4-28     2025-01-29 [3] CRAN (R 4.5.0)
 mvtnorm          1.3-3      2025-01-10 [1] CRAN (R 4.5.0)
 nlme             3.1-168    2025-03-31 [4] CRAN (R 4.4.3)
 nnet             7.3-20     2025-01-01 [4] CRAN (R 4.4.2)
 packrat          0.9.3      2025-06-16 [1] CRAN (R 4.5.0)
 parallelly       1.44.0     2025-05-07 [1] CRAN (R 4.5.0)
 permute        * 0.9-8      2025-06-25 [1] CRAN (R 4.5.0)
 pillar           1.11.0     2025-07-04 [1] CRAN (R 4.5.0)
 pkgbuild         1.4.8      2025-05-26 [1] CRAN (R 4.5.0)
 pkgconfig        2.0.3      2019-09-22 [1] CRAN (R 4.5.0)
 pkgload          1.4.0      2024-06-28 [1] CRAN (R 4.5.0)
 plyr             1.8.9      2023-10-02 [1] CRAN (R 4.5.0)
 posterior        1.6.1      2025-02-27 [1] CRAN (R 4.5.0)
 pROC             1.18.0     2021-09-03 [3] CRAN (R 4.1.1)
 prodlim          2025.04.28 2025-04-28 [3] CRAN (R 4.5.0)
 profvis          0.4.0      2024-09-20 [1] CRAN (R 4.5.0)
 promises         1.3.3      2025-05-29 [1] RSPM (R 4.5.0)
 purrr            1.1.0      2025-07-10 [1] CRAN (R 4.5.0)
 R6               2.6.1      2025-02-15 [1] CRAN (R 4.5.0)
 randomForest     4.7-1.2    2024-09-22 [3] CRAN (R 4.5.0)
 RColorBrewer     1.1-3      2022-04-03 [1] CRAN (R 4.5.0)
 Rcpp           * 1.1.0      2025-07-02 [1] CRAN (R 4.5.0)
 RcppParallel     5.1.10     2025-01-24 [1] CRAN (R 4.5.0)
 readxl         * 1.4.5      2025-03-07 [3] CRAN (R 4.5.0)
 recipes          1.3.0      2025-04-17 [1] CRAN (R 4.5.0)
 remotes          2.5.0      2024-03-17 [1] CRAN (R 4.5.0)
 reshape2         1.4.4      2020-04-09 [1] CRAN (R 4.5.0)
 rlang            1.1.6      2025-04-11 [1] CRAN (R 4.5.0)
 rmarkdown        2.29       2024-11-04 [1] CRAN (R 4.5.0)
 rpart            4.1.24     2025-01-07 [4] CRAN (R 4.4.2)
 rstantools       2.4.0      2024-01-31 [1] CRAN (R 4.5.0)
 rstudioapi       0.17.1     2024-10-22 [1] CRAN (R 4.5.0)
 sandwich         3.1-1      2024-09-15 [3] CRAN (R 4.5.0)
 sass             0.4.10     2025-04-11 [1] CRAN (R 4.5.0)
 scales           1.4.0      2025-04-24 [1] CRAN (R 4.5.0)
 sessioninfo      1.2.3      2025-02-05 [3] CRAN (R 4.5.0)
 shiny            1.10.0     2024-12-14 [1] CRAN (R 4.5.0)
 sketchy          1.0.5      2025-08-20 [1] CRANs (R 4.5.0)
 stringi          1.8.7      2025-03-27 [1] CRAN (R 4.5.0)
 stringr          1.5.1      2023-11-14 [1] CRAN (R 4.5.0)
 survival         3.8-3      2024-12-17 [4] CRAN (R 4.4.2)
 tensorA          0.36.2.1   2023-12-13 [1] CRAN (R 4.5.0)
 TH.data          1.1-3      2025-01-17 [3] CRAN (R 4.5.0)
 tibble           3.3.0      2025-06-08 [1] RSPM (R 4.5.0)
 tidyr            1.3.1      2024-01-24 [1] CRAN (R 4.5.0)
 tidyselect       1.2.1      2024-03-11 [1] CRAN (R 4.5.0)
 timechange       0.3.0      2024-01-18 [3] CRAN (R 4.5.0)
 timeDate         4041.110   2024-09-22 [3] CRAN (R 4.5.0)
 urlchecker       1.0.1      2021-11-30 [1] CRAN (R 4.5.0)
 usethis          3.1.0      2024-11-26 [3] CRAN (R 4.5.0)
 vctrs            0.6.5      2023-12-01 [1] CRAN (R 4.5.0)
 vegan          * 2.7-1      2025-06-05 [1] CRAN (R 4.5.0)
 viridis        * 0.6.5      2024-01-29 [1] CRAN (R 4.5.0)
 viridisLite    * 0.4.2      2023-05-02 [1] CRAN (R 4.5.0)
 withr            3.0.2      2024-10-28 [1] CRAN (R 4.5.0)
 xaringanExtra    0.8.0      2024-05-19 [1] CRAN (R 4.5.0)
 xfun             0.53       2025-08-19 [1] CRAN (R 4.5.0)
 xtable           1.8-4      2019-04-21 [3] CRAN (R 4.0.1)
 yaml             2.3.10     2024-07-26 [1] CRAN (R 4.5.0)
 zoo              1.8-14     2025-04-10 [3] CRAN (R 4.5.0)

 [1] /home/m/R/x86_64-pc-linux-gnu-library/4.5
 [2] /usr/local/lib/R/site-library
 [3] /usr/lib/R/site-library
 [4] /usr/lib/R/library
 * ── Packages attached to the search path.

──────────────────────────────────────────────────────────────────────────────