Code
# options to customize chunk outputs
knitr::opts_chunk$set(
tidy.opts = list(width.cutoff = 65),
tidy = TRUE,
message = FALSE
)Análisis estatísticos y visualización de datos
# print link to github repo if any
if (file.exists("./.git/config")){
config <- readLines("./.git/config")
url <- grep("url", config, value = TRUE)
url <- gsub("\\turl = |.git$", "", url)
if (length(url) > 1){
cat("\nSource code and data found at [", url, "](", url, ")", sep = "")
}
}# options to customize chunk outputs
knitr::opts_chunk$set(
tidy.opts = list(width.cutoff = 65),
tidy = TRUE,
message = FALSE
)
# knitr is require for creating html/pdf/word reports formatR is
# used for soft-wrapping code
# install/ load packages
sketchy::load_packages(packages = c("knitr", "formatR", "readxl",
"DT", "ggplot2", "brms", "viridis", "vegan", "caret"))
theme_classic <- function(...) ggplot2::theme_classic(base_size = 20,
...)options(DT.options = list(pageLength = 200, scrollX = TRUE, scrollY = "800px",
dom = "Bfrtip", buttons = c("copy", "csv", "excel")))
datatable2 <- function(x, ...) DT::datatable(data = x, extensions = "Buttons",
...)dat <- read_xlsx("./data/raw/FECOP 2025 peces ICE.xlsx")
# remove things after '('
dat$Especie <- gsub(" \\(.*", "", dat$Especie)
dat$Especie[dat$Especie == "Theraps urderwoodi"] <- "Tomocichla tuba"
dat$Sitio[grepl(dat$Sitio, pattern = "La virgen")] <- "La Virgen"
dat$Sitio[grepl(dat$Sitio, pattern = "San Miguel")] <- "San Miguel"
# convertir fecha en mes
dat$mes <- format(as.Date(dat$fecha, format = "%d/%m/%Y"), "%m")
# convertir a espanol
month.abb.esp <- c("Ene", "Feb", "Mar", "Abr", "May", "Jun", "Jul",
"Ago", "Sep", "Oct", "Nov", "Dic")
dat$mes_chr <- month.abb.esp[as.numeric(dat$mes)]
# hacer numerica
dat$`Long total cm` <- as.numeric(dat$`Long total cm`)
dat$`Peso g` <- as.numeric(dat$`Peso g`)
datatable2(dat)Muestras por sitio
as.data.frame(table(dat$Sitio))| Var1 | Freq |
|---|---|
| Cariblanco | 99 |
| Chilamate | 296 |
| El Roble | 111 |
| La Guaria | 166 |
| La Virgen | 125 |
| San Miguel | 60 |
| Tirimbina | 185 |
Muestras por especie
tab_sp <- as.data.frame(table(dat$Especie))
tab_sp <- tab_sp[order(-tab_sp$Freq), ]
tab_sp| Var1 | Freq | |
|---|---|---|
| 7 | Dajaus monticola | 298 |
| 14 | Rhamdia laticauda | 271 |
| 16 | Tomocichla tuba | 148 |
| 2 | Astyanax orstedii | 132 |
| 1 | Arcos nudus | 82 |
| 13 | Poecilia mexicana | 36 |
| 11 | Joturus pichardi | 20 |
| 12 | Neetroplus nematopus | 18 |
| 5 | Brycon costaricensis | 16 |
| 9 | Gobiomorus dormitor | 6 |
| 6 | Cribroheros alfari | 5 |
| 15 | Rhamdia nicaraguensis | 4 |
| 4 | Awaus banana | 3 |
| 3 | Aterinella | 1 |
| 8 | Eretmobrycon scleroparius | 1 |
| 10 | Hypostomus aspidolepis | 1 |
Solo especies de interés comercial
all_spp_dat <- dat
dat <- dat[dat$Especie %in% c("Brycon costaricensis", "Gobiomorus dormitor",
"Awaus banana", "Rhamdia laticauda", "Rhamdia nicaraguensis",
"Dajaus monticola", "Joturus pichardi"), ]
tab_sp <- as.data.frame(table(dat$Especie))
tab_sp <- tab_sp[order(-tab_sp$Freq), ]
tab_sp| Var1 | Freq | |
|---|---|---|
| 3 | Dajaus monticola | 298 |
| 6 | Rhamdia laticauda | 271 |
| 5 | Joturus pichardi | 20 |
| 2 | Brycon costaricensis | 16 |
| 4 | Gobiomorus dormitor | 6 |
| 7 | Rhamdia nicaraguensis | 4 |
| 1 | Awaus banana | 3 |
Individuos por especie y sitio
dat$indiv <- 1
# Barras con proporcion de indiv por especie x sitio
agg_dat2 <- aggregate(indiv ~ Sitio + Especie, data = dat, FUN = sum)
agg_dat2$Sitio <- factor(agg_dat2$Sitio, levels = c("La Guaria", "Chilamate",
"El Roble", "Tirimbina", "La Virgen", "San Miguel", "Cariblanco"))
agg_dat2$Especie <- factor(agg_dat2$Especie, levels = tab_sp$Var1)
gg1 <- ggplot(agg_dat2, aes(x = Sitio, y = indiv, fill = Especie)) +
geom_bar(stat = "identity", position = "fill") + labs(y = "Porcentaje\nde individuos",
x = "Sitio") + theme_classic() + scale_y_continuous(labels = scales::percent,
n.breaks = 4) + scale_fill_viridis_d(alpha = 1, option = "G",
direction = -1) + theme(axis.text.x = element_text(angle = 90,
hjust = 1))
# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {
print(gg1)
ggsave(gg1, path = "./scripts/", device = grDevices::png, filename = "especies_por_sitio.png",
width = 10, height = 6, dpi = 300)
}# Barras con proporcion de indiv por especie x sitio y por mes
agg_dat3 <- aggregate(indiv ~ mes + Especie, data = dat, FUN = sum)
# convertir a espanol
agg_dat3$mes_chr <- month.abb.esp[as.numeric(agg_dat3$mes)]
agg_dat3$mes_chr <- factor(agg_dat3$mes_chr, levels = month.abb.esp)
agg_dat3$Especie <- factor(agg_dat3$Especie, levels = tab_sp$Var1)
# agregar meses faltantes
sub_agg <- agg_dat3[1:2, ]
sub_agg$indiv <- 0
sub_agg$mes_chr <- c("Ene", "Feb")
agg_dat3 <- rbind(agg_dat3, sub_agg)
gg2 <- ggplot(agg_dat3, aes(x = mes_chr, y = indiv, fill = Especie)) +
geom_bar(stat = "identity", position = "fill") + labs(y = "Porcentaje\nde individuos",
x = "Mes") + theme_classic() + scale_y_continuous(labels = scales::percent,
n.breaks = 4) + scale_fill_viridis_d(alpha = 1, option = "G",
direction = -1) + theme(axis.text.x = element_text(angle = 90,
hjust = 1))
# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {
print(gg2)
ggsave(gg2, path = "./scripts/", device = grDevices::png, filename = "especies_por_mes.png",
width = 10, height = 6, dpi = 300)
}# Barras con proporcion de indiv por especie x sitio y por mes
agg_dat3 <- aggregate(indiv ~ mes + Especie + Sitio, data = dat, FUN = sum)
# convertir a espanol
agg_dat3$mes_chr <- month.abb.esp[as.numeric(agg_dat3$mes)]
agg_dat3$mes_chr <- factor(agg_dat3$mes_chr, levels = month.abb.esp)
agg_dat3$Especie <- factor(agg_dat3$Especie, levels = tab_sp$Var1)
# agregar meses faltantes
sub_agg <- agg_dat3[1:2, ]
sub_agg$indiv <- 0
sub_agg$mes_chr <- c("Ene", "Feb")
agg_dat3$Sitio <- factor(agg_dat3$Sitio, levels = c("La Guaria", "Chilamate",
"El Roble", "Tirimbina", "La Virgen", "San Miguel", "Cariblanco"))
agg_dat3 <- rbind(agg_dat3, sub_agg)
gg3 <- ggplot(agg_dat3, aes(x = mes_chr, y = indiv, fill = Especie)) +
geom_bar(stat = "identity", position = "fill") + labs(y = "Porcentaje de individuos",
x = "Mes") + theme_classic() + scale_y_continuous(labels = scales::percent,
n.breaks = 4) + scale_fill_viridis_d(alpha = 1, option = "G",
direction = -1) + theme(axis.text.x = element_text(angle = 90,
hjust = 1)) + facet_wrap(~Sitio, scales = "free_y", ncol = 2)
# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {
print(gg3)
ggsave(gg3, path = "./scripts/", device = grDevices::png, filename = "especies_por_mes_sitio.png",
width = 10, height = 6, dpi = 300)
}Tamaño por especie y mes
dat2 <- all_spp_dat[all_spp_dat$Especie %in% c("Rhamdia laticauda",
"Dajaus monticola", "Joturus pichardi", "Tomocichla tuba"), ]
agg_dat4 <- aggregate(`Long total cm` ~ mes + Especie, data = dat2,
FUN = mean, na.rm = TRUE)
se <- function(x) {
x <- na.omit(x)
sd(x)/sqrt(length(x))
}
agg_dat4$se <- aggregate(`Long total cm` ~ mes + Especie, data = dat2,
FUN = se)$`Long total cm`
agg_dat4$mes_chr <- month.abb.esp[as.numeric(agg_dat4$mes)]
agg_dat4$mes_chr <- factor(agg_dat4$mes_chr, levels = month.abb.esp)
agg_dat4$Especie <- factor(agg_dat4$Especie, levels = unique(agg_dat4$Especie))
gg4 <- ggplot(agg_dat4, aes(x = as.numeric(mes), y = `Long total cm`,
color = Especie)) + geom_line(size = 1, linetype = "dotdash") +
geom_errorbar(aes(ymin = `Long total cm` - se, ymax = `Long total cm` +
se), width = 0.2) + geom_point(size = 3) + labs(y = "Longitud total (cm)",
x = "Mes") + theme_classic() + scale_color_viridis_d(alpha = 1,
option = "G", direction = 1) + scale_x_continuous(breaks = 1:12,
labels = month.abb.esp) + theme(axis.text.x = element_text(angle = 90,
hjust = 1))
# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {
print(gg4)
ggsave(gg4, path = "./scripts/", device = grDevices::png, filename = "talla_por_especie_mes.png",
width = 10, height = 4, dpi = 300)
}datatable2(agg_dat4)dat2 <- all_spp_dat[all_spp_dat$Especie %in% c("Brycon costaricensis",
"Gobiomorus dormitor", "Joturus pichardi", "Tomocichla tuba"),
]
agg_dat4 <- aggregate(`Long total cm` ~ Sitio + Especie, data = dat2,
FUN = mean, na.rm = TRUE)
se <- function(x) {
x <- na.omit(x)
sd(x)/sqrt(length(x))
}
agg_dat4$se <- aggregate(`Long total cm` ~ Sitio + Especie, data = dat2,
FUN = se)$`Long total cm`
# add missing rows for 'Cariblanco' with 0s
car_agg <- agg_dat4[rep(1, 3), ]
car_agg$`Long total cm` <- 0
car_agg$se <- NA
car_agg$Especie <- unique(agg_dat4$Especie)[-1]
agg_dat4 <- rbind(agg_dat4, car_agg)
agg_dat4$Sitio <- factor(agg_dat4$Sitio, levels = c("La Guaria", "Chilamate",
"El Roble", "Tirimbina", "La Virgen", "San Miguel", "Cariblanco"))
agg_dat4$Especie <- factor(agg_dat4$Especie, levels = unique(agg_dat4$Especie))
gg4 <- ggplot(agg_dat4, aes(x = Sitio, y = `Long total cm`, fill = Especie,
group = Especie)) + geom_bar(stat = "identity", position = "dodge",
width = 0.9) + geom_errorbar(aes(ymin = `Long total cm` - se,
ymax = `Long total cm` + se), width = 0.1, position = position_dodge(0.9)) +
labs(y = "Longitud total (cm)", x = "Sitio") + theme_classic() +
scale_color_viridis_d(alpha = 1, option = "G", direction = 1) +
scale_fill_viridis_d(alpha = 1, option = "G", direction = 1) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {
print(gg4)
ggsave(gg4, path = "./scripts/", device = grDevices::png, filename = "longitud_por_especies_por_sitio.png",
width = 10, height = 4, dpi = 300)
}datatable2(agg_dat4)dat2 <- all_spp_dat[all_spp_dat$Especie %in% c("Brycon costaricensis",
"Gobiomorus dormitor", "Joturus pichardi", "Tomocichla tuba"),
]
agg_dat4 <- aggregate(`Peso g` ~ Sitio + Especie, data = dat2, FUN = mean,
na.rm = TRUE)
agg_dat4$se <- aggregate(`Peso g` ~ Sitio + Especie, data = dat2,
FUN = se)$`Peso g`
# add missing rows for 'Cariblanco' with 0s
car_agg <- agg_dat4[rep(1, 3), ]
car_agg$`Peso g` <- 0
car_agg$se <- NA
car_agg$Especie <- unique(agg_dat4$Especie)[-1]
agg_dat4 <- rbind(agg_dat4, car_agg)
agg_dat4$Sitio <- factor(agg_dat4$Sitio, levels = c("La Guaria", "Chilamate",
"El Roble", "Tirimbina", "La Virgen", "San Miguel", "Cariblanco"))
agg_dat4$Especie <- factor(agg_dat4$Especie, levels = unique(agg_dat4$Especie))
gg4 <- ggplot(agg_dat4, aes(x = Sitio, y = `Peso g`, fill = Especie,
group = Especie)) + geom_bar(stat = "identity", position = "dodge",
width = 0.9) + geom_errorbar(aes(ymin = `Peso g` - se, ymax = `Peso g` +
se), width = 0.1, position = position_dodge(0.9)) + labs(y = "Peso (g)",
x = "Sitio") + theme_classic() + scale_color_viridis_d(alpha = 1,
option = "G", direction = 1) + scale_fill_viridis_d(alpha = 1,
option = "G", direction = 1) + theme(axis.text.x = element_text(angle = 45,
hjust = 1))
# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {
print(gg4)
ggsave(gg4, path = "./scripts/", device = grDevices::png, filename = "peso_por_especies_por_sitio.png",
width = 10, height = 4, dpi = 300)
}datatable2(agg_dat4)agg_dat0 <- aggregate(`Long total cm` ~ Especie, data = all_spp_dat,
FUN = mean, na.rm = TRUE)
agg_dat0$se <- aggregate(`Long total cm` ~ Especie, data = all_spp_dat,
FUN = se)$`Long total cm`
agg_dat0 <- agg_dat0[order(agg_dat0$`Long total cm`, decreasing = FALSE),
]
agg_dat0$Especie <- factor(agg_dat0$Especie, levels = agg_dat0$Especie)
gg0 <- ggplot(agg_dat0, aes(x = Especie, y = `Long total cm`)) + geom_bar(stat = "identity",
position = "dodge", fill = "gray") + geom_errorbar(aes(ymin = `Long total cm` -
se, ymax = `Long total cm` + se), width = 0.2) + labs(y = "Longitud total (cm)",
x = "Especie") + theme_classic() + theme(axis.text.x = element_text(angle = 45,
hjust = 1))
# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {
print(gg0)
ggsave(gg0, path = "./scripts/", device = grDevices::png, filename = "talla_por_especie.png",
width = 10, height = 8, dpi = 300)
}datatable2(agg_dat0)Peso por especie y mes
agg_dat5 <- aggregate(`Peso g` ~ mes + Especie, data = dat2, FUN = mean,
na.rm = TRUE)
agg_dat5$se <- aggregate(`Peso g` ~ mes + Especie, data = dat2, FUN = se)$`Peso g`
agg_dat5$mes_chr <- month.abb.esp[as.numeric(agg_dat5$mes)]
agg_dat5$mes_chr <- factor(agg_dat5$mes_chr, levels = month.abb.esp)
agg_dat5$Especie <- factor(agg_dat5$Especie, levels = c("Brycon costaricensis",
"Gobiomorus dormitor", "Joturus pichardi", "Tomocichla tuba"))
gg5 <- ggplot(agg_dat5, aes(x = as.numeric(mes), y = `Peso g`, color = Especie)) +
geom_line(size = 1, linetype = "dotdash") + geom_point(size = 3) +
geom_errorbar(aes(ymin = `Peso g` - se, ymax = `Peso g` + se),
width = 0.2) + labs(y = "Peso (g)", x = "Mes") + theme_classic() +
scale_color_viridis_d(alpha = 1, option = "G", direction = 1) +
scale_x_continuous(breaks = 1:12, labels = month.abb.esp) + theme(axis.text.x = element_text(angle = 90,
hjust = 1))
# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {
print(gg5)
ggsave(gg5, path = "./scripts/", device = grDevices::png, filename = "peso_por_especie_mes.png",
width = 10, height = 4, dpi = 300)
}datatable2(agg_dat5)agg_dat0 <- aggregate(`Peso g` ~ Especie, data = all_spp_dat, FUN = mean,
na.rm = TRUE)
agg_dat0$se <- aggregate(`Peso g` ~ Especie, data = all_spp_dat, FUN = se)$`Peso g`
agg_dat0 <- agg_dat0[order(agg_dat0$`Peso g`, decreasing = FALSE),
]
agg_dat0$Especie <- factor(agg_dat0$Especie, levels = agg_dat0$Especie)
gg0 <- ggplot(agg_dat0, aes(x = Especie, y = `Peso g`)) + geom_bar(stat = "identity",
position = "dodge", fill = "gray") + geom_errorbar(aes(ymin = `Peso g` -
se, ymax = `Peso g` + se), width = 0.2) + labs(y = "Peso (g)",
x = "Especie") + theme_classic() + theme(axis.text.x = element_text(angle = 45,
hjust = 1))
# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {
print(gg0)
ggsave(gg0, path = "./scripts/", device = grDevices::png, filename = "peso_por_especie.png",
width = 10, height = 8, dpi = 300)
}datatable2(agg_dat0)Peso por especie y mes para las que tienen mas datos
agg_dat6 <- aggregate(`Long total cm` ~ mes + Especie, data = dat[!dat$Especie %in%
c("Awaus banana", "Rhamdia nicaraguensis"), ], FUN = mean, na.rm = TRUE)
agg_dat6$mes_chr <- month.abb.esp[as.numeric(agg_dat6$mes)]
agg_dat6$mes_chr <- factor(agg_dat6$mes_chr, levels = month.abb.esp)
agg_dat6$Especie <- factor(agg_dat6$Especie, levels = tab_sp$Var1)
gg6 <- ggplot(agg_dat6, aes(x = as.numeric(mes), y = `Long total cm`,
color = Especie, shape = Especie)) + geom_line(size = 1) + geom_point(size = 2) +
labs(y = "Peso (g)", x = "Mes") + theme_classic() + scale_color_viridis_d(alpha = 1,
option = "G", direction = 1) + scale_x_continuous(breaks = 1:12,
labels = month.abb.esp) + theme(axis.text.x = element_text(angle = 90,
hjust = 1))
# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {
print(gg6)
ggsave(gg6, path = "./scripts/", device = grDevices::png, filename = "peso_por_especie_mes_5sp.png",
width = 10, height = 4, dpi = 300)
}dat_met <- read_xlsx("./data/raw/FECOP 2025 peces ICE.xlsx", sheet = "metodo")
dat_met$Especie[dat_met$Especie == "RhamDía laticauda (rogersi)"] <- "Rhamdia laticauda"
dat_met <- dat_met[dat_met$Especie %in% c("Dajaus monticola", "Astyanax orstedii",
"Rhamdia laticauda", "Tomocichla tuba"), ]
table(dat_met$Especie)
Astyanax orstedii Dajaus monticola Rhamdia laticauda Tomocichla tuba
4 4 4 4
# plot abundance by method and period using bars
gg7 <- ggplot(dat_met, aes(x = Especie, fill = periodo, y = indivs,
group = metodo)) + geom_bar(stat = "identity", color = "black",
position = "fill") + labs(y = "Abundancia", x = "Especie", fill = "Periodo") +
theme_classic() + scale_y_continuous(labels = scales::percent,
n.breaks = 4) + scale_fill_manual(values = c("black", "white")) +
facet_wrap(~metodo, ncol = 1) + theme(axis.text.x = element_text(angle = 25,
hjust = 1))
# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {
print(gg7)
ggsave(gg7, path = "./scripts/", device = grDevices::png, filename = "abundancia_por_especie_metodo_y_periodo.png",
width = 10, height = 8, dpi = 300)
}datatable2(dat_met)dat_peso <- read_xlsx("./data/raw/FECOP 2025 peces ICE.xlsx", sheet = "peso")
dat_peso$SE <- dat_peso$s/sqrt(dat_peso$N)
# dat_met$Especie[dat_met$Especie == 'RhamDía laticauda
# (rogersi)'] <- 'Rhamdia laticauda'
dat_peso <- dat_peso[dat_peso$Especie %in% c("Dajaus monticola", "Astyanax orstedii",
"Poecilia mexicana", "Tomocichla tuba"), ]
dat_peso$Especie <- factor(dat_peso$Especie, levels = c("Dajaus monticola",
"Tomocichla tuba", "Astyanax orstedii", "Poecilia mexicana"))
table(dat_peso$Especie)
Dajaus monticola Tomocichla tuba Astyanax orstedii Poecilia mexicana
2 2 2 2
# plot abundance by method and period using bars
gg8 <- ggplot(dat_peso, aes(x = Especie, fill = Periodo, y = Media,
group = Periodo)) + geom_bar(stat = "identity", color = "black",
position = "dodge") + geom_errorbar(aes(ymin = Media - SE, ymax = Media +
SE), width = 0.2, position = position_dodge(0.9)) + labs(y = "Peso (g)",
x = "Especie", fill = "Periodo") + theme_classic() + scale_fill_manual(values = c("black",
"white")) + theme(axis.text.x = element_text(angle = 25, hjust = 1))
# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {
print(gg8)
ggsave(gg8, path = "./scripts/", device = grDevices::png, filename = "peso_por_especie_periodo.png",
width = 10, height = 4, dpi = 300)
}datatable2(dat_peso)dat$año_mes <- paste0(format(as.Date(dat$fecha, format = "%d/%m/%Y"),
"%Y"), "-", dat$mes)
sort(unique(dat$año_mes))
div_agg <- aggregate(Especie ~ Sitio + año_mes, data = dat, FUN = function(x) length(unique(x)))bioq <- read_xls("./data/raw/muestreos ice FINAL para fecop ALL.xls")
bioq$Sitio[bioq$Sitio == "Guaria"] <- "La Guaria"
bioq$Sitio[bioq$Sitio == "Virgen"] <- "La Virgen"
bioq$Sitio[bioq$Sitio == "Roble"] <- "El Roble"
bioq$Sitio[bioq$Sitio == "SM, bajo la chancha"] <- "San Miguel"
bioq$Sitio[bioq$Sitio == "Chancha"] <- "San Miguel"
spp_sitio <- aggregate(Especie ~ Sitio, data = all_spp_dat, FUN = function(x) length(unique(x)))
# unique(bioq$Sitio) unique(dat$Sitio)
# setdiff(unique(bioq$Sitio), unique(dat$Sitio))
bioq <- bioq[bioq$Sitio %in% unique(dat$Sitio), ]
bioq <- merge(bioq, spp_sitio, by = "Sitio")
nNA <- sapply(bioq, function(x) sum(is.na(x)))
# names(nNA)[nNA <= 5]
sub_bioq <- bioq[, c("ox mg/l", "Temp °C", "pH", "Conductividad",
"TDS", "Especie")]
sub_bioq <- sub_bioq[complete.cases(sub_bioq), ]# random forest with caret predicting diversity based on
# bioquimica
set.seed(123)
rf_fit <- train(Especie ~ ., data = sub_bioq, method = "rf", importance = TRUE,
trControl = trainControl(method = "repeatedcv", number = 100,
repeats = 30))
saveRDS(rf_fit, file = "./data/processed/rf_fit_diversity_bioq.rds")rf_fit <- readRDS(file = "./data/processed/rf_fit_diversity_bioq.rds")
# importance
varImp(rf_fit)rf variable importance
Overall
`Temp °C` 100.00
TDS 74.03
Conductividad 61.82
pH 32.33
`ox mg/l` 0.00
## importance with ggplot2
imp <- as.data.frame(varImp(rf_fit)$importance)
imp$Variable <- rownames(imp)
imp <- imp[order(imp$Overall), ]
imp$Variable <- factor(imp$Variable, levels = imp$Variable)
gg_imp <- ggplot(imp, aes(x = Variable, y = Overall)) + geom_bar(stat = "identity",
fill = "gray") + coord_flip() + theme_classic() + labs(y = "Importancia (Random Forest)",
x = "Variable fisico-química") + scale_y_continuous(breaks = scales::pretty_breaks(n = 5))
# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {
print(gg_imp)
ggsave(gg_imp, path = "./scripts/", device = grDevices::png, filename = "importancia.png",
width = 10, height = 4, dpi = 300)
}all_spp_dat <- all_spp_dat[all_spp_dat$Especie != "Aterinella", ]
# convert species name from 'Genus species' to 'G. species'
all_spp_dat$Especie <- gsub("^(\\w)\\w+ ", "\\1. ", all_spp_dat$Especie)
all_spp_dat$Especie[all_spp_dat$Especie == "D. monticola"] <- "D. monticola"
out <- lapply(unique(all_spp_dat$Sitio), function(x) {
sub <- all_spp_dat[all_spp_dat$Sitio == x, ]
tab <- as.data.frame(table(sub$Especie))
# add missing species wtih 0s
missing_spp <- setdiff(unique(all_spp_dat$Especie), tab$Var1)
if (length(missing_spp) > 0) {
add_tab <- data.frame(Var1 = missing_spp, Freq = 0)
tab <- rbind(tab, add_tab)
}
tab <- tab[order(tab$Var1), ]
return(tab$Freq)
})
fish_spec <- do.call(rbind, out)
colnames(fish_spec) <- sort(unique(all_spp_dat$Especie))
sub_bioq2 <- bioq[, c("ox mg/l", "Temp °C", "pH", "Conductividad",
"TDS", "Sitio")]
fish_chem <- aggregate(. ~ Sitio, data = sub_bioq2, FUN = mean, na.rm = TRUE)
# order rows as in unique(all_spp_dat$Sitio)
fish_chem <- fish_chem[match(unique(all_spp_dat$Sitio), fish_chem$Sitio),
]
rownames(fish_spec) <- rownames(fish_chem) <- fish_chem$Sitio
fish_chem <- fish_chem[, -1]
fish_cca <- cca(fish_spec ~ `ox mg/l` + `Temp °C` + pH + Conductividad +
TDS, data = fish_chem)
summary(fish_cca)
Call:
cca(formula = fish_spec ~ `ox mg/l` + `Temp °C` + pH + Conductividad + TDS, data = fish_chem)
Partitioning of scaled Chi-square:
Inertia Proportion
Total 1.7195 1.0000
Constrained 1.5468 0.8996
Unconstrained 0.1727 0.1004
Eigenvalues, and their contribution to the scaled Chi-square
Importance of components:
CCA1 CCA2 CCA3 CCA4 CCA5 CA1
Eigenvalue 0.6717 0.4629 0.1922 0.13054 0.08941 0.1727
Proportion Explained 0.3907 0.2692 0.1118 0.07592 0.05200 0.1004
Cumulative Proportion 0.3907 0.6599 0.7716 0.84756 0.89955 1.0000
Accumulated constrained eigenvalues
Importance of components:
CCA1 CCA2 CCA3 CCA4 CCA5
Eigenvalue 0.6717 0.4629 0.1922 0.13054 0.08941
Proportion Explained 0.4343 0.2993 0.1242 0.08439 0.05780
Cumulative Proportion 0.4343 0.7336 0.8578 0.94220 1.00000
cols <- mako(3, alpha = 0.9, begin = 0.2, end = 0.8)
fish_cca_df <- ggvegan::fortify(fish_cca)
fish_cca_df$score <- as.character(fish_cca_df$score)
fish_cca_df$score[fish_cca_df$score == "sites"] <- "Sitio"
fish_cca_df$score[fish_cca_df$score == "species"] <- "Especie"
site_cca_df <- fish_cca_df[fish_cca_df$score == "Sitio", ]
sp_cca_df <- fish_cca_df[fish_cca_df$score == "Especie", ]
site_cca_df$vjust <- c(-2.5, 2, 0.2, -1.5, 1.2, 1, 0.5)
site_cca_df$hjust <- c(0.2, 0.5, 1.4, 1, -0.5, 1, -0.1)
sp_cca_df$vjust <- c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3)
sp_cca_df$hjust <- c(1, 1, 1, 1, 1, 1, -0.1, 1, 1, 1, 1, 1, 1)
autoplot(fish_cca) + theme_classic() + xlim(c(-1.9, 2.7)) + ylim(c(-1.5,
2.7)) + labs(x = "CCA1 (43.4%)", y = "CCA2 (30.0%)", color = "",
shape = "") + scale_color_manual(values = cols[c(1, 3)], labels = c("Sitio",
"Especie")) + scale_shape_manual(values = c(20, 1), labels = c("Sitio",
"Especie")) + geom_text(data = site_cca_df, aes(label = label,
x = CCA1, y = CCA2, vjust = vjust, hjust = hjust), size = 3, color = cols[1]) +
geom_text(data = sp_cca_df, aes(label = label, x = CCA1, y = CCA2,
vjust = vjust, hjust = hjust), size = 3, color = cols[3]) +
theme(axis.text = element_text(size = 12), axis.title = element_text(size = 14)) +
theme(legend.position = c(0.85, 0.9), legend.background = element_rect(fill = alpha("white",
0.5)))anova.cca(fish_cca)| Df | ChiSquare | F | Pr(>F) | |
|---|---|---|---|---|
| Model | 5 | 1.5467755 | 1.791102 | 0.217 |
| Residual | 1 | 0.1727177 | NA | NA |
─ Session info ───────────────────────────────────────────────────────────────
setting value
version R version 4.5.0 (2025-04-11)
os Ubuntu 22.04.5 LTS
system x86_64, linux-gnu
ui X11
language (EN)
collate en_US.UTF-8
ctype en_US.UTF-8
tz America/Costa_Rica
date 2025-09-11
pandoc 3.2 @ /usr/lib/rstudio/resources/app/bin/quarto/bin/tools/x86_64/ (via rmarkdown)
quarto 1.7.31 @ /usr/local/bin/quarto
─ Packages ───────────────────────────────────────────────────────────────────
package * version date (UTC) lib source
abind 1.4-8 2024-09-12 [1] CRAN (R 4.5.0)
backports 1.5.0 2024-05-23 [1] CRAN (R 4.5.0)
bayesplot 1.12.0 2025-04-10 [1] CRAN (R 4.5.0)
bridgesampling 1.1-2 2021-04-16 [1] CRAN (R 4.5.0)
brms * 2.22.0 2024-09-23 [1] CRAN (R 4.5.0)
Brobdingnag 1.2-9 2022-10-19 [1] CRAN (R 4.5.0)
bslib 0.9.0 2025-01-30 [1] CRAN (R 4.5.0)
cachem 1.1.0 2024-05-16 [1] CRAN (R 4.5.0)
caret * 7.0-1 2024-12-10 [1] CRAN (R 4.5.0)
cellranger 1.1.0 2016-07-27 [3] CRAN (R 4.0.1)
checkmate 2.3.3 2025-08-18 [1] CRAN (R 4.5.0)
class 7.3-23 2025-01-01 [4] CRAN (R 4.4.2)
cli 3.6.5 2025-04-23 [1] CRAN (R 4.5.0)
cluster 2.1.8.1 2025-03-12 [4] CRAN (R 4.4.3)
coda 0.19-4.1 2024-01-31 [1] CRAN (R 4.5.0)
codetools 0.2-20 2024-03-31 [4] CRAN (R 4.5.0)
crayon 1.5.3 2024-06-20 [1] CRAN (R 4.5.0)
crosstalk 1.2.1 2023-11-23 [3] CRAN (R 4.5.0)
data.table 1.17.0 2025-02-22 [3] CRAN (R 4.5.0)
devtools 2.4.5 2022-10-11 [1] CRAN (R 4.5.0)
digest 0.6.37 2024-08-19 [1] CRAN (R 4.5.0)
distributional 0.5.0 2024-09-17 [1] CRAN (R 4.5.0)
dplyr 1.1.4 2023-11-17 [1] CRAN (R 4.5.0)
DT * 0.33 2024-04-04 [3] CRAN (R 4.5.0)
ellipsis 0.3.2 2021-04-29 [3] CRAN (R 4.1.1)
emmeans 1.11.1 2025-05-04 [3] CRAN (R 4.5.0)
estimability 1.5.1 2024-05-12 [3] CRAN (R 4.5.0)
evaluate 1.0.4 2025-06-18 [1] CRAN (R 4.5.0)
farver 2.1.2 2024-05-13 [1] CRAN (R 4.5.0)
fastmap 1.2.0 2024-05-15 [1] CRAN (R 4.5.0)
foreach 1.5.2 2022-02-02 [3] CRAN (R 4.1.2)
formatR * 1.14 2023-01-17 [1] CRAN (R 4.5.0)
fs 1.6.6 2025-04-12 [1] CRAN (R 4.5.0)
future 1.49.0 2025-05-09 [1] CRAN (R 4.5.0)
future.apply 1.11.3 2024-10-27 [1] CRAN (R 4.5.0)
generics 0.1.4 2025-05-09 [1] CRAN (R 4.5.0)
ggplot2 * 3.5.2 2025-04-09 [1] CRAN (R 4.5.0)
ggrepel 0.9.6 2024-09-07 [1] CRAN (R 4.5.0)
ggvegan 0.1.999 2025-09-05 [1] Github (gavinsimpson/ggvegan@3056a43)
globals 0.18.0 2025-05-08 [1] CRAN (R 4.5.0)
glue 1.8.0 2024-09-30 [1] CRAN (R 4.5.0)
gower 1.0.2 2024-12-17 [3] CRAN (R 4.5.0)
gridExtra 2.3 2017-09-09 [1] CRAN (R 4.5.0)
gtable 0.3.6 2024-10-25 [1] CRAN (R 4.5.0)
hardhat 1.4.1 2025-01-31 [1] CRAN (R 4.5.0)
htmltools 0.5.8.1 2024-04-04 [1] CRAN (R 4.5.0)
htmlwidgets 1.6.4 2023-12-06 [1] RSPM (R 4.5.0)
httpuv 1.6.16 2025-04-16 [1] RSPM (R 4.5.0)
ipred 0.9-15 2024-07-18 [3] CRAN (R 4.5.0)
iterators 1.0.14 2022-02-05 [3] CRAN (R 4.1.2)
jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.5.0)
jsonlite 2.0.0 2025-03-27 [1] CRAN (R 4.5.0)
knitr * 1.50 2025-03-16 [1] CRAN (R 4.5.0)
labeling 0.4.3 2023-08-29 [1] CRAN (R 4.5.0)
later 1.4.2 2025-04-08 [1] RSPM (R 4.5.0)
lattice * 0.22-7 2025-04-02 [4] CRAN (R 4.5.0)
lava 1.8.1 2025-01-12 [3] CRAN (R 4.5.0)
lifecycle 1.0.4 2023-11-07 [1] CRAN (R 4.5.0)
listenv 0.9.1 2024-01-29 [1] CRAN (R 4.5.0)
loo 2.8.0 2024-07-03 [1] CRAN (R 4.5.0)
lubridate 1.9.4 2024-12-08 [3] CRAN (R 4.5.0)
magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.5.0)
MASS 7.3-65 2025-02-28 [4] CRAN (R 4.4.3)
Matrix 1.7-3 2025-03-11 [4] CRAN (R 4.4.3)
matrixStats 1.5.0 2025-01-07 [1] CRAN (R 4.5.0)
memoise 2.0.1 2021-11-26 [3] CRAN (R 4.1.2)
mgcv 1.9-3 2025-04-04 [4] CRAN (R 4.5.0)
mime 0.13 2025-03-17 [1] CRAN (R 4.5.0)
miniUI 0.1.2 2025-04-17 [3] CRAN (R 4.5.0)
ModelMetrics 1.2.2.2 2020-03-17 [3] CRAN (R 4.0.1)
multcomp 1.4-28 2025-01-29 [3] CRAN (R 4.5.0)
mvtnorm 1.3-3 2025-01-10 [1] CRAN (R 4.5.0)
nlme 3.1-168 2025-03-31 [4] CRAN (R 4.4.3)
nnet 7.3-20 2025-01-01 [4] CRAN (R 4.4.2)
packrat 0.9.3 2025-06-16 [1] CRAN (R 4.5.0)
parallelly 1.44.0 2025-05-07 [1] CRAN (R 4.5.0)
permute * 0.9-8 2025-06-25 [1] CRAN (R 4.5.0)
pillar 1.11.0 2025-07-04 [1] CRAN (R 4.5.0)
pkgbuild 1.4.8 2025-05-26 [1] CRAN (R 4.5.0)
pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.5.0)
pkgload 1.4.0 2024-06-28 [1] CRAN (R 4.5.0)
plyr 1.8.9 2023-10-02 [1] CRAN (R 4.5.0)
posterior 1.6.1 2025-02-27 [1] CRAN (R 4.5.0)
pROC 1.18.0 2021-09-03 [3] CRAN (R 4.1.1)
prodlim 2025.04.28 2025-04-28 [3] CRAN (R 4.5.0)
profvis 0.4.0 2024-09-20 [1] CRAN (R 4.5.0)
promises 1.3.3 2025-05-29 [1] RSPM (R 4.5.0)
purrr 1.1.0 2025-07-10 [1] CRAN (R 4.5.0)
R6 2.6.1 2025-02-15 [1] CRAN (R 4.5.0)
randomForest 4.7-1.2 2024-09-22 [3] CRAN (R 4.5.0)
RColorBrewer 1.1-3 2022-04-03 [1] CRAN (R 4.5.0)
Rcpp * 1.1.0 2025-07-02 [1] CRAN (R 4.5.0)
RcppParallel 5.1.10 2025-01-24 [1] CRAN (R 4.5.0)
readxl * 1.4.5 2025-03-07 [3] CRAN (R 4.5.0)
recipes 1.3.0 2025-04-17 [1] CRAN (R 4.5.0)
remotes 2.5.0 2024-03-17 [1] CRAN (R 4.5.0)
reshape2 1.4.4 2020-04-09 [1] CRAN (R 4.5.0)
rlang 1.1.6 2025-04-11 [1] CRAN (R 4.5.0)
rmarkdown 2.29 2024-11-04 [1] CRAN (R 4.5.0)
rpart 4.1.24 2025-01-07 [4] CRAN (R 4.4.2)
rstantools 2.4.0 2024-01-31 [1] CRAN (R 4.5.0)
rstudioapi 0.17.1 2024-10-22 [1] CRAN (R 4.5.0)
sandwich 3.1-1 2024-09-15 [3] CRAN (R 4.5.0)
sass 0.4.10 2025-04-11 [1] CRAN (R 4.5.0)
scales 1.4.0 2025-04-24 [1] CRAN (R 4.5.0)
sessioninfo 1.2.3 2025-02-05 [3] CRAN (R 4.5.0)
shiny 1.10.0 2024-12-14 [1] CRAN (R 4.5.0)
sketchy 1.0.5 2025-08-20 [1] CRANs (R 4.5.0)
stringi 1.8.7 2025-03-27 [1] CRAN (R 4.5.0)
stringr 1.5.1 2023-11-14 [1] CRAN (R 4.5.0)
survival 3.8-3 2024-12-17 [4] CRAN (R 4.4.2)
tensorA 0.36.2.1 2023-12-13 [1] CRAN (R 4.5.0)
TH.data 1.1-3 2025-01-17 [3] CRAN (R 4.5.0)
tibble 3.3.0 2025-06-08 [1] RSPM (R 4.5.0)
tidyr 1.3.1 2024-01-24 [1] CRAN (R 4.5.0)
tidyselect 1.2.1 2024-03-11 [1] CRAN (R 4.5.0)
timechange 0.3.0 2024-01-18 [3] CRAN (R 4.5.0)
timeDate 4041.110 2024-09-22 [3] CRAN (R 4.5.0)
urlchecker 1.0.1 2021-11-30 [1] CRAN (R 4.5.0)
usethis 3.1.0 2024-11-26 [3] CRAN (R 4.5.0)
vctrs 0.6.5 2023-12-01 [1] CRAN (R 4.5.0)
vegan * 2.7-1 2025-06-05 [1] CRAN (R 4.5.0)
viridis * 0.6.5 2024-01-29 [1] CRAN (R 4.5.0)
viridisLite * 0.4.2 2023-05-02 [1] CRAN (R 4.5.0)
withr 3.0.2 2024-10-28 [1] CRAN (R 4.5.0)
xaringanExtra 0.8.0 2024-05-19 [1] CRAN (R 4.5.0)
xfun 0.53 2025-08-19 [1] CRAN (R 4.5.0)
xtable 1.8-4 2019-04-21 [3] CRAN (R 4.0.1)
yaml 2.3.10 2024-07-26 [1] CRAN (R 4.5.0)
zoo 1.8-14 2025-04-10 [3] CRAN (R 4.5.0)
[1] /home/m/R/x86_64-pc-linux-gnu-library/4.5
[2] /usr/local/lib/R/site-library
[3] /usr/lib/R/site-library
[4] /usr/lib/R/library
* ── Packages attached to the search path.
──────────────────────────────────────────────────────────────────────────────