Code
# options to customize chunk outputs
::opts_chunk$set(
knitrtidy.opts = list(width.cutoff = 65),
tidy = TRUE,
message = FALSE
)
Análisis estatísticos y visualización de datos
# print link to github repo if any
if (file.exists("./.git/config")){
<- readLines("./.git/config")
config <- grep("url", config, value = TRUE)
url <- gsub("\\turl = |.git$", "", url)
url if (length(url) > 1){
cat("\nSource code and data found at [", url, "](", url, ")", sep = "")
} }
# options to customize chunk outputs
::opts_chunk$set(
knitrtidy.opts = list(width.cutoff = 65),
tidy = TRUE,
message = FALSE
)
# knitr is require for creating html/pdf/word reports formatR is
# used for soft-wrapping code
# install/ load packages
::load_packages(packages = c("knitr", "formatR", "readxl",
sketchy"DT", "ggplot2", "brms", "viridis", "vegan", "caret"))
<- function(...) ggplot2::theme_classic(base_size = 20,
theme_classic ...)
options(DT.options = list(pageLength = 200, scrollX = TRUE, scrollY = "800px",
dom = "Bfrtip", buttons = c("copy", "csv", "excel")))
<- function(x, ...) DT::datatable(data = x, extensions = "Buttons",
datatable2 ...)
<- read_xlsx("./data/raw/FECOP 2025 peces ICE.xlsx")
dat # remove things after '('
$Especie <- gsub(" \\(.*", "", dat$Especie)
dat
$Especie[dat$Especie == "Theraps urderwoodi"] <- "Tomocichla tuba"
dat$Sitio[grepl(dat$Sitio, pattern = "La virgen")] <- "La Virgen"
dat
$Sitio[grepl(dat$Sitio, pattern = "San Miguel")] <- "San Miguel"
dat
# convertir fecha en mes
$mes <- format(as.Date(dat$fecha, format = "%d/%m/%Y"), "%m")
dat
# convertir a espanol
<- c("Ene", "Feb", "Mar", "Abr", "May", "Jun", "Jul",
month.abb.esp "Ago", "Sep", "Oct", "Nov", "Dic")
$mes_chr <- month.abb.esp[as.numeric(dat$mes)]
dat
# hacer numerica
$`Long total cm` <- as.numeric(dat$`Long total cm`)
dat
$`Peso g` <- as.numeric(dat$`Peso g`)
dat
datatable2(dat)
Muestras por sitio
as.data.frame(table(dat$Sitio))
Var1 | Freq |
---|---|
Cariblanco | 99 |
Chilamate | 296 |
El Roble | 111 |
La Guaria | 166 |
La Virgen | 125 |
San Miguel | 60 |
Tirimbina | 185 |
Muestras por especie
<- as.data.frame(table(dat$Especie))
tab_sp
<- tab_sp[order(-tab_sp$Freq), ]
tab_sp
tab_sp
Var1 | Freq | |
---|---|---|
7 | Dajaus monticola | 298 |
14 | Rhamdia laticauda | 271 |
16 | Tomocichla tuba | 148 |
2 | Astyanax orstedii | 132 |
1 | Arcos nudus | 82 |
13 | Poecilia mexicana | 36 |
11 | Joturus pichardi | 20 |
12 | Neetroplus nematopus | 18 |
5 | Brycon costaricensis | 16 |
9 | Gobiomorus dormitor | 6 |
6 | Cribroheros alfari | 5 |
15 | Rhamdia nicaraguensis | 4 |
4 | Awaus banana | 3 |
3 | Aterinella | 1 |
8 | Eretmobrycon scleroparius | 1 |
10 | Hypostomus aspidolepis | 1 |
Solo especies de interés comercial
<- dat
all_spp_dat
<- dat[dat$Especie %in% c("Brycon costaricensis", "Gobiomorus dormitor",
dat "Awaus banana", "Rhamdia laticauda", "Rhamdia nicaraguensis",
"Dajaus monticola", "Joturus pichardi"), ]
<- as.data.frame(table(dat$Especie))
tab_sp
<- tab_sp[order(-tab_sp$Freq), ]
tab_sp
tab_sp
Var1 | Freq | |
---|---|---|
3 | Dajaus monticola | 298 |
6 | Rhamdia laticauda | 271 |
5 | Joturus pichardi | 20 |
2 | Brycon costaricensis | 16 |
4 | Gobiomorus dormitor | 6 |
7 | Rhamdia nicaraguensis | 4 |
1 | Awaus banana | 3 |
Individuos por especie y sitio
$indiv <- 1
dat
# Barras con proporcion de indiv por especie x sitio
<- aggregate(indiv ~ Sitio + Especie, data = dat, FUN = sum)
agg_dat2
$Sitio <- factor(agg_dat2$Sitio, levels = c("La Guaria", "Chilamate",
agg_dat2"El Roble", "Tirimbina", "La Virgen", "San Miguel", "Cariblanco"))
$Especie <- factor(agg_dat2$Especie, levels = tab_sp$Var1)
agg_dat2
<- ggplot(agg_dat2, aes(x = Sitio, y = indiv, fill = Especie)) +
gg1 geom_bar(stat = "identity", position = "fill") + labs(y = "Porcentaje\nde individuos",
x = "Sitio") + theme_classic() + scale_y_continuous(labels = scales::percent,
n.breaks = 4) + scale_fill_viridis_d(alpha = 1, option = "G",
direction = -1) + theme(axis.text.x = element_text(angle = 90,
hjust = 1))
# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {
print(gg1)
ggsave(gg1, path = "./scripts/", device = grDevices::png, filename = "especies_por_sitio.png",
width = 10, height = 6, dpi = 300)
}
# Barras con proporcion de indiv por especie x sitio y por mes
<- aggregate(indiv ~ mes + Especie, data = dat, FUN = sum)
agg_dat3
# convertir a espanol
$mes_chr <- month.abb.esp[as.numeric(agg_dat3$mes)]
agg_dat3
$mes_chr <- factor(agg_dat3$mes_chr, levels = month.abb.esp)
agg_dat3
$Especie <- factor(agg_dat3$Especie, levels = tab_sp$Var1)
agg_dat3
# agregar meses faltantes
<- agg_dat3[1:2, ]
sub_agg $indiv <- 0
sub_agg$mes_chr <- c("Ene", "Feb")
sub_agg
<- rbind(agg_dat3, sub_agg)
agg_dat3
<- ggplot(agg_dat3, aes(x = mes_chr, y = indiv, fill = Especie)) +
gg2 geom_bar(stat = "identity", position = "fill") + labs(y = "Porcentaje\nde individuos",
x = "Mes") + theme_classic() + scale_y_continuous(labels = scales::percent,
n.breaks = 4) + scale_fill_viridis_d(alpha = 1, option = "G",
direction = -1) + theme(axis.text.x = element_text(angle = 90,
hjust = 1))
# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {
print(gg2)
ggsave(gg2, path = "./scripts/", device = grDevices::png, filename = "especies_por_mes.png",
width = 10, height = 6, dpi = 300)
}
# Barras con proporcion de indiv por especie x sitio y por mes
<- aggregate(indiv ~ mes + Especie + Sitio, data = dat, FUN = sum)
agg_dat3
# convertir a espanol
$mes_chr <- month.abb.esp[as.numeric(agg_dat3$mes)]
agg_dat3
$mes_chr <- factor(agg_dat3$mes_chr, levels = month.abb.esp)
agg_dat3
$Especie <- factor(agg_dat3$Especie, levels = tab_sp$Var1)
agg_dat3
# agregar meses faltantes
<- agg_dat3[1:2, ]
sub_agg $indiv <- 0
sub_agg$mes_chr <- c("Ene", "Feb")
sub_agg
$Sitio <- factor(agg_dat3$Sitio, levels = c("La Guaria", "Chilamate",
agg_dat3"El Roble", "Tirimbina", "La Virgen", "San Miguel", "Cariblanco"))
<- rbind(agg_dat3, sub_agg)
agg_dat3
<- ggplot(agg_dat3, aes(x = mes_chr, y = indiv, fill = Especie)) +
gg3 geom_bar(stat = "identity", position = "fill") + labs(y = "Porcentaje de individuos",
x = "Mes") + theme_classic() + scale_y_continuous(labels = scales::percent,
n.breaks = 4) + scale_fill_viridis_d(alpha = 1, option = "G",
direction = -1) + theme(axis.text.x = element_text(angle = 90,
hjust = 1)) + facet_wrap(~Sitio, scales = "free_y", ncol = 2)
# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {
print(gg3)
ggsave(gg3, path = "./scripts/", device = grDevices::png, filename = "especies_por_mes_sitio.png",
width = 10, height = 6, dpi = 300)
}
Tamaño por especie y mes
<- all_spp_dat[all_spp_dat$Especie %in% c("Rhamdia laticauda",
dat2 "Dajaus monticola", "Joturus pichardi", "Tomocichla tuba"), ]
<- aggregate(`Long total cm` ~ mes + Especie, data = dat2,
agg_dat4 FUN = mean, na.rm = TRUE)
<- function(x) {
se <- na.omit(x)
x sd(x)/sqrt(length(x))
}
$se <- aggregate(`Long total cm` ~ mes + Especie, data = dat2,
agg_dat4FUN = se)$`Long total cm`
$mes_chr <- month.abb.esp[as.numeric(agg_dat4$mes)]
agg_dat4
$mes_chr <- factor(agg_dat4$mes_chr, levels = month.abb.esp)
agg_dat4
$Especie <- factor(agg_dat4$Especie, levels = unique(agg_dat4$Especie))
agg_dat4
<- ggplot(agg_dat4, aes(x = as.numeric(mes), y = `Long total cm`,
gg4 color = Especie)) + geom_line(size = 1, linetype = "dotdash") +
geom_errorbar(aes(ymin = `Long total cm` - se, ymax = `Long total cm` +
width = 0.2) + geom_point(size = 3) + labs(y = "Longitud total (cm)",
se), x = "Mes") + theme_classic() + scale_color_viridis_d(alpha = 1,
option = "G", direction = 1) + scale_x_continuous(breaks = 1:12,
labels = month.abb.esp) + theme(axis.text.x = element_text(angle = 90,
hjust = 1))
# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {
print(gg4)
ggsave(gg4, path = "./scripts/", device = grDevices::png, filename = "talla_por_especie_mes.png",
width = 10, height = 4, dpi = 300)
}
datatable2(agg_dat4)
<- all_spp_dat[all_spp_dat$Especie %in% c("Brycon costaricensis",
dat2 "Gobiomorus dormitor", "Joturus pichardi", "Tomocichla tuba"),
]
<- aggregate(`Long total cm` ~ Sitio + Especie, data = dat2,
agg_dat4 FUN = mean, na.rm = TRUE)
<- function(x) {
se <- na.omit(x)
x sd(x)/sqrt(length(x))
}
$se <- aggregate(`Long total cm` ~ Sitio + Especie, data = dat2,
agg_dat4FUN = se)$`Long total cm`
# add missing rows for 'Cariblanco' with 0s
<- agg_dat4[rep(1, 3), ]
car_agg $`Long total cm` <- 0
car_agg$se <- NA
car_agg$Especie <- unique(agg_dat4$Especie)[-1]
car_agg
<- rbind(agg_dat4, car_agg)
agg_dat4
$Sitio <- factor(agg_dat4$Sitio, levels = c("La Guaria", "Chilamate",
agg_dat4"El Roble", "Tirimbina", "La Virgen", "San Miguel", "Cariblanco"))
$Especie <- factor(agg_dat4$Especie, levels = unique(agg_dat4$Especie))
agg_dat4
<- ggplot(agg_dat4, aes(x = Sitio, y = `Long total cm`, fill = Especie,
gg4 group = Especie)) + geom_bar(stat = "identity", position = "dodge",
width = 0.9) + geom_errorbar(aes(ymin = `Long total cm` - se,
ymax = `Long total cm` + se), width = 0.1, position = position_dodge(0.9)) +
labs(y = "Longitud total (cm)", x = "Sitio") + theme_classic() +
scale_color_viridis_d(alpha = 1, option = "G", direction = 1) +
scale_fill_viridis_d(alpha = 1, option = "G", direction = 1) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {
print(gg4)
ggsave(gg4, path = "./scripts/", device = grDevices::png, filename = "longitud_por_especies_por_sitio.png",
width = 10, height = 4, dpi = 300)
}
datatable2(agg_dat4)
<- all_spp_dat[all_spp_dat$Especie %in% c("Brycon costaricensis",
dat2 "Gobiomorus dormitor", "Joturus pichardi", "Tomocichla tuba"),
]
<- aggregate(`Peso g` ~ Sitio + Especie, data = dat2, FUN = mean,
agg_dat4 na.rm = TRUE)
$se <- aggregate(`Peso g` ~ Sitio + Especie, data = dat2,
agg_dat4FUN = se)$`Peso g`
# add missing rows for 'Cariblanco' with 0s
<- agg_dat4[rep(1, 3), ]
car_agg $`Peso g` <- 0
car_agg$se <- NA
car_agg$Especie <- unique(agg_dat4$Especie)[-1]
car_agg
<- rbind(agg_dat4, car_agg)
agg_dat4
$Sitio <- factor(agg_dat4$Sitio, levels = c("La Guaria", "Chilamate",
agg_dat4"El Roble", "Tirimbina", "La Virgen", "San Miguel", "Cariblanco"))
$Especie <- factor(agg_dat4$Especie, levels = unique(agg_dat4$Especie))
agg_dat4
<- ggplot(agg_dat4, aes(x = Sitio, y = `Peso g`, fill = Especie,
gg4 group = Especie)) + geom_bar(stat = "identity", position = "dodge",
width = 0.9) + geom_errorbar(aes(ymin = `Peso g` - se, ymax = `Peso g` +
width = 0.1, position = position_dodge(0.9)) + labs(y = "Peso (g)",
se), x = "Sitio") + theme_classic() + scale_color_viridis_d(alpha = 1,
option = "G", direction = 1) + scale_fill_viridis_d(alpha = 1,
option = "G", direction = 1) + theme(axis.text.x = element_text(angle = 45,
hjust = 1))
# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {
print(gg4)
ggsave(gg4, path = "./scripts/", device = grDevices::png, filename = "peso_por_especies_por_sitio.png",
width = 10, height = 4, dpi = 300)
}
datatable2(agg_dat4)
<- aggregate(`Long total cm` ~ Especie, data = all_spp_dat,
agg_dat0 FUN = mean, na.rm = TRUE)
$se <- aggregate(`Long total cm` ~ Especie, data = all_spp_dat,
agg_dat0FUN = se)$`Long total cm`
<- agg_dat0[order(agg_dat0$`Long total cm`, decreasing = FALSE),
agg_dat0
]
$Especie <- factor(agg_dat0$Especie, levels = agg_dat0$Especie)
agg_dat0
<- ggplot(agg_dat0, aes(x = Especie, y = `Long total cm`)) + geom_bar(stat = "identity",
gg0 position = "dodge", fill = "gray") + geom_errorbar(aes(ymin = `Long total cm` -
ymax = `Long total cm` + se), width = 0.2) + labs(y = "Longitud total (cm)",
se, x = "Especie") + theme_classic() + theme(axis.text.x = element_text(angle = 45,
hjust = 1))
# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {
print(gg0)
ggsave(gg0, path = "./scripts/", device = grDevices::png, filename = "talla_por_especie.png",
width = 10, height = 8, dpi = 300)
}
datatable2(agg_dat0)
Peso por especie y mes
<- aggregate(`Peso g` ~ mes + Especie, data = dat2, FUN = mean,
agg_dat5 na.rm = TRUE)
$se <- aggregate(`Peso g` ~ mes + Especie, data = dat2, FUN = se)$`Peso g`
agg_dat5
$mes_chr <- month.abb.esp[as.numeric(agg_dat5$mes)]
agg_dat5
$mes_chr <- factor(agg_dat5$mes_chr, levels = month.abb.esp)
agg_dat5
$Especie <- factor(agg_dat5$Especie, levels = c("Brycon costaricensis",
agg_dat5"Gobiomorus dormitor", "Joturus pichardi", "Tomocichla tuba"))
<- ggplot(agg_dat5, aes(x = as.numeric(mes), y = `Peso g`, color = Especie)) +
gg5 geom_line(size = 1, linetype = "dotdash") + geom_point(size = 3) +
geom_errorbar(aes(ymin = `Peso g` - se, ymax = `Peso g` + se),
width = 0.2) + labs(y = "Peso (g)", x = "Mes") + theme_classic() +
scale_color_viridis_d(alpha = 1, option = "G", direction = 1) +
scale_x_continuous(breaks = 1:12, labels = month.abb.esp) + theme(axis.text.x = element_text(angle = 90,
hjust = 1))
# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {
print(gg5)
ggsave(gg5, path = "./scripts/", device = grDevices::png, filename = "peso_por_especie_mes.png",
width = 10, height = 4, dpi = 300)
}
datatable2(agg_dat5)
<- aggregate(`Peso g` ~ Especie, data = all_spp_dat, FUN = mean,
agg_dat0 na.rm = TRUE)
$se <- aggregate(`Peso g` ~ Especie, data = all_spp_dat, FUN = se)$`Peso g`
agg_dat0
<- agg_dat0[order(agg_dat0$`Peso g`, decreasing = FALSE),
agg_dat0
]
$Especie <- factor(agg_dat0$Especie, levels = agg_dat0$Especie)
agg_dat0
<- ggplot(agg_dat0, aes(x = Especie, y = `Peso g`)) + geom_bar(stat = "identity",
gg0 position = "dodge", fill = "gray") + geom_errorbar(aes(ymin = `Peso g` -
ymax = `Peso g` + se), width = 0.2) + labs(y = "Peso (g)",
se, x = "Especie") + theme_classic() + theme(axis.text.x = element_text(angle = 45,
hjust = 1))
# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {
print(gg0)
ggsave(gg0, path = "./scripts/", device = grDevices::png, filename = "peso_por_especie.png",
width = 10, height = 8, dpi = 300)
}
datatable2(agg_dat0)
Peso por especie y mes para las que tienen mas datos
<- aggregate(`Long total cm` ~ mes + Especie, data = dat[!dat$Especie %in%
agg_dat6 c("Awaus banana", "Rhamdia nicaraguensis"), ], FUN = mean, na.rm = TRUE)
$mes_chr <- month.abb.esp[as.numeric(agg_dat6$mes)]
agg_dat6
$mes_chr <- factor(agg_dat6$mes_chr, levels = month.abb.esp)
agg_dat6
$Especie <- factor(agg_dat6$Especie, levels = tab_sp$Var1)
agg_dat6
<- ggplot(agg_dat6, aes(x = as.numeric(mes), y = `Long total cm`,
gg6 color = Especie, shape = Especie)) + geom_line(size = 1) + geom_point(size = 2) +
labs(y = "Peso (g)", x = "Mes") + theme_classic() + scale_color_viridis_d(alpha = 1,
option = "G", direction = 1) + scale_x_continuous(breaks = 1:12,
labels = month.abb.esp) + theme(axis.text.x = element_text(angle = 90,
hjust = 1))
# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {
print(gg6)
ggsave(gg6, path = "./scripts/", device = grDevices::png, filename = "peso_por_especie_mes_5sp.png",
width = 10, height = 4, dpi = 300)
}
<- read_xlsx("./data/raw/FECOP 2025 peces ICE.xlsx", sheet = "metodo")
dat_met
$Especie[dat_met$Especie == "RhamDía laticauda (rogersi)"] <- "Rhamdia laticauda"
dat_met
<- dat_met[dat_met$Especie %in% c("Dajaus monticola", "Astyanax orstedii",
dat_met "Rhamdia laticauda", "Tomocichla tuba"), ]
table(dat_met$Especie)
Astyanax orstedii Dajaus monticola Rhamdia laticauda Tomocichla tuba
4 4 4 4
# plot abundance by method and period using bars
<- ggplot(dat_met, aes(x = Especie, fill = periodo, y = indivs,
gg7 group = metodo)) + geom_bar(stat = "identity", color = "black",
position = "fill") + labs(y = "Abundancia", x = "Especie", fill = "Periodo") +
theme_classic() + scale_y_continuous(labels = scales::percent,
n.breaks = 4) + scale_fill_manual(values = c("black", "white")) +
facet_wrap(~metodo, ncol = 1) + theme(axis.text.x = element_text(angle = 25,
hjust = 1))
# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {
print(gg7)
ggsave(gg7, path = "./scripts/", device = grDevices::png, filename = "abundancia_por_especie_metodo_y_periodo.png",
width = 10, height = 8, dpi = 300)
}
datatable2(dat_met)
<- read_xlsx("./data/raw/FECOP 2025 peces ICE.xlsx", sheet = "peso")
dat_peso $SE <- dat_peso$s/sqrt(dat_peso$N)
dat_peso# dat_met$Especie[dat_met$Especie == 'RhamDía laticauda
# (rogersi)'] <- 'Rhamdia laticauda'
<- dat_peso[dat_peso$Especie %in% c("Dajaus monticola", "Astyanax orstedii",
dat_peso "Poecilia mexicana", "Tomocichla tuba"), ]
$Especie <- factor(dat_peso$Especie, levels = c("Dajaus monticola",
dat_peso"Tomocichla tuba", "Astyanax orstedii", "Poecilia mexicana"))
table(dat_peso$Especie)
Dajaus monticola Tomocichla tuba Astyanax orstedii Poecilia mexicana
2 2 2 2
# plot abundance by method and period using bars
<- ggplot(dat_peso, aes(x = Especie, fill = Periodo, y = Media,
gg8 group = Periodo)) + geom_bar(stat = "identity", color = "black",
position = "dodge") + geom_errorbar(aes(ymin = Media - SE, ymax = Media +
width = 0.2, position = position_dodge(0.9)) + labs(y = "Peso (g)",
SE), x = "Especie", fill = "Periodo") + theme_classic() + scale_fill_manual(values = c("black",
"white")) + theme(axis.text.x = element_text(angle = 25, hjust = 1))
# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {
print(gg8)
ggsave(gg8, path = "./scripts/", device = grDevices::png, filename = "peso_por_especie_periodo.png",
width = 10, height = 4, dpi = 300)
}
datatable2(dat_peso)
$año_mes <- paste0(format(as.Date(dat$fecha, format = "%d/%m/%Y"),
dat"%Y"), "-", dat$mes)
sort(unique(dat$año_mes))
<- aggregate(Especie ~ Sitio + año_mes, data = dat, FUN = function(x) length(unique(x))) div_agg
<- read_xls("./data/raw/muestreos ice FINAL para fecop ALL.xls")
bioq
$Sitio[bioq$Sitio == "Guaria"] <- "La Guaria"
bioq$Sitio[bioq$Sitio == "Virgen"] <- "La Virgen"
bioq$Sitio[bioq$Sitio == "Roble"] <- "El Roble"
bioq$Sitio[bioq$Sitio == "SM, bajo la chancha"] <- "San Miguel"
bioq$Sitio[bioq$Sitio == "Chancha"] <- "San Miguel"
bioq
<- aggregate(Especie ~ Sitio, data = all_spp_dat, FUN = function(x) length(unique(x)))
spp_sitio
# unique(bioq$Sitio) unique(dat$Sitio)
# setdiff(unique(bioq$Sitio), unique(dat$Sitio))
<- bioq[bioq$Sitio %in% unique(dat$Sitio), ]
bioq
<- merge(bioq, spp_sitio, by = "Sitio")
bioq
<- sapply(bioq, function(x) sum(is.na(x)))
nNA
# names(nNA)[nNA <= 5]
<- bioq[, c("ox mg/l", "Temp °C", "pH", "Conductividad",
sub_bioq "TDS", "Especie")]
<- sub_bioq[complete.cases(sub_bioq), ] sub_bioq
# random forest with caret predicting diversity based on
# bioquimica
set.seed(123)
<- train(Especie ~ ., data = sub_bioq, method = "rf", importance = TRUE,
rf_fit trControl = trainControl(method = "repeatedcv", number = 100,
repeats = 30))
saveRDS(rf_fit, file = "./data/processed/rf_fit_diversity_bioq.rds")
<- readRDS(file = "./data/processed/rf_fit_diversity_bioq.rds")
rf_fit
# importance
varImp(rf_fit)
rf variable importance
Overall
`Temp °C` 100.00
TDS 74.03
Conductividad 61.82
pH 32.33
`ox mg/l` 0.00
## importance with ggplot2
<- as.data.frame(varImp(rf_fit)$importance)
imp $Variable <- rownames(imp)
imp<- imp[order(imp$Overall), ]
imp $Variable <- factor(imp$Variable, levels = imp$Variable)
imp
<- ggplot(imp, aes(x = Variable, y = Overall)) + geom_bar(stat = "identity",
gg_imp fill = "gray") + coord_flip() + theme_classic() + labs(y = "Importancia (Random Forest)",
x = "Variable fisico-química") + scale_y_continuous(breaks = scales::pretty_breaks(n = 5))
# exclude when knitr is run
if (!isTRUE(getOption("knitr.in.progress"))) {
print(gg_imp)
ggsave(gg_imp, path = "./scripts/", device = grDevices::png, filename = "importancia.png",
width = 10, height = 4, dpi = 300)
}
<- all_spp_dat[all_spp_dat$Especie != "Aterinella", ]
all_spp_dat
# convert species name from 'Genus species' to 'G. species'
$Especie <- gsub("^(\\w)\\w+ ", "\\1. ", all_spp_dat$Especie)
all_spp_dat
$Especie[all_spp_dat$Especie == "D. monticola"] <- "D. monticola"
all_spp_dat
<- lapply(unique(all_spp_dat$Sitio), function(x) {
out
<- all_spp_dat[all_spp_dat$Sitio == x, ]
sub
<- as.data.frame(table(sub$Especie))
tab
# add missing species wtih 0s
<- setdiff(unique(all_spp_dat$Especie), tab$Var1)
missing_spp
if (length(missing_spp) > 0) {
<- data.frame(Var1 = missing_spp, Freq = 0)
add_tab
<- rbind(tab, add_tab)
tab
}
<- tab[order(tab$Var1), ]
tab
return(tab$Freq)
})
<- do.call(rbind, out)
fish_spec
colnames(fish_spec) <- sort(unique(all_spp_dat$Especie))
<- bioq[, c("ox mg/l", "Temp °C", "pH", "Conductividad",
sub_bioq2 "TDS", "Sitio")]
<- aggregate(. ~ Sitio, data = sub_bioq2, FUN = mean, na.rm = TRUE)
fish_chem
# order rows as in unique(all_spp_dat$Sitio)
<- fish_chem[match(unique(all_spp_dat$Sitio), fish_chem$Sitio),
fish_chem
]rownames(fish_spec) <- rownames(fish_chem) <- fish_chem$Sitio
<- fish_chem[, -1]
fish_chem
<- cca(fish_spec ~ `ox mg/l` + `Temp °C` + pH + Conductividad +
fish_cca data = fish_chem)
TDS,
summary(fish_cca)
Call:
cca(formula = fish_spec ~ `ox mg/l` + `Temp °C` + pH + Conductividad + TDS, data = fish_chem)
Partitioning of scaled Chi-square:
Inertia Proportion
Total 1.7195 1.0000
Constrained 1.5468 0.8996
Unconstrained 0.1727 0.1004
Eigenvalues, and their contribution to the scaled Chi-square
Importance of components:
CCA1 CCA2 CCA3 CCA4 CCA5 CA1
Eigenvalue 0.6717 0.4629 0.1922 0.13054 0.08941 0.1727
Proportion Explained 0.3907 0.2692 0.1118 0.07592 0.05200 0.1004
Cumulative Proportion 0.3907 0.6599 0.7716 0.84756 0.89955 1.0000
Accumulated constrained eigenvalues
Importance of components:
CCA1 CCA2 CCA3 CCA4 CCA5
Eigenvalue 0.6717 0.4629 0.1922 0.13054 0.08941
Proportion Explained 0.4343 0.2993 0.1242 0.08439 0.05780
Cumulative Proportion 0.4343 0.7336 0.8578 0.94220 1.00000
<- mako(3, alpha = 0.9, begin = 0.2, end = 0.8)
cols
<- ggvegan::fortify(fish_cca)
fish_cca_df
$score <- as.character(fish_cca_df$score)
fish_cca_df
$score[fish_cca_df$score == "sites"] <- "Sitio"
fish_cca_df$score[fish_cca_df$score == "species"] <- "Especie"
fish_cca_df
<- fish_cca_df[fish_cca_df$score == "Sitio", ]
site_cca_df <- fish_cca_df[fish_cca_df$score == "Especie", ]
sp_cca_df
$vjust <- c(-2.5, 2, 0.2, -1.5, 1.2, 1, 0.5)
site_cca_df$hjust <- c(0.2, 0.5, 1.4, 1, -0.5, 1, -0.1)
site_cca_df
$vjust <- c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3)
sp_cca_df$hjust <- c(1, 1, 1, 1, 1, 1, -0.1, 1, 1, 1, 1, 1, 1)
sp_cca_df
autoplot(fish_cca) + theme_classic() + xlim(c(-1.9, 2.7)) + ylim(c(-1.5,
2.7)) + labs(x = "CCA1 (43.4%)", y = "CCA2 (30.0%)", color = "",
shape = "") + scale_color_manual(values = cols[c(1, 3)], labels = c("Sitio",
"Especie")) + scale_shape_manual(values = c(20, 1), labels = c("Sitio",
"Especie")) + geom_text(data = site_cca_df, aes(label = label,
x = CCA1, y = CCA2, vjust = vjust, hjust = hjust), size = 3, color = cols[1]) +
geom_text(data = sp_cca_df, aes(label = label, x = CCA1, y = CCA2,
vjust = vjust, hjust = hjust), size = 3, color = cols[3]) +
theme(axis.text = element_text(size = 12), axis.title = element_text(size = 14)) +
theme(legend.position = c(0.85, 0.9), legend.background = element_rect(fill = alpha("white",
0.5)))
anova.cca(fish_cca)
Df | ChiSquare | F | Pr(>F) | |
---|---|---|---|---|
Model | 5 | 1.5467755 | 1.791102 | 0.217 |
Residual | 1 | 0.1727177 | NA | NA |
─ Session info ───────────────────────────────────────────────────────────────
setting value
version R version 4.5.0 (2025-04-11)
os Ubuntu 22.04.5 LTS
system x86_64, linux-gnu
ui X11
language (EN)
collate en_US.UTF-8
ctype en_US.UTF-8
tz America/Costa_Rica
date 2025-09-11
pandoc 3.2 @ /usr/lib/rstudio/resources/app/bin/quarto/bin/tools/x86_64/ (via rmarkdown)
quarto 1.7.31 @ /usr/local/bin/quarto
─ Packages ───────────────────────────────────────────────────────────────────
package * version date (UTC) lib source
abind 1.4-8 2024-09-12 [1] CRAN (R 4.5.0)
backports 1.5.0 2024-05-23 [1] CRAN (R 4.5.0)
bayesplot 1.12.0 2025-04-10 [1] CRAN (R 4.5.0)
bridgesampling 1.1-2 2021-04-16 [1] CRAN (R 4.5.0)
brms * 2.22.0 2024-09-23 [1] CRAN (R 4.5.0)
Brobdingnag 1.2-9 2022-10-19 [1] CRAN (R 4.5.0)
bslib 0.9.0 2025-01-30 [1] CRAN (R 4.5.0)
cachem 1.1.0 2024-05-16 [1] CRAN (R 4.5.0)
caret * 7.0-1 2024-12-10 [1] CRAN (R 4.5.0)
cellranger 1.1.0 2016-07-27 [3] CRAN (R 4.0.1)
checkmate 2.3.3 2025-08-18 [1] CRAN (R 4.5.0)
class 7.3-23 2025-01-01 [4] CRAN (R 4.4.2)
cli 3.6.5 2025-04-23 [1] CRAN (R 4.5.0)
cluster 2.1.8.1 2025-03-12 [4] CRAN (R 4.4.3)
coda 0.19-4.1 2024-01-31 [1] CRAN (R 4.5.0)
codetools 0.2-20 2024-03-31 [4] CRAN (R 4.5.0)
crayon 1.5.3 2024-06-20 [1] CRAN (R 4.5.0)
crosstalk 1.2.1 2023-11-23 [3] CRAN (R 4.5.0)
data.table 1.17.0 2025-02-22 [3] CRAN (R 4.5.0)
devtools 2.4.5 2022-10-11 [1] CRAN (R 4.5.0)
digest 0.6.37 2024-08-19 [1] CRAN (R 4.5.0)
distributional 0.5.0 2024-09-17 [1] CRAN (R 4.5.0)
dplyr 1.1.4 2023-11-17 [1] CRAN (R 4.5.0)
DT * 0.33 2024-04-04 [3] CRAN (R 4.5.0)
ellipsis 0.3.2 2021-04-29 [3] CRAN (R 4.1.1)
emmeans 1.11.1 2025-05-04 [3] CRAN (R 4.5.0)
estimability 1.5.1 2024-05-12 [3] CRAN (R 4.5.0)
evaluate 1.0.4 2025-06-18 [1] CRAN (R 4.5.0)
farver 2.1.2 2024-05-13 [1] CRAN (R 4.5.0)
fastmap 1.2.0 2024-05-15 [1] CRAN (R 4.5.0)
foreach 1.5.2 2022-02-02 [3] CRAN (R 4.1.2)
formatR * 1.14 2023-01-17 [1] CRAN (R 4.5.0)
fs 1.6.6 2025-04-12 [1] CRAN (R 4.5.0)
future 1.49.0 2025-05-09 [1] CRAN (R 4.5.0)
future.apply 1.11.3 2024-10-27 [1] CRAN (R 4.5.0)
generics 0.1.4 2025-05-09 [1] CRAN (R 4.5.0)
ggplot2 * 3.5.2 2025-04-09 [1] CRAN (R 4.5.0)
ggrepel 0.9.6 2024-09-07 [1] CRAN (R 4.5.0)
ggvegan 0.1.999 2025-09-05 [1] Github (gavinsimpson/ggvegan@3056a43)
globals 0.18.0 2025-05-08 [1] CRAN (R 4.5.0)
glue 1.8.0 2024-09-30 [1] CRAN (R 4.5.0)
gower 1.0.2 2024-12-17 [3] CRAN (R 4.5.0)
gridExtra 2.3 2017-09-09 [1] CRAN (R 4.5.0)
gtable 0.3.6 2024-10-25 [1] CRAN (R 4.5.0)
hardhat 1.4.1 2025-01-31 [1] CRAN (R 4.5.0)
htmltools 0.5.8.1 2024-04-04 [1] CRAN (R 4.5.0)
htmlwidgets 1.6.4 2023-12-06 [1] RSPM (R 4.5.0)
httpuv 1.6.16 2025-04-16 [1] RSPM (R 4.5.0)
ipred 0.9-15 2024-07-18 [3] CRAN (R 4.5.0)
iterators 1.0.14 2022-02-05 [3] CRAN (R 4.1.2)
jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.5.0)
jsonlite 2.0.0 2025-03-27 [1] CRAN (R 4.5.0)
knitr * 1.50 2025-03-16 [1] CRAN (R 4.5.0)
labeling 0.4.3 2023-08-29 [1] CRAN (R 4.5.0)
later 1.4.2 2025-04-08 [1] RSPM (R 4.5.0)
lattice * 0.22-7 2025-04-02 [4] CRAN (R 4.5.0)
lava 1.8.1 2025-01-12 [3] CRAN (R 4.5.0)
lifecycle 1.0.4 2023-11-07 [1] CRAN (R 4.5.0)
listenv 0.9.1 2024-01-29 [1] CRAN (R 4.5.0)
loo 2.8.0 2024-07-03 [1] CRAN (R 4.5.0)
lubridate 1.9.4 2024-12-08 [3] CRAN (R 4.5.0)
magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.5.0)
MASS 7.3-65 2025-02-28 [4] CRAN (R 4.4.3)
Matrix 1.7-3 2025-03-11 [4] CRAN (R 4.4.3)
matrixStats 1.5.0 2025-01-07 [1] CRAN (R 4.5.0)
memoise 2.0.1 2021-11-26 [3] CRAN (R 4.1.2)
mgcv 1.9-3 2025-04-04 [4] CRAN (R 4.5.0)
mime 0.13 2025-03-17 [1] CRAN (R 4.5.0)
miniUI 0.1.2 2025-04-17 [3] CRAN (R 4.5.0)
ModelMetrics 1.2.2.2 2020-03-17 [3] CRAN (R 4.0.1)
multcomp 1.4-28 2025-01-29 [3] CRAN (R 4.5.0)
mvtnorm 1.3-3 2025-01-10 [1] CRAN (R 4.5.0)
nlme 3.1-168 2025-03-31 [4] CRAN (R 4.4.3)
nnet 7.3-20 2025-01-01 [4] CRAN (R 4.4.2)
packrat 0.9.3 2025-06-16 [1] CRAN (R 4.5.0)
parallelly 1.44.0 2025-05-07 [1] CRAN (R 4.5.0)
permute * 0.9-8 2025-06-25 [1] CRAN (R 4.5.0)
pillar 1.11.0 2025-07-04 [1] CRAN (R 4.5.0)
pkgbuild 1.4.8 2025-05-26 [1] CRAN (R 4.5.0)
pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.5.0)
pkgload 1.4.0 2024-06-28 [1] CRAN (R 4.5.0)
plyr 1.8.9 2023-10-02 [1] CRAN (R 4.5.0)
posterior 1.6.1 2025-02-27 [1] CRAN (R 4.5.0)
pROC 1.18.0 2021-09-03 [3] CRAN (R 4.1.1)
prodlim 2025.04.28 2025-04-28 [3] CRAN (R 4.5.0)
profvis 0.4.0 2024-09-20 [1] CRAN (R 4.5.0)
promises 1.3.3 2025-05-29 [1] RSPM (R 4.5.0)
purrr 1.1.0 2025-07-10 [1] CRAN (R 4.5.0)
R6 2.6.1 2025-02-15 [1] CRAN (R 4.5.0)
randomForest 4.7-1.2 2024-09-22 [3] CRAN (R 4.5.0)
RColorBrewer 1.1-3 2022-04-03 [1] CRAN (R 4.5.0)
Rcpp * 1.1.0 2025-07-02 [1] CRAN (R 4.5.0)
RcppParallel 5.1.10 2025-01-24 [1] CRAN (R 4.5.0)
readxl * 1.4.5 2025-03-07 [3] CRAN (R 4.5.0)
recipes 1.3.0 2025-04-17 [1] CRAN (R 4.5.0)
remotes 2.5.0 2024-03-17 [1] CRAN (R 4.5.0)
reshape2 1.4.4 2020-04-09 [1] CRAN (R 4.5.0)
rlang 1.1.6 2025-04-11 [1] CRAN (R 4.5.0)
rmarkdown 2.29 2024-11-04 [1] CRAN (R 4.5.0)
rpart 4.1.24 2025-01-07 [4] CRAN (R 4.4.2)
rstantools 2.4.0 2024-01-31 [1] CRAN (R 4.5.0)
rstudioapi 0.17.1 2024-10-22 [1] CRAN (R 4.5.0)
sandwich 3.1-1 2024-09-15 [3] CRAN (R 4.5.0)
sass 0.4.10 2025-04-11 [1] CRAN (R 4.5.0)
scales 1.4.0 2025-04-24 [1] CRAN (R 4.5.0)
sessioninfo 1.2.3 2025-02-05 [3] CRAN (R 4.5.0)
shiny 1.10.0 2024-12-14 [1] CRAN (R 4.5.0)
sketchy 1.0.5 2025-08-20 [1] CRANs (R 4.5.0)
stringi 1.8.7 2025-03-27 [1] CRAN (R 4.5.0)
stringr 1.5.1 2023-11-14 [1] CRAN (R 4.5.0)
survival 3.8-3 2024-12-17 [4] CRAN (R 4.4.2)
tensorA 0.36.2.1 2023-12-13 [1] CRAN (R 4.5.0)
TH.data 1.1-3 2025-01-17 [3] CRAN (R 4.5.0)
tibble 3.3.0 2025-06-08 [1] RSPM (R 4.5.0)
tidyr 1.3.1 2024-01-24 [1] CRAN (R 4.5.0)
tidyselect 1.2.1 2024-03-11 [1] CRAN (R 4.5.0)
timechange 0.3.0 2024-01-18 [3] CRAN (R 4.5.0)
timeDate 4041.110 2024-09-22 [3] CRAN (R 4.5.0)
urlchecker 1.0.1 2021-11-30 [1] CRAN (R 4.5.0)
usethis 3.1.0 2024-11-26 [3] CRAN (R 4.5.0)
vctrs 0.6.5 2023-12-01 [1] CRAN (R 4.5.0)
vegan * 2.7-1 2025-06-05 [1] CRAN (R 4.5.0)
viridis * 0.6.5 2024-01-29 [1] CRAN (R 4.5.0)
viridisLite * 0.4.2 2023-05-02 [1] CRAN (R 4.5.0)
withr 3.0.2 2024-10-28 [1] CRAN (R 4.5.0)
xaringanExtra 0.8.0 2024-05-19 [1] CRAN (R 4.5.0)
xfun 0.53 2025-08-19 [1] CRAN (R 4.5.0)
xtable 1.8-4 2019-04-21 [3] CRAN (R 4.0.1)
yaml 2.3.10 2024-07-26 [1] CRAN (R 4.5.0)
zoo 1.8-14 2025-04-10 [3] CRAN (R 4.5.0)
[1] /home/m/R/x86_64-pc-linux-gnu-library/4.5
[2] /usr/local/lib/R/site-library
[3] /usr/lib/R/site-library
[4] /usr/lib/R/library
* ── Packages attached to the search path.
──────────────────────────────────────────────────────────────────────────────