Objetivo 1: Estadística descriptiva

Author

Luis La Cruz & German Chacón

Published

September 5, 2024

library(readxl)
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(egg)
Cargando paquete requerido: gridExtra

Adjuntando el paquete: 'gridExtra'

The following object is masked from 'package:dplyr':

    combine
library(tidyverse)
library(ggplot2)
library(ggpmisc)
Cargando paquete requerido: ggpp
Registered S3 methods overwritten by 'ggpp':
  method                  from   
  heightDetails.titleGrob ggplot2
  widthDetails.titleGrob  ggplot2

Adjuntando el paquete: 'ggpp'

The following object is masked from 'package:ggplot2':

    annotate
#install.packages("broom")
library(broom)
library(ggplot2)
library(patchwork)
library(egg)
library(ggpubr)

Adjuntando el paquete: 'ggpubr'

The following objects are masked from 'package:ggpp':

    as_npc, as_npcx, as_npcy

The following object is masked from 'package:egg':

    ggarrange
library(readxl)
library(tidyverse)
library(egg)
library(tidyverse)
library(ggplot2)


dat_clean0=read.csv("dat_clean_modified_zscore_especies.csv")

dat_clean=dat_clean0%>%
  rename(group2="group")%>%
  rename(group=Class)

dat_clean$Banda <- factor(dat_clean$Banda,
  levels = c("35-45","45-90","90-170","170-260"),labels = c("35-45","45-90","90-170","170-260"))


dat_clean=dat_clean %>%
  mutate(Value_linear = 10^(Value/10))  #%>%
  #filter(!is.na(Value_linear))
#library(explore)
#explore(dat_clean)

descriptiva_clean=dat_clean %>% 
  group_by(group) %>%
  get_summary_stats(type = "common")  %>%
  dplyr::filter(variable=="Value")

dclass1=descriptiva_clean%>% 
  select(-variable,-iqr,-mean,-sd,-se,-ci)

library(dplyr)
library(magrittr)  # Para el operador %>%

Adjuntando el paquete: 'magrittr'
The following object is masked from 'package:purrr':

    set_names
The following object is masked from 'package:tidyr':

    extract
library(seewave)

Adjuntando el paquete: 'seewave'
The following object is masked from 'package:lubridate':

    duration
The following object is masked from 'package:readr':

    spec
# Calcular la media en dB por grupo
descriptiva_clean2 <- dat_clean %>%
  group_by(group) %>%
  mutate(mean_dB = meandB(Value), sd_dB=sddB(Value, level="SPL"),
    sd_lineal=sd(Value_linear))  # Calcular la media en dB usando seewave::meandB()

summary_table <- descriptiva_clean2 %>%
  group_by(group) %>%
  summarise(mean_dB = mean(mean_dB),mean_sd_dB = mean(sd_dB),mean_sd_lineal = mean(sd_lineal))  # Calcular el promedio de mean_dB por grupo

# Mostrar la tabla resumen
d2=summary_table %>%
  select(mean_dB, mean_sd_dB)

Tabla_D=cbind(dclass1,d2)%>%
  rename(Talla="group",Mínimo="min",Máximo="max",Mediana="median",Media="mean_dB", sd="mean_sd_dB")%>%
  mutate_if(is.numeric, ~round(., 2))

Tabla_D
         Talla      n Mínimo Máximo Mediana  Media    sd
1    Anchoveta 116908 -80.62 -24.35  -58.11 -48.32 12.05
2       Múnida  10414 -76.16 -42.56  -59.91 -57.20  5.42
3     Plancton  47096 -88.31 -43.35  -67.10 -62.71  7.37
4       Salpas   2675 -73.67 -59.05  -66.45 -65.72  2.46
5 Vinciguerria 155306 -89.28 -42.55  -66.01 -58.33  9.96
descriptiva_clean3=dat_clean %>% 
  group_by(group,Banda) %>%
  get_summary_stats(type = "common") %>%
  dplyr::filter(variable=="Value")

d3=descriptiva_clean3%>% 
  select(-variable,-iqr,-mean,-sd,-se,-ci)%>%
   dplyr::rename(Talla="group",Mínimo="min",Máximo="max",Mediana="median")%>%
  mutate_if(is.numeric, ~round(., 2))
library(dplyr)
library(magrittr)  # Para el operador %>%
library(seewave)

# Calcular la media en dB por grupo
descriptiva_clean4 <- dat_clean %>%
  group_by(group, Banda) %>%
  mutate(mean_dB = meandB(Value), sd_dB=sddB(Value, level="SPL"),
    sd_lineal=sd(Value_linear))  # Calcular la media en dB usando 

summary_table4 <- descriptiva_clean4 %>%
  group_by(group, Banda) %>%
  summarise(mean_dB = mean(mean_dB),mean_sd_dB = mean(sd_dB),mean_sd_lineal = mean(sd_lineal))  # Calcular el promedio de mean_dB por grupo
`summarise()` has grouped output by 'group'. You can override using the
`.groups` argument.
# Mostrar la tabla resumen
d4=summary_table4 %>%
  select(mean_dB, mean_sd_dB)
Adding missing grouping variables: `group`
Tabla_D=cbind(d3,d4)


Tabla_D%>%
  select(-group)%>%
 rename(Media="mean_dB", sd="mean_sd_dB")%>%
  mutate_if(is.numeric, ~round(., 2))
     Banda        Talla     n Mínimo Máximo Mediana  Media    sd
1    35-45    Anchoveta  5251 -72.61 -26.47  -51.21 -43.94  9.68
2    45-90    Anchoveta 20203 -76.79 -24.47  -56.25 -46.86 11.99
3   90-170    Anchoveta 37553 -79.77 -24.35  -57.79 -47.83 12.29
4  170-260    Anchoveta 53901 -80.62 -29.41  -59.44 -50.55 11.16
5    35-45       Múnida   492 -76.16 -53.53  -63.78 -62.17  4.19
6    45-90       Múnida  1886 -75.35 -46.60  -64.91 -62.57  5.45
7   90-170       Múnida  3034 -71.56 -42.56  -61.54 -58.34  5.79
8  170-260       Múnida  5002 -69.51 -43.92  -56.89 -55.51  4.03
9    35-45     Plancton  2160 -88.31 -53.12  -68.82 -64.84  6.57
10   45-90     Plancton  8278 -87.04 -43.59  -70.50 -65.63  8.24
11  90-170     Plancton 14701 -87.89 -43.77  -66.56 -62.12  7.38
12 170-260     Plancton 21957 -87.92 -43.35  -65.88 -62.19  6.87
13   35-45       Salpas    87 -68.16 -59.08  -61.72 -61.65  1.99
14   45-90       Salpas   465 -73.67 -59.07  -68.01 -67.41  2.90
15  90-170       Salpas   781 -69.08 -59.05  -66.42 -65.62  2.00
16 170-260       Salpas  1342 -70.61 -59.24  -66.11 -65.72  2.02
17   35-45 Vinciguerria  7209 -87.93 -42.56  -63.50 -56.71  8.74
18   45-90 Vinciguerria 27786 -89.28 -42.55  -67.88 -58.41 11.25
19  90-170 Vinciguerria 46489 -83.68 -42.55  -67.39 -58.44 10.81
20 170-260 Vinciguerria 73822 -82.09 -42.55  -65.04 -58.43  9.12
ggplot(dat_clean, aes(y = Value, x=as.numeric(Frequency)))+
  geom_line(alpha=0.5, aes(color=group),lwd=1, show.legend = F)+
  geom_smooth(size=1,method = "gam", color="black", show.legend = F)+
  theme_presentation(base_size = 14) +
  xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-80,-70,-60,-50,-40,-30,-20),linetype = c("dashed"),color="gray")+
  scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  theme(legend.title=element_blank())+
  facet_wrap(~group,nrow = 3)

ggplot(dat_clean, aes(y = Value, x=as.numeric(Frequency)))+
  geom_smooth(size=1,method = "gam", aes(color=group), show.legend = F)+
  theme_presentation(base_size = 14) +
  xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-80,-70,-60,-50,-40,-30,-20),linetype = c("dashed"),color="gray")+
  scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  theme(legend.title=element_blank())+
  facet_wrap(~group,nrow = 3)

firmas_group_LM=ggplot(dat_clean, aes(y = Value, x=as.numeric(Frequency)))+
  geom_line(alpha=0.5, aes(color=group),lwd=1, show.legend = F)+
  geom_smooth(size=1,method = "lm", color="black", show.legend = F)+
  stat_regline_equation()+
theme_presentation(base_size = 18) +
  xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-80,-70,-60,-50,-40,-30,-20),linetype = c("dashed"),color="gray")+
  #scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  scale_color_viridis_d()+
  facet_wrap(~group,nrow = 3)

firmas_group_LM

ggplot(dat_clean, aes(y = Value, x=as.numeric(Frequency)))+
  geom_smooth(size=1,method = "lm", aes(color=group),show.legend = F)+
  #stat_regline_equation()+
  theme_presentation(base_size = 14) +
  xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-80,-70,-60,-50,-40,-30,-20),linetype = c("dashed"),color="gray")+
scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  theme(legend.title=element_blank())+
  facet_wrap(~group,nrow = 3)

ggsave(filename = "FM_modas_LM.png",
  plot = firmas_group_LM,     
  height = 6,             # Specifies the height of the plot in inches
       width = 12,              # Specifies the width of the plot in inches
       dpi = 1000,             # Specifies the resolution in dots per inch
       path = "F:/Tesis abordo/Tesis abordo/Figuras/Objetivo01/",device = "png") 
w=ggplot(dat_clean,alpha=0.5)+
  #geom_line(alpha=0.1, aes(color=group),lwd=1, show.legend = T)+
  geom_smooth(aes(y = Value, x=as.numeric(Frequency),color=group),size=1.5,method = "lm", show.legend = T, alpha=0.5)+
  scale_y_continuous(limits = c(-65,-45))+
  #stat_regline_equation()+
  theme_presentation(base_size = 16.5) +
  xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-70,-60,-50,-40),linetype = c("dashed"),color="gray")+
scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  theme(legend.title=element_blank())+
   theme(panel.grid.major.y = element_line(color = "gray", linetype = "dashed"))+
       theme(panel.grid.major.x = element_line(color = "gray", linetype = "dashed"))



k=ggplot(dat_clean)+
  #geom_line(alpha=0.1, aes(color=group),lwd=1, show.legend = T)+
  geom_smooth(aes(y = Value, x=as.numeric(Frequency),color=group),size=1.5,method = "gam", show.legend = F)+
  #stat_regline_equation()+
  theme_presentation(base_size = 16.5) +
  xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  scale_y_continuous(limits = c(-65,-45))+
  scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-70,-60,-50,-40),linetype = c("dashed"),color="gray")+
scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  theme(legend.title=element_blank())+
   theme(panel.grid.major.y = element_line(color = "gray", linetype = "dashed"))+
     theme(panel.grid.major.x = element_line(color = "gray", linetype = "dashed"))


library(cowplot)

# Crear las gráficas w y k (código que proporcionaste)

# Obtener la leyenda de una de las gráficas (por ejemplo, w)
legend_w <- get_legend(w)

# Combinar las dos gráficas y agregar la leyenda
combined_plot <- plot_grid(
  k + theme(legend.position = "none"),  # Ocultar la leyenda de la gráfica w
  w + theme(legend.position = "none"),  # Ocultar la leyenda de la gráfica k
  legend_w,
  ncol = 3, rel_heights = c(1, 1, 1),rel_widths = c(1,1,0.35),  # Ajustar las alturas relativas
  labels = c("(a)", "(b)", ""),  # Etiquetas de enumeración
  align = "h"  # Alinear horizontalmente las partes
)

# Ajustar el tamaño de la leyenda
combined_plot <- combined_plot + theme(
  legend.text = element_text(size = 19),  # Tamaño del texto de la leyenda
  legend.title = element_text(size = 14)  # Tamaño del título de la leyenda
)

# Imprimir la figura combinada
print(combined_plot)

library(emuR)

dat_mean=dat_clean  %>%
  group_by(group, Frequency) %>%
  summarise(track_value = mean(10^(Value/10)))



dat_mean2= dat_mean%>%
  group_by(group)%>%
  mutate(reconstructed = emuR::dct(track_value,fit=T))


firmas_tallas=ggplot(dat_mean2) +
  geom_line(alpha=0.7,size=2,aes(x = as.numeric(Frequency), y = 10*log10(reconstructed ), color = group) ) +

  xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  #scale_y_continuous(limits = c(-65,-45))+
  scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-70,-60,-50,-40),linetype = c("dashed"),color="gray")+
#scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
    scale_color_viridis_d(name="Especie")+
   theme(panel.grid.major.y = element_line(color = "gray", linetype = "dashed"))+
     theme(panel.grid.major.x = element_line(color = "gray", linetype = "dashed"))+
  theme_presentation()

firmas_tallas

ggsave(filename = "Resumen_firmas_modales3.png",
  plot = firmas_tallas,     
  height = 5,             # Specifies the height of the plot in inches
       width = 12,              # Specifies the width of the plot in inches
       dpi = 1000,             # Specifies the resolution in dots per inch
       path = "F:/Tesis abordo/Tesis abordo/Figuras/Objetivo01/",device = "png") 
firmas_group=ggplot(dat_clean, aes(y = Value, x=as.numeric(Frequency)))+
  geom_line(alpha=0.5, aes(color=group),lwd=1, show.legend = F)+
  #geom_smooth(size=1,method = "gam", color="black", show.legend = F)+
  theme_presentation() +
  xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-80,-70,-60,-50,-40,-30,-20),linetype = c("dashed"),color="gray")+
  #scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  scale_color_viridis_d()+
geom_line(data = dat_mean2,size=1,aes(x = as.numeric(Frequency), y = 10*log10(reconstructed)))+
      
  facet_wrap(~group,nrow = 3)


firmas_group

ggsave(filename = "6.FM_modas_espectral.png",
  plot = firmas_group,     
  height = 10,             # Specifies the height of the plot in inches
       width = 10,              # Specifies the width of the plot in inches
       dpi = 1000,             # Specifies the resolution in dots per inch
       path = "F:/Tesis abordo/Tesis abordo/Figuras/Objetivo01/",device = "png") 
sS_dftlong.mean = dat_mean2 %>%
  group_by(group) %>%
  mutate(#you can't use m=0 in order to calculate k0 only
    smoothed_k0tok1 = emuR::dct(track_value,m=1,fit=T),
    smoothed_k0tok2 = emuR::dct(track_value,m=2,fit=T),
    smoothed_k0tok3 = emuR::dct(track_value,m=3,fit=T),
    smoothed_k0tok4 = emuR::dct(track_value,m=4,fit=T),
    smoothed_k0tok5 = emuR::dct(track_value,m=5,fit=T),
    smoothed_k0tok6 = emuR::dct(track_value,m=6,fit=T))
ggplot(sS_dftlong.mean) +
  geom_line(size=2,aes(x = as.numeric(Frequency), y = 10*log10(smoothed_k0tok1), color = group) ) +
  #geom_rect(data = bloqueos, aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf), fill = "gray", alpha = 0.75) +  # Ajusta color y transparencia según necesites
  labs(x = "Frecuencia", y = "10*log10(Value)") +
  
scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))

#############

ggplot(sS_dftlong.mean) +
  geom_line(size=2,aes(x = as.numeric(Frequency), y = 10*log10(smoothed_k0tok2), color = group) ) +
  #geom_rect(data = bloqueos, aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf), fill = "gray", alpha = 0.75) +  # Ajusta color y transparencia según necesites
  labs(x = "Frecuencia", y = "10*log10(Value)") +
  
scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))

#############

ggplot(sS_dftlong.mean) +
  geom_line(size=2,aes(x = as.numeric(Frequency), y = 10*log10(smoothed_k0tok3), color = group) ) +
  #geom_rect(data = bloqueos, aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf), fill = "gray", alpha = 0.75) +  # Ajusta color y transparencia según necesites
  labs(x = "Frecuencia", y = "10*log10(Value)") +
  
scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))

#############

ggplot(sS_dftlong.mean) +
  geom_line(size=2,aes(x = as.numeric(Frequency), y = 10*log10(smoothed_k0tok4), color = group) ) +
  #geom_rect(data = bloqueos, aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf), fill = "gray", alpha = 0.75) +  # Ajusta color y transparencia según necesites
  labs(x = "Frecuencia", y = "10*log10(Value)") +
  
scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))
Warning in FUN(X[[i]], ...): Se han producido NaNs

#############

ggplot(sS_dftlong.mean) +
  geom_line(size=2,aes(x = as.numeric(Frequency), y = 10*log10(smoothed_k0tok5), color = group) ) +
  #geom_rect(data = bloqueos, aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf), fill = "gray", alpha = 0.75) +  # Ajusta color y transparencia según necesites
  labs(x = "Frecuencia", y = "10*log10(Value)") +
  
scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))

#############

ggplot(sS_dftlong.mean) +
  geom_line(size=2,aes(x = as.numeric(Frequency), y = 10*log10(smoothed_k0tok6), color = group) ) +
  #geom_rect(data = bloqueos, aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf), fill = "gray", alpha = 0.75) +  # Ajusta color y transparencia según necesites
  labs(x = "Frecuencia", y = "10*log10(Value)") +
  
scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))

ggplot(dat_clean,alpha=0.5)+
  #geom_line(alpha=0.1, aes(color=group),lwd=1, show.legend = T)+
  geom_smooth(aes(y = Value, x=as.numeric(Frequency),color=group),size=1.5,method = "lm", show.legend = T, alpha=0.5)+
  scale_y_continuous(limits = c(-65,-45))+
  #stat_regline_equation()+
  theme_presentation(base_size = 16.5) +
  xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  #scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  #geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-70,-60,-50,-40),linetype = c("dashed"),color="gray")+
scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  theme(legend.title=element_blank())+
   theme(panel.grid.major.y = element_line(color = "gray", linetype = "dashed"))+
       theme(panel.grid.major.x = element_line(color = "gray", linetype = "dashed"))+
facet_wrap(facets = "Banda",ncol = 4,scales = "free_x")

Figura03=ggplot(dat_clean,aes(x = Value, group = group, color = group))+
  stat_ecdf(size=1.5,pad = T, alpha=0.7) +
  theme_presentation(base_size = 14) +
  scale_x_continuous(breaks = c(-90,-80,-70,-60,-50,-40,-30,-20))+
  scale_color_viridis_d(name = "Especie")+
  #scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  xlab("Sv (dB)") +
  ylab("ECDF")+
  ggtitle("Prueba K-S") +
  theme(legend.position = "right")+
  theme(panel.grid.major.y = element_line(color = "gray", linetype = "dashed"))+
  theme(panel.grid.major.x = element_line(color = "gray", linetype = "dashed"))

Figura03

ggsave(filename = "Kolmo_Sv_modas.png",
  plot = Figura03,     
  height = 4,             # Specifies the height of the plot in inches
       width = 8,              # Specifies the width of the plot in inches
       dpi = 1000,             # Specifies the resolution in dots per inch
       path = "F:/Tesis abordo/Tesis abordo/Figuras/Objetivo01/",device = "png") 
#library(coin)

#oneway_test(Value ~ Class, data = dat_clean)
# Realizar la prueba K-S para todas las combinaciones de grupos
# Asegúrate de que dat_clean esté definido correctamente
# dat_clean <- ...

# Asegúrate de que dat_clean esté definido correctamente
# dat_clean <- ...

# Crear un data frame para almacenar los resultados
results <- data.frame(group1 = character(),
                      group2 = character(),
                      D_value = numeric(),
                      p_value = numeric(),
                      stringsAsFactors = FALSE)

# Obtener grupos únicos
groups <- unique(dat_clean$group)

# Realizar la prueba K-S para todas las combinaciones de grupos
for (i in 1:(length(groups) - 1)) {
  for (j in (i + 1):length(groups)) {
    group1 <- groups[i]
    group2 <- groups[j]
    
    # Subconjuntos de datos para los dos grupos
    data_group1 <- dat_clean$Value[dat_clean$group == group1]
    data_group2 <- dat_clean$Value[dat_clean$group == group2]
    
    # Realizar la prueba K-S
    test_result <- ks.test(data_group1, data_group2)
    
    # Almacenar resultados en el data frame results
    results <- rbind(results, data.frame(
      group1 = group1,
      group2 = group2,
      D_value = round(test_result$statistic, 2),
      p_value = round(test_result$p.value, 2)
    ))
  }
}

# Mostrar el resultado final con las modificaciones requeridas
print(results)
         group1       group2 D_value p_value
D      Plancton Vinciguerria    0.10       0
D1     Plancton       Salpas    0.29       0
D2     Plancton       Múnida    0.45       0
D3     Plancton    Anchoveta    0.51       0
D4 Vinciguerria       Salpas    0.27       0
D5 Vinciguerria       Múnida    0.38       0
D6 Vinciguerria    Anchoveta    0.43       0
D7       Salpas       Múnida    0.65       0
D8       Salpas    Anchoveta    0.70       0
D9       Múnida    Anchoveta    0.20       0
library(ggplot2)

ggplot(dat_clean)+
  geom_boxplot(alpha=0.5,size=0.75, aes(fill=group,y = Value, x=group), show.legend = F)+
  theme_presentation(base_size = 16) +
  ylab("Sv (dB)")+
  xlab("Talla (cm)")+
scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  theme(legend.position = "top")+
  theme(panel.grid.major.y = element_line(color = "gray", linetype = "dashed"))

library(ggplot2)
library(dplyr)
library(rstatix)

# Realizar prueba de Kruskal-Wallis
kruskal_result <- dat_clean %>%
  kruskal_test(Value ~ group)

kruskal_result
# A tibble: 1 × 6
  .y.        n statistic    df     p method        
* <chr>  <int>     <dbl> <int> <dbl> <chr>         
1 Value 332399    82584.     4     0 Kruskal-Wallis
df_wilcox_tallas <- dat_clean %>%
  pairwise_wilcox_test(Value ~ group) %>%
  add_y_position(step.increase = 0.2)

df_wilcox_tallas 
# A tibble: 10 × 11
   .y.   group1  group2     n1     n2 statistic         p     p.adj p.adj.signif
   <chr> <chr>   <chr>   <int>  <int>     <dbl>     <dbl>     <dbl> <chr>       
 1 Value Anchov… Múnida 116908  10414   7.29e 8 2.51e-244 7.53e-244 ****        
 2 Value Anchov… Planc… 116908  47096   4.63e 9 0         0         ****        
 3 Value Anchov… Salpas 116908   2675   2.74e 8 0         0         ****        
 4 Value Anchov… Vinci… 116908 155306   1.42e10 0         0         ****        
 5 Value Múnida  Planc…  10414  47096   3.96e 8 0         0         ****        
 6 Value Múnida  Salpas  10414   2675   2.39e 7 0         0         ****        
 7 Value Múnida  Vinci…  10414 155306   1.19e 9 0         0         ****        
 8 Value Planct… Salpas  47096   2675   5.72e 7 1.48e- 15 2.96e- 15 ****        
 9 Value Planct… Vinci…  47096 155306   3.11e 9 0         0         ****        
10 Value Salpas  Vinci…   2675 155306   1.97e 8 4.58e-  6 4.58e-  6 ****        
# ℹ 2 more variables: y.position <dbl>, groups <named list>
kruskal_result_lineal <- dat_clean %>%
  kruskal_test(Value_linear ~ group)

kruskal_result_lineal
# A tibble: 1 × 6
  .y.               n statistic    df     p method        
* <chr>         <int>     <dbl> <int> <dbl> <chr>         
1 Value_linear 332399    82584.     4     0 Kruskal-Wallis
df_wilcox_tallas_lineal <- dat_clean %>%
  pairwise_wilcox_test(Value_linear ~ group) %>%
  add_y_position(step.increase = 0.2)

df_wilcox_tallas_lineal 
# A tibble: 10 × 11
   .y.    group1 group2     n1     n2 statistic         p     p.adj p.adj.signif
   <chr>  <chr>  <chr>   <int>  <int>     <dbl>     <dbl>     <dbl> <chr>       
 1 Value… Ancho… Múnida 116908  10414   7.29e 8 2.51e-244 7.53e-244 ****        
 2 Value… Ancho… Planc… 116908  47096   4.63e 9 0         0         ****        
 3 Value… Ancho… Salpas 116908   2675   2.74e 8 0         0         ****        
 4 Value… Ancho… Vinci… 116908 155306   1.42e10 0         0         ****        
 5 Value… Múnida Planc…  10414  47096   3.96e 8 0         0         ****        
 6 Value… Múnida Salpas  10414   2675   2.39e 7 0         0         ****        
 7 Value… Múnida Vinci…  10414 155306   1.19e 9 0         0         ****        
 8 Value… Planc… Salpas  47096   2675   5.72e 7 1.48e- 15 2.96e- 15 ****        
 9 Value… Planc… Vinci…  47096 155306   3.11e 9 0         0         ****        
10 Value… Salpas Vinci…   2675 155306   1.97e 8 4.58e-  6 4.58e-  6 ****        
# ℹ 2 more variables: y.position <dbl>, groups <named list>
kw_total=ggplot(dat_clean)+
  geom_boxplot(alpha=0.5,size=0.75, aes(fill=group,y = Value, x=group), show.legend = F)+
  theme_presentation(base_size = 16) +
  scale_y_continuous(breaks = seq(-90,-20,10))+
  ylab("Sv (dB)")+
  xlab("Especie")+
#scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  theme(legend.position = "none")+
  theme(panel.grid.major.y = element_line(color = "gray", linetype = "dashed"))+
    stat_kruskal_test(aes(y = Value, x=group, group=group),p.adjust.method = "bonferroni",label.x = 1.25,label.y = 50)+#label = "as_detailed_italic"
   stat_pvalue_manual(df_wilcox_tallas,color ="group1",step.group.by="group1",tip.length = 0,step.increase = 0.01)+
#scale_color_manual(name="Longitud (cm)",values =c("#ffff00","#ff8000","#ff00bf","#ff0000","#5f5f5f","#0000ff","#000080","#00bf00"))+
    scale_color_viridis_d()+
    scale_fill_viridis_d()


kw_total

ggsave(filename = "kw_total_sv_ob1.png",
  plot = kw_total,     
  height = 9,             # Specifies the height of the plot in inches
       width = 6,              # Specifies the width of the plot in inches
       dpi = 1000,             # Specifies the resolution in dots per inch
       path = "F:/Tesis abordo/Tesis abordo/Figuras/Objetivo01/",device = "png")
library(ggplot2)
library(dplyr)
library(rstatix)


kruskal_result_banda <- dat_clean %>%
    group_by(Banda)%>%
  kruskal_test(Value ~ group)

kruskal_result_banda
# A tibble: 4 × 7
  Banda   .y.        n statistic    df     p method        
* <fct>   <chr>  <int>     <dbl> <int> <dbl> <chr>         
1 35-45   Value  15199     7003.     4     0 Kruskal-Wallis
2 45-90   Value  58618    23328.     4     0 Kruskal-Wallis
3 90-170  Value 102558    29421.     4     0 Kruskal-Wallis
4 170-260 Value 156024    28335.     4     0 Kruskal-Wallis
df_wilcox_tallas_grupos <- dat_clean %>%
  group_by(Banda)%>%
  pairwise_wilcox_test(Value ~ group) %>%
  add_y_position(step.increase = 0.2)

df_wilcox_tallas_grupos
# A tibble: 40 × 12
   Banda .y.   group1    group2          n1    n2 statistic         p     p.adj
   <fct> <chr> <chr>     <chr>        <int> <int>     <dbl>     <dbl>     <dbl>
 1 35-45 Value Anchoveta Múnida        5251   492  2387420  4.03e-213 2.82e-212
 2 35-45 Value Anchoveta Plancton      5251  2160 10906028. 0         0        
 3 35-45 Value Anchoveta Salpas        5251    87   414334. 7.19e- 39 3.60e- 38
 4 35-45 Value Anchoveta Vinciguerria  5251  7209 32691323  0         0        
 5 35-45 Value Múnida    Plancton       492  2160   773090. 4.98e- 56 2.99e- 55
 6 35-45 Value Múnida    Salpas         492    87    15392. 2.94e-  5 8.82e-  5
 7 35-45 Value Múnida    Vinciguerria   492  7209  1586736  9.13e-  5 1.83e-  4
 8 35-45 Value Plancton  Salpas        2160    87    30949  2.41e- 26 9.64e- 26
 9 35-45 Value Plancton  Vinciguerria  2160  7209  3912134. 2.41e-270 1.93e-269
10 35-45 Value Salpas    Vinciguerria    87  7209   357660. 2.4 e-  2 2.4 e-  2
# ℹ 30 more rows
# ℹ 3 more variables: p.adj.signif <chr>, y.position <dbl>, groups <named list>
kruskal_result_banda_lineal <- dat_clean %>%
    group_by(Banda)%>%
  kruskal_test(Value_linear ~ group)

kruskal_result_banda_lineal
# A tibble: 4 × 7
  Banda   .y.               n statistic    df     p method        
* <fct>   <chr>         <int>     <dbl> <int> <dbl> <chr>         
1 35-45   Value_linear  15199     7003.     4     0 Kruskal-Wallis
2 45-90   Value_linear  58618    23328.     4     0 Kruskal-Wallis
3 90-170  Value_linear 102558    29421.     4     0 Kruskal-Wallis
4 170-260 Value_linear 156024    28335.     4     0 Kruskal-Wallis
df_wilcox_tallas_grupos_lineal <- dat_clean %>%
  group_by(Banda)%>%
  pairwise_wilcox_test(Value_linear ~ group) %>%
  add_y_position(step.increase = 0.2)

df_wilcox_tallas_grupos_lineal
# A tibble: 40 × 12
   Banda .y.          group1    group2    n1    n2 statistic         p     p.adj
   <fct> <chr>        <chr>     <chr>  <int> <int>     <dbl>     <dbl>     <dbl>
 1 35-45 Value_linear Anchoveta Múnida  5251   492  2387420  4.03e-213 2.82e-212
 2 35-45 Value_linear Anchoveta Planc…  5251  2160 10906028. 0         0        
 3 35-45 Value_linear Anchoveta Salpas  5251    87   414334. 7.19e- 39 3.60e- 38
 4 35-45 Value_linear Anchoveta Vinci…  5251  7209 32691323  0         0        
 5 35-45 Value_linear Múnida    Planc…   492  2160   773090. 4.98e- 56 2.99e- 55
 6 35-45 Value_linear Múnida    Salpas   492    87    15392. 2.94e-  5 8.82e-  5
 7 35-45 Value_linear Múnida    Vinci…   492  7209  1586736  9.13e-  5 1.83e-  4
 8 35-45 Value_linear Plancton  Salpas  2160    87    30949  2.41e- 26 9.64e- 26
 9 35-45 Value_linear Plancton  Vinci…  2160  7209  3912134. 2.41e-270 1.93e-269
10 35-45 Value_linear Salpas    Vinci…    87  7209   357660. 2.4 e-  2 2.4 e-  2
# ℹ 30 more rows
# ℹ 3 more variables: p.adj.signif <chr>, y.position <dbl>, groups <named list>
######################################################################

kw_total_sv_bandas=ggplot(dat_clean)+
  geom_boxplot(alpha=0.5,size=0.75, aes(fill=group,y = Value, x=group), show.legend = F)+
  theme_presentation(base_size = 13) +
  ylab("Sv (dB)")+
  xlab("Bandas de frecuencia (kHz)")+
  theme(legend.position = "top")+ 
  theme(panel.grid.major.y = element_line(color = "gray", linetype = "dashed"))+
  scale_y_continuous(breaks = seq(-90,-20,10))+
  stat_kruskal_test(show.legend = F,aes(y = Value, x=group,group = group),p.adjust.method = "bonferroni",label.x.npc = 0.45,label.y.npc = 0.45)+#label = "as_detailed_italic"
  
  
   stat_pvalue_manual(df_wilcox_tallas_grupos,color ="group1",step.group.by="group1",tip.length = 0,step.increase = 0.01)+
  

    scale_color_viridis_d(name="Especie")+
    scale_fill_viridis_d(name="Especie")+
    theme(axis.text.x = element_text(angle = 80, hjust = 1))+
  
    facet_wrap(~Banda,ncol = 4)

kw_total_sv_bandas

ggsave(filename = "kw_total_sv_bandas_ob1.png",
  plot = kw_total_sv_bandas,     
  height = 8,             # Specifies the height of the plot in inches
       width = 12,              # Specifies the width of the plot in inches
       dpi = 1000,             # Specifies the resolution in dots per inch
       path = "F:/Tesis abordo/Tesis abordo/Figuras/Objetivo01/",device = "png")
ggplot(dat_clean)+
  geom_boxplot(alpha=0.5,size=0.75, aes(fill=group,y = Value, x=Banda), show.legend = F)+
  theme_presentation(base_size = 14) +
  ylab("Sv (dB)")+ 
  xlab("Frecuencia (kHz)")+
scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  theme(legend.position = "top")+ 
  theme(panel.grid.major.y = element_line(color = "gray", linetype = "dashed"))+
  facet_wrap(~group)