Objetivo 2: Estadística descriptiva

Author

Luis La Cruz & German Chacón

Published

September 2, 2024

library(readxl)
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(egg)
Cargando paquete requerido: gridExtra

Adjuntando el paquete: 'gridExtra'

The following object is masked from 'package:dplyr':

    combine
library(tidyverse)
library(ggplot2)
library(ggpmisc)
Cargando paquete requerido: ggpp
Registered S3 methods overwritten by 'ggpp':
  method                  from   
  heightDetails.titleGrob ggplot2
  widthDetails.titleGrob  ggplot2

Adjuntando el paquete: 'ggpp'

The following object is masked from 'package:ggplot2':

    annotate
#install.packages("broom")
library(broom)
library(ggplot2)
library(patchwork)
library(egg)
library(ggpubr)

Adjuntando el paquete: 'ggpubr'

The following objects are masked from 'package:ggpp':

    as_npc, as_npcx, as_npcy

The following object is masked from 'package:egg':

    ggarrange
library(readxl)
library(tidyverse)
library(egg)
library(tidyverse)
library(ggplot2)
dat_clean=read.csv("dat_clean_modified_zscore_anchoveta.csv")


dat_clean$group <- factor(dat_clean$group,      # Reordering group factor levels
                         levels = c("3.5", "4", "5", "7.5","10.5","11","12","12.5","13.5"),labels = c("3.5", "4", "5", "7.5","10.5","11","12","12.5","13.5"))

dat_clean$Banda <- factor(dat_clean$Banda,
  levels = c("35-45","45-90","90-170","170-260"),labels = c("35-45","45-90","90-170","170-260"))
#library(explore)
#explore(dat_clean)

descriptiva_clean=dat_clean %>% 
  group_by(group) %>%
  get_summary_stats(type = "common")  %>%
  dplyr::filter(variable=="Value")

dclass1=descriptiva_clean%>% 
  select(-variable,-iqr,-mean,-sd,-se,-ci)

library(dplyr)
library(magrittr)  # Para el operador %>%

Adjuntando el paquete: 'magrittr'
The following object is masked from 'package:purrr':

    set_names
The following object is masked from 'package:tidyr':

    extract
library(seewave)

Adjuntando el paquete: 'seewave'
The following object is masked from 'package:lubridate':

    duration
The following object is masked from 'package:readr':

    spec
# Calcular la media en dB por grupo
descriptiva_clean2 <- dat_clean %>%
  group_by(group) %>%
  mutate(mean_dB = meandB(Value), sd_dB=sddB(Value, level="SPL"),
    sd_lineal=sd(Value_linear))  # Calcular la media en dB usando seewave::meandB()

summary_table <- descriptiva_clean2 %>%
  group_by(group) %>%
  summarise(mean_dB = mean(mean_dB),mean_sd_dB = mean(sd_dB),mean_sd_lineal = mean(sd_lineal))  # Calcular el promedio de mean_dB por grupo

# Mostrar la tabla resumen
d2=summary_table %>%
  select(mean_dB, mean_sd_dB)

Tabla_D=cbind(dclass1,d2)%>%
  rename(Talla="group",Mínimo="min",Máximo="max",Mediana="median",Media="mean_dB", sd="mean_sd_dB")%>%
  mutate_if(is.numeric, ~round(., 2))

Tabla_D
  Talla     n Mínimo Máximo Mediana  Media    sd
1   3.5  4351 -76.76 -40.66  -57.96 -55.28  5.72
2     4  9860 -77.88 -42.14  -61.08 -56.46  7.05
3     5 49446 -80.62 -38.60  -59.60 -55.39  6.90
4   7.5  7080 -80.20 -37.38  -60.98 -53.42 10.44
5  10.5 10881 -70.45 -39.54  -58.61 -54.28  6.95
6    11 12397 -75.80 -34.15  -57.39 -50.78  8.76
7    12 11554 -78.24 -24.47  -52.76 -46.72  8.94
8  12.5  5166 -62.97 -26.84  -45.34 -42.87  5.65
9  13.5  6173 -56.76 -24.35  -42.08 -38.58  6.18
descriptiva_clean3=dat_clean %>% 
  group_by(group,Banda) %>%
  get_summary_stats(type = "common") %>%
  dplyr::filter(variable=="Value")

d3=descriptiva_clean3%>% 
  select(-variable,-iqr,-mean,-sd,-se,-ci)%>%
   dplyr::rename(Talla="group",Mínimo="min",Máximo="max",Mediana="median")%>%
  mutate_if(is.numeric, ~round(., 2))
library(dplyr)
library(magrittr)  # Para el operador %>%
library(seewave)

# Calcular la media en dB por grupo
descriptiva_clean4 <- dat_clean %>%
  group_by(group, Banda) %>%
  mutate(mean_dB = meandB(Value), sd_dB=sddB(Value, level="SPL"),
    sd_lineal=sd(Value_linear))  # Calcular la media en dB usando 

summary_table4 <- descriptiva_clean4 %>%
  group_by(group, Banda) %>%
  summarise(mean_dB = mean(mean_dB),mean_sd_dB = mean(sd_dB),mean_sd_lineal = mean(sd_lineal))  # Calcular el promedio de mean_dB por grupo
`summarise()` has grouped output by 'group'. You can override using the
`.groups` argument.
# Mostrar la tabla resumen
d4=summary_table4 %>%
  select(mean_dB, mean_sd_dB)
Adding missing grouping variables: `group`
Tabla_D=cbind(d3,d4)


Tabla_D%>%
  select(-group)%>%
 rename(Media="mean_dB", sd="mean_sd_dB")%>%
  mutate_if(is.numeric, ~round(., 2))
     Banda Talla     n Mínimo Máximo Mediana  Media    sd
1    35-45   3.5   192 -72.61 -57.62  -65.92 -64.83  2.99
2    45-90   3.5   735 -76.76 -51.99  -66.92 -64.43  5.10
3   90-170   3.5  1472 -73.83 -40.66  -59.47 -55.12  6.37
4  170-260   3.5  1952 -69.88 -43.78  -54.89 -53.85  3.41
5    35-45     4   455 -62.34 -42.22  -49.15 -47.87  3.80
6    45-90     4  1761 -68.38 -42.14  -57.78 -54.49  5.61
7   90-170     4  2886 -71.41 -43.45  -61.24 -58.16  5.18
8  170-260     4  4758 -77.88 -47.23  -63.03 -60.54  4.49
9    35-45     5  2184 -71.04 -39.29  -54.57 -51.66  5.66
10   45-90     5  8340 -76.79 -38.60  -57.98 -53.46  7.25
11  90-170     5 16739 -79.77 -38.65  -59.32 -54.96  6.93
12 170-260     5 22183 -80.62 -45.47  -60.61 -57.78  5.62
13   35-45   7.5   324 -63.60 -40.98  -52.03 -50.10  4.55
14   45-90   7.5  1274 -76.14 -37.43  -58.30 -51.23 10.04
15  90-170   7.5  2068 -77.37 -37.38  -60.44 -53.72 10.00
16 170-260   7.5  3414 -80.20 -38.06  -63.16 -55.05 11.29
17   35-45  10.5   508 -59.65 -39.67  -46.87 -45.92  3.53
18   45-90  10.5  1945 -67.81 -39.54  -55.47 -52.25  5.89
19  90-170  10.5  3182 -68.46 -40.73  -58.26 -55.86  5.07
20 170-260  10.5  5246 -70.45 -44.88  -60.41 -58.28  4.49
21   35-45    11   549 -65.09 -34.36  -51.35 -44.90  7.81
22   45-90    11  2153 -70.48 -34.15  -55.52 -49.64  8.07
23  90-170    11  3961 -75.13 -34.29  -56.67 -50.88  7.79
24 170-260    11  5734 -75.80 -36.98  -59.12 -52.79  8.72
25   35-45    12   528 -65.36 -33.34  -46.45 -43.07  6.06
26   45-90    12  2024 -69.13 -24.47  -50.89 -44.66  9.56
27  90-170    12  3634 -73.66 -30.17  -52.42 -46.02  8.70
28 170-260    12  5368 -78.24 -36.19  -54.84 -49.53  7.83
29   35-45  12.5   228 -53.40 -34.12  -41.74 -40.87  3.84
30   45-90  12.5   872 -58.84 -26.84  -45.38 -41.76  6.98
31  90-170  12.5  1748 -62.97 -29.42  -44.84 -42.11  5.79
32 170-260  12.5  2318 -61.62 -35.30  -45.84 -44.49  4.52
33   35-45  13.5   283 -43.67 -26.47  -35.64 -34.07  3.62
34   45-90  13.5  1099 -52.30 -25.02  -41.42 -37.39  6.14
35  90-170  13.5  1863 -52.06 -24.35  -41.51 -37.80  6.14
36 170-260  13.5  2928 -56.76 -29.41  -43.70 -40.96  5.35
ggplot(dat_clean, aes(y = Value, x=as.numeric(Frequency)))+
  geom_line(alpha=0.5, aes(color=group),lwd=1, show.legend = F)+
  geom_smooth(size=1,method = "gam", color="black", show.legend = F)+
  theme_presentation(base_size = 14) +
  xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-80,-70,-60,-50,-40,-30,-20),linetype = c("dashed"),color="gray")+
  scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  theme(legend.title=element_blank())+
  facet_wrap(~group,nrow = 3)

ggplot(dat_clean, aes(y = Value, x=as.numeric(Frequency)))+
  geom_smooth(size=1,method = "gam", aes(color=group), show.legend = F)+
  theme_presentation(base_size = 14) +
  xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-80,-70,-60,-50,-40,-30,-20),linetype = c("dashed"),color="gray")+
  scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  theme(legend.title=element_blank())+
  facet_wrap(~group,nrow = 3)

firmas_group_LM=ggplot(dat_clean, aes(y = Value, x=as.numeric(Frequency)))+
  geom_line(alpha=0.5, aes(color=group),lwd=1, show.legend = F)+
  geom_smooth(size=1,method = "lm", color="black", show.legend = F)+
  stat_regline_equation()+
theme_presentation(base_size = 18) +
  xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-80,-70,-60,-50,-40,-30,-20),linetype = c("dashed"),color="gray")+
  scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  theme()+
  facet_wrap(~group,nrow = 3)

firmas_group_LM

ggplot(dat_clean, aes(y = Value, x=as.numeric(Frequency)))+
  geom_smooth(size=1,method = "lm", aes(color=group),show.legend = F)+
  #stat_regline_equation()+
  theme_presentation(base_size = 14) +
  xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-80,-70,-60,-50,-40,-30,-20),linetype = c("dashed"),color="gray")+
scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  theme(legend.title=element_blank())+
  facet_wrap(~group,nrow = 3)

ggsave(filename = "FM_modas_LM.png",
  plot = firmas_group_LM,     
  height = 6,             # Specifies the height of the plot in inches
       width = 12,              # Specifies the width of the plot in inches
       dpi = 1000,             # Specifies the resolution in dots per inch
       path = "F:/Tesis abordo/Tesis abordo/Figuras/Objetivo02/",device = "png") 
w=ggplot(dat_clean,alpha=0.5)+
  #geom_line(alpha=0.1, aes(color=group),lwd=1, show.legend = T)+
  geom_smooth(aes(y = Value, x=as.numeric(Frequency),color=group),size=1.5,method = "lm", show.legend = T, alpha=0.5)+
  scale_y_continuous(limits = c(-65,-45))+
  #stat_regline_equation()+
  theme_presentation(base_size = 16.5) +
  xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-70,-60,-50,-40),linetype = c("dashed"),color="gray")+
scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  theme(legend.title=element_blank())+
   theme(panel.grid.major.y = element_line(color = "gray", linetype = "dashed"))+
       theme(panel.grid.major.x = element_line(color = "gray", linetype = "dashed"))



k=ggplot(dat_clean)+
  #geom_line(alpha=0.1, aes(color=group),lwd=1, show.legend = T)+
  geom_smooth(aes(y = Value, x=as.numeric(Frequency),color=group),size=1.5,method = "gam", show.legend = F)+
  #stat_regline_equation()+
  theme_presentation(base_size = 16.5) +
  xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  scale_y_continuous(limits = c(-65,-45))+
  scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-70,-60,-50,-40),linetype = c("dashed"),color="gray")+
scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  theme(legend.title=element_blank())+
   theme(panel.grid.major.y = element_line(color = "gray", linetype = "dashed"))+
     theme(panel.grid.major.x = element_line(color = "gray", linetype = "dashed"))


library(cowplot)

# Crear las gráficas w y k (código que proporcionaste)

# Obtener la leyenda de una de las gráficas (por ejemplo, w)
legend_w <- get_legend(w)

# Combinar las dos gráficas y agregar la leyenda
combined_plot <- plot_grid(
  k + theme(legend.position = "none"),  # Ocultar la leyenda de la gráfica w
  w + theme(legend.position = "none"),  # Ocultar la leyenda de la gráfica k
  legend_w,
  ncol = 3, rel_heights = c(1, 1, 1),rel_widths = c(1,1,0.35),  # Ajustar las alturas relativas
  labels = c("(a)", "(b)", ""),  # Etiquetas de enumeración
  align = "h"  # Alinear horizontalmente las partes
)

# Ajustar el tamaño de la leyenda
combined_plot <- combined_plot + theme(
  legend.text = element_text(size = 19),  # Tamaño del texto de la leyenda
  legend.title = element_text(size = 14)  # Tamaño del título de la leyenda
)

# Imprimir la figura combinada
print(combined_plot)

library(emuR)

dat_mean=dat_clean  %>%
  group_by(group, Frequency) %>%
  summarise(track_value = mean(10^(Value/10)))



dat_mean2= dat_mean%>%
  group_by(group)%>%
  mutate(reconstructed = emuR::dct(track_value,fit=T))


firmas_tallas=ggplot(dat_mean2) +
  geom_line(alpha=0.5,size=2,aes(x = as.numeric(Frequency), y = 10*log10(reconstructed ), color = group) ) +

  xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  #scale_y_continuous(limits = c(-65,-45))+
  scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-70,-60,-50,-40),linetype = c("dashed"),color="gray")+
scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
   theme(panel.grid.major.y = element_line(color = "gray", linetype = "dashed"))+
     theme(panel.grid.major.x = element_line(color = "gray", linetype = "dashed"))+
  theme_presentation()

firmas_tallas

ggsave(filename = "Resumen_firmas_modales3.png",
  plot = firmas_tallas,     
  height = 5,             # Specifies the height of the plot in inches
       width = 12,              # Specifies the width of the plot in inches
       dpi = 1000,             # Specifies the resolution in dots per inch
       path = "F:/Tesis abordo/Tesis abordo/Figuras/Objetivo02/",device = "png") 
firmas_group=ggplot(dat_clean, aes(y = Value, x=as.numeric(Frequency)))+
  geom_line(alpha=0.5, aes(color=group),lwd=1, show.legend = F)+
  #geom_smooth(size=1,method = "gam", color="black", show.legend = F)+
  theme_presentation(base_size = 18) +
  xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-80,-70,-60,-50,-40,-30,-20),linetype = c("dashed"),color="gray")+
  scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+

geom_line(data = dat_mean2,size=1,aes(x = as.numeric(Frequency), y = 10*log10(reconstructed)))+
      
  facet_wrap(~group,nrow = 3)


firmas_group

ggsave(filename = "FM_modas_GAM.png",
  plot = firmas_group,     
  height = 6,             # Specifies the height of the plot in inches
       width = 12,              # Specifies the width of the plot in inches
       dpi = 1000,             # Specifies the resolution in dots per inch
       path = "F:/Tesis abordo/Tesis abordo/Figuras/Objetivo02/",device = "png") 
sS_dftlong.mean = dat_mean2 %>%
  group_by(group) %>%
  mutate(#you can't use m=0 in order to calculate k0 only
    smoothed_k0tok1 = emuR::dct(track_value,m=1,fit=T),
    smoothed_k0tok2 = emuR::dct(track_value,m=2,fit=T),
    smoothed_k0tok3 = emuR::dct(track_value,m=3,fit=T),
    smoothed_k0tok4 = emuR::dct(track_value,m=4,fit=T),
    smoothed_k0tok5 = emuR::dct(track_value,m=5,fit=T),
    smoothed_k0tok6 = emuR::dct(track_value,m=6,fit=T))
ggplot(sS_dftlong.mean) +
  geom_line(size=2,aes(x = as.numeric(Frequency), y = 10*log10(smoothed_k0tok1), color = group) ) +
  #geom_rect(data = bloqueos, aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf), fill = "gray", alpha = 0.75) +  # Ajusta color y transparencia según necesites
  labs(x = "Frecuencia", y = "10*log10(Value)") +
  
scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))
Warning in FUN(X[[i]], ...): Se han producido NaNs
Warning: Removed 71 rows containing missing values or values outside the scale range
(`geom_line()`).

#############

ggplot(sS_dftlong.mean) +
  geom_line(size=2,aes(x = as.numeric(Frequency), y = 10*log10(smoothed_k0tok2), color = group) ) +
  #geom_rect(data = bloqueos, aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf), fill = "gray", alpha = 0.75) +  # Ajusta color y transparencia según necesites
  labs(x = "Frecuencia", y = "10*log10(Value)") +
  
scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))
Warning in FUN(X[[i]], ...): Se han producido NaNs
Warning: Removed 41 rows containing missing values or values outside the scale range
(`geom_line()`).

#############

ggplot(sS_dftlong.mean) +
  geom_line(size=2,aes(x = as.numeric(Frequency), y = 10*log10(smoothed_k0tok3), color = group) ) +
  #geom_rect(data = bloqueos, aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf), fill = "gray", alpha = 0.75) +  # Ajusta color y transparencia según necesites
  labs(x = "Frecuencia", y = "10*log10(Value)") +
  
scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))
Warning in FUN(X[[i]], ...): Se han producido NaNs
Warning: Removed 26 rows containing missing values or values outside the scale range
(`geom_line()`).

#############

ggplot(sS_dftlong.mean) +
  geom_line(size=2,aes(x = as.numeric(Frequency), y = 10*log10(smoothed_k0tok4), color = group) ) +
  #geom_rect(data = bloqueos, aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf), fill = "gray", alpha = 0.75) +  # Ajusta color y transparencia según necesites
  labs(x = "Frecuencia", y = "10*log10(Value)") +
  
scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))
Warning in FUN(X[[i]], ...): Se han producido NaNs

#############

ggplot(sS_dftlong.mean) +
  geom_line(size=2,aes(x = as.numeric(Frequency), y = 10*log10(smoothed_k0tok5), color = group) ) +
  #geom_rect(data = bloqueos, aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf), fill = "gray", alpha = 0.75) +  # Ajusta color y transparencia según necesites
  labs(x = "Frecuencia", y = "10*log10(Value)") +
  
scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))
Warning in FUN(X[[i]], ...): Se han producido NaNs

#############

ggplot(sS_dftlong.mean) +
  geom_line(size=2,aes(x = as.numeric(Frequency), y = 10*log10(smoothed_k0tok6), color = group) ) +
  #geom_rect(data = bloqueos, aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf), fill = "gray", alpha = 0.75) +  # Ajusta color y transparencia según necesites
  labs(x = "Frecuencia", y = "10*log10(Value)") +
  
scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))
Warning in FUN(X[[i]], ...): Se han producido NaNs

ggplot(dat_clean,alpha=0.5)+
  #geom_line(alpha=0.1, aes(color=group),lwd=1, show.legend = T)+
  geom_smooth(aes(y = Value, x=as.numeric(Frequency),color=group),size=1.5,method = "lm", show.legend = T, alpha=0.5)+
  scale_y_continuous(limits = c(-65,-45))+
  #stat_regline_equation()+
  theme_presentation(base_size = 16.5) +
  xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  #scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  #geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-70,-60,-50,-40),linetype = c("dashed"),color="gray")+
scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  theme(legend.title=element_blank())+
   theme(panel.grid.major.y = element_line(color = "gray", linetype = "dashed"))+
       theme(panel.grid.major.x = element_line(color = "gray", linetype = "dashed"))+
facet_wrap(facets = "Banda",ncol = 4,scales = "free_x")

Figura03=ggplot(dat_clean,aes(x = Value, group = group, color = group))+
    stat_ecdf(size=1.5,pad = T, alpha=0.5) +
  theme_presentation(base_size = 14) +
  scale_x_continuous(breaks = c(-90,-80,-70,-60,-50,-40,-30,-20))+
scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  xlab("Sv (dB)") +
  ylab("ECDF")+
  ggtitle("Prueba K-S") +
  theme(legend.position = "right")+
  theme(panel.grid.major.y = element_line(color = "gray", linetype = "dashed"))+
  theme(panel.grid.major.x = element_line(color = "gray", linetype = "dashed"))

Figura03

ggsave(filename = "Kolmo_Sv_modas.png",
  plot = Figura03,     
  height = 4,             # Specifies the height of the plot in inches
       width = 8,              # Specifies the width of the plot in inches
       dpi = 1000,             # Specifies the resolution in dots per inch
       path = "F:/Tesis abordo/Tesis abordo/Figuras/Objetivo02/",device = "png") 
#library(coin)

#oneway_test(Value ~ Class, data = dat_clean)
# Realizar la prueba K-S para todas las combinaciones de grupos
# Asegúrate de que dat_clean esté definido correctamente
# dat_clean <- ...

# Asegúrate de que dat_clean esté definido correctamente
# dat_clean <- ...

# Crear un data frame para almacenar los resultados
results <- data.frame(group1 = character(),
                      group2 = character(),
                      D_value = numeric(),
                      p_value = numeric(),
                      stringsAsFactors = FALSE)

# Obtener grupos únicos
groups <- unique(dat_clean$group)

# Realizar la prueba K-S para todas las combinaciones de grupos
for (i in 1:(length(groups) - 1)) {
  for (j in (i + 1):length(groups)) {
    group1 <- groups[i]
    group2 <- groups[j]
    
    # Subconjuntos de datos para los dos grupos
    data_group1 <- dat_clean$Value[dat_clean$group == group1]
    data_group2 <- dat_clean$Value[dat_clean$group == group2]
    
    # Realizar la prueba K-S
    test_result <- ks.test(data_group1, data_group2)
    
    # Almacenar resultados en el data frame results
    results <- rbind(results, data.frame(
      group1 = group1,
      group2 = group2,
      D_value = round(test_result$statistic, 2),
      p_value = round(test_result$p.value, 2)
    ))
  }
}

# Mostrar el resultado final con las modificaciones requeridas
print(results)
    group1 group2 D_value p_value
D     13.5    7.5    0.87       0
D1    13.5   12.5    0.27       0
D2    13.5     11    0.77       0
D3    13.5     12    0.61       0
D4    13.5    3.5    0.89       0
D5    13.5   10.5    0.88       0
D6    13.5      4    0.91       0
D7    13.5      5    0.89       0
D8     7.5   12.5    0.75       0
D9     7.5     11    0.22       0
D10    7.5     12    0.40       0
D11    7.5    3.5    0.19       0
D12    7.5   10.5    0.20       0
D13    7.5      4    0.13       0
D14    7.5      5    0.10       0
D15   12.5     11    0.63       0
D16   12.5     12    0.40       0
D17   12.5    3.5    0.75       0
D18   12.5   10.5    0.74       0
D19   12.5      4    0.80       0
D20   12.5      5    0.75       0
D21     11     12    0.25       0
D22     11    3.5    0.12       0
D23     11   10.5    0.12       0
D24     11      4    0.25       0
D25     11      5    0.14       0
D26     12    3.5    0.36       0
D27     12   10.5    0.36       0
D28     12      4    0.45       0
D29     12      5    0.38       0
D30    3.5   10.5    0.10       0
D31    3.5      4    0.21       0
D32    3.5      5    0.11       0
D33   10.5      4    0.18       0
D34   10.5      5    0.11       0
D35      4      5    0.10       0
library(ggplot2)

ggplot(dat_clean)+
  geom_boxplot(alpha=0.5,size=0.75, aes(fill=group,y = Value, x=group), show.legend = F)+
  theme_presentation(base_size = 16) +
  ylab("Sv (dB)")+
  xlab("Talla (cm)")+
scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  theme(legend.position = "top")+
  theme(panel.grid.major.y = element_line(color = "gray", linetype = "dashed"))

library(ggplot2)
library(dplyr)
library(rstatix)

# Realizar prueba de Kruskal-Wallis
kruskal_result <- dat_clean %>%
  kruskal_test(Value ~ group)

kruskal_result
# A tibble: 1 × 6
  .y.        n statistic    df     p method        
* <chr>  <int>     <dbl> <int> <dbl> <chr>         
1 Value 116908    31238.     8     0 Kruskal-Wallis
df_wilcox_tallas <- dat_clean %>%
  pairwise_wilcox_test(Value ~ group) %>%
  add_y_position(step.increase = 0.2)

df_wilcox_tallas 
# A tibble: 36 × 11
   .y.   group1 group2    n1    n2  statistic         p     p.adj p.adj.signif
   <chr> <chr>  <chr>  <int> <int>      <dbl>     <dbl>     <dbl> <chr>       
 1 Value 3.5    4       4351  9860  26060158. 5.94e- 93 5.94e- 92 ****        
 2 Value 3.5    5       4351 49446 118797333  2.88e- 30 1.15e- 29 ****        
 3 Value 3.5    7.5     4351  7080  18618880. 1.21e- 78 1.09e- 77 ****        
 4 Value 3.5    10.5    4351 10881  23644386. 9.12e-  1 9.12e-  1 ns          
 5 Value 3.5    11      4351 12397  23904728  5.7 e- 29 1.71e- 28 ****        
 6 Value 3.5    12      4351 11554  15920789  4.27e-279 6.83e-278 ****        
 7 Value 3.5    12.5    4351  5166   1391016. 0         0         ****        
 8 Value 3.5    13.5    4351  6173    311828  0         0         ****        
 9 Value 4      5       9860 49446 216487780  3.83e- 69 2.68e- 68 ****        
10 Value 4      7.5     9860  7080  35930406. 1   e-  3 2   e-  3 **          
# ℹ 26 more rows
# ℹ 2 more variables: y.position <dbl>, groups <named list>
kruskal_result_lineal <- dat_clean %>%
  kruskal_test(Value_linear ~ group)

kruskal_result_lineal
# A tibble: 1 × 6
  .y.               n statistic    df     p method        
* <chr>         <int>     <dbl> <int> <dbl> <chr>         
1 Value_linear 116908    31238.     8     0 Kruskal-Wallis
df_wilcox_tallas_lineal <- dat_clean %>%
  pairwise_wilcox_test(Value_linear ~ group) %>%
  add_y_position(step.increase = 0.2)

df_wilcox_tallas_lineal 
# A tibble: 36 × 11
   .y.      group1 group2    n1    n2 statistic         p     p.adj p.adj.signif
   <chr>    <chr>  <chr>  <int> <int>     <dbl>     <dbl>     <dbl> <chr>       
 1 Value_l… 3.5    4       4351  9860    2.61e7 5.94e- 93 5.94e- 92 ****        
 2 Value_l… 3.5    5       4351 49446    1.19e8 2.88e- 30 1.15e- 29 ****        
 3 Value_l… 3.5    7.5     4351  7080    1.86e7 1.21e- 78 1.09e- 77 ****        
 4 Value_l… 3.5    10.5    4351 10881    2.36e7 9.12e-  1 9.12e-  1 ns          
 5 Value_l… 3.5    11      4351 12397    2.39e7 5.7 e- 29 1.71e- 28 ****        
 6 Value_l… 3.5    12      4351 11554    1.59e7 4.27e-279 6.83e-278 ****        
 7 Value_l… 3.5    12.5    4351  5166    1.39e6 0         0         ****        
 8 Value_l… 3.5    13.5    4351  6173    3.12e5 0         0         ****        
 9 Value_l… 4      5       9860 49446    2.16e8 3.83e- 69 2.68e- 68 ****        
10 Value_l… 4      7.5     9860  7080    3.59e7 1   e-  3 2   e-  3 **          
# ℹ 26 more rows
# ℹ 2 more variables: y.position <dbl>, groups <named list>
kw_total=ggplot(dat_clean)+
  geom_boxplot(alpha=0.5,size=0.75, aes(fill=group,y = Value, x=group), show.legend = F)+
  theme_presentation(base_size = 16) +
  scale_y_continuous(breaks = seq(-90,-20,10))+
  ylab("Sv (dB)")+
  xlab("Longitud (cm)")+
scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  theme(legend.position = "none")+
  theme(panel.grid.major.y = element_line(color = "gray", linetype = "dashed"))+
    stat_kruskal_test(aes(y = Value, x=group, group=group),p.adjust.method = "bonferroni",label.x = 1.25,label.y = 160)+#label = "as_detailed_italic"
   stat_pvalue_manual(df_wilcox_tallas,color ="group1",step.group.by="group1",tip.length = 0,step.increase = 0.02)+
scale_color_manual(name="Longitud (cm)",values =c("#ffff00","#ff8000","#ff00bf","#ff0000","#5f5f5f","#0000ff","#000080","#00bf00"))


kw_total

ggsave(filename = "kw_total_sv.png",
  plot = kw_total,     
  height = 9,             # Specifies the height of the plot in inches
       width = 6,              # Specifies the width of the plot in inches
       dpi = 1000,             # Specifies the resolution in dots per inch
       path = "F:/Tesis abordo/Tesis abordo/Figuras/Objetivo02/",device = "png")
library(ggplot2)
library(dplyr)
library(rstatix)


kruskal_result_banda <- dat_clean %>%
    group_by(Banda)%>%
  kruskal_test(Value ~ group)

kruskal_result_banda
# A tibble: 4 × 7
  Banda   .y.       n statistic    df     p method        
* <fct>   <chr> <int>     <dbl> <int> <dbl> <chr>         
1 35-45   Value  5251     2449.     8     0 Kruskal-Wallis
2 45-90   Value 20203     6692.     8     0 Kruskal-Wallis
3 90-170  Value 37553    10535.     8     0 Kruskal-Wallis
4 170-260 Value 53901    16675.     8     0 Kruskal-Wallis
df_wilcox_tallas_grupos <- dat_clean %>%
  group_by(Banda)%>%
  pairwise_wilcox_test(Value ~ group) %>%
  add_y_position(step.increase = 0.2)

df_wilcox_tallas_grupos
# A tibble: 144 × 12
   Banda .y.   group1 group2    n1    n2 statistic        p    p.adj
   <fct> <chr> <chr>  <chr>  <int> <int>     <dbl>    <dbl>    <dbl>
 1 35-45 Value 3.5    4        192   455       96  1.45e-89 3.34e-88
 2 35-45 Value 3.5    5        192  2184    22242. 5.71e-94 1.54e-92
 3 35-45 Value 3.5    7.5      192   324      325  7.48e-79 1.65e-77
 4 35-45 Value 3.5    10.5     192   508        7  9.50e-93 2.47e-91
 5 35-45 Value 3.5    11       192   549      694. 2.95e-92 7.37e-91
 6 35-45 Value 3.5    12       192   528      544. 9.04e-92 2.17e-90
 7 35-45 Value 3.5    12.5     192   228        0  8.30e-70 1.58e-68
 8 35-45 Value 3.5    13.5     192   283        0  1.88e-76 3.95e-75
 9 35-45 Value 4      5        455  2184   767840. 5.04e-75 1.01e-73
10 35-45 Value 4      7.5      455   324    99446. 9.28e-17 7.42e-16
# ℹ 134 more rows
# ℹ 3 more variables: p.adj.signif <chr>, y.position <dbl>, groups <named list>
kruskal_result_banda_lineal <- dat_clean %>%
    group_by(Banda)%>%
  kruskal_test(Value_linear ~ group)

kruskal_result_banda_lineal
# A tibble: 4 × 7
  Banda   .y.              n statistic    df     p method        
* <fct>   <chr>        <int>     <dbl> <int> <dbl> <chr>         
1 35-45   Value_linear  5251     2449.     8     0 Kruskal-Wallis
2 45-90   Value_linear 20203     6692.     8     0 Kruskal-Wallis
3 90-170  Value_linear 37553    10535.     8     0 Kruskal-Wallis
4 170-260 Value_linear 53901    16675.     8     0 Kruskal-Wallis
df_wilcox_tallas_grupos_lineal <- dat_clean %>%
  group_by(Banda)%>%
  pairwise_wilcox_test(Value_linear ~ group) %>%
  add_y_position(step.increase = 0.2)

df_wilcox_tallas_grupos_lineal
# A tibble: 144 × 12
   Banda .y.          group1 group2    n1    n2 statistic        p    p.adj
   <fct> <chr>        <chr>  <chr>  <int> <int>     <dbl>    <dbl>    <dbl>
 1 35-45 Value_linear 3.5    4        192   455       96  1.45e-89 3.34e-88
 2 35-45 Value_linear 3.5    5        192  2184    22242. 5.71e-94 1.54e-92
 3 35-45 Value_linear 3.5    7.5      192   324      325  7.48e-79 1.65e-77
 4 35-45 Value_linear 3.5    10.5     192   508        7  9.50e-93 2.47e-91
 5 35-45 Value_linear 3.5    11       192   549      694. 2.95e-92 7.37e-91
 6 35-45 Value_linear 3.5    12       192   528      544. 9.04e-92 2.17e-90
 7 35-45 Value_linear 3.5    12.5     192   228        0  8.30e-70 1.58e-68
 8 35-45 Value_linear 3.5    13.5     192   283        0  1.88e-76 3.95e-75
 9 35-45 Value_linear 4      5        455  2184   767840. 5.04e-75 1.01e-73
10 35-45 Value_linear 4      7.5      455   324    99446. 9.28e-17 7.42e-16
# ℹ 134 more rows
# ℹ 3 more variables: p.adj.signif <chr>, y.position <dbl>, groups <named list>
kw_total_sv_bandas=ggplot(dat_clean)+
  geom_boxplot(alpha=0.5,size=0.75, aes(fill=group,y = Value, x=group), show.legend = F)+
  theme_presentation(base_size = 13) +
  ylab("Sv (dB)")+
  xlab("Longitud (cm)")+
scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  theme(legend.position = "none")+ 
  theme(panel.grid.major.y = element_line(color = "gray", linetype = "dashed"))+
  
  scale_y_continuous(breaks = seq(-90,-20,10))+
  

  

  
  
  
   stat_kruskal_test(aes(y = Value, x=group,group = group),p.adjust.method = "bonferroni",label.x.npc = 0.45,label.y.npc = 0.25)+#label = "as_detailed_italic"
  
  
   stat_pvalue_manual(df_wilcox_tallas_grupos,color ="group1",step.group.by="group1",tip.length = 0,step.increase = 0.02)+
  
  scale_color_manual(name="Longitud (cm)",values =c("#ffff00","#ff8000","#ff00bf","#ff0000","#5f5f5f","#0000ff","#000080","#00bf00"))+
  
    facet_wrap(~Banda,ncol = 4)


kw_total_sv_bandas

ggsave(filename = "kw_total_sv_bandas.png",
  plot = kw_total_sv_bandas,     
  height = 8,             # Specifies the height of the plot in inches
       width = 12,              # Specifies the width of the plot in inches
       dpi = 1000,             # Specifies the resolution in dots per inch
       path = "F:/Tesis abordo/Tesis abordo/Figuras/Objetivo02/",device = "png")
ggplot(dat_clean)+
  geom_boxplot(alpha=0.5,size=0.75, aes(fill=group,y = Value, x=Banda), show.legend = F)+
  theme_presentation(base_size = 14) +
  ylab("Sv (dB)")+ 
  xlab("Frecuencia (kHz)")+
scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  theme(legend.position = "top")+ 
  theme(panel.grid.major.y = element_line(color = "gray", linetype = "dashed"))+
  facet_wrap(~group)