Estadística descriptiva

Author

Luis La Cruz & German Chacón

library(readxl)
library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(egg)

Loading required package: gridExtra

Attaching package: 'gridExtra'

The following object is masked from 'package:dplyr':

    combine

library(tidyverse)
library(ggplot2)
library(ggpmisc)

Loading required package: ggpp
Registered S3 methods overwritten by 'ggpp':
  method                  from   
  heightDetails.titleGrob ggplot2
  widthDetails.titleGrob  ggplot2

Attaching package: 'ggpp'

The following object is masked from 'package:ggplot2':

    annotate

Registered S3 method overwritten by 'ggpmisc':
  method                  from   
  as.character.polynomial polynom

#install.packages("broom")
library(broom)
library(ggplot2)
library(patchwork)
library(egg)
library(ggpubr)


Attaching package: 'ggpubr'

The following objects are masked from 'package:ggpp':

    as_npc, as_npcx, as_npcy

The following object is masked from 'package:egg':

    ggarrange

library(readxl)
library(tidyverse)
library(egg)
library(tidyverse)
library(ggplot2)
dat_clean=read.csv("dat_clean_modified_zscore_anchoveta.csv")


dat_clean$group <- factor(dat_clean$Subclass_n,      # Reordering group factor levels
                         levels = c("Moda 3.5 cm", "Moda 4 cm", "Moda 5 cm", "Moda 7.5 cm","Moda 10.5 cm", "Moda 11 cm", "Moda 12 cm","Moda 12.5 cm","Moda 13 cm","Moda 13.5 cm"),labels = c("3.5", "4", "5", "7.5","10.5","11","12","12.5","13","13.5"))

dat_clean$Banda <- factor(dat_clean$Banda,
  levels = c("35-45","45-90","90-170","170-260"),labels = c("35-45","45-90","90-170","170-260"))

#library(explore)
#explore(dat_clean)

descriptiva_clean=dat_clean %>% 
  group_by(group) %>%
  get_summary_stats(type = "common")  %>%
  dplyr::filter(variable=="Value")

dclass1=descriptiva_clean%>% 
  select(-variable,-iqr,-mean,-sd,-se,-ci)

library(dplyr)
library(magrittr)  # Para el operador %>%


Attaching package: 'magrittr'

The following object is masked from 'package:purrr':

    set_names

The following object is masked from 'package:tidyr':

    extract

library(seewave)


Attaching package: 'seewave'

The following object is masked from 'package:lubridate':

    duration

The following object is masked from 'package:readr':

    spec

# Calcular la media en dB por grupo
descriptiva_clean2 <- dat_clean %>%
  group_by(group) %>%
  mutate(mean_dB = meandB(Value), sd_dB=sddB(Value, level="SPL"),
    sd_lineal=sd(Value_linear))  # Calcular la media en dB usando seewave::meandB()

summary_table <- descriptiva_clean2 %>%
  group_by(group) %>%
  summarise(mean_dB = mean(mean_dB),mean_sd_dB = mean(sd_dB),mean_sd_lineal = mean(sd_lineal))  # Calcular el promedio de mean_dB por grupo

# Mostrar la tabla resumen
d2=summary_table %>%
  select(mean_dB, mean_sd_dB)

Tabla_D=cbind(dclass1,d2)%>%
  rename(Talla="group",Mínimo="min",Máximo="max",Mediana="median",Media="mean_dB", sd="mean_sd_dB")%>%
  mutate_if(is.numeric, ~round(., 2))

Tabla_D

   Talla     n Mínimo Máximo Mediana  Media    sd
1    3.5 12220 -77.67 -32.23  -54.77 -51.40  6.65
2      4 13965 -77.88 -33.38  -58.70 -51.73  8.92
3      5 50538 -81.07 -37.82  -59.45 -54.70  7.38
4    7.5 12188 -80.20 -25.30  -57.26 -46.73 12.25
5   10.5 15232 -82.76 -33.05  -58.47 -51.60  8.93
6     11 12397 -75.80 -34.15  -57.39 -50.78  8.76
7     12 12062 -80.11 -24.47  -53.26 -46.91  9.23
8   12.5  9812 -72.99 -25.48  -50.48 -45.11  7.99
9     13   250 -56.00 -37.94  -50.42 -47.66  4.75
10  13.5 14340 -67.25 -18.98  -46.17 -39.42  9.35

descriptiva_clean3=dat_clean %>% 
  group_by(group,Banda) %>%
  get_summary_stats(type = "common") %>%
  dplyr::filter(variable=="Value")

d3=descriptiva_clean3%>% 
  select(-variable,-iqr,-mean,-sd,-se,-ci)%>%
   dplyr::rename(Talla="group",Mínimo="min",Máximo="max",Mediana="median")%>%
  mutate_if(is.numeric, ~round(., 2))

library(dplyr)
library(magrittr)  # Para el operador %>%
library(seewave)

# Calcular la media en dB por grupo
descriptiva_clean4 <- dat_clean %>%
  group_by(group, Banda) %>%
  mutate(mean_dB = meandB(Value), sd_dB=sddB(Value, level="SPL"),
    sd_lineal=sd(Value_linear))  # Calcular la media en dB usando 

summary_table4 <- descriptiva_clean4 %>%
  group_by(group, Banda) %>%
  summarise(mean_dB = mean(mean_dB),mean_sd_dB = mean(sd_dB),mean_sd_lineal = mean(sd_lineal))  # Calcular el promedio de mean_dB por grupo

`summarise()` has grouped output by 'group'. You can override using the
`.groups` argument.

# Mostrar la tabla resumen
d4=summary_table4 %>%
  select(mean_dB, mean_sd_dB)

Adding missing grouping variables: `group`

Tabla_D=cbind(d3,d4)


Tabla_D%>%
  select(-group)%>%
 rename(Media="mean_dB", sd="mean_sd_dB")%>%
  mutate_if(is.numeric, ~round(., 2))

     Banda Talla     n Mínimo Máximo Mediana  Media    sd
1    35-45   3.5   527 -77.64 -37.01  -62.05 -49.69  9.70
2    45-90   3.5  2063 -77.67 -32.23  -62.68 -50.66 10.64
3   90-170   3.5  4140 -74.40 -34.17  -55.44 -51.01  6.94
4  170-260   3.5  5490 -76.83 -41.22  -53.95 -52.30  4.13
5    35-45     4   660 -64.54 -35.77  -47.17 -44.96  4.92
6    45-90     4  2525 -71.09 -33.38  -55.58 -49.49  8.44
7   90-170     4  4070 -75.42 -33.89  -58.55 -52.56  8.26
8  170-260     4  6710 -77.88 -37.86  -60.89 -54.91  7.81
9    35-45     5  2232 -71.04 -38.73  -54.42 -51.23  5.88
10   45-90     5  8526 -76.79 -37.85  -57.78 -52.80  7.65
11  90-170     5 17099 -81.00 -37.82  -59.18 -54.26  7.43
12 170-260     5 22681 -81.07 -40.68  -60.47 -56.98  6.31
13   35-45   7.5   576 -63.60 -28.70  -48.40 -41.75  8.70
14   45-90   7.5  2204 -76.14 -25.30  -55.10 -45.51 11.06
15  90-170   7.5  3552 -77.37 -25.88  -56.95 -46.97 11.79
16 170-260   7.5  5856 -80.20 -29.09  -59.48 -48.29 13.05
17   35-45  10.5   720 -81.55 -34.96  -46.94 -44.89  5.29
18   45-90  10.5  2753 -82.76 -33.05  -55.45 -49.37  8.80
19  90-170  10.5  4439 -75.59 -34.07  -58.23 -52.48  8.08
20 170-260  10.5  7320 -75.89 -35.73  -60.05 -54.67  7.70
21   35-45    11   549 -65.09 -34.36  -51.35 -44.90  7.81
22   45-90    11  2153 -70.48 -34.15  -55.52 -49.64  8.07
23  90-170    11  3961 -75.13 -34.29  -56.67 -50.88  7.79
24 170-260    11  5734 -75.80 -36.98  -59.12 -52.79  8.72
25   35-45    12   552 -80.11 -33.34  -46.76 -43.27  6.44
26   45-90    12  2116 -80.11 -24.47  -51.18 -44.85  9.91
27  90-170    12  3782 -74.39 -30.17  -52.88 -46.19  8.98
28 170-260    12  5612 -78.24 -36.19  -55.44 -49.72  8.09
29   35-45  12.5   444 -64.23 -34.12  -45.19 -42.65  5.65
30   45-90  12.5  1702 -67.59 -25.48  -49.73 -43.67  9.24
31  90-170  12.5  3152 -70.08 -29.42  -50.05 -44.38  8.00
32 170-260  12.5  4514 -72.99 -35.30  -51.77 -47.00  7.00
33   35-45    13    12 -48.52 -41.33  -43.78 -43.80  2.27
34   45-90    13    44 -54.00 -40.12  -51.02 -49.62  3.45
35  90-170    13    81 -54.66 -37.94  -50.60 -46.72  5.68
36 170-260    13   113 -56.00 -38.55  -49.90 -48.54  3.96
37   35-45  13.5  1303 -59.95 -26.47  -46.35 -39.74  8.04
38   45-90  13.5  1189 -58.71 -19.33  -41.58 -37.00  7.21
39  90-170  13.5  8835 -67.25 -18.98  -48.62 -39.34 11.04
40 170-260  13.5  3013 -63.09 -29.41  -43.88 -41.06  5.41

ggplot(dat_clean, aes(y = Value, x=as.numeric(Frequency)))+
  geom_line(alpha=0.5, aes(color=group),lwd=1, show.legend = F)+
  geom_smooth(size=1,method = "gam", color="black", show.legend = F)+
  theme_presentation(base_size = 14) +
  xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-80,-70,-60,-50,-40,-30,-20),linetype = c("dashed"),color="gray")+
  scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#008000","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  theme(legend.title=element_blank())+
  facet_wrap(~group,nrow = 3)

ggplot(dat_clean, aes(y = Value, x=as.numeric(Frequency)))+
  geom_smooth(size=1,method = "gam", aes(color=group), show.legend = F)+
  theme_presentation(base_size = 14) +
  xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-80,-70,-60,-50,-40,-30,-20),linetype = c("dashed"),color="gray")+
  scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#008000","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  theme(legend.title=element_blank())+
  facet_wrap(~group,nrow = 3)

ggplot(dat_clean, aes(y = Value, x=as.numeric(Frequency)))+
  geom_line(alpha=0.5, aes(color=group),lwd=1, show.legend = F)+
  geom_smooth(size=1,method = "lm", color="black", show.legend = F)+
  stat_regline_equation()+
  theme_presentation(base_size = 14) +
  xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-80,-70,-60,-50,-40,-30,-20),linetype = c("dashed"),color="gray")+
  scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#008000","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  theme(legend.title=element_blank())+
  facet_wrap(~group,nrow = 3)

ggplot(dat_clean, aes(y = Value, x=as.numeric(Frequency)))+
  geom_smooth(size=1,method = "lm", aes(color=group),show.legend = F)+
  stat_regline_equation()+
  theme_presentation(base_size = 14) +
  xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-80,-70,-60,-50,-40,-30,-20),linetype = c("dashed"),color="gray")+
scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#008000","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  theme(legend.title=element_blank())+
  facet_wrap(~group,nrow = 3)

w=ggplot(dat_clean,alpha=0.5)+
  #geom_line(alpha=0.1, aes(color=group),lwd=1, show.legend = T)+
  geom_smooth(aes(y = Value, x=as.numeric(Frequency),color=group),size=1.5,method = "lm", show.legend = T, alpha=0.5)+
  scale_y_continuous(limits = c(-65,-45))+
  #stat_regline_equation()+
  theme_presentation(base_size = 16.5) +
  xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-70,-60,-50,-40),linetype = c("dashed"),color="gray")+
scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#008000","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  theme(legend.title=element_blank())+
   theme(panel.grid.major.y = element_line(color = "gray", linetype = "dashed"))+
       theme(panel.grid.major.x = element_line(color = "gray", linetype = "dashed"))



k=ggplot(dat_clean)+
  #geom_line(alpha=0.1, aes(color=group),lwd=1, show.legend = T)+
  geom_smooth(aes(y = Value, x=as.numeric(Frequency),color=group),size=1.5,method = "gam", show.legend = F)+
  #stat_regline_equation()+
  theme_presentation(base_size = 16.5) +
  xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  scale_y_continuous(limits = c(-65,-45))+
  scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-70,-60,-50,-40),linetype = c("dashed"),color="gray")+
scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#008000","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  theme(legend.title=element_blank())+
   theme(panel.grid.major.y = element_line(color = "gray", linetype = "dashed"))+
     theme(panel.grid.major.x = element_line(color = "gray", linetype = "dashed"))


library(cowplot)

# Crear las gráficas w y k (código que proporcionaste)

# Obtener la leyenda de una de las gráficas (por ejemplo, w)
legend_w <- get_legend(w)

# Combinar las dos gráficas y agregar la leyenda
combined_plot <- plot_grid(
  k + theme(legend.position = "none"),  # Ocultar la leyenda de la gráfica w
  w + theme(legend.position = "none"),  # Ocultar la leyenda de la gráfica k
  legend_w,
  ncol = 3, rel_heights = c(1, 1, 1),rel_widths = c(1,1,0.35),  # Ajustar las alturas relativas
  labels = c("(a)", "(b)", ""),  # Etiquetas de enumeración
  align = "h"  # Alinear horizontalmente las partes
)

# Ajustar el tamaño de la leyenda
combined_plot <- combined_plot + theme(
  legend.text = element_text(size = 19),  # Tamaño del texto de la leyenda
  legend.title = element_text(size = 14)  # Tamaño del título de la leyenda
)

# Imprimir la figura combinada
print(combined_plot)

ggplot(dat_clean,aes(x = Value, group = group, color = group))+
    stat_ecdf(size=1.5,pad = T, alpha=0.5) +
  theme_presentation(base_size = 14) +
  scale_x_continuous(breaks = c(-90,-80,-70,-60,-50,-40,-30,-20))+
scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#008000","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  xlab("Sv (dB)") +
  ylab("ECDF")+
  ggtitle("Prueba K-S") +
  theme(legend.position = "right")+
  theme(panel.grid.major.y = element_line(color = "gray", linetype = "dashed"))+
  theme(panel.grid.major.x = element_line(color = "gray", linetype = "dashed"),
    legend.title=element_blank())

#library(coin)

#oneway_test(Value ~ Class, data = dat_clean)

# Realizar la prueba K-S para todas las combinaciones de grupos
# Asegúrate de que dat_clean esté definido correctamente
# dat_clean <- ...

# Asegúrate de que dat_clean esté definido correctamente
# dat_clean <- ...

# Crear un data frame para almacenar los resultados
results <- data.frame(group1 = character(),
                      group2 = character(),
                      D_value = numeric(),
                      p_value = numeric(),
                      stringsAsFactors = FALSE)

# Obtener grupos únicos
groups <- unique(dat_clean$group)

# Realizar la prueba K-S para todas las combinaciones de grupos
for (i in 1:(length(groups) - 1)) {
  for (j in (i + 1):length(groups)) {
    group1 <- groups[i]
    group2 <- groups[j]
    
    # Subconjuntos de datos para los dos grupos
    data_group1 <- dat_clean$Value[dat_clean$group == group1]
    data_group2 <- dat_clean$Value[dat_clean$group == group2]
    
    # Realizar la prueba K-S
    test_result <- ks.test(data_group1, data_group2)
    
    # Almacenar resultados en el data frame results
    results <- rbind(results, data.frame(
      group1 = group1,
      group2 = group2,
      D_value = round(test_result$statistic, 2),
      p_value = round(test_result$p.value, 2)
    ))
  }
}

# Mostrar el resultado final con las modificaciones requeridas
print(results)

    group1 group2 D_value p_value
D     13.5    7.5    0.47       0
D1    13.5   12.5    0.22       0
D2    13.5     13    0.38       0
D3    13.5     11    0.54       0
D4    13.5     12    0.33       0
D5    13.5    3.5    0.50       0
D6    13.5   10.5    0.56       0
D7    13.5      4    0.57       0
D8    13.5      5    0.65       0
D9     7.5   12.5    0.28       0
D10    7.5     13    0.58       0
D11    7.5     11    0.07       0
D12    7.5     12    0.17       0
D13    7.5    3.5    0.14       0
D14    7.5   10.5    0.09       0
D15    7.5      4    0.10       0
D16    7.5      5    0.18       0
D17   12.5     13    0.33       0
D18   12.5     11    0.35       0
D19   12.5     12    0.15       0
D20   12.5    3.5    0.29       0
D21   12.5   10.5    0.36       0
D22   12.5      4    0.36       0
D23   12.5      5    0.45       0
D24     13     11    0.63       0
D25     13     12    0.42       0
D26     13    3.5    0.48       0
D27     13   10.5    0.67       0
D28     13      4    0.68       0
D29     13      5    0.75       0
D30     11     12    0.23       0
D31     11    3.5    0.15       0
D32     11   10.5    0.07       0
D33     11      4    0.09       0
D34     11      5    0.13       0
D35     12    3.5    0.17       0
D36     12   10.5    0.25       0
D37     12      4    0.26       0
D38     12      5    0.34       0
D39    3.5   10.5    0.20       0
D40    3.5      4    0.21       0
D41    3.5      5    0.28       0
D42   10.5      4    0.03       0
D43   10.5      5    0.09       0
D44      4      5    0.10       0

library(ggplot2)

ggplot(dat_clean)+
  geom_boxplot(alpha=0.5,size=0.75, aes(fill=group,y = Value, x=group), show.legend = F)+
  theme_presentation(base_size = 16) +
  ylab("Sv (dB)")+
  xlab("Talla (cm)")+
scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#008000","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  theme(legend.position = "top")+
  theme(panel.grid.major.y = element_line(color = "gray", linetype = "dashed"))

library(ggplot2)
library(dplyr)
library(rstatix)

# Realizar prueba de Kruskal-Wallis
kruskal_result <- dat_clean %>%
  kruskal_test(Value ~ group)

kruskal_result

# A tibble: 1 × 6
  .y.        n statistic    df     p method        
* <chr>  <int>     <dbl> <int> <dbl> <chr>         
1 Value 153004    30247.     9     0 Kruskal-Wallis

df_wilcox_tallas <- dat_clean %>%
  pairwise_wilcox_test(Value ~ group) %>%
  add_y_position(step.increase = 0.2)

df_wilcox_tallas

# A tibble: 45 × 11
   .y.   group1 group2    n1    n2  statistic         p     p.adj p.adj.signif
   <chr> <chr>  <chr>  <int> <int>      <dbl>     <dbl>     <dbl> <chr>       
 1 Value 3.5    4      12220 13965 103331800  2.42e-191 6.05e-190 ****        
 2 Value 3.5    5      12220 50538 410178798  0         0         ****        
 3 Value 3.5    7.5    12220 12188  81403280. 2.14e- 36 1.93e- 35 ****        
 4 Value 3.5    10.5   12220 15232 111735838. 5.35e-180 1.28e-178 ****        
 5 Value 3.5    11     12220 12397  86071956. 1.34e- 76 2.41e- 75 ****        
 6 Value 3.5    12     12220 12062  65551944. 2.55e- 50 3.06e- 49 ****        
 7 Value 3.5    12.5   12220  9812  39884065  0         0         ****        
 8 Value 3.5    13     12220   250    685315  1.64e- 50 2.13e- 49 ****        
 9 Value 3.5    13.5   12220 14340  30884547  0         0         ****        
10 Value 4      5      13965 50538 384119700  6.93e- 58 9.70e- 57 ****        
# ℹ 35 more rows
# ℹ 2 more variables: y.position <dbl>, groups <named list>

kruskal_result_lineal <- dat_clean %>%
  kruskal_test(Value_linear ~ group)

kruskal_result_lineal

# A tibble: 1 × 6
  .y.               n statistic    df     p method        
* <chr>         <int>     <dbl> <int> <dbl> <chr>         
1 Value_linear 153004    30247.     9     0 Kruskal-Wallis

df_wilcox_tallas_lineal <- dat_clean %>%
  pairwise_wilcox_test(Value_linear ~ group) %>%
  add_y_position(step.increase = 0.2)

df_wilcox_tallas_lineal

# A tibble: 45 × 11
   .y.      group1 group2    n1    n2 statistic         p     p.adj p.adj.signif
   <chr>    <chr>  <chr>  <int> <int>     <dbl>     <dbl>     <dbl> <chr>       
 1 Value_l… 3.5    4      12220 13965    1.03e8 2.42e-191 6.05e-190 ****        
 2 Value_l… 3.5    5      12220 50538    4.10e8 0         0         ****        
 3 Value_l… 3.5    7.5    12220 12188    8.14e7 2.14e- 36 1.93e- 35 ****        
 4 Value_l… 3.5    10.5   12220 15232    1.12e8 5.35e-180 1.28e-178 ****        
 5 Value_l… 3.5    11     12220 12397    8.61e7 1.34e- 76 2.41e- 75 ****        
 6 Value_l… 3.5    12     12220 12062    6.56e7 2.55e- 50 3.06e- 49 ****        
 7 Value_l… 3.5    12.5   12220  9812    3.99e7 0         0         ****        
 8 Value_l… 3.5    13     12220   250    6.85e5 1.64e- 50 2.13e- 49 ****        
 9 Value_l… 3.5    13.5   12220 14340    3.09e7 0         0         ****        
10 Value_l… 4      5      13965 50538    3.84e8 6.93e- 58 9.70e- 57 ****        
# ℹ 35 more rows
# ℹ 2 more variables: y.position <dbl>, groups <named list>

ggplot(dat_clean)+
  geom_boxplot(alpha=0.5,size=0.75, aes(fill=group,y = Value, x=group), show.legend = F)+
  theme_presentation(base_size = 16) +
  scale_y_continuous(breaks = seq(-90,-20,10))+
  ylab("Sv (dB)")+
  xlab("Talla (cm)")+
scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#008000","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  theme(legend.position = "none")+
  theme(panel.grid.major.y = element_line(color = "gray", linetype = "dashed"))+
    stat_kruskal_test(aes(y = Value, x=group, group=group),p.adjust.method = "bonferroni",label.x = 1.25,label.y = 160)+#label = "as_detailed_italic"
   stat_pvalue_manual(df_wilcox_tallas,color ="group1",step.group.by="group1",tip.length = 0,step.increase = 0.02)+
scale_color_manual(name="Longitud (cm)",values =c("#008000","#ffff00","#ff8000","#ff00bf","#008000","#5f5f5f","#0000ff","#000080","#00bf00"))

library(ggplot2)
library(dplyr)
library(rstatix)


kruskal_result_banda <- dat_clean %>%
    group_by(Banda)%>%
  kruskal_test(Value ~ group)

kruskal_result_banda

# A tibble: 4 × 7
  Banda   .y.       n statistic    df     p method        
* <fct>   <chr> <int>     <dbl> <int> <dbl> <chr>         
1 35-45   Value  7575     2021.     9     0 Kruskal-Wallis
2 45-90   Value 25275     4833.     9     0 Kruskal-Wallis
3 90-170  Value 53111    11729.     9     0 Kruskal-Wallis
4 170-260 Value 67043    15016.     9     0 Kruskal-Wallis

df_wilcox_tallas_grupos <- dat_clean %>%
  group_by(Banda)%>%
  pairwise_wilcox_test(Value ~ group) %>%
  add_y_position(step.increase = 0.2)

df_wilcox_tallas_grupos

# A tibble: 180 × 12
   Banda .y.   group1 group2    n1    n2 statistic         p     p.adj
   <fct> <chr> <chr>  <chr>  <int> <int>     <dbl>     <dbl>     <dbl>
 1 35-45 Value 3.5    4        527   660    69188. 3.10e- 71 1.18e- 69
 2 35-45 Value 3.5    5        527  2232   437696  5.90e- 20 1.71e- 18
 3 35-45 Value 3.5    7.5      527   576    66630. 2.10e- 58 7.56e- 57
 4 35-45 Value 3.5    10.5     527   720    88562. 2.42e- 58 8.47e- 57
 5 35-45 Value 3.5    11       527   549    76694  1.39e- 40 4.45e- 39
 6 35-45 Value 3.5    12       527   552    64648. 3.54e- 56 1.20e- 54
 7 35-45 Value 3.5    12.5     527   444    40330  2.08e- 69 7.70e- 68
 8 35-45 Value 3.5    13       527    12      646. 2.40e-  6 4.56e-  5
 9 35-45 Value 3.5    13.5     527  1303   114149  4.85e-111 2.04e-109
10 35-45 Value 4      5        660  2232  1197416  4.49e-132 1.98e-130
# ℹ 170 more rows
# ℹ 3 more variables: p.adj.signif <chr>, y.position <dbl>, groups <named list>

kruskal_result_banda_lineal <- dat_clean %>%
    group_by(Banda)%>%
  kruskal_test(Value_linear ~ group)

kruskal_result_banda_lineal

# A tibble: 4 × 7
  Banda   .y.              n statistic    df     p method        
* <fct>   <chr>        <int>     <dbl> <int> <dbl> <chr>         
1 35-45   Value_linear  7575     2021.     9     0 Kruskal-Wallis
2 45-90   Value_linear 25275     4833.     9     0 Kruskal-Wallis
3 90-170  Value_linear 53111    11729.     9     0 Kruskal-Wallis
4 170-260 Value_linear 67043    15016.     9     0 Kruskal-Wallis

df_wilcox_tallas_grupos_lineal <- dat_clean %>%
  group_by(Banda)%>%
  pairwise_wilcox_test(Value_linear ~ group) %>%
  add_y_position(step.increase = 0.2)

df_wilcox_tallas_grupos_lineal

# A tibble: 180 × 12
   Banda .y.          group1 group2    n1    n2 statistic         p     p.adj
   <fct> <chr>        <chr>  <chr>  <int> <int>     <dbl>     <dbl>     <dbl>
 1 35-45 Value_linear 3.5    4        527   660    69188. 3.10e- 71 1.18e- 69
 2 35-45 Value_linear 3.5    5        527  2232   437696  5.90e- 20 1.71e- 18
 3 35-45 Value_linear 3.5    7.5      527   576    66630. 2.10e- 58 7.56e- 57
 4 35-45 Value_linear 3.5    10.5     527   720    88562. 2.42e- 58 8.47e- 57
 5 35-45 Value_linear 3.5    11       527   549    76694  1.39e- 40 4.45e- 39
 6 35-45 Value_linear 3.5    12       527   552    64648. 3.54e- 56 1.20e- 54
 7 35-45 Value_linear 3.5    12.5     527   444    40330  2.08e- 69 7.70e- 68
 8 35-45 Value_linear 3.5    13       527    12      646. 2.40e-  6 4.56e-  5
 9 35-45 Value_linear 3.5    13.5     527  1303   114149  4.85e-111 2.04e-109
10 35-45 Value_linear 4      5        660  2232  1197416  4.49e-132 1.98e-130
# ℹ 170 more rows
# ℹ 3 more variables: p.adj.signif <chr>, y.position <dbl>, groups <named list>

ggplot(dat_clean)+
  geom_boxplot(alpha=0.5,size=0.75, aes(fill=group,y = Value, x=group), show.legend = F)+
  theme_presentation(base_size = 13) +
  ylab("Sv (dB)")+
  xlab("Talla (cm)")+
scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#008000","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  theme(legend.position = "none")+ 
  theme(panel.grid.major.y = element_line(color = "gray", linetype = "dashed"))+
  
  scale_y_continuous(breaks = seq(-90,-20,10))+
  

  

  
  
  
   stat_kruskal_test(aes(y = Value, x=group,group = group),p.adjust.method = "bonferroni",label.x.npc = 0.45,label.y.npc = 0.25)+#label = "as_detailed_italic"
  
  
   stat_pvalue_manual(df_wilcox_tallas_grupos,color ="group1",step.group.by="group1",tip.length = 0,step.increase = 0.02)+
  
  scale_color_manual(name="Longitud (cm)",values =c("#008000","#ffff00","#ff8000","#ff00bf","#008000","#5f5f5f","#0000ff","#000080","#00bf00"))+
  
    facet_wrap(~Banda,ncol = 4)

ggplot(dat_clean)+
  geom_boxplot(alpha=0.5,size=0.75, aes(fill=group,y = Value, x=Banda), show.legend = F)+
  theme_presentation(base_size = 14) +
  ylab("Sv (dB)")+ 
  xlab("Frecuencia (kHz)")+
scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#000080","#00bf00","#008000","#ffff00","#ff8000","#ff00bf","#ff0000","#a6533c"))+
  theme(legend.position = "top")+ 
  theme(panel.grid.major.y = element_line(color = "gray", linetype = "dashed"))+
  facet_wrap(~group,scales = "free")