Objetivo 2: Segmentación

Published

September 2, 2024

library(readxl)
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(egg)
Cargando paquete requerido: gridExtra

Adjuntando el paquete: 'gridExtra'

The following object is masked from 'package:dplyr':

    combine
library(tidyverse)
library(ggplot2)
library(ggpmisc)
Cargando paquete requerido: ggpp
Registered S3 methods overwritten by 'ggpp':
  method                  from   
  heightDetails.titleGrob ggplot2
  widthDetails.titleGrob  ggplot2

Adjuntando el paquete: 'ggpp'

The following object is masked from 'package:ggplot2':

    annotate
library(broom)
library(ggplot2)
library(patchwork)
library(egg)
library(ggpubr)

Adjuntando el paquete: 'ggpubr'

The following objects are masked from 'package:ggpp':

    as_npc, as_npcx, as_npcy

The following object is masked from 'package:egg':

    ggarrange
library(readxl)
library(tidyverse)
library(egg)
library(tidyverse)
library(ggplot2)
dat_clean=read.csv("dat_clean_modified_zscore_anchoveta.csv")


dat_clean$group <- factor(dat_clean$group,      # Reordering group factor levels

levels = c("3.5", "4", "5", "7.5","10.5","11","12","12.5","13.5"),labels = c("3.5", "4", "5", "7.5","10.5","11","12","12.5","13.5"))

dat_clean$Banda <- factor(dat_clean$Banda,
  levels = c("35-45","45-90","90-170","170-260"),labels = c("35-45","45-90","90-170","170-260"))
Postlarva1 <- dat_clean %>%
  filter(group %in% c("3.5"))%>%
  mutate(Segmentacion=rep("3.5"))


Postlarva2 <- dat_clean %>%
  filter(group %in% c("4", "5"))%>%
  mutate(Segmentacion=rep("4-5"))

Juvenil <- dat_clean %>%
  filter(group %in% c("7.5", "10.5", "11"))%>%
  mutate(Segmentacion=rep("7.5-11"))

Adulto <- dat_clean %>%
  filter(group %in% c("12", "12.5","13.5"))%>%
  mutate(Segmentacion=rep("12-13.5"))




dat_clean2=rbind(Postlarva1,Postlarva2,Juvenil,Adulto)


dat_clean2$Segmentacion <- factor(dat_clean2$Segmentacion,
  levels = c("3.5","4-5","7.5-11","12-13.5"),labels = c("3.5","4 a 5","7.5 a 11","12 a 13.5"))

dat_clean2$Banda <- factor(dat_clean2$Banda,
  levels = c("35-45","45-90","90-170","170-260"),labels = c("35-45","45-90","90-170","170-260"))


dat=dat_clean2

Sin linealizar

descriptiva_segmentacion=dat_clean2 %>% 
  group_by(Segmentacion) %>%
  get_summary_stats(type = "common")  %>%
  dplyr::filter(variable=="Value")

descriptiva_segmentacion
# A tibble: 4 × 11
  Segmentacion variable     n   min   max median   iqr  mean    sd    se    ci
  <fct>        <fct>    <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 3.5          Value     4351 -76.8 -40.7  -58.0  8.57 -58.7  5.95 0.09  0.177
2 4 a 5        Value    59306 -80.6 -38.6  -59.8  8.06 -59.9  6.16 0.025 0.05 
3 7.5 a 11     Value    30358 -80.2 -34.2  -58.6  8.5  -58.3  6.71 0.039 0.075
4 12 a 13.5    Value    22893 -78.2 -24.4  -47.4 11.5  -48.9  8.86 0.059 0.115

linealizado

#library(explore)
#explore(dat_clean)

descriptiva_clean=dat_clean2 %>% 
  group_by(Segmentacion) %>%
  get_summary_stats(type = "common")  %>%
  dplyr::filter(variable=="Value")

dclass1=descriptiva_clean%>% 
  select(-variable,-iqr,-mean,-sd,-se,-ci)

library(dplyr)
library(magrittr)  # Para el operador %>%

Adjuntando el paquete: 'magrittr'
The following object is masked from 'package:purrr':

    set_names
The following object is masked from 'package:tidyr':

    extract
library(seewave)

Adjuntando el paquete: 'seewave'
The following object is masked from 'package:lubridate':

    duration
The following object is masked from 'package:readr':

    spec
# Calcular la media en dB por grupo
descriptiva_clean2 <- dat_clean2 %>%
  group_by(Segmentacion) %>%
  mutate(mean_dB = meandB(Value), sd_dB=sddB(Value, level="SPL"),
    sd_lineal=sd(Value_linear),
    count_Detect_school = sum(!is.na(Detect_school)))  # Calcular la media en dB usando seewave::meandB()

summary_table <- descriptiva_clean2 %>%
  group_by(Segmentacion) %>%
  summarise(mean_dB = mean(mean_dB),mean_sd_dB = mean(sd_dB),mean_sd_lineal = mean(sd_lineal),
    count_Detect_school = mean(count_Detect_school))  # Calcular el promedio de mean_dB por grupo

# Mostrar la tabla resumen
d2=summary_table %>%
  select(mean_dB, mean_sd_dB,count_Detect_school)

Tabla_D=cbind(dclass1,d2)%>%
  rename(Mínimo="min",Máximo="max",Mediana="median",Media="mean_dB", sd="mean_sd_dB")%>%
  mutate_if(is.numeric, ~round(., 2))

Tabla_D
  Segmentacion     n Mínimo Máximo Mediana  Media   sd count_Detect_school
1          3.5  4351 -76.76 -40.66  -57.96 -55.28 5.72                4351
2        4 a 5 59306 -80.62 -38.60  -59.84 -55.55 6.94               59306
3     7.5 a 11 30358 -80.20 -34.15  -58.65 -52.36 8.84               30358
4    12 a 13.5 22893 -78.24 -24.35  -47.37 -42.24 8.36               22893

Sin linealizar segmentacion x banda

descriptiva_segmentacion_banda=dat_clean2 %>% 
  group_by(Segmentacion,Banda) %>%
  get_summary_stats(type = "common")  %>%
  dplyr::filter(variable=="Value")

descriptiva_segmentacion_banda
# A tibble: 16 × 12
   Banda  Segmentacion variable     n   min   max median   iqr  mean    sd    se
   <fct>  <fct>        <fct>    <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
 1 35-45  3.5          Value      192 -72.6 -57.6  -65.9  4.12 -65.8  2.84 0.205
 2 45-90  3.5          Value      735 -76.8 -52.0  -66.9  4.88 -66.7  4.01 0.148
 3 90-170 3.5          Value     1472 -73.8 -40.7  -59.5  6.1  -58.6  5.05 0.132
 4 170-2… 3.5          Value     1952 -69.9 -43.8  -54.9  4.00 -55.1  3.24 0.073
 5 35-45  4 a 5        Value     2639 -71.0 -39.3  -53.6  8.42 -54.0  5.86 0.114
 6 45-90  4 a 5        Value    10101 -76.8 -38.6  -57.9  8.07 -57.9  6.01 0.06 
 7 90-170 4 a 5        Value    19625 -79.8 -38.6  -59.6  8.35 -59.6  6.20 0.044
 8 170-2… 4 a 5        Value    26941 -80.6 -45.5  -61.0  7.33 -61.3  5.62 0.034
 9 35-45  7.5 a 11     Value     1381 -65.1 -34.4  -49.6  8.51 -49.7  5.76 0.155
10 45-90  7.5 a 11     Value     5372 -76.1 -34.2  -56.2  7.49 -55.8  6.14 0.084
11 90-170 7.5 a 11     Value     9211 -77.4 -34.3  -58.0  7.78 -57.8  6.16 0.064
12 170-2… 7.5 a 11     Value    14394 -80.2 -37.0  -60.6  7.93 -60.3  6.31 0.053
13 35-45  12 a 13.5    Value     1039 -65.4 -26.5  -41.9 10.2  -42.9  7.35 0.228
14 45-90  12 a 13.5    Value     3995 -69.1 -24.5  -45.8  9.91 -47.3  8.13 0.129
15 90-170 12 a 13.5    Value     7245 -73.7 -24.4  -46.6 11.6  -48.1  8.80 0.103
16 170-2… 12 a 13.5    Value    10614 -78.2 -29.4  -49.0 12.1  -50.5  8.89 0.086
# ℹ 1 more variable: ci <dbl>
descriptiva_clean3=dat_clean2 %>% 
  group_by(group,Banda) %>%
  get_summary_stats(type = "common") %>%
  dplyr::filter(variable=="Value")

d3=descriptiva_clean3%>% 
  select(-variable,-iqr,-mean,-sd,-se,-ci)%>%
   dplyr::rename(Talla="group",Mínimo="min",Máximo="max",Mediana="median")%>%
  mutate_if(is.numeric, ~round(., 2))


library(dplyr)
library(magrittr)  # Para el operador %>%
library(seewave)

# Calcular la media en dB por grupo
descriptiva_clean4 <- dat_clean %>%
  group_by(group, Banda) %>%
  mutate(mean_dB = meandB(Value), sd_dB=sddB(Value, level="SPL"),
    sd_lineal=sd(Value_linear))  # Calcular la media en dB usando 

summary_table4 <- descriptiva_clean4 %>%
  group_by(group, Banda) %>%
  summarise(mean_dB = mean(mean_dB),mean_sd_dB = mean(sd_dB),mean_sd_lineal = mean(sd_lineal))  # Calcular el promedio de mean_dB por grupo
`summarise()` has grouped output by 'group'. You can override using the
`.groups` argument.
# Mostrar la tabla resumen
d4=summary_table4 %>%
  select(mean_dB, mean_sd_dB)
Adding missing grouping variables: `group`
Tabla_D=cbind(d3,d4)


Tabla_D%>%
  select(-group)%>%
 rename(Media="mean_dB", sd="mean_sd_dB")%>%
  mutate_if(is.numeric, ~round(., 2))
     Banda Talla     n Mínimo Máximo Mediana  Media    sd
1    35-45   3.5   192 -72.61 -57.62  -65.92 -64.83  2.99
2    45-90   3.5   735 -76.76 -51.99  -66.92 -64.43  5.10
3   90-170   3.5  1472 -73.83 -40.66  -59.47 -55.12  6.37
4  170-260   3.5  1952 -69.88 -43.78  -54.89 -53.85  3.41
5    35-45     4   455 -62.34 -42.22  -49.15 -47.87  3.80
6    45-90     4  1761 -68.38 -42.14  -57.78 -54.49  5.61
7   90-170     4  2886 -71.41 -43.45  -61.24 -58.16  5.18
8  170-260     4  4758 -77.88 -47.23  -63.03 -60.54  4.49
9    35-45     5  2184 -71.04 -39.29  -54.57 -51.66  5.66
10   45-90     5  8340 -76.79 -38.60  -57.98 -53.46  7.25
11  90-170     5 16739 -79.77 -38.65  -59.32 -54.96  6.93
12 170-260     5 22183 -80.62 -45.47  -60.61 -57.78  5.62
13   35-45   7.5   324 -63.60 -40.98  -52.03 -50.10  4.55
14   45-90   7.5  1274 -76.14 -37.43  -58.30 -51.23 10.04
15  90-170   7.5  2068 -77.37 -37.38  -60.44 -53.72 10.00
16 170-260   7.5  3414 -80.20 -38.06  -63.16 -55.05 11.29
17   35-45  10.5   508 -59.65 -39.67  -46.87 -45.92  3.53
18   45-90  10.5  1945 -67.81 -39.54  -55.47 -52.25  5.89
19  90-170  10.5  3182 -68.46 -40.73  -58.26 -55.86  5.07
20 170-260  10.5  5246 -70.45 -44.88  -60.41 -58.28  4.49
21   35-45    11   549 -65.09 -34.36  -51.35 -44.90  7.81
22   45-90    11  2153 -70.48 -34.15  -55.52 -49.64  8.07
23  90-170    11  3961 -75.13 -34.29  -56.67 -50.88  7.79
24 170-260    11  5734 -75.80 -36.98  -59.12 -52.79  8.72
25   35-45    12   528 -65.36 -33.34  -46.45 -43.07  6.06
26   45-90    12  2024 -69.13 -24.47  -50.89 -44.66  9.56
27  90-170    12  3634 -73.66 -30.17  -52.42 -46.02  8.70
28 170-260    12  5368 -78.24 -36.19  -54.84 -49.53  7.83
29   35-45  12.5   228 -53.40 -34.12  -41.74 -40.87  3.84
30   45-90  12.5   872 -58.84 -26.84  -45.38 -41.76  6.98
31  90-170  12.5  1748 -62.97 -29.42  -44.84 -42.11  5.79
32 170-260  12.5  2318 -61.62 -35.30  -45.84 -44.49  4.52
33   35-45  13.5   283 -43.67 -26.47  -35.64 -34.07  3.62
34   45-90  13.5  1099 -52.30 -25.02  -41.42 -37.39  6.14
35  90-170  13.5  1863 -52.06 -24.35  -41.51 -37.80  6.14
36 170-260  13.5  2928 -56.76 -29.41  -43.70 -40.96  5.35
library(ggplot2)

ggplot(dat)+
  geom_boxplot(alpha=0.5,size=0.35, aes(fill=Segmentacion,y = Value, x=Segmentacion, alpha=0.5), show.legend = F)+
  theme_presentation(base_size = 12) +
  ylab("Sv (dB)")+

        scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))+
  #scale_fill_manual(values =c("#5f5f5f","#000080","#008000","#ff00bf","#a6533c"))+ 
 #scale_fill_viridis_d(option = "C",direction = -1,begin = 0.30,end = 1,alpha = 0.5)+
  labs(x="Categoría")+
  scale_x_discrete()+
  theme(legend.position = "top")+
  theme(panel.grid.major.y = element_line(color = "gray", linetype = "dashed"))

ggplot(dat)+
  geom_boxplot(alpha=0.5,size=0.35, aes(fill=Segmentacion,y = Value, x=Segmentacion, alpha=0.5), show.legend = F)+
  theme_presentation(base_size = 12) +
  ylab("Sv (dB)")+
      scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))+
  #scale_fill_manual(values =c("#5f5f5f","#000080","#008000","#ff00bf","#a6533c"))+ 
 #scale_fill_viridis_d(option = "C",direction = -1,begin = 0.30,end = 1,alpha = 0.5)+
  labs(x="Categoría")+
  scale_x_discrete()+
  theme(legend.position = "top")+
  theme(panel.grid.major.y = element_line(color = "gray", linetype = "dashed"))+
  facet_wrap(~Banda)

Figuras

ggplot(dat, aes(y = Value, x=as.numeric(Frequency)))+
  geom_line(alpha=0.5, aes(color=Segmentacion),lwd=1, show.legend = F)+
  geom_smooth(size=1,method = "gam", color="black", show.legend = F)+
  theme_presentation(base_size = 12) +
  xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-80,-70,-60,-50,-40,-30,-20),linetype = c("dashed"),color="gray")+
  
      scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))+
  
        scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))+
  
theme(legend.title=element_blank())+
  facet_wrap(~Segmentacion, ncol=5)

  #facet_wrap(c("Year", "Class), labeller = "label_both")


######################################################

ggplot(dat, aes(y = Value, x=as.numeric(Frequency)))+
  #geom_line(alpha=0.5, aes(color=N_Catch_Year),lwd=3)+
  geom_smooth(size=1,method = "gam", aes(color=Segmentacion), show.legend = F)+

  theme_presentation(base_size = 12) +
  xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  
  scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-80,-70,-60,-50,-40,-30,-20),linetype = c("dashed"),color="gray")+
  
      scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))+
  
        scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))+
  theme(legend.title=element_blank())+
  facet_wrap(~Segmentacion, ncol=5)

  #facet_wrap(c("Year", "Class), labeller = "label_both")


######################################################


ggplot(dat, aes(y = Value, x=as.numeric(Frequency)))+
  geom_line(alpha=0.5, aes(color=Segmentacion),lwd=1, show.legend = F)+
  geom_smooth(size=1,method = "lm", color="black", show.legend = F)+
  stat_regline_equation()+
  theme_presentation(base_size = 12) +
  
  xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  
  scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-80,-70,-60,-50,-40,-30,-20),linetype = c("dashed"),color="gray")+

  
      scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))+
  
        scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))+
  
theme(legend.title=element_blank())+
  facet_wrap(~Segmentacion,ncol=5)

  #facet_wrap(c("Year", "Class), labeller = "label_both")


######################################################

ggplot(dat, aes(y = Value, x=as.numeric(Frequency)))+
  #geom_line(alpha=0.5, aes(color=N_Catch_Year),lwd=3)+
  geom_smooth(size=1,method = "lm", aes(color=Segmentacion),show.legend = F)+
  stat_regline_equation()+
  theme_presentation(base_size = 12) +
  xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  
  scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-80,-70,-60,-50,-40,-30,-20),linetype = c("dashed"),color="gray")+
  
      scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))+
  
        scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))+
  #ggtitle("")+
  theme(legend.title=element_blank())+
  facet_wrap(~Segmentacion, ncol=5)

  #facet_wrap(c("Year", "Class), labeller = "label_both")


######################################################

ggplot(dat, aes(y = Value, x=as.numeric(Frequency)))+
  #geom_line(alpha=0.5, aes(color=N_Catch_Year),lwd=3)+
  geom_smooth(size=1,method = "glm", aes(color=Segmentacion),show.legend = F)+
  stat_regline_equation()+
  theme_presentation(base_size = 12) +
  xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  
  scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-80,-70,-60,-50,-40,-30,-20),linetype = c("dashed"),color="gray")+
  
      scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))+
  
        scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))+
  #ggtitle("")+
  theme(legend.title=element_blank())+
  facet_wrap(~Segmentacion, ncol=5)

  #facet_wrap(c("Year", "Class), labeller = "label_both")

Regresión en dB

ggplot(dat, aes(y = Value, x=as.numeric(Frequency)))+
  geom_line(alpha=0.5, aes(color=Segmentacion),lwd=1.5, show.legend = F)+
  #geom_smooth(size=1,method = "lm", color="black", show.legend = F)+
    xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  
  scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-80,-70,-60,-50,-40,-30,-20),linetype = c("dashed"),color="gray")+
      scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))+
  
        scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))+
  #stat_regline_equation(size=10)+
  theme_presentation() +
  facet_wrap(~Segmentacion, ncol=2)

ggplot(dat, aes(y = Value, x=log10(as.numeric(Frequency))))+
  geom_line(alpha=0.5, aes(color=Segmentacion),lwd=1.5, show.legend = F)+
  geom_smooth(size=1,method = "glm", color="black", show.legend = F)+
    xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  
  #scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  #geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-80,-70,-60,-50,-40,-30,-20),linetype = c("dashed"),color="gray")+
      scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))+
  
        scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))+
  stat_regline_equation(size=10)+
  theme_presentation() +
  facet_wrap(~Segmentacion, ncol=2)

Regresion en dominio lineal

ggplot(dat, aes(y = 10^(Value/10), x=as.numeric(Frequency)))+
  geom_line(alpha=0.5, aes(color=Segmentacion),lwd=1.5, show.legend = F)+
  geom_smooth(size=1,method = "lm", color="black", show.legend = F)+
  
      scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))+
  
        scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))+
  stat_regline_equation(size=7)+
  theme_presentation() +
    facet_wrap(~Segmentacion, ncol=2, scale="free")

w=ggplot(dat, aes(y = Value, x=as.numeric(Frequency),color=Segmentacion) )+
  #geom_line(alpha=0.1, aes(color=group),lwd=1, show.legend = T)+
  geom_smooth(size=2.25,method = "lm", show.legend = T,se = T)+
  scale_y_continuous(limits = c(-65,-45))+
      scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))+
  
        scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))+
  #stat_regline_equation()+
  theme_presentation(base_size = 14) +
  xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-70,-60,-50,-40),linetype = c("dashed"),color="gray")+
  #scale_color_viridis_d(option = "C")+
  theme(legend.title=element_blank())+
   theme(panel.grid.major.y = element_line(color = "gray", linetype = "dashed"))+
       theme(panel.grid.major.x = element_line(color = "gray", linetype = "dashed"))



k=ggplot(dat, aes(y = Value, x=as.numeric(Frequency),color=Segmentacion) )+
  #geom_line(alpha=0.1, aes(color=group),lwd=1, show.legend = T)+
  geom_smooth(size=2.25,method = "gam", show.legend = F,se=T)+
      scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))+
  
        scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))+
  #stat_regline_equation()+
  theme_presentation(base_size = 14) +
  xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  scale_y_continuous(limits = c(-65,-45))+
  scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-70,-60,-50,-40),linetype = c("dashed"),color="gray")+
  #scale_color_viridis_d(option = "C")+
  theme(legend.title=element_blank())+
   theme(panel.grid.major.y = element_line(color = "gray", linetype = "dashed"))+
     theme(panel.grid.major.x = element_line(color = "gray", linetype = "dashed"))


library(cowplot)

# Crear las gráficas w y k (código que proporcionaste)

# Obtener la leyenda de una de las gráficas (por ejemplo, w)
legend_w <- get_legend(w)

# Combinar las dos gráficas y agregar la leyenda
combined_plot <- plot_grid(
  k + theme(legend.position = "none"),  # Ocultar la leyenda de la gráfica w
  w + theme(legend.position = "none"),  # Ocultar la leyenda de la gráfica k
  legend_w,
  ncol = 3, rel_heights = c(1, 1, 1),rel_widths = c(1,1,0.35),  # Ajustar las alturas relativas
  labels = c("(a)", "(b)", ""),  # Etiquetas de enumeración
  align = "h"  # Alinear horizontalmente las partes
)

# Ajustar el tamaño de la leyenda
combined_plot <- combined_plot + theme(
  legend.text = element_text(size = 19),  # Tamaño del texto de la leyenda
  legend.title = element_text(size = 19)  # Tamaño del título de la leyenda
)

# Imprimir la figura combinada
print(combined_plot)

library(coin)
Cargando paquete requerido: survival
oneway_test(Value ~ group, data = dat)

    Asymptotic K-Sample Fisher-Pitman Permutation Test

data:  Value by
     group (3.5, 4, 5, 7.5, 10.5, 11, 12, 12.5, 13.5)
chi-squared = 41872, df = 8, p-value < 2.2e-16

Análisis Espectral

Análisis de datos espectrales (uni-muenchen.de)

www.phonetik.uni-muenchen.de/~jmh/lehre/sem/ws1920/WP_4.1/SpectralAnalysis.html

library(emuR)

Adjuntando el paquete: 'emuR'
The following object is masked from 'package:seewave':

    mel
The following object is masked from 'package:base':

    norm
library(tidyverse)


ggplot(dat, aes(y = Value, x=as.numeric(Frequency)))+
  geom_line(alpha=0.5, aes(color=Segmentacion),lwd=1, show.legend = F)+facet_wrap(~Segmentacion, ncol=5)

ggplot(dat)+
aes(color=Segmentacion,x=as.numeric(Frequency),y = Value)+
  geom_area()+
  facet_wrap(~Segmentacion, ncol=5)

##########################################
dat_mean=dat %>%
  group_by(Segmentacion, Frequency) %>%
  summarise(track_value = mean(10^(Value/10)))
`summarise()` has grouped output by 'Segmentacion'. You can override using the
`.groups` argument.
ggplot(dat_mean) +
  aes(x = as.numeric(Frequency), y = 10*log10(track_value), color = Segmentacion) +
  geom_line() 

bloqueos <- data.frame(
  xmin = c(40,85, 155,250),
  xmax = c(55,100, 171,270)
)

ggplot(dat_mean) +
  geom_line(size=2,aes(x = as.numeric(Frequency), y = 10*log10(track_value), color = Segmentacion) ) +
  geom_rect(data = bloqueos, aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf), fill = "gray", alpha = 0.75) +  # Ajusta color y transparencia según necesites
  labs(x = "Frecuencia", y = "10*log10(Value)") +
  
        scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))+
  
        scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))

names(dat_mean)
[1] "Segmentacion" "Frequency"    "track_value" 

1 ¿Cómo cuantificar las diferencias entre espectros?

Cuando observamos la figura anterior, que muestra los espectros promedio en los puntos medios temporales de dos categorías fricativas, parece que una manera fácil de distinguir entre las dos sería concentrarse en las diferencias en las amplitudes en el rango de 2000 a 3000 Hz. Podríamos, por ejemplo, tomar la media en ese rango de frecuencia en todos los tokens de los dos tipos para comprobar si es consistente el caso de que el alveolar contiene mucha menos energía en ese rango de frecuencia que su contraparte postalveolar:

sS2to3thousandHz = dat %>%
  filter(Frequency >= 100 & Frequency <= 150) %>%
  group_by(Segmentacion,Detect_school) %>%
  summarise(amplitudes_2000_3000Hz = mean(10^(Value/10)))

ggplot(sS2to3thousandHz)+
  aes(x = Segmentacion, y = 10*log10(amplitudes_2000_3000Hz),
    fill = Segmentacion)+
  geom_boxplot()+
          scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))+
  
        scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))

##
sS2to3thousandHz = dat %>%
  filter(Frequency >= 171 & Frequency <= 250) %>%
  group_by(Segmentacion,Detect_school) %>%
  summarise(amplitudes_2000_3000Hz = mean(10^(Value/10)))

ggplot(sS2to3thousandHz)+
  aes(x = Segmentacion, y = 10*log10(amplitudes_2000_3000Hz),
    fill = Segmentacion)+
  geom_boxplot()+
          scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))+
  
        scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))

1.1 Transformada discreta de coseno (DCT)

(Véase también este documento.)

Una transformada discreta de coseno (DCT) expresa una secuencia finita de n puntos de datos en términos de una suma de funciones de coseno que oscilan a diferentes frecuencias.

Las amplitudes de las funciones del coseno, k0, k1, k2, … kn-1, se denominan coeficientes DCT.

  • k0: la amplitud de un coseno con una frecuencia de 0

  • k1: la amplitud de un coseno con una frecuencia de 0,5

  • k2: la amplitud de un coseno con una frecuencia de 1

  • kn-1: la amplitud de un coseno con una frecuencia de 0,5*(n-1)

Si suma todos estos coeficientes DCT, reconstruirá exactamente la misma señal que se ingresó para el análisis DCT.

Los coeficientes DCT más altos corresponden a los detalles de la “secuencia finita de n puntos de datos”, mientras que los coeficientes más bajos representan las características más generales. Al menos las tres más bajas, k0, k1 y k2, corresponden (pero no son iguales) a las siguientes tres características descriptivas estadísticas: k0 está relacionada linealmente con la media de la secuencia, k1 con la pendiente de la secuencia y k2 con su curvatura. Véase, por ejemplo:

dat_mean2= dat_mean %>%
  group_by(Segmentacion)%>%
  mutate(reconstructed = emuR::dct(track_value,fit=T))


ggplot(dat_mean2) +
  geom_line(size=2,aes(x = as.numeric(Frequency), y = 10*log10(reconstructed ), color = Segmentacion) ) +
  geom_rect(data = bloqueos, aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf), fill = "gray", alpha = 0.75) +  # Ajusta color y transparencia según necesites
  labs(x = "Frecuencia", y = "10*log10(Value)") +
  
      scale_color_manual(name="Longitud (cm)",values =c("royalblue1","orange","green3","darkorchid4"))+
  
        scale_fill_manual(name="Longitud (cm)",values =c("royalblue1","orange","green3","darkorchid4"))

sS_dftlong.mean = dat_mean %>%
  group_by(Segmentacion,) %>%
  mutate(#you can't use m=0 in order to calculate k0 only
    smoothed_k0tok1 = emuR::dct(track_value,m=1,fit=T),
    smoothed_k0tok2 = emuR::dct(track_value,m=2,fit=T),
    smoothed_k0tok3 = emuR::dct(track_value,m=3,fit=T),
    smoothed_k0tok4 = emuR::dct(track_value,m=4,fit=T),
    smoothed_k0tok5 = emuR::dct(track_value,m=5,fit=T),
    smoothed_k0tok6 = emuR::dct(track_value,m=6,fit=T))
ggplot(sS_dftlong.mean) +
  geom_line(size=2,aes(x = as.numeric(Frequency), y = 10*log10(smoothed_k0tok1), color = Segmentacion) ) +
  geom_rect(data = bloqueos, aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf), fill = "gray", alpha = 0.75) +  # Ajusta color y transparencia según necesites
  labs(x = "Frecuencia", y = "10*log10(Value)") +
  
        scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))+
  
        scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))+
    ggtitle("Smoothed with 2 DCT-coefficients (m=1)")

#############

ggplot(sS_dftlong.mean) +
  geom_line(size=2,aes(x = as.numeric(Frequency), y = 10*log10(smoothed_k0tok2), color = Segmentacion) ) +
  geom_rect(data = bloqueos, aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf), fill = "gray", alpha = 0.75) +  # Ajusta color y transparencia según necesites
  labs(x = "Frecuencia", y = "10*log10(Value)") +
  
        scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))+
  
        scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))+
    ggtitle("Smoothed with 3 DCT-coefficients (m=2)")
Warning in FUN(X[[i]], ...): Se han producido NaNs
Warning: Removed 41 rows containing missing values or values outside the scale range
(`geom_line()`).

#############

ggplot(sS_dftlong.mean) +
  geom_line(size=2,aes(x = as.numeric(Frequency), y = 10*log10(smoothed_k0tok3), color = Segmentacion) ) +
  geom_rect(data = bloqueos, aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf), fill = "gray", alpha = 0.75) +  # Ajusta color y transparencia según necesites
  labs(x = "Frecuencia", y = "10*log10(Value)") +
  
        scale_color_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))+
  
        scale_fill_manual(name="Longitud (cm)",values =c("#5f5f5f","#0000ff","#00bf00","#ff00bf"))+
    ggtitle("Smoothed with 4 DCT-coefficients (m=3)")

#############

ggplot(sS_dftlong.mean) +
  geom_line(size=2,aes(x = as.numeric(Frequency), y = 10*log10(smoothed_k0tok4), color = Segmentacion) ) +
  geom_rect(data = bloqueos, aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf), fill = "gray", alpha = 0.75) +  # Ajusta color y transparencia según necesites
  labs(x = "Frecuencia", y = "10*log10(Value)") +
  
      scale_color_manual(name="Longitud (cm)",values =c("royalblue1","orange","green3","darkorchid4"))+
  
        scale_fill_manual(name="Longitud (cm)",values =c("royalblue1","orange","green3","darkorchid4"))
Warning in FUN(X[[i]], ...): Se han producido NaNs

#############

ggplot(sS_dftlong.mean) +
  geom_line(size=2,aes(x = as.numeric(Frequency), y = 10*log10(smoothed_k0tok5), color = Segmentacion) ) +
  geom_rect(data = bloqueos, aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf), fill = "gray", alpha = 0.75) +  # Ajusta color y transparencia según necesites
  labs(x = "Frecuencia", y = "10*log10(Value)") +
  
      scale_color_manual(name="Longitud (cm)",values =c("royalblue1","orange","green3","darkorchid4"))+
  
        scale_fill_manual(name="Longitud (cm)",values =c("royalblue1","orange","green3","darkorchid4"))

#############

ggplot(sS_dftlong.mean) +
  geom_line(size=2,aes(x = as.numeric(Frequency), y = 10*log10(smoothed_k0tok6), color = Segmentacion) ) +
  geom_rect(data = bloqueos, aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf), fill = "gray", alpha = 0.75) +  # Ajusta color y transparencia según necesites
  labs(x = "Frecuencia", y = "10*log10(Value)") +
  
      scale_color_manual(name="Longitud (cm)",values =c("royalblue1","orange","green3","darkorchid4"))+
  
        scale_fill_manual(name="Longitud (cm)",values =c("royalblue1","orange","green3","darkorchid4"))

Recuerde que la última figura muestra solo dos funciones de coseno (invertidas) de una cierta amplitud con frecuencia 0.5. Obviamente, esta no es la mejor representación de los espectros de /s/ y /ʃ/. Tenemos que encontrar un compromiso entre demasiada y muy poca información. En este caso específico, (= 5 coeficientes DCT) parece ser el mejor compromiso.m = 4

Podemos, por supuesto, aplicar la función -también a los datos no promediados:dct

segmentacion=ggplot(dat, aes(y = Value, x=as.numeric(Frequency)))+
  geom_line(alpha=0.5, aes(color=Segmentacion),lwd=1.5, show.legend = F)+

  
    
geom_line(data=sS_dftlong.mean,aes(x = as.numeric(Frequency), y = 10*log10(smoothed_k0tok3), color = Segmentacion),lwd = 1.2) +
  
  
  #geom_smooth(size=1,method = "lm", color="black", show.legend = F)+
    xlab("Frecuencia (kHz)") +
  ylab("Sv (dB)")+
  
  scale_x_continuous(breaks = c(38,70,90,120,170,200,260))+
  geom_vline(xintercept = c(38,45,90,170,260),linetype = c("dashed"),color="gray")+
  geom_hline(yintercept = c(-80,-70,-60,-50,-40,-30,-20),linetype = c("dashed"),color="gray")+
      scale_color_manual(name="Longitud (cm)",values =c("royalblue1","orange","green3","darkorchid4"))+
  
        scale_fill_manual(name="Longitud (cm)",values =c("royalblue1","orange","green3","darkorchid4"))+
  #stat_regline_equation(size=10)+
  theme_presentation(base_size = 20) +
      ggtitle("Smoothed with 4 DCT-coefficients (m=3)")+
  facet_wrap(~Segmentacion, ncol=2)

segmentacion

ggsave(filename = "Segmentacion.png",
  plot = segmentacion,     
  height = 6,             # Specifies the height of the plot in inches
       width = 12,              # Specifies the width of the plot in inches
       dpi = 1000,             # Specifies the resolution in dots per inch
       path = "F:/Tesis abordo/Tesis abordo/Figuras/Objetivo02/",device = "png") 
# dat_mean=dat %>%
#   group_by(Segmentacion, Frequency) %>%
#   summarise(track_value = mean(10^(Value/10)))


dat_mean_school= dat %>%
  group_by(Segmentacion,Detect_school)%>%
  summarise(track_value = mean(10^(Value/10)))
`summarise()` has grouped output by 'Segmentacion'. You can override using the
`.groups` argument.
#Suavizado para cada school
sS_dftlong.mean_school = dat_mean_school %>%
  group_by(Segmentacion) %>%
  mutate(smoothed_k0tok3 = emuR::dct(track_value,m=3,fit=T))


sS_dctCoefficients =
sS_dftlong.mean_school  %>%
  group_by(Segmentacion) %>%
  do(tibble(DCT = emuR::dct(.$track_value, m = 1, fit = F)))%>%
  mutate(DCTCOEF = paste0("k", 0:1))%>%
  tidyr::spread(DCTCOEF, DCT)



tabla <- sS_dctCoefficients %>%
  mutate(k0db = round(10 * log10(k0), 2)) %>%
  mutate(k1db = round(10 * log10(abs(k1)), 2))

tabla
# A tibble: 4 × 5
# Groups:   Segmentacion [4]
  Segmentacion         k0           k1  k0db  k1db
  <fct>             <dbl>        <dbl> <dbl> <dbl>
1 3.5          0.00000420 -0.000000629 -53.8 -62.0
2 4 a 5        0.00000389  0.000000249 -54.1 -66.0
3 7.5 a 11     0.00000872  0.000000580 -50.6 -62.4
4 12 a 13.5    0.0000844   0.00000813  -40.7 -50.9
library(ggplot2)

# Graficar figura de barras
ggplot(tabla, aes(x = Segmentacion, y = k0db, fill = Segmentacion)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.7) +
  labs(x = "Segmentacion", y = "k0 en dB") +
  theme_minimal()

ggplot(tabla, aes(x = Segmentacion, y = k1db, fill = Segmentacion)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.7) +
  labs(x = "Segmentacion", y = "k0 en dB") +
  theme_minimal()