Song sexual dimorphism

Author
Published

December 8, 2023

Install/load packages

Code
source("https://raw.githubusercontent.com/maRce10/sketchy/main/R/load_packages.R")
source("https://raw.githubusercontent.com/maRce10/sketchy/main/R/internal_functions.R")

x <- c("remotes", "dtw", "warbleR", "knitr", "MASS", "parallel", github = "maRce10/PhenotypeSpace",
    "emstreeR", "DT")

load_packages(x)

1 Element-level

1.1 Format data with element-level acoustic features

Code
# read element level data
elm_measures <- read.csv("M-F song selections for FINAL analysis - elmt data.csv",
    header = TRUE, stringsAsFactors = FALSE, na.strings = c("", " ",
        "NA"))

# remove this species
elm_measures <- elm_measures[!elm_measures$species %in% c("Cisticola aridulus",
    "Cisticola brunnescens", "Cisticola lais", "Passerculus sandwichensis",
    "Troglodytes sissonii", "Troglodytes tanneri", "Thryophilus sinaloa"),
    ]

# remove these two Neotropical wren songs with very high
# frequency ranges detected during data exploration with Neil
# (mis-measured; upper frequency bound placed too high)
elm_measures <- elm_measures[grep("CB-003_F|CB-003_M|G-49_M|G-49_F",
    elm_measures$song.number, invert = TRUE), ]

# PR20130611_KJO_R1095_MaleSolo_AB-R_3.wav_684. This selection
# is the length of whole song.
elm_measures <- elm_measures[elm_measures$sound.files_selec != "PR20130611_KJO_R1095_MaleSolo_AB-R_3.wav_684",
    ]


# Meliphagidae_Anthornis melanura_DB0275_REC0002275.wav_2131
# Peak frequency = 0; Remove this note.
elm_measures <- elm_measures[elm_measures$sound.files_selec != "Meliphagidae_Anthornis melanura_DB0275_REC0002275.wav_2131",
    ]

# Donacobiidae_Donacobius atricapilla_ML45932.wav_1766. This is
# an outlier: This note has a lot of noise in the background.
# This note was measured by mistake. Remove this note (can leave
# the other notes in the song)
elm_measures <- elm_measures[elm_measures$sound.files_selec != "Donacobiidae_Donacobius atricapilla_ML45932.wav_1766",
    ]

# Cisticolidae_Cisticola chubbi_MB0221_REC0000145.wav_862. This
# is a mistake. There is no signal here and the bounds are too
# high. Remove this note but keep other notes in the song.
elm_measures <- elm_measures[elm_measures$sound.files_selec != "Cisticolidae_Cisticola chubbi_MB0221_REC0000145.wav_862",
    ]

# save data
write.csv(elm_measures, "element_level_acoustic_features.csv", row.names = FALSE)

1.2 Describe data

Code
elm_measures <- read.csv("element_level_acoustic_features.csv")
  • 131 species from 40 families

  • Sample size per species:

Code
agg_elm <- aggregate(selec ~ family.name + species, data = elm_measures,
    length)

names(agg_elm)[ncol(agg_elm)] <- "annotations"

agg_elm$songs <- aggregate(song ~ family.name + species, data = elm_measures,
    function(x) length(unique(x)))[, 3]

agg_elm$sound.files <- aggregate(sound.files ~ family.name + species,
    data = elm_measures, function(x) length(unique(x)))[, 3]

DT::datatable(agg_elm, options = list(pageLength = 50, autoWidth = TRUE),
    rownames = FALSE)

1.3 Measure dynamic time warping distance on elements (in process)

Code
# read data
elm_measures <- read.csv("element_level_acoustic_features.csv")

# replace missing frequency contour values with peak frequency
elm_measures$Peak.Freq.Contour..Hz. <- ifelse(is.na(elm_measures$Peak.Freq.Contour..Hz.),
    elm_measures$Peak.Freq..Hz., elm_measures$Peak.Freq.Contour..Hz.)

# Add frequency contours
fcts <- extract_ts(X = elm_measures, ts.column = "Peak.Freq.Contour..Hz.",
    equal.length = TRUE, length.out = 50)

dtw.dist <- dfDTW(ts.df = fcts, length.out = 50, parallel = 20)

mds.dtw <- cmdscale(dtw.dist, k = 10)


colnames(mds.dtw) <- paste("dtw.mds", 1:ncol(mds.dtw), sep = "-")


elm_measures <- data.frame(elm_measures, mds.dtw)

# save data
write.csv(elm_measures, "element_level_acoustic_features.csv", row.names = FALSE)

The output data (‘element_level_acoustic_features.csv’) looks like this (not yet done):

2 Song-level

2.1 Measure song-level acoustic features

Code
elm_measures <- read.csv("element_level_acoustic_features.csv")

# use warbleR package to create song-level dataframe, excluding
# DTW columns
song_measures <- warbleR::song_param(X = elm_measures[, grep("dtw.mds",
    names(elm_measures), invert = TRUE)], song_colm = "song", min_colm = "Freq.5...Hz.",
    max_colm = "Freq.95...Hz.", na.rm = TRUE, sd = FALSE)

# create frequency range column
song_measures$freq.range.Min5toMax95 <- song_measures$max.Freq.95...Hz. -
    song_measures$min.Freq.5...Hz.

song_measures <- merge(x = song_measures, y = elm_measures[!duplicated(elm_measures$song),
    c("song", "species", "family.name", "sex")], by = "song")

write.csv(song_measures, "song_level_acoustic_features.csv", row.names = FALSE)

2.2 Measure song-level element diversity

  • Project acoustic space with PCA
  • Measure minimum spanning tree as a metric of diversity
Code
# read song data
song_measures <- read.csv("song_level_acoustic_features.csv")

# read element data
elm_measures <- read.csv("element_level_acoustic_features.csv")

# define element level features to be used for measuring element
# diversity
target_elm_acoustic_measures <- c("Dur.90...s.", "IQR.Dur..s.", "Q1.Freq..Hz.",
    "Q3.Freq..Hz.", "Center.Freq..Hz.", "Freq.5...Hz.", "Freq.95...Hz.",
    "IQR.BW..Hz.", "BW.90...Hz.", "Max.Freq..Hz.", "Peak.Freq..Hz.",
    "PFC.Num.Inf.Pts", "Avg.Entropy..bits.", "Agg.Entropy..bits.",
    "Min.Entropy..bits.", "Max.Entropy..bits.", "duration", "meanfreq",
    "sd", "freq.median", "freq.Q25", "freq.Q75", "freq.IQR", "time.median",
    "time.Q25", "time.Q75", "time.IQR", "skew", "kurt", "sp.ent",
    "time.ent", "entropy", "sfm", "meanpeakf")

# run PCA
pca <- prcomp(scale(elm_measures[, target_elm_acoustic_measures]))

## add PCs to element data
elm_measures$PC1 <- summary(pca)$x[, 1]
elm_measures$PC2 <- summary(pca)$x[, 2]

# calculate acoustic areas and mst branch lengths
set.seed(123)

# get which song have 2 or more observations
tab <- table(elm_measures$song)
target_songs <- names(tab[tab > 1])

# calculate mst space size
song_msts <- PhenotypeSpace::space_size(X = elm_measures[elm_measures$song %in%
    target_songs, ], group = "song", dimensions = c("PC1", "PC2"),
    type = "mst")

# add to song data
song_measures$element.diversity <- sapply(song_measures$song, FUN = function(x) {
    song_mst <- song_msts$size[song_msts$group == x]

    if (length(song_mst) == 0)
        song_mst <- 0

    return(song_mst)
})

# save song data
write.csv(song_measures, "song_level_acoustic_features.csv", row.names = FALSE)

The output data (‘song_level_acoustic_features.csv’) looks like this:

song sound.files selec start end top.freq bottom.freq Channel File.Offset..s. new.selection Q1.Time..s. Q3.Time..s. Time.5…s. Time.95…s. Delta.Time..s. Dur.90…s. IQR.Dur..s. Max.Time..s. Peak.Time..s. Center.Time..s. Q1.Freq..Hz. Q3.Freq..Hz. Center.Freq..Hz. Delta.Freq..Hz. IQR.BW..Hz. BW.90…Hz. Max.Freq..Hz. Peak.Freq..Hz. PFC.Min.Freq..Hz. PFC.Min.Slope..Hz.ms. PFC.Max.Freq..Hz. PFC.Max.Slope..Hz.ms. PFC.Num.Inf.Pts Energy..dB. Avg.Entropy..bits. Agg.Entropy..bits. Min.Entropy..bits. Max.Entropy..bits. original.selection duration meanfreq sd freq.median freq.Q25 freq.Q75 freq.IQR time.median time.Q25 time.Q75 time.IQR skew kurt sp.ent time.ent entropy sfm meandom mindom maxdom dfrange modindx startdom enddom dfslope meanpeakf min.Freq.5…Hz. max.Freq.95…Hz. num.elms elm.duration freq.range song.duration song.rate gap.duration freq.range.Min5toMax95 species family.name sex element.diversity
063114W014_lorentzi_male.wav_NA_M 063114W014_lorentzi_male.wav 1 6.175 10.66850 7.8584 1.9573 1 8.492362 86323.5 851823.1 851823.1 851823.1 851823.1 0.0431437 0.0261225 0.0087075 851823.1 851823.1 851823.1 5114.507 5552.598 5374.383 2545.583 438.0914 1185.072 5420.428 5420.428 4486.319 -113.33276 5736.753 152.72586 1.3103448 92.40862 3.087672 3.641948 2.345017 3.741207 44526.5 0.0431437 5.203888 0.4867636 5.242143 4.886113 5.556606 0.6704937 0.0205525 0.0129951 0.0293820 0.0163868 1.494390 5.057079 0.8836390 0.9488844 0.8382200 0.5088950 5.164286 4.202172 5.986448 1.784276 5.493428 5.515034 5.398448 -3.756625 5.575862 3186.9 7321.3 58 0.0431438 5.9011 4.49350 13.016158 0.0349326 4134.4 Malurus alboscapulatus Maluridae M 15.287414
063114W015_lorentzi_male.wav_NA_M 063114W015_lorentzi_male.wav 1 1.736 6.86536 7.4494 2.0741 1 4.376269 86391.5 851834.2 851834.2 851834.2 851834.2 0.0404982 0.0282808 0.0122054 851834.2 851834.2 851834.2 4685.403 5080.728 4915.086 2041.996 395.3308 1008.205 4979.137 4979.137 4367.372 -78.18846 5130.421 61.83077 0.7948718 83.48333 2.685064 3.366936 2.066795 3.405141 44594.5 0.0404983 4.766903 0.4027421 4.811570 4.480760 5.068369 0.5876073 0.0186380 0.0105113 0.0281086 0.0175972 1.536056 5.655952 0.8786656 0.9539454 0.8380362 0.5182423 4.788138 3.969000 5.401308 1.432308 5.188877 4.926385 4.700231 -6.154439 5.058308 2584.0 7149.0 78 0.0404986 5.3753 5.12936 15.321155 0.0255905 4565.0 Malurus alboscapulatus Maluridae M 16.882941
1-001.wav_1-001_F 1-001.wav 1 2.354 7.28950 4.5930 0.8105 1 NA 107994.0 855027.5 855027.5 855027.5 855027.5 0.1110302 0.0818505 0.0319275 855027.5 855027.5 855027.5 2153.330 2919.890 2472.020 3512.250 766.6000 2428.950 2342.840 2342.840 1240.340 -192.90000 3712.310 241.86000 5.9000000 108.71000 3.927600 4.773000 2.821300 4.650500 7336.5 0.1110302 2.701598 0.7410328 2.626396 2.187754 3.253675 1.0659189 0.0679572 0.0450104 0.0885836 0.0435732 1.254295 4.705025 0.9410353 0.8956927 0.8428793 0.5935633 2.505610 1.176000 4.174800 2.998800 17.902732 2.116800 2.499000 3.531038 2.528400 1292.0 3962.1 10 0.1110310 3.7825 4.93550 2.075119 0.4250211 2670.1 Cantorchilus leucotis Troglodytidae F 1.124199
1-001.wav_1-001_M 1-001.wav 1 0.459 7.09347 4.3228 0.7805 1 NA 107990.5 855026.5 855026.6 855026.5 855026.6 0.2058162 0.1683450 0.0513519 855026.5 855026.5 855026.5 2352.092 3094.146 2617.115 3300.985 742.0692 1510.646 2438.231 2438.231 1126.331 -212.29231 3663.954 117.56923 5.7692308 119.96923 2.589846 4.220923 1.521077 4.225769 7365.0 0.2058162 2.414369 0.5308540 2.445397 2.191609 2.687635 0.4960276 0.1336904 0.0943088 0.1677989 0.0734899 2.685867 10.836700 0.8981494 0.8864010 0.7961203 0.4079136 2.524062 1.029000 3.923769 2.894769 12.575215 1.277769 3.516692 10.851756 2.793000 1808.8 3617.6 13 0.2058181 3.5423 6.63447 2.020202 0.3299029 1808.8 Cantorchilus leucotis Troglodytidae M 1.438351
1-007.wav_1-007_F 1-007.wav 1 0.682 4.55847 4.3211 0.5934 1 NA 108022.5 855032.8 855032.8 855032.8 855032.8 0.0713712 0.0406350 0.0141900 855032.8 855032.8 855032.8 1737.000 2201.156 1947.572 2394.161 464.1722 1421.211 1952.339 1952.339 1287.217 -102.22222 2493.067 81.61111 1.8333333 110.30000 2.955000 3.902889 2.120722 3.855556 7391.5 0.0713712 2.070505 0.4552149 2.025226 1.755829 2.376977 0.6211472 0.0324270 0.0211386 0.0446769 0.0235382 1.078599 3.268228 0.8927546 0.9310627 0.8296004 0.5093177 1.840791 0.980000 2.972667 1.992667 6.572923 1.764000 1.519000 -10.565836 1.992667 1205.9 3703.7 18 0.0713713 3.7277 3.87647 4.789782 0.1524581 2497.8 Cantorchilus leucotis Troglodytidae F 5.627068
1-007.wav_1-007_M 1-007.wav 1 0.434 4.38575 4.5711 1.1690 1 NA 108020.5 855032.7 855032.7 855032.6 855032.7 0.1048324 0.0786900 0.0264450 855032.7 855032.7 855032.7 2244.239 2985.939 2555.278 2666.061 741.6944 1689.156 2698.833 2698.833 1545.617 -72.53333 3411.811 129.42778 0.8888889 115.24444 2.542722 4.325556 1.576611 3.764611 7418.5 0.1048325 2.538987 0.5334437 2.535069 2.168331 2.899826 0.7314934 0.0696966 0.0401170 0.0874664 0.0473493 1.062557 3.413206 0.9236994 0.9039251 0.8351149 0.5392376 2.424793 1.470000 3.691333 2.221333 4.187654 1.862000 2.776667 11.796325 2.450000 1464.3 4048.2 18 0.1048342 3.4021 3.95175 4.728132 0.1214550 2583.9 Cantorchilus leucotis Troglodytidae M 5.358270

2.3 Measure species-level song dimorphism

  • Project acoustic space with PCA
  • Measure dimorphism as:
    1. Kernel density overlap of the acoustic space of sexes
    2. Distance between centroids of the acoustic space of sexes
Code
# read song data
song_measures <- read.csv("song_level_acoustic_features.csv")

### song level parameters to include
target_song_acoustic_measures <- c("freq.range.Min5toMax95", "num.elms",
    "elm.duration", "song.duration", "song.rate", "gap.duration",
    "Q1.Freq..Hz.", "Q3.Freq..Hz.", "element.diversity", "meanfreq",
    "sd", "BW.90...Hz.", "sp.ent", "time.ent", "entropy", "modindx")


# scale all numeric variables
scaled_song_measures <- data.frame(scale(song_measures[, target_song_acoustic_measures]))

# replace missing values with 0
scaled_song_measures$song.rate[is.na(scaled_song_measures$song.rate)] <- scaled_song_measures$gap.duration[is.na(scaled_song_measures$gap.duration)] <- 0

sapply(scaled_song_measures, anyNA)

# run pca and save pcs
pca <- prcomp(scaled_song_measures)

pcs <- summary(pca)$x

## add PCs to element data
song_measures$PC1 <- summary(pca)$x[, 1]
song_measures$PC2 <- summary(pca)$x[, 2]

# calculate acoustic space overlap of sexes by specie
species_dimorphism_list <- pbapply::pblapply(unique(song_measures$species),
    cl = 10, function(x) {

        sex_overlap <- warbleR::try_na(PhenotypeSpace::rarefact_space_similarity(X = song_measures[song_measures$species ==
            x, ], group = "sex", dimensions = c("PC1", "PC2"), type = "mean.density.overlap",
            pb = FALSE)$mean.overlap)

        sex_distance <- warbleR::try_na(rarefact_space_similarity(X = song_measures[song_measures$species ==
            x, ], group = "sex", dimensions = c("PC1", "PC2"), type = "centroid.distance",
            pb = FALSE)$mean.distance)

        species_dimorph <- data.frame(species = x, overlap.dimorphism = sex_overlap,
            distance.dimorphism = sex_distance)

        return(species_dimorph)
    })

# put into a data frame
species_dimorphism <- do.call(rbind, species_dimorphism_list)

# remove species in which overlap cannot be calculated (less
# than 6 observation for at least 1 sex)
species_dimorphism <- species_dimorphism[!is.na(species_dimorphism$overlap.dimorphism),
    ]

# save dimorphism data
write.csv(species_dimorphism, "species_dimorphism.csv", row.names = FALSE)

The output data (‘species_dimorphism.csv’) looks like this:

species overlap.dimorphism distance.dimorphism
Malurus alboscapulatus 0.8333954 0.1428006
Cantorchilus leucotis 0.5365640 1.0417153
Pheugopedius rutilus 0.0650704 1.1112888
Acanthiza pusilla 0.7379409 0.3163981
Malurus splendens 0.6232183 0.1753111
Malurus assimilis 0.6693665 0.5636030

Session information

R version 4.3.1 (2023-06-16)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.2 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3 
LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3;  LAPACK version 3.9.0

locale:
 [1] LC_CTYPE=pt_BR.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=es_CR.UTF-8        LC_COLLATE=pt_BR.UTF-8    
 [5] LC_MONETARY=es_CR.UTF-8    LC_MESSAGES=pt_BR.UTF-8   
 [7] LC_PAPER=es_CR.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C       

time zone: America/Costa_Rica
tzcode source: system (glibc)

attached base packages:
[1] parallel  stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] DT_0.30              emstreeR_3.1.2       PhenotypeSpace_0.1.0
 [4] MASS_7.3-60          warbleR_1.1.29       NatureSounds_1.0.4  
 [7] knitr_1.45           seewave_2.2.3        tuneR_1.4.5         
[10] dtw_1.23-1           proxy_0.4-27         remotes_2.4.2.1     

loaded via a namespace (and not attached):
 [1] tidyselect_1.2.0      viridisLite_0.4.2     dplyr_1.1.3          
 [4] viridis_0.6.4         bitops_1.0-7          fastmap_1.1.1        
 [7] RCurl_1.98-1.13       spatstat.geom_3.2-7   digest_0.6.33        
[10] rpart_4.1.21          lifecycle_1.0.4       cluster_2.1.4        
[13] ellipsis_0.3.2        spatstat.data_3.0-3   terra_1.7-55         
[16] magrittr_2.0.3        compiler_4.3.1        sass_0.4.7           
[19] rlang_1.1.2           tools_4.3.1           utf8_1.2.4           
[22] yaml_2.3.7            htmlwidgets_1.6.2     sp_2.1-1             
[25] abind_1.4-5           grid_4.3.1            polyclip_1.10-6      
[28] fansi_1.0.5           colorspace_2.1-0      ggplot2_3.4.4        
[31] scales_1.2.1          spatstat.utils_3.0-4  signal_0.7-7         
[34] cli_3.6.1             rmarkdown_2.25        vegan_2.6-4          
[37] generics_0.1.3        rstudioapi_0.15.0     rjson_0.2.21         
[40] cachem_1.0.8          pbapply_1.7-2         fftw_1.0-7           
[43] splines_4.3.1         formatR_1.14          vctrs_0.6.4          
[46] Matrix_1.6-2          jsonlite_1.8.7        tensor_1.5           
[49] crosstalk_1.2.0       testthat_3.2.0        jquerylib_0.1.4      
[52] goftest_1.2-3         glue_1.6.2            spatstat.random_3.2-1
[55] spatstat.core_2.4-4   codetools_0.2-19      gtable_0.3.4         
[58] deldir_1.0-9          raster_3.6-26         munsell_0.5.0        
[61] tibble_3.2.1          pillar_1.9.0          htmltools_0.5.7      
[64] brio_1.1.3            R6_2.5.1              evaluate_0.23        
[67] lattice_0.22-5        bslib_0.5.1           Rcpp_1.0.11          
[70] gridExtra_2.3         nlme_3.1-163          permute_0.9-7        
[73] spatstat.sparse_3.0-3 mgcv_1.9-0            xfun_0.41            
[76] rgeos_0.6-4           pkgconfig_2.0.3