Load packages

## vector with package names
x <- c( "pbapply", "parallel", "ggplot2", "warbleR", "Rraven", "viridis", "readxl")

aa <- lapply(x, function(y) {
  
  # check if installed, if not then install 
  if (!y %in% installed.packages()[,"Package"]) 
    install.packages(y) 

  # load package
  try(require(y, character.only = T), silent = T)
})


detec_saturation <- function(X, parallel = 1, bit = 16, max_amplitude = NULL, path = NULL){
  
  if (is.null(max_amplitude))
  max_amplitude <- max((2 ^ bit) / 2) - 1
  
  out <- pblapply(1:nrow(X), cl = parallel, function(x){
    
    wv <- read_sound_file(X, index = x, path = path)
  
    out_df <- data.frame(sound.files = X$sound.files[x], selec = X$selec[x], prop.saturated = sum(wv@left == max_amplitude) / length(wv@left))
    
    return(out_df) 
})

 sat_df <- do.call(rbind, out)
 
 
 if (any(sat_df$prop.saturated > 0.2))
 cat(crayon::magenta(paste(sum(sat_df$prop.saturated > 0.2), "selections look saturated (i.e. > 20% of amplitude samples reached the highest value)"))) 

  if (any(all(sat_df$prop.saturated < 0.2) & any(sat_df$prop.saturated > 0.05)))
 cat(crayon::magenta(paste(sum(sat_df$prop.saturated > 0.05), "selections with some degree of saturation (i.e. < 20% but  > 5% of amplitude samples reached the highest value)"))) 

  if (all(sat_df$prop.saturated < 0.05))
 cat(crayon::silver("no saturation detected (all selections < 5% of amplitude samples reached the highest value)")) 

 
  return(sat_df)

}

Functions and parameters

#functions and parameters
knitr::opts_knit$set(root.dir = normalizePath(".."))

knitr::opts_chunk$set(dpi = 50, fig.width = 12, message = FALSE, warning = FALSE, comment = FALSE) 

# ggplot2 theme
# theme_set(theme_classic(base_size = 20))

cut_path <-  "./data/raw/cuts"

treatments <- c("Calibration", "Regular_sining", "Coordination", "After_chase",  
 "Before_playback", "After_playback", "Before_interaction", "After_interaction", "Before_noise", "After_noise")

warbleR_options(wav.path = cut_path, parallel = 20)

sel_tabs <- imp_raven(path = "./data/raw/selections", warbler.format = TRUE, all.data = TRUE)

sf <- strsplit(sel_tabs$sound.files, "\\", fixed = TRUE)

sel_tabs$sound.files <- sapply(sf, function(x) x[length(x)])

sel_tabs$Treatment[grep("Coord", sel_tabs$Treatment)] <- "Coordination"
sel_tabs$Treatment[grep("BIN", sel_tabs$Treatment)] <- "Before_interaction"
sel_tabs$Treatment[grep("AIN", sel_tabs$Treatment)] <- "After_interaction"
sel_tabs$Treatment[grep("BN", sel_tabs$Treatment)] <- "Before_noise"
sel_tabs$Treatment[grep("AN", sel_tabs$Treatment)] <- "After_noise"
sel_tabs$Treatment[grep("Interaction", sel_tabs$Treatment)] <- "After_chase"
sel_tabs$Treatment[grep("BPB", sel_tabs$Treatment)] <- "Before_playback"
sel_tabs$Treatment[grep("APB", sel_tabs$Treatment)] <- "After_playback"
sel_tabs$Treatment[grep("Normal", sel_tabs$Treatment)] <- "Regular_sining"

sel_tabs$`Ind Color` <- NULL

sel_tabs$ID <- substr(sel_tabs$selec.file, 0 , 3)

sel_tabs$ID[grep("mp3$", sel_tabs$sound.files)] <- sapply(grep("mp3$", sel_tabs$sound.files, value = TRUE), function(x) strsplit(x, split = "\\.")[[1]][1])

sel_tabs$year <- ifelse(grepl("2021", sel_tabs$sound.files), 2021, 2019)

sel_tabs$year[grep("mp3$", sel_tabs$sound.files)] <- sapply(grep("mp3$", sel_tabs$sound.files, value = TRUE), function(x) strsplit(x, split = "\\.")[[1]][3])


# table(sel_tabs$year, sel_tabs$Treatment)

Automatic detection of LBH songs on flac files

warbleR_options(wl = 300, parallel = 10, flim = c(0, 11))

sel_tabs_wavs <- sel_tabs[grep("wav$", sel_tabs$sound.files, ignore.case = TRUE), ]


cs <- check_sels(sel_tabs_wavs, pb = FALSE, path = "~/Dropbox/Recordings/LBH/")

song_sels <- sel_tabs_wavs[sel_tabs_wavs$Treatment != "Calibration", ]

cut_sels(song_sels, path = "~/Dropbox/Recordings/LBH/", dest.path = cut_path, overwrite = FALSE)

 

wav_2_flac(path = cut_path)

ad <- auto_detec(threshold = 15, bp = c(1, 9), ssmooth = 300, hold.time = 0.1, output = "data.frame", path = cut_path, thinning = 0.05, flist = list.files(path = cut_path, pattern = "flac$")) 

ad$org.sound.file <- sapply(1:nrow(ad), function(x) song_sels$sound.files[song_sels$cuts == ad$sound.files[x]][1])

exp_raven(X = ad, path = cut_path, sound.file.path = normalizePath(cut_path), file.name = "song_detection_sel_tab_wavs.txt")

sel_tabs_mp3s <- sel_tabs[grep("mp3$", sel_tabs$sound.files, ignore.case = TRUE), ]

sel_tabs_mp3s$org.sound.file <- sel_tabs_mp3s$sound.files

cs <- check_sels(sel_tabs_mp3s, pb = FALSE, path = cut_path)

exp_raven(X = sel_tabs_mp3s, path = cut_path, sound.file.path = normalizePath(cut_path), file.name = "song_detection_sel_tab_mp3s.txt")

Measure amplitude

sels <- imp_raven(warbler.format = TRUE, all.data = TRUE, path = "./data/raw", pb = FALSE, files = c("song_detection_sel_tab_mp3s.txt","song_detection_sel_tab_wavs.txt"))

pos <- regexpr("\\.([[:alnum:]]+)$",  sel_tabs$sound.files)
sel_tabs$extsn <- tolower(ifelse(pos > -1L, substring(sel_tabs$sound.files,pos + 1L), ""))

sel_tabs$extsn[extsn == "wav"] <- "flac"


sel_tabs$cuts <- paste0(paste(gsub(".WAV", "", sel_tabs$sound.files, ignore.case = TRUE), sel_tabs$selec, sep = "-"), ".", extsn)

sel_tabs$cuts[grep("mp3$",sel_tabs$cuts)] <- sel_tabs$sound.files[grep("mp3$",sel_tabs$cuts)]

names(sel_tabs)[names(sel_tabs) == "Ind Color" ] <- "ID"


pos <- regexpr("\\.([[:alnum:]]+)$",  sels$sound.files)
sels$extsn <- tolower(ifelse(pos > -1L, substring(sels$sound.files,pos + 1L), ""))

sels$sound.files[sels$sound.files == "424_SUR_05-Feb-2021_07.58_09.52_A41.flac"] <- "424_SUR_05-Feb-2021_07.58_09.52_A41A.flac"

sels$year <- ifelse(grepl("2021", sels$sound.files), 2021, 2019)

sels$year[grep("mp3$", sels$sound.files)]  <- as.numeric(sapply(grep("mp3$", sels$sound.files, value = TRUE), function(x) strsplit(x, split = "\\.")[[1]][3]))

sels$Treatment[sels$extsn == "flac"] <- sapply(which(sels$extsn == "flac"), function(x){
  y <- sel_tabs$Treatment[sel_tabs$cuts == sels$sound.files[x]][1]
  if(length(y) == 0) return(NA) else return(y)
  })

sels$ID <- sapply(1:nrow(sels), function(x){
  y <- sel_tabs$ID[sel_tabs$cuts == sels$sound.files[x]][1]
  if(length(y) == 0) return(NA) else return(y)
  })


# table(sels$year, sels$Treatment)


am_set <- as.data.frame(read_excel(path = "./data/raw/audiomoth_recording_settings.xlsx"))

sels$org.sound.file <- gsub(".wav", "", sels$org.sound.file, ignore.case = TRUE)

sels$Audiomoth <- sapply(1:nrow(sels), function(x){
  y <- am_set$Audiomoth[am_set$`Original audio file` == sels$org.sound.file[x]]
  if(length(y) == 0) return(NA) else return(y)
  })

sels$Calibration <- sapply(1:nrow(sels), function(x){
  y <- am_set$Calibration[am_set$`Original audio file` == sels$org.sound.file[x]]
  if(length(y) == 0) return(NA) else return(y)
  })

sels$`Raven selections file` <- sapply(1:nrow(sels), function(x){
  y <- am_set$`Raven selections file`[am_set$`Original audio file` == sels$org.sound.file[x]]
  if(length(y) == 0) return(NA) else return(y)
  })

sels$Gain <- sapply(1:nrow(sels), function(x){
  y <- am_set$Gain[am_set$`Original audio file` == sels$org.sound.file[x]]
  if(length(y) == 0) return(NA) else return(y)
  })

# sels2 <- merge(sels, am_set[, c("Raven selections file", "Audiomoth", "Gain", "Calibration", "Original audio file")], by.x = "org.sound.file", by.y = "Original audio file")


sels$duration <- sels$old.selec <- sels$View <- sels$Channel <- sels$`Begin Path` <- sels$`File Offset (s)` <- NULL

sels$cut.label <- 1




sat <- detec_saturation(sels, parallel = 20, bit = 16, path = cut_path)

sels$prop.saturated <- sat$prop.saturated


# measure uncalibrated SPL
sels$uncal.spl <- pbsapply(1:nrow(sels), cl = 20, function(x) {

  signal <- read_sound_file(sels, index = x, path = cut_path)
  
   # signal <- ffilter(signal, f = signal@samp.rate, from = bp[1] * 1000, to = bp[2] * 1000, bandpass = TRUE, output = "Wave")
   
  sigamp <- seewave::rms(seewave::env(signal, envt = "abs", plot = FALSE))
  signaldb <- 20 * log10(sigamp)
  
    return(signaldb)
  }
)

cal <- read.csv("./output/spl_constant_for_audiomoth_calibration.csv")

cal <- cal[cal$sound.files != "soundmeter", ]

cal$Audiomoth <- substr(sapply(cal$sound.files, function(x) strsplit(x, split = "_")[[1]][3]), 2, 3)

sels$Audiomoth[is.na(sels$Audiomoth)] <- "non-calibrated"
sels$Gain[is.na(sels$Gain)] <- "non-calibrated"

cal$Audiomoth[cal$Audiomoth == "41"] <- "41A"

cal$gain <- sapply(cal$sound.files, function(x) strsplit(x, split = "_")[[1]][2])


amp <- read.csv("./output/calibrated_amplitude_all_songs.csv")


cal_spl_for_calibrated_l <- pblapply(unique(sels$org.sound.file[sels$Audiomoth != "non-calibrated"]), cl = 10, function(x){
  X <- sels[sels$org.sound.file == x, ]
  X$cal.spl <- X$uncal.spl + cal$mean.diff[cal$Audiomoth == X$Audiomoth[1] & cal$gain == X$Gain[1]]
return(X)
    }
)

cal_spl_for_calibrated <- do.call(rbind, cal_spl_for_calibrated_l)

# speudo-calibration for non-calibrated audiomoth recordings forcing them to have a mean SPL equals to the mean SPL of all calibrated songs
cal.spl_for_non_calibrated_l <- pblapply(unique(sels$org.sound.file[sels$Audiomoth == "non-calibrated"]), cl = 10, function(x){
  X <- sels[sels$org.sound.file == x, ]
  
  X$cal.spl <- X$uncal.spl + mean(cal_spl_for_calibrated$cal.spl) - mean(X$uncal.spl)
  return(X)
  }
)

cal.spl_for_non_calibrated <- do.call(rbind, cal.spl_for_non_calibrated_l)

sels <- rbind(cal.spl_for_non_calibrated, cal_spl_for_calibrated)

# extract hour
sels$time <- as.numeric(substr(sels$sound.files, 21, 22))

sels$time[grep("mp3$", sels$sound.files)]  <- as.numeric(sapply(grep("mp3$", sels$sound.files, value = TRUE), function(x) strsplit(x, split = "\\.")[[1]][6]))


sels$time[sels$org.sound.file == "448_CCL_26-Feb-2021_00.00_00.00_AC8"] <- 8

sels$period <- ifelse(sels$time > 12, "afternoon", "morning")

sels$ID <- substr(sels$sound.files, 0 , 3)

sels$ID[grep("mp3$", sels$sound.files)] <- sapply(grep("mp3$", sels$sound.files, value = TRUE), function(x) strsplit(x, split = "\\.")[[1]][1])

# add lek
sels$lek <- substr(sels$sound.files, 5, 7)

sels$lek[grep("mp3$", sels$sound.files)] <- sapply(grep("mp3$", sels$sound.files, value = TRUE), function(x) strsplit(x, split = "\\.")[[1]][2])


sels$bottom.freq[is.na(sels$bottom.freq)] <- mean(sels$bottom.freq, na.rm = TRUE)
sels$top.freq[is.na(sels$top.freq)] <- mean(sels$top.freq, na.rm = TRUE)

# add song type
songtype_df <- read.csv("./output/songtype_classification.csv")

sels$songtype <- sapply(1:nrow(sels), function(x) songtype_df$songtype[songtype_df$ID == sels$ID[x]][1])

sels$duration <- sels$end - sels$start
sels <- sels[sels$duration > 0.1 & sels$duration < 0.18, ]


hist(sels$duration[sels$sound.files == "395_SUR_26-Jul-2019_08.46_09.06_AEZ-5.flac"])
hist(sels$duration)
range(sels$duration)
quantile(sels$duration)

cs <- checksels(sels)

full_spectrograms(X = sels, sxrow = 5, rows = 13, fast.spec = TRUE, horizontal = TRUE, path = cut_path, parallel = 20, flim = c(1, 12), dest.path =  "./data/processed/full_spectrograms_autodetec")

# write.csv(sels, "./output/calibrated_amplitude_all_songs.csv", row.names = FALSE)

Descriptive stats

Number of recorded individuals = 36
Sound file sample size for each treatment:

# read data
sels <- read.csv("./output/calibrated_amplitude_all_songs.csv")

sub_sels <- sels[!duplicated(sels[, c("sound.files", "Treatment")]), ]

agg <- aggregate(sound.files ~ Treatment, sub_sels, length)

# order levels
agg <- agg[match(treatments[treatments %in% agg$Treatment], agg$Treatment), ]

kable(agg)

	Treatment	sound.files
9	Regular_sining	124
8	Coordination	52
1	After_chase	6
7	Before_playback	24
4	After_playback	29
5	Before_interaction	24
2	After_interaction	24
6	Before_noise	1
3	After_noise	1

Individual sample size for each treatment:

sub_sels <- sels[!duplicated(sels[, c("ID", "Treatment")]), ]

agg <- aggregate(sound.files ~ Treatment, sub_sels, length)

# order levels
agg <- agg[match(treatments[treatments %in% agg$Treatment], agg$Treatment), ]


kable(agg)

	Treatment	sound.files
9	Regular_sining	20
8	Coordination	11
1	After_chase	3
7	Before_playback	17
4	After_playback	17
5	Before_interaction	17
2	After_interaction	17
6	Before_noise	1
3	After_noise	1

Individual sample size for each treatment by year (recordings from years before 2019 were extracted from videos in mp3 format):

sub_sels <- sels[!duplicated(sels[, c("ID", "year", "Treatment")]), ]

sub_sels$year <- as.factor(sub_sels$year)

agg <- as.data.frame.matrix(table(sub_sels$Treatment, sub_sels$year))


# order levels
agg <- agg[match(treatments[treatments %in% rownames(agg)], rownames(agg)), ]

agg[agg == 0] <- ""

kable(agg)

	2013	2014	2019	2021
Regular_sining				20
Coordination				11
After_chase				3
Before_playback			8	10
After_playback			8	10
Before_interaction	10	1	3	4
After_interaction	10	1	3	4
Before_noise				1
After_noise				1

Total number of songs = 25400
Mean number of songs per individual = 706
Songs per treatment:

agg <- aggregate(selec ~ Treatment, sels, length)

# order levels
agg <- agg[match(treatments[-c(1, 9, 10)], agg$Treatment), ]

kable(agg)

	Treatment	selec
9	Regular_sining	14542
8	Coordination	2645
1	After_chase	131
7	Before_playback	3374
4	After_playback	3989
5	Before_interaction	291
2	After_interaction	302

Maximum number of saturated songs = 0.006071

amp <- read.csv("./output/calibrated_amplitude_all_songs.csv")

# options for warbleR
warbleR_options(wav.path = cut_path, wl = 400)
check_sels(amp)

# measure signal to noise ratio
snr <- sig2noise(amp, 0.1, path = cut_path)


# get the 3 songs with the highest SNR for each individual
examp_l <- lapply(unique(snr$ID), function(x) {
  
  X <- snr[snr$ID == x, ]
  
  X <- X[order(X$SNR, decreasing = TRUE), ]
  
  return(X[1:3, ])
})

# make it a data frame
examp <- do.call(rbind, examp_l)

# add lek label
examp$lek <- substr(examp$org.sound.file, 5, 7)

examp$lek[grep("mp3$", examp$sound.files)] <- sapply(grep("mp3$", examp$sound.files, value = TRUE), function(x) strsplit(x, split = "\\.")[[1]][2])

examp_est <- selection_table(examp, extended = TRUE, confirm.extended = FALSE)

# make catalog
catalog(examp_est, flim = c(0, 14), ovlp = 95, nrow = 10, ncol = 6, wl = 400, pal = viridis, collevels = seq(-100, 0, 1), labels = c("ID", "lek"), width = 15.5, height = 8.5, res = 200, tags = c("lek"), tag.pal = list(magma), mar = 0.005, hatching = 2, parallel = 1)

move_imgs(from = cut_path, to = "./data/raw", overwrite = TRUE)


songtype_df <- data.frame(ID = c(397, 400, 403, 413, 415, 416, 417, 419, 422, 423, 424, 432, 433, 435, 437, 438, 444, 446, 448, 449, 0, 108, 124, 146, 176, 178, 179, 36, 54, 9, 384, 390, 391, 395, 398, 402), songtype = c("a", "b", "f", "c", "f", "c", "c",  "g",  "e", "c", "d", "h", "i", "j", "i", "j", "l", "k", "l", "m", "n", "l", "o", "p", "q", "q", "q", "r", "q", "q", "s", "a", "a", "a", "c", "b"))


write.csv(songtype_df, "./output/songtype_classification.csv", row.names = FALSE)

Session information

sessionInfo()

FALSE R version 4.0.3 (2020-10-10)
FALSE Platform: x86_64-pc-linux-gnu (64-bit)
FALSE Running under: Ubuntu 20.04.1 LTS
FALSE 
FALSE Matrix products: default
FALSE BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
FALSE LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
FALSE 
FALSE locale:
FALSE  [1] LC_CTYPE=pt_PT.UTF-8       LC_NUMERIC=C              
FALSE  [3] LC_TIME=es_CR.UTF-8        LC_COLLATE=pt_PT.UTF-8    
FALSE  [5] LC_MONETARY=es_CR.UTF-8    LC_MESSAGES=pt_PT.UTF-8   
FALSE  [7] LC_PAPER=es_CR.UTF-8       LC_NAME=C                 
FALSE  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
FALSE [11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C       
FALSE 
FALSE attached base packages:
FALSE [1] parallel  stats     graphics  grDevices utils     datasets  methods  
FALSE [8] base     
FALSE 
FALSE other attached packages:
FALSE  [1] readxl_1.3.1       viridis_0.5.1      viridisLite_0.3.0  Rraven_1.0.12     
FALSE  [5] warbleR_1.1.27     NatureSounds_1.0.3 knitr_1.31         seewave_2.1.6     
FALSE  [9] tuneR_1.3.3        ggplot2_3.3.3      pbapply_1.4-3     
FALSE 
FALSE loaded via a namespace (and not attached):
FALSE  [1] Rcpp_1.0.6        highr_0.8         cellranger_1.1.0  pillar_1.4.6     
FALSE  [5] compiler_4.0.3    bitops_1.0-6      tools_4.0.3       digest_0.6.27    
FALSE  [9] evaluate_0.14     lifecycle_0.2.0   tibble_3.0.4      gtable_0.3.0     
FALSE [13] fftw_1.0-6        pkgconfig_2.0.3   rlang_0.4.10      yaml_2.2.1       
FALSE [17] xfun_0.22         gridExtra_2.3     withr_2.4.1       stringr_1.4.0    
FALSE [21] dplyr_1.0.2       generics_0.1.0    vctrs_0.3.6       grid_4.0.3       
FALSE [25] tidyselect_1.1.0  glue_1.4.2        R6_2.5.0          dtw_1.22-3       
FALSE [29] rmarkdown_2.4     purrr_0.3.4       magrittr_2.0.1    scales_1.1.1     
FALSE [33] ellipsis_0.3.1    htmltools_0.5.1.1 MASS_7.3-53       colorspace_2.0-0 
FALSE [37] proxy_0.4-25      stringi_1.5.3     RCurl_1.98-1.3    signal_0.7-6     
FALSE [41] munsell_0.5.0     rjson_0.2.20      crayon_1.4.1

Detecting songs

Long-billed hermit song amplitude

Marcelo Araya-Salas, PhD & Melanie Talavera

18-04-2021

Load packages

Functions and parameters

Automatic detection of LBH songs on flac files

Measure amplitude

Descriptive stats

Session information