## vector with package names
x <- c( "pbapply", "parallel", "ggplot2", "warbleR", "Rraven", "viridis", "readxl")
aa <- lapply(x, function(y) {
# check if installed, if not then install
if (!y %in% installed.packages()[,"Package"])
install.packages(y)
# load package
try(require(y, character.only = T), silent = T)
})
detec_saturation <- function(X, parallel = 1, bit = 16, max_amplitude = NULL, path = NULL){
if (is.null(max_amplitude))
max_amplitude <- max((2 ^ bit) / 2) - 1
out <- pblapply(1:nrow(X), cl = parallel, function(x){
wv <- read_sound_file(X, index = x, path = path)
out_df <- data.frame(sound.files = X$sound.files[x], selec = X$selec[x], prop.saturated = sum(wv@left == max_amplitude) / length(wv@left))
return(out_df)
})
sat_df <- do.call(rbind, out)
if (any(sat_df$prop.saturated > 0.2))
cat(crayon::magenta(paste(sum(sat_df$prop.saturated > 0.2), "selections look saturated (i.e. > 20% of amplitude samples reached the highest value)")))
if (any(all(sat_df$prop.saturated < 0.2) & any(sat_df$prop.saturated > 0.05)))
cat(crayon::magenta(paste(sum(sat_df$prop.saturated > 0.05), "selections with some degree of saturation (i.e. < 20% but > 5% of amplitude samples reached the highest value)")))
if (all(sat_df$prop.saturated < 0.05))
cat(crayon::silver("no saturation detected (all selections < 5% of amplitude samples reached the highest value)"))
return(sat_df)
}
#functions and parameters
knitr::opts_knit$set(root.dir = normalizePath(".."))
knitr::opts_chunk$set(dpi = 50, fig.width = 12, message = FALSE, warning = FALSE, comment = FALSE)
# ggplot2 theme
# theme_set(theme_classic(base_size = 20))
cut_path <- "./data/raw/cuts"
treatments <- c("Calibration", "Regular_sining", "Coordination", "After_chase",
"Before_playback", "After_playback", "Before_interaction", "After_interaction", "Before_noise", "After_noise")
warbleR_options(wav.path = cut_path, parallel = 20)
sel_tabs <- imp_raven(path = "./data/raw/selections", warbler.format = TRUE, all.data = TRUE)
sf <- strsplit(sel_tabs$sound.files, "\\", fixed = TRUE)
sel_tabs$sound.files <- sapply(sf, function(x) x[length(x)])
sel_tabs$Treatment[grep("Coord", sel_tabs$Treatment)] <- "Coordination"
sel_tabs$Treatment[grep("BIN", sel_tabs$Treatment)] <- "Before_interaction"
sel_tabs$Treatment[grep("AIN", sel_tabs$Treatment)] <- "After_interaction"
sel_tabs$Treatment[grep("BN", sel_tabs$Treatment)] <- "Before_noise"
sel_tabs$Treatment[grep("AN", sel_tabs$Treatment)] <- "After_noise"
sel_tabs$Treatment[grep("Interaction", sel_tabs$Treatment)] <- "After_chase"
sel_tabs$Treatment[grep("BPB", sel_tabs$Treatment)] <- "Before_playback"
sel_tabs$Treatment[grep("APB", sel_tabs$Treatment)] <- "After_playback"
sel_tabs$Treatment[grep("Normal", sel_tabs$Treatment)] <- "Regular_sining"
sel_tabs$`Ind Color` <- NULL
sel_tabs$ID <- substr(sel_tabs$selec.file, 0 , 3)
sel_tabs$ID[grep("mp3$", sel_tabs$sound.files)] <- sapply(grep("mp3$", sel_tabs$sound.files, value = TRUE), function(x) strsplit(x, split = "\\.")[[1]][1])
sel_tabs$year <- ifelse(grepl("2021", sel_tabs$sound.files), 2021, 2019)
sel_tabs$year[grep("mp3$", sel_tabs$sound.files)] <- sapply(grep("mp3$", sel_tabs$sound.files, value = TRUE), function(x) strsplit(x, split = "\\.")[[1]][3])
# table(sel_tabs$year, sel_tabs$Treatment)
warbleR_options(wl = 300, parallel = 10, flim = c(0, 11))
sel_tabs_wavs <- sel_tabs[grep("wav$", sel_tabs$sound.files, ignore.case = TRUE), ]
cs <- check_sels(sel_tabs_wavs, pb = FALSE, path = "~/Dropbox/Recordings/LBH/")
song_sels <- sel_tabs_wavs[sel_tabs_wavs$Treatment != "Calibration", ]
cut_sels(song_sels, path = "~/Dropbox/Recordings/LBH/", dest.path = cut_path, overwrite = FALSE)
wav_2_flac(path = cut_path)
ad <- auto_detec(threshold = 15, bp = c(1, 9), ssmooth = 300, hold.time = 0.1, output = "data.frame", path = cut_path, thinning = 0.05, flist = list.files(path = cut_path, pattern = "flac$"))
ad$org.sound.file <- sapply(1:nrow(ad), function(x) song_sels$sound.files[song_sels$cuts == ad$sound.files[x]][1])
exp_raven(X = ad, path = cut_path, sound.file.path = normalizePath(cut_path), file.name = "song_detection_sel_tab_wavs.txt")
sel_tabs_mp3s <- sel_tabs[grep("mp3$", sel_tabs$sound.files, ignore.case = TRUE), ]
sel_tabs_mp3s$org.sound.file <- sel_tabs_mp3s$sound.files
cs <- check_sels(sel_tabs_mp3s, pb = FALSE, path = cut_path)
exp_raven(X = sel_tabs_mp3s, path = cut_path, sound.file.path = normalizePath(cut_path), file.name = "song_detection_sel_tab_mp3s.txt")
sels <- imp_raven(warbler.format = TRUE, all.data = TRUE, path = "./data/raw", pb = FALSE, files = c("song_detection_sel_tab_mp3s.txt","song_detection_sel_tab_wavs.txt"))
pos <- regexpr("\\.([[:alnum:]]+)$", sel_tabs$sound.files)
sel_tabs$extsn <- tolower(ifelse(pos > -1L, substring(sel_tabs$sound.files,pos + 1L), ""))
sel_tabs$extsn[extsn == "wav"] <- "flac"
sel_tabs$cuts <- paste0(paste(gsub(".WAV", "", sel_tabs$sound.files, ignore.case = TRUE), sel_tabs$selec, sep = "-"), ".", extsn)
sel_tabs$cuts[grep("mp3$",sel_tabs$cuts)] <- sel_tabs$sound.files[grep("mp3$",sel_tabs$cuts)]
names(sel_tabs)[names(sel_tabs) == "Ind Color" ] <- "ID"
pos <- regexpr("\\.([[:alnum:]]+)$", sels$sound.files)
sels$extsn <- tolower(ifelse(pos > -1L, substring(sels$sound.files,pos + 1L), ""))
sels$sound.files[sels$sound.files == "424_SUR_05-Feb-2021_07.58_09.52_A41.flac"] <- "424_SUR_05-Feb-2021_07.58_09.52_A41A.flac"
sels$year <- ifelse(grepl("2021", sels$sound.files), 2021, 2019)
sels$year[grep("mp3$", sels$sound.files)] <- as.numeric(sapply(grep("mp3$", sels$sound.files, value = TRUE), function(x) strsplit(x, split = "\\.")[[1]][3]))
sels$Treatment[sels$extsn == "flac"] <- sapply(which(sels$extsn == "flac"), function(x){
y <- sel_tabs$Treatment[sel_tabs$cuts == sels$sound.files[x]][1]
if(length(y) == 0) return(NA) else return(y)
})
sels$ID <- sapply(1:nrow(sels), function(x){
y <- sel_tabs$ID[sel_tabs$cuts == sels$sound.files[x]][1]
if(length(y) == 0) return(NA) else return(y)
})
# table(sels$year, sels$Treatment)
am_set <- as.data.frame(read_excel(path = "./data/raw/audiomoth_recording_settings.xlsx"))
sels$org.sound.file <- gsub(".wav", "", sels$org.sound.file, ignore.case = TRUE)
sels$Audiomoth <- sapply(1:nrow(sels), function(x){
y <- am_set$Audiomoth[am_set$`Original audio file` == sels$org.sound.file[x]]
if(length(y) == 0) return(NA) else return(y)
})
sels$Calibration <- sapply(1:nrow(sels), function(x){
y <- am_set$Calibration[am_set$`Original audio file` == sels$org.sound.file[x]]
if(length(y) == 0) return(NA) else return(y)
})
sels$`Raven selections file` <- sapply(1:nrow(sels), function(x){
y <- am_set$`Raven selections file`[am_set$`Original audio file` == sels$org.sound.file[x]]
if(length(y) == 0) return(NA) else return(y)
})
sels$Gain <- sapply(1:nrow(sels), function(x){
y <- am_set$Gain[am_set$`Original audio file` == sels$org.sound.file[x]]
if(length(y) == 0) return(NA) else return(y)
})
# sels2 <- merge(sels, am_set[, c("Raven selections file", "Audiomoth", "Gain", "Calibration", "Original audio file")], by.x = "org.sound.file", by.y = "Original audio file")
sels$duration <- sels$old.selec <- sels$View <- sels$Channel <- sels$`Begin Path` <- sels$`File Offset (s)` <- NULL
sels$cut.label <- 1
sat <- detec_saturation(sels, parallel = 20, bit = 16, path = cut_path)
sels$prop.saturated <- sat$prop.saturated
# measure uncalibrated SPL
sels$uncal.spl <- pbsapply(1:nrow(sels), cl = 20, function(x) {
signal <- read_sound_file(sels, index = x, path = cut_path)
# signal <- ffilter(signal, f = signal@samp.rate, from = bp[1] * 1000, to = bp[2] * 1000, bandpass = TRUE, output = "Wave")
sigamp <- seewave::rms(seewave::env(signal, envt = "abs", plot = FALSE))
signaldb <- 20 * log10(sigamp)
return(signaldb)
}
)
cal <- read.csv("./output/spl_constant_for_audiomoth_calibration.csv")
cal <- cal[cal$sound.files != "soundmeter", ]
cal$Audiomoth <- substr(sapply(cal$sound.files, function(x) strsplit(x, split = "_")[[1]][3]), 2, 3)
sels$Audiomoth[is.na(sels$Audiomoth)] <- "non-calibrated"
sels$Gain[is.na(sels$Gain)] <- "non-calibrated"
cal$Audiomoth[cal$Audiomoth == "41"] <- "41A"
cal$gain <- sapply(cal$sound.files, function(x) strsplit(x, split = "_")[[1]][2])
amp <- read.csv("./output/calibrated_amplitude_all_songs.csv")
cal_spl_for_calibrated_l <- pblapply(unique(sels$org.sound.file[sels$Audiomoth != "non-calibrated"]), cl = 10, function(x){
X <- sels[sels$org.sound.file == x, ]
X$cal.spl <- X$uncal.spl + cal$mean.diff[cal$Audiomoth == X$Audiomoth[1] & cal$gain == X$Gain[1]]
return(X)
}
)
cal_spl_for_calibrated <- do.call(rbind, cal_spl_for_calibrated_l)
# speudo-calibration for non-calibrated audiomoth recordings forcing them to have a mean SPL equals to the mean SPL of all calibrated songs
cal.spl_for_non_calibrated_l <- pblapply(unique(sels$org.sound.file[sels$Audiomoth == "non-calibrated"]), cl = 10, function(x){
X <- sels[sels$org.sound.file == x, ]
X$cal.spl <- X$uncal.spl + mean(cal_spl_for_calibrated$cal.spl) - mean(X$uncal.spl)
return(X)
}
)
cal.spl_for_non_calibrated <- do.call(rbind, cal.spl_for_non_calibrated_l)
sels <- rbind(cal.spl_for_non_calibrated, cal_spl_for_calibrated)
# extract hour
sels$time <- as.numeric(substr(sels$sound.files, 21, 22))
sels$time[grep("mp3$", sels$sound.files)] <- as.numeric(sapply(grep("mp3$", sels$sound.files, value = TRUE), function(x) strsplit(x, split = "\\.")[[1]][6]))
sels$time[sels$org.sound.file == "448_CCL_26-Feb-2021_00.00_00.00_AC8"] <- 8
sels$period <- ifelse(sels$time > 12, "afternoon", "morning")
sels$ID <- substr(sels$sound.files, 0 , 3)
sels$ID[grep("mp3$", sels$sound.files)] <- sapply(grep("mp3$", sels$sound.files, value = TRUE), function(x) strsplit(x, split = "\\.")[[1]][1])
# add lek
sels$lek <- substr(sels$sound.files, 5, 7)
sels$lek[grep("mp3$", sels$sound.files)] <- sapply(grep("mp3$", sels$sound.files, value = TRUE), function(x) strsplit(x, split = "\\.")[[1]][2])
sels$bottom.freq[is.na(sels$bottom.freq)] <- mean(sels$bottom.freq, na.rm = TRUE)
sels$top.freq[is.na(sels$top.freq)] <- mean(sels$top.freq, na.rm = TRUE)
# add song type
songtype_df <- read.csv("./output/songtype_classification.csv")
sels$songtype <- sapply(1:nrow(sels), function(x) songtype_df$songtype[songtype_df$ID == sels$ID[x]][1])
sels$duration <- sels$end - sels$start
sels <- sels[sels$duration > 0.1 & sels$duration < 0.18, ]
hist(sels$duration[sels$sound.files == "395_SUR_26-Jul-2019_08.46_09.06_AEZ-5.flac"])
hist(sels$duration)
range(sels$duration)
quantile(sels$duration)
cs <- checksels(sels)
full_spectrograms(X = sels, sxrow = 5, rows = 13, fast.spec = TRUE, horizontal = TRUE, path = cut_path, parallel = 20, flim = c(1, 12), dest.path = "./data/processed/full_spectrograms_autodetec")
# write.csv(sels, "./output/calibrated_amplitude_all_songs.csv", row.names = FALSE)
Number of recorded individuals = 36
Sound file sample size for each treatment:
# read data
sels <- read.csv("./output/calibrated_amplitude_all_songs.csv")
sub_sels <- sels[!duplicated(sels[, c("sound.files", "Treatment")]), ]
agg <- aggregate(sound.files ~ Treatment, sub_sels, length)
# order levels
agg <- agg[match(treatments[treatments %in% agg$Treatment], agg$Treatment), ]
kable(agg)
Treatment | sound.files | |
---|---|---|
9 | Regular_sining | 124 |
8 | Coordination | 52 |
1 | After_chase | 6 |
7 | Before_playback | 24 |
4 | After_playback | 29 |
5 | Before_interaction | 24 |
2 | After_interaction | 24 |
6 | Before_noise | 1 |
3 | After_noise | 1 |
sub_sels <- sels[!duplicated(sels[, c("ID", "Treatment")]), ]
agg <- aggregate(sound.files ~ Treatment, sub_sels, length)
# order levels
agg <- agg[match(treatments[treatments %in% agg$Treatment], agg$Treatment), ]
kable(agg)
Treatment | sound.files | |
---|---|---|
9 | Regular_sining | 20 |
8 | Coordination | 11 |
1 | After_chase | 3 |
7 | Before_playback | 17 |
4 | After_playback | 17 |
5 | Before_interaction | 17 |
2 | After_interaction | 17 |
6 | Before_noise | 1 |
3 | After_noise | 1 |
sub_sels <- sels[!duplicated(sels[, c("ID", "year", "Treatment")]), ]
sub_sels$year <- as.factor(sub_sels$year)
agg <- as.data.frame.matrix(table(sub_sels$Treatment, sub_sels$year))
# order levels
agg <- agg[match(treatments[treatments %in% rownames(agg)], rownames(agg)), ]
agg[agg == 0] <- ""
kable(agg)
2013 | 2014 | 2019 | 2021 | |
---|---|---|---|---|
Regular_sining | 20 | |||
Coordination | 11 | |||
After_chase | 3 | |||
Before_playback | 8 | 10 | ||
After_playback | 8 | 10 | ||
Before_interaction | 10 | 1 | 3 | 4 |
After_interaction | 10 | 1 | 3 | 4 |
Before_noise | 1 | |||
After_noise | 1 |
Total number of songs = 25400
Mean number of songs per individual = 706
Songs per treatment:
agg <- aggregate(selec ~ Treatment, sels, length)
# order levels
agg <- agg[match(treatments[-c(1, 9, 10)], agg$Treatment), ]
kable(agg)
Treatment | selec | |
---|---|---|
9 | Regular_sining | 14542 |
8 | Coordination | 2645 |
1 | After_chase | 131 |
7 | Before_playback | 3374 |
4 | After_playback | 3989 |
5 | Before_interaction | 291 |
2 | After_interaction | 302 |
amp <- read.csv("./output/calibrated_amplitude_all_songs.csv")
# options for warbleR
warbleR_options(wav.path = cut_path, wl = 400)
check_sels(amp)
# measure signal to noise ratio
snr <- sig2noise(amp, 0.1, path = cut_path)
# get the 3 songs with the highest SNR for each individual
examp_l <- lapply(unique(snr$ID), function(x) {
X <- snr[snr$ID == x, ]
X <- X[order(X$SNR, decreasing = TRUE), ]
return(X[1:3, ])
})
# make it a data frame
examp <- do.call(rbind, examp_l)
# add lek label
examp$lek <- substr(examp$org.sound.file, 5, 7)
examp$lek[grep("mp3$", examp$sound.files)] <- sapply(grep("mp3$", examp$sound.files, value = TRUE), function(x) strsplit(x, split = "\\.")[[1]][2])
examp_est <- selection_table(examp, extended = TRUE, confirm.extended = FALSE)
# make catalog
catalog(examp_est, flim = c(0, 14), ovlp = 95, nrow = 10, ncol = 6, wl = 400, pal = viridis, collevels = seq(-100, 0, 1), labels = c("ID", "lek"), width = 15.5, height = 8.5, res = 200, tags = c("lek"), tag.pal = list(magma), mar = 0.005, hatching = 2, parallel = 1)
move_imgs(from = cut_path, to = "./data/raw", overwrite = TRUE)
songtype_df <- data.frame(ID = c(397, 400, 403, 413, 415, 416, 417, 419, 422, 423, 424, 432, 433, 435, 437, 438, 444, 446, 448, 449, 0, 108, 124, 146, 176, 178, 179, 36, 54, 9, 384, 390, 391, 395, 398, 402), songtype = c("a", "b", "f", "c", "f", "c", "c", "g", "e", "c", "d", "h", "i", "j", "i", "j", "l", "k", "l", "m", "n", "l", "o", "p", "q", "q", "q", "r", "q", "q", "s", "a", "a", "a", "c", "b"))
write.csv(songtype_df, "./output/songtype_classification.csv", row.names = FALSE)
sessionInfo()
FALSE R version 4.0.3 (2020-10-10)
FALSE Platform: x86_64-pc-linux-gnu (64-bit)
FALSE Running under: Ubuntu 20.04.1 LTS
FALSE
FALSE Matrix products: default
FALSE BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
FALSE LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
FALSE
FALSE locale:
FALSE [1] LC_CTYPE=pt_PT.UTF-8 LC_NUMERIC=C
FALSE [3] LC_TIME=es_CR.UTF-8 LC_COLLATE=pt_PT.UTF-8
FALSE [5] LC_MONETARY=es_CR.UTF-8 LC_MESSAGES=pt_PT.UTF-8
FALSE [7] LC_PAPER=es_CR.UTF-8 LC_NAME=C
FALSE [9] LC_ADDRESS=C LC_TELEPHONE=C
FALSE [11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C
FALSE
FALSE attached base packages:
FALSE [1] parallel stats graphics grDevices utils datasets methods
FALSE [8] base
FALSE
FALSE other attached packages:
FALSE [1] readxl_1.3.1 viridis_0.5.1 viridisLite_0.3.0 Rraven_1.0.12
FALSE [5] warbleR_1.1.27 NatureSounds_1.0.3 knitr_1.31 seewave_2.1.6
FALSE [9] tuneR_1.3.3 ggplot2_3.3.3 pbapply_1.4-3
FALSE
FALSE loaded via a namespace (and not attached):
FALSE [1] Rcpp_1.0.6 highr_0.8 cellranger_1.1.0 pillar_1.4.6
FALSE [5] compiler_4.0.3 bitops_1.0-6 tools_4.0.3 digest_0.6.27
FALSE [9] evaluate_0.14 lifecycle_0.2.0 tibble_3.0.4 gtable_0.3.0
FALSE [13] fftw_1.0-6 pkgconfig_2.0.3 rlang_0.4.10 yaml_2.2.1
FALSE [17] xfun_0.22 gridExtra_2.3 withr_2.4.1 stringr_1.4.0
FALSE [21] dplyr_1.0.2 generics_0.1.0 vctrs_0.3.6 grid_4.0.3
FALSE [25] tidyselect_1.1.0 glue_1.4.2 R6_2.5.0 dtw_1.22-3
FALSE [29] rmarkdown_2.4 purrr_0.3.4 magrittr_2.0.1 scales_1.1.1
FALSE [33] ellipsis_0.3.1 htmltools_0.5.1.1 MASS_7.3-53 colorspace_2.0-0
FALSE [37] proxy_0.4-25 stringi_1.5.3 RCurl_1.98-1.3 signal_0.7-6
FALSE [41] munsell_0.5.0 rjson_0.2.20 crayon_1.4.1