Load packages

## add 'developer/' to packages to be installed from github
x <- c("devtools", "maRce10/warbleR", "bioacoustics", "pbapply", "Rraven", "parallel", "viridis", "ggplot2", "knitr", "kableExtra", "maRce10/ohun", "DT", "formatR", "ranger", "chron")

aa <- lapply(x, function(y) {
  
  # get pakage name
  pkg <- strsplit(y, "/")[[1]]
  pkg <- pkg[length(pkg)]
  
  # check if installed, if not then install 
  if (!pkg %in% installed.packages()[,"Package"])  {

      if (grepl("/", y))  devtools::install_github(y, force = TRUE) else
    install.packages(y) 
    }

  # load package
  try(require(pkg, character.only = T), silent = T)
})

opts_knit$set(root.dir = "..")

Apply automatic detection on all data

cnsd_df <- consolidate(path = "/media/m/CHIRINO/Grabaciones SM4 2019/", dest.path = "/media/m/Agalychnis/Agalychnis_lemur/original_recordings",
    parallel = 14)

fix_wavs(path = "/media/m/Agalychnis/Agalychnis_lemur/original_recordings", samp.rate = 10)


# move converted_sound_files manually

templates_est <- readRDS("./data/processed/mean_acoustic_parameter_templates_est.RDS")

corr_templ <- template_correlator(templates = templates_est[templates_est$sound.files ==
    "mean.PC1", , drop = FALSE], path = "/media/m/Agalychnis/Agalychnis_lemur/10_kHz_recordings/",
    parallel = 1, hop.size = 11.6, ovlp = 70)

saveRDS(corr_templ, "./data/processed/template_correlations_all_data.RDS")

corr_templ <- readRDS("./data/processed/template_correlations_all_data.RDS")

all_detec <- template_detector(template.correlations = corr_templ, threshold = 0.43,
    parallel = 10, pb = TRUE)

all_detec$template <- NULL
all_detec$sound.files <- as.factor(all_detec$sound.files)

saveRDS(all_detec, "./data/processed/template_detections_all_data.RDS")

sukia_detec <- all_detec[grep("SUKIA", all_detec$sound.files), ]

rownames(sukia_detec) <- 1:nrow(sukia_detec)
saveRDS(sukia_detec, "./data/processed/template_detections_sukia_data.RDS")

chimu_detec <- all_detec[grep("SUKIA", all_detec$sound.files, invert = TRUE), ]

rownames(chimu_detec) <- 1:nrow(chimu_detec)

saveRDS(chimu_detec, "./data/processed/template_detections_chimu_data.RDS")

sukia_detec <- readRDS("./data/processed/template_detections_sukia_data.RDS")

rfm <- readRDS("./data/processed/data_and_model_random_forest_0.43_threshold_only_spectro_parameters.RDS")$rfm

dates <- unique(sapply(as.character(sukia_detec$sound.files), function(x) strsplit(x,
    "_")[[1]][2]))


# 388 dates
out <- warbleR:::pblapply_wrblr_int(dates, pbar = TRUE, function(y) {

    rds_file <- file.path("./data/detections", paste0("sukia_", y, ".RDS"))

    if (!file.exists(rds_file))
        out <- try(pred_FUN(x = y, file = rds_file, X = sukia_detec), silent = TRUE)

})

chimu_detec <- readRDS("./data/processed/template_detections_chimu_data.RDS")

rfm <- readRDS("./data/processed/data_and_model_random_forest_0.43_threshold_only_spectro_parameters.RDS")$rfm

dates <- unique(sapply(as.character(chimu_detec$sound.files), function(x) strsplit(x,
    "_")[[1]][2]))

out <- warbleR:::pblapply_wrblr_int(dates, pbar = TRUE, function(y) {

    rds_file <- file.path("./data/detections", paste0("chimu_", y, ".RDS"))

    if (!file.exists(rds_file))
        out <- try(pred_FUN(x = y, file = rds_file, X = chimu_detec), silent = TRUE)
    # pred_FUN(x)
})

detec_rds_files <- list.files(path = "./data/detections", full.names = TRUE)

detec_list <- pblapply(detec_rds_files, function(x) readRDS(x)[, c("sound.files",
    "selec", "start", "end")])

detects <- do.call(rbind, detec_list)

# add metdata
detects$site <- sapply(as.character(detects$sound.files), function(x) strsplit(x,
    "_")[[1]][1])

detects$date <- sapply(as.character(detects$sound.files), function(x) strsplit(x,
    "_")[[1]][2])
detects$recording.time <- gsub("\\.wav", "", sapply(as.character(detects$sound.files),
    function(x) strsplit(x, "_")[[1]][3]))
detects$date_time <- paste(detects$date, detects$recording.time, sep = "_")

detects$date <- strptime(x = detects$date, format = "%Y%m%d")

detects$recording.time <- format(strptime(x = detects$recording.time, format = "%H%M%S"),
    format = "%H%M%S")
detects$call.time <- format(strptime(x = detects$recording.time, format = "%H%M%S") +
    apply(detects[, c("start", "end")], 1, mean), format = "%H%M%S")

detects$rec_date_hour <- paste(sapply(as.character(detects$sound.files), function(x) strsplit(x,
    "_")[[1]][2]), substr(as.character(detects$call.time), 0, 2), sep = "_")


detects$recording.time <- times(detects$recording.time, format = "HMS")
detects$call.time <- times(detects$call.time, format = "HMS")

detects$call.time.num <- as.numeric(detects$call.time) * 24

write.csv(detects, "./data/detections_all_files.csv")

get calls per hour (per site)

detects <- read.csv("./data/detections_all_files.csv")

# list all recordings
all_files <- list.files(path = .Options$warbleR$path)
site_date_time <- paste(sapply(as.character(all_files), function(x) strsplit(x, "_")[[1]][1]),
    sapply(as.character(all_files), function(x) strsplit(x, "_")[[1]][2]), gsub("\\.wav",
        "", sapply(as.character(all_files), function(x) strsplit(x, "_")[[1]][3])),
    sep = "_")

# get for all date time combination the call rate, must be the porportion of
# the hour that was recorded and the number of call for that period

out <- pblapply(unique(substr(site_date_time, 0, 20)), cl = 1, function(x) {

    # get number of calls
    n_call <- sum(detects$rec_date_hour == paste(strsplit(x, "_")[[1]][2], substr(strsplit(x,
        "_")[[1]][3], 0, 2), sep = "_") & grepl(substr(strsplit(x, "_")[[1]][1],
        0, 8), x = detects$sound.files))

    files <- grep(x, all_files, value = TRUE)

    if (length(files) > 0) {

        durs <- duration_sound_files(files = files)

        prop_hour_recorded <- sum(durs$duration)/3600

        call_rate <- if (n_call > 0)
            n_call * prop_hour_recorded else 0
    } else {
        call_rate <- NA
        prop_hour_recorded <- NA
    }

    # rec_time in minutes
    output <- data.frame(site_date_hour = paste0(x, "0000"), call_rate = call_rate,
        n_call = n_call, rec_time = if (!is.na(prop_hour_recorded))
            prop_hour_recorded * 60 else NA)

    return(output)
})


call_rates <- do.call(rbind, out)

call_rates$hour <- as.numeric(substr(sapply(call_rates$site_date_hour, function(x) strsplit(x,
    "_")[[1]][3]), 0, 2))
call_rates$month <- as.numeric(substr(sapply(call_rates$site_date_hour, function(x) strsplit(x,
    "_")[[1]][2]), 5, 6))
call_rates$year <- as.numeric(substr(sapply(call_rates$site_date_hour, function(x) strsplit(x,
    "_")[[1]][2]), 0, 4))
call_rates$site <- sapply(call_rates$site_date_hour, function(x) strsplit(x, "_")[[1]][1])

write.csv(call_rates, "./data/processed/call_rate_per_date_time_and_site.csv", row.names = FALSE)

Call rate during day hours by site

Removing 5 pm data

call_rates <- read.csv("./data/processed/call_rate_per_date_time_and_site.csv")

# remove 5 pm data
call_rates <- call_rates[call_rates$hour != 17, ]


agg_hour_rate <- aggregate(call_rate ~ hour + site, data = call_rates, mean)

agg_hour_rate$sd <- aggregate(call_rate ~ hour + site, data = call_rates, sd)[, 3]

agg_hour_rate$hour_factor <- factor(agg_hour_rate$hour, levels = c("17", "18", "19",
    "20", "21", "22", "23", "0", "1", "2", "3", "4"))

agg_hour_rate$site <- factor(agg_hour_rate$site, levels = c("SUKIA", "LAGCHIMU"))


ggplot(agg_hour_rate, aes(x = hour_factor, y = call_rate, fill = site)) + geom_bar(stat = "identity",
    position = "identity", show.legend = FALSE) + geom_errorbar(aes(ymin = call_rate,
    ymax = call_rate + sd), width = 0) + scale_fill_viridis_d(alpha = 0.9, begin = 0.2,
    end = 0.7) + facet_wrap(~site)

Call rate during day hours by site and month

agg_hour_rate_month <- aggregate(call_rate ~ hour + month + site, data = call_rates,
    mean)

agg_hour_rate_month$sd <- aggregate(call_rate ~ hour + month + site, data = call_rates,
    sd)[, 4]

agg_hour_rate_month$hour_factor <- factor(agg_hour_rate_month$hour, levels = c("17",
    "18", "19", "20", "21", "22", "23", "0", "1", "2", "3", "4"))

agg_hour_rate_month$month_factor <- as.factor(month.abb[agg_hour_rate_month$month])

agg_hour_rate_month$month_factor <- factor(agg_hour_rate_month$month_factor, levels = month.abb)


ggplot(agg_hour_rate_month, aes(x = hour_factor, y = call_rate, fill = site)) + geom_bar(stat = "identity",
    position = "identity") + geom_errorbar(aes(ymin = call_rate, ymax = call_rate +
    sd), width = 0) + scale_fill_viridis_d(alpha = 0.9, begin = 0.2, end = 0.7) +
    labs(y = "Call rate (calls/ hour)", x = "Hour") + facet_grid(month_factor ~ site) +
    theme_classic()

ggplot(agg_hour_rate_month, aes(x = hour_factor, y = call_rate, fill = site)) + geom_bar(stat = "identity",
    position = "identity") + scale_fill_viridis_d(alpha = 0.9, begin = 0.2, end = 0.7) +
    labs(y = "Call rate (calls/ hour)", x = "Hour") + facet_grid(month_factor ~ site) +
    theme_classic()

Call rate during day hours by site and year

agg_hour_rate_month <- aggregate(call_rate ~ hour + month + site + year, data = call_rates,
    mean)

agg_hour_rate_month$sd <- aggregate(call_rate ~ hour + month + site + year, data = call_rates,
    sd)[, 5]

agg_hour_rate_month$hour_factor <- factor(agg_hour_rate_month$hour, levels = c("17",
    "18", "19", "20", "21", "22", "23", "0", "1", "2", "3", "4"))

agg_hour_rate_month$month_factor <- as.factor(month.abb[agg_hour_rate_month$month])

agg_hour_rate_month$month_factor <- factor(agg_hour_rate_month$month_factor, levels = month.abb)

ggplot(agg_hour_rate_month, aes(x = hour_factor, y = call_rate, fill = site)) + geom_bar(stat = "identity",
    position = "identity") + scale_fill_viridis_d(alpha = 0.9, begin = 0.2, end = 0.7) +
    labs(y = "Call rate (calls/ hour)", x = "Hour") + facet_grid(month_factor ~ site +
    year) + theme_classic()

ggplot(agg_hour_rate_month[agg_hour_rate_month$year != "2021", ], aes(x = hour_factor,
    y = call_rate, fill = site)) + geom_bar(stat = "identity", position = "identity") +
    scale_fill_viridis_d(alpha = 0.9, begin = 0.2, end = 0.7) + labs(y = "Call rate (calls/ hour)",
    x = "Hour") + facet_grid(month_factor ~ site + year) + theme_classic()

Normalize to a maximum of 1 within site (to make them comparable across sites)

agg_hour_rate_month$norm_call_rate <- ifelse(agg_hour_rate_month$site == "SUKIA",
    agg_hour_rate_month$call_rate/max(agg_hour_rate_month$call_rate[agg_hour_rate_month$site ==
        "SUKIA"]), agg_hour_rate_month$call_rate/max(agg_hour_rate_month$call_rate[agg_hour_rate_month$site ==
        "LAGCHIMU"]))

ggplot(agg_hour_rate_month[agg_hour_rate_month$year != "2021", ], aes(x = hour_factor,
    y = norm_call_rate, fill = site)) + geom_bar(stat = "identity", position = "identity") +
    scale_fill_viridis_d(alpha = 0.9, begin = 0.2, end = 0.7) + labs(y = "Normalize call rate",
    x = "Hour") + facet_grid(month_factor ~ site + year) + theme_classic()

Number of recordings per month and site

all_files <- list.files(path = .Options$warbleR$path)

site_month <- data.frame(site = sapply(as.character(all_files), function(x) strsplit(x,
    "_")[[1]][1]), month = as.numeric(substr(sapply(as.character(all_files), function(x) strsplit(x,
    "_")[[1]][2]), 5, 6)), year = as.numeric(substr(sapply(as.character(all_files),
    function(x) strsplit(x, "_")[[1]][2]), 0, 4)))

write.csv(site_month, "./data/processed/recordings_per_month_and_site.csv", row.names = FALSE)

site_month <- read.csv("./data/processed/recordings_per_month_and_site.csv")

site_month$month_factor <- as.factor(month.abb[site_month$month])

site_month$month_factor <- factor(site_month$month_factor, levels = month.abb)

agg_site_month <- aggregate(month ~ month_factor + site + year, site_month, sum)


ggplot(agg_site_month, aes(x = month_factor, y = month, fill = site)) + geom_bar(stat = "identity",
    position = "identity", show.legend = FALSE) + scale_fill_viridis_d(alpha = 0.9,
    begin = 0.2, end = 0.7) + labs(y = "Number of recordings", x = "Month") + facet_wrap(~site +
    year) + theme_classic()

Export raven selections to double-check detections

# get those from sukia at 5pm and specific months
sub_detects <- detects[grepl("SUKIA", detects$sound.files) & substr(detects$rec_date_hour,
    10, 11) == "17" & substr(detects$rec_date_hour, 5, 6) %in% c("01", "02", "04",
    "05", "06") & substr(detects$rec_date_hour, 0, 4) == "2020" | grepl("SUKIA",
    detects$sound.files) & substr(detects$rec_date_hour, 10, 11) == "17" & substr(detects$rec_date_hour,
    5, 6) %in% c("12") & substr(detects$rec_date_hour, 0, 4) == "2019", ]

sub_detects$bottom.freq <- 1
sub_detects$top.freq <- 3.5

exp_raven(X = sub_detects, sound.file.path = .Options$warbleR$path, path = "./data/processed/selection_tables/detection_double-check",
    file.name = "double_checking_detections")

# fix_path(path = './data/processed/selection_tables/detection_double-check',
# new.begin.path =
# '/Users/fabiolachirino/Dropbox/Fabiola/proyecto_lemur/data/raw/sound_files/10_kHz_5_min_cuts/',
# sound.file.col = 'Begin File')

Next steps

Align acoustic and climatic data by hour
Stats!

Session information

## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
## 
## locale:
##  [1] LC_CTYPE=pt_BR.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=es_CR.UTF-8        LC_COLLATE=pt_BR.UTF-8    
##  [5] LC_MONETARY=es_CR.UTF-8    LC_MESSAGES=pt_BR.UTF-8   
##  [7] LC_PAPER=es_CR.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] chron_2.3-56       ranger_0.13.1      formatR_1.11       DT_0.18           
##  [5] ohun_0.1.0         kableExtra_1.3.1   ggplot2_3.3.5      viridis_0.6.2     
##  [9] viridisLite_0.4.0  Rraven_1.0.13      pbapply_1.5-0      bioacoustics_0.2.8
## [13] warbleR_1.1.27     NatureSounds_1.0.4 knitr_1.39         seewave_2.2.0     
## [17] tuneR_1.4.0        devtools_2.4.2     usethis_2.0.1     
## 
## loaded via a namespace (and not attached):
##  [1] bitops_1.0-7      fs_1.5.0          webshot_0.5.2     httr_1.4.2       
##  [5] rprojroot_2.0.2   tools_4.1.0       bslib_0.2.5.1     utf8_1.2.2       
##  [9] R6_2.5.1          DBI_1.1.1         colorspace_2.0-2  withr_2.4.3      
## [13] tidyselect_1.1.1  gridExtra_2.3     prettyunits_1.1.1 processx_3.5.2   
## [17] moments_0.14.1    compiler_4.1.0    cli_3.1.0         rvest_1.0.0      
## [21] xml2_1.3.2        desc_1.3.0        labeling_0.4.2    sass_0.4.0       
## [25] scales_1.1.1      callr_3.7.0       proxy_0.4-26      dtw_1.22-3       
## [29] stringr_1.4.0     digest_0.6.29     rmarkdown_2.13    pkgconfig_2.0.3  
## [33] htmltools_0.5.2   sessioninfo_1.1.1 highr_0.9         fastmap_1.1.0    
## [37] htmlwidgets_1.5.3 rlang_1.0.2       rstudioapi_0.13   farver_2.1.0     
## [41] jquerylib_0.1.4   generics_0.1.0    jsonlite_1.8.0    dplyr_1.0.7      
## [45] RCurl_1.98-1.6    magrittr_2.0.3    Matrix_1.3-4      Rcpp_1.0.8.3     
## [49] munsell_0.5.0     fansi_1.0.0       lifecycle_1.0.1   stringi_1.7.6    
## [53] yaml_2.3.5        MASS_7.3-54       pkgbuild_1.2.0    grid_4.1.0       
## [57] crayon_1.5.1      lattice_0.20-44   ps_1.6.0          pillar_1.6.4     
## [61] rjson_0.2.21      fftw_1.0-7        pkgload_1.2.1     glue_1.6.2       
## [65] evaluate_0.15     remotes_2.4.0     vctrs_0.3.8       testthat_3.0.4   
## [69] gtable_0.3.0      purrr_0.3.4       assertthat_0.2.1  cachem_1.0.5     
## [73] xfun_0.31         signal_0.7-7      tibble_3.1.6      memoise_2.0.0    
## [77] ellipsis_0.3.2

Song detection on all data

Automatic song detection in A. lemur

Marcelo Araya-Salas, PhD & Fabiola Chirino

21-05-2022

Load packages

Apply automatic detection on all data

get calls per hour (per site)

Call rate during day hours by site

Call rate during day hours by site and month

Call rate during day hours by site and year

Number of recordings per month and site

Export raven selections to double-check detections

Next steps