Detecting rat ultrasonic vocalizations

Curating count data

Author
Published

February 25, 2026

Objetive

  • Curate and double check OH data

Load packages

Code
## add 'developer/' to packages to install from github
x <- c(
    "parallel",
    "kableExtra",
    "knitr",
    "DT",
    "viridis",
    "ggplot2",
    "warbleR"
)

sketchy::load_packages(x)

opts_knit$set(root.dir = "..")

options(knitr.kable.NA = '')

print <- function(x) {
    kb <- kable(x, row.names = FALSE, digits = 4, "html")
    kb <- kable_styling(kb,
                        bootstrap_options = c("striped", "hover", "condensed", "responsive"))
    scroll_box(kb, width = "100%")
}

theme_set(theme_classic())

options(
    DT.options = list(
        pageLength = 200,
        scrollX = TRUE,
        scrollY = "800px",
        dom = 'Bfrtip',
        buttons = c('copy', 'csv', 'excel')
    )
)

datatable2 <- function(x, ...) datatable(data = x, extensions = 'Buttons', height = 200, ...)

source("~/Dropbox/R_package_testing/warbleR/R/acoustic_activity.R")
source("~/Dropbox/R_package_testing/warbleR/R/internal_functions.R")

1 Read data

Code
# read bout 1
 bout1_dat <- readRDS(
   file.path(
        "/home/m/Dropbox/Projects/rat_vocalization_alcohol/data/processed/",
        "bout1_USV_counts_per_minute_and_detections.RDS"
    )
)

 # read bout 2
 bout2_dat <- readRDS(
   file.path(
        "/home/m/Dropbox/Projects/rat_vocalization_alcohol/data/processed/",
        "bout2_USV_counts_per_minute_and_detections.RDS"
    )
)
 
# table(bout2_dat$wide_count_min$experiment) 
# table(bout1_dat$wide_count_min$experiment) 

obesidad_counts <- bout2_dat$wide_count_min[bout2_dat$wide_count_min$experiment == "Audios Obesidad 3", ]

oh_counts <- rbind(bout1_dat$wide_count_min, bout2_dat$wide_count_min[bout2_dat$wide_count_min$experiment != "Audios Obesidad 3", ])

oh_metadata <- rbind(bout1_dat$metadata, bout2_dat$metadata[grep("Obesidad 3", bout2_dat$metadata$original_dir, invert = TRUE), ])


# remove those shorter than 1 min
oh_counts <- oh_counts[oh_counts$sound.file.duration >= 60, ] 

# table(oh_counts$experiment)

2 Add metadata

Observations:

  • bout 172 only has 2 cages
Code
oh_counts$sound.file.duration.min <- round(oh_counts$sound.file.duration/ 60, 1)

oh_counts$date <- sapply(oh_counts$sound.files, function(x) {
    # extract date
    date <- strsplit(split = "202",
                     x = x,
                     fixed = TRUE)[[1]]
    date <- ifelse(grepl("EXP2_OH", date[length(date)]), date[2], date[length(date)])
    date <- paste0("202", substr(date, 1, 7))
    date
})

# unique(oh_counts$date)
oh_counts$date <- as.Date(oh_counts$date)

oh_counts$time <- sapply(oh_counts$sound.files, function(x) {
    # extract date
    date <- strsplit(split = "202",
                     x = x,
                     fixed = TRUE)[[1]]
    date <- ifelse(grepl("EXP2_OH", date[length(date)]), date[2], date[length(date)])
    date <- substr(date, 9, 16)
    date
})
# unique(oh_counts$time)

oh_counts <- oh_counts[order(oh_counts$date, oh_counts$time), ]

oh_counts$directory <- sapply(oh_counts$sound.files, function(x){
 oh_metadata$original_dir[oh_metadata$new_name == x]
  }
)

oh_counts$cage <- substr(oh_counts$sound.files, 1, 1)
oh_counts$cage[oh_counts$cage == "E"] <- NA

oh_counts$cage <- ifelse(oh_counts$cage == "2",substr(basename(oh_counts$directory),1, 1), oh_counts$cage)

oh_counts$cage <- ifelse(grepl("CT_PRUEBA",oh_counts$sound.files),substr(oh_counts$sound.files, 11, 11), oh_counts$cage)

# table(oh_counts$cage)

num_time <- as.numeric(gsub("-", "", oh_counts$time))


oh_counts$bout <- NA
oh_counts$bout[1] <- 1

for (i in 2:nrow(oh_counts)) {
  
    if (!is.na(num_time[i])){
    if (num_time[i] - num_time[i - 1] <= 41 & oh_counts$date[i] == oh_counts$date[i - 1]){
  oh_counts$bout[i] <- oh_counts$bout[i - 1]
  } else {
    oh_counts$bout[i] <- oh_counts$bout[i - 1] + 1
  }
        } else{ oh_counts$bout[i] <- NA}
  }
tab <- table(oh_counts$bout)

# all(tab == 4)
# 
# which(tab < 4)

num_time <- as.numeric(gsub("-", "", oh_counts$time))


oh_counts$day.bout[1] <- NA

oh_counts$day.bout[1] <- 1

for (i in 2:nrow(oh_counts)) {
    if (!is.na(oh_counts$date[i])){
  if (oh_counts$date[i] != oh_counts$date[i - 1]) {
    oh_counts$day.bout[i] <- 1
  } else {
  
  if (num_time[i] - num_time[i - 1] <= 41 & oh_counts$date[i] == oh_counts$date[i - 1]){
  oh_counts$day.bout[i] <- oh_counts$day.bout[i - 1]
  } else {
    oh_counts$day.bout[i] <- oh_counts$day.bout[i - 1] + 1
  }
    }
        } else{ oh_counts$day.bout[i] <- NA}
  }


tab <- table(oh_counts$day.bout, oh_counts$date)

# all(tab <= 4)


oh_counts$period <- ifelse(oh_counts$sound.file.duration < 660, "pre", "post")
oh_counts$period <- ifelse(grepl("Día 0", oh_counts$day), "day 0", oh_counts$period)

# table(oh_counts$period)


min_cols <- paste("min", 1:30)

# order columns
oh_counts <- oh_counts[,c(
setdiff(names(oh_counts), min_cols),
min_cols
)]

3 Graphs summarizing data

Code
agg <- aggregate(total ~ period + day + experiment, data = oh_counts, length)

agg$day_num <- sapply(strsplit(x = gsub("Día ", "", agg$day), split = " ", fixed = TRUE), "[[", 1)

agg$day_num <- factor(agg$day_num, levels = sort(as.numeric(unique(agg$day_num))))

agg$period <- factor(agg$period, levels = c("day 0", "pre", "post"))

# bar graph colored by period
ggplot(agg, aes(x = day_num, y = total, fill = period)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    title = "Total recordings per day",
    x = "Day",
    y = "# of recordings"
  ) +
  scale_fill_viridis_d(option = "G", alpha = 0.9) +
    facet_wrap(~ experiment, ncol = 2)

Code
agg <- aggregate(total ~ period + day + experiment, data = oh_counts, sum)

agg$day_num <- sapply(strsplit(x = gsub("Día ", "", agg$day), split = " ", fixed = TRUE), "[[", 1)

agg$day_num <- factor(agg$day_num, levels = sort(as.numeric(unique(agg$day_num))))

agg$period <- factor(agg$period, levels = c("day 0", "pre", "post"))

# bar graph colored by period
ggplot(agg, aes(x = day_num, y = total, fill = period)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    title = "Total USVs counts per day all minutes",
    x = "Day",
    y = "# of USVs"
  ) +
  scale_fill_viridis_d(option = "G", alpha = 0.9) +
    facet_wrap(~ experiment, ncol = 2)

Code
post_last15 <- oh_counts[oh_counts$period == "post", c("experiment", "day", paste("min", 15:30))]

post_last15$total <- rowSums(post_last15[, paste("min", 15:30)], na.rm = TRUE)

agg <- aggregate(total ~ day  + experiment, data = post_last15, sum)

agg$day_num <- sapply(strsplit(x = gsub("Día ", "", agg$day), split = " ", fixed = TRUE), "[[", 1)

agg$day_num <- factor(agg$day_num, levels = sort(as.numeric(unique(agg$day_num))))


# bar graph colored by period
ggplot(agg, aes(x = day_num, y = total)) +
  geom_bar(stat = "identity", position = "dodge", fill = "#54C9ADB3") +
  labs(
    title = "Total USV counts per day",
    x = "Day",
    y = "Total USV counts"
  )  + 
    facet_wrap(~ experiment, ncol = 2)

4 Activity by minute

Code
detections <- rbind(bout1_dat$detections, bout2_dat$detections)
file_durations <- rbind(bout1_dat$wide_count_min[, c("sound.files", "sound.file.duration")], bout2_dat$wide_count_min[, c("sound.files", "sound.file.duration")])

names(file_durations)[2] <- "duration"

# counting
count_high_min <- acoustic_activity(
    X = detections,
    time.window = 60,
    hop.size = 1,
    Y = file_durations
)

# rate per minute
count_high_min$rate <- count_high_min$rate * 60

## add metadata
all(count_high_min$sound.files %in% oh_counts$sound.files)
[1] FALSE
Code
count_high_min <- count_high_min[count_high_min$sound.files %in% oh_counts$sound.files,]

setdiff(unique(count_high_min$sound.files), unique(oh_counts$sound.files))
character(0)
Code
count_high_min$experiment <- sapply(count_high_min$sound.files, function(x) {
    oh_counts$experiment[oh_counts$sound.files == x][1]
})

count_high_min$day <- sapply(count_high_min$sound.files, function(x) {
    oh_counts$day[oh_counts$sound.files == x][1]
})

count_high_min$period <- sapply(count_high_min$sound.files, function(x) {
    oh_counts$period[oh_counts$sound.files == x][1]
})

# get highest per sound file
sub_count_high_min <- count_high_min[count_high_min$duration == 60, ]


sub_count_high_min_list <- lapply(unique(sub_count_high_min$sound.files), function(x){
    Y <- sub_count_high_min[sub_count_high_min$sound.files == x,]
    Y$norm.rate <- Y$rate / max(Y$rate, na.rm = TRUE) 
    return(Y)
    })

sub_count_high_min <- do.call(rbind, sub_count_high_min_list)

sub_count_high_min$period <- factor(sub_count_high_min$period, levels = c("day 0", "pre", "post"))


# aggregate by start, day and period
agg_count <- aggregate(norm.rate ~ start + period + experiment, data = sub_count_high_min, mean)

tab <- table(agg_count$start)
agg_count <- agg_count[agg_count$start %in% names(tab)[tab > 3], ]

sub_count_high_min <- sub_count_high_min[sub_count_high_min$start %in% names(tab)[tab > 3],]

sub_count_high_min$experiment <- factor(sub_count_high_min$experiment, levels = c("2020 OH3 M-SENS - USVs", "2022 OH5 H-SENS - USVs", "2022 OH4 M-INCUB - USVs", "2023 OH6 H-INCUB - USVs"))

# ggplot of rate per start faceted by sound file
ggplot(sub_count_high_min, aes(x = start / 60, y = norm.rate, group = sound.files, color = period)) +
    geom_line() +
    facet_wrap( ~  experiment+ period, scales = "free_y", ncol = 3) +
    labs(
        title = "Rate of USV detections across recordings",
        x = "Time (min)",
        y = "Normalized rate (USVs/min)"
    ) + 
    geom_line(data = agg_count, aes(x = start / 60, y = norm.rate), size = 1, inherit.aes = FALSE) +
    scale_color_viridis_d(option = "G", alpha = 0.5, end = 0.8, guide = NULL) +
    theme_classic()
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
Warning: Removed 362 rows containing missing values or values outside the scale range
(`geom_line()`).

Highest min at the first, second and third segments of the file:

Code
count_sections_list <- lapply(unique(count_high_min$sound.files), function(x){
    
count_high_min_file <- count_high_min[
  count_high_min$sound.files == x,
]

max_dur <- max(count_high_min_file$start)

cut_points <- c(0, max_dur / 3, 2 * max_dur / 3, max_dur)

out_sections <- lapply(1:(length(cut_points) - 1), function(i) {
    
    count_high_min_file_segment <- count_high_min_file[
        count_high_min_file$start >= cut_points[i] & count_high_min_file$start < cut_points[i + 1], 
    ]
    
    Y <- count_high_min_file_segment[which.max(count_high_min_file_segment$rate),]
    
    Y$section <- ifelse(i == 1, "begin", ifelse(i == 2, "middle", "end")) 
    
    return(Y)
    })

 out_sections <- do.call(rbind, out_sections)

    return(out_sections)
    
})

count_sections <- do.call(rbind, count_sections_list)

Session information

Click to see
─ Session info ───────────────────────────────────────────────────────────────
 setting  value
 version  R version 4.5.2 (2025-10-31)
 os       Ubuntu 22.04.5 LTS
 system   x86_64, linux-gnu
 ui       X11
 language (EN)
 collate  en_US.UTF-8
 ctype    en_US.UTF-8
 tz       America/Costa_Rica
 date     2026-02-25
 pandoc   3.2 @ /usr/lib/rstudio/resources/app/bin/quarto/bin/tools/x86_64/ (via rmarkdown)
 quarto   1.7.31 @ /usr/local/bin/quarto

─ Packages ───────────────────────────────────────────────────────────────────
 package       * version   date (UTC) lib source
 bitops          1.0-9     2024-10-03 [3] CRAN (R 4.5.0)
 brio            1.1.5     2024-04-24 [1] CRAN (R 4.5.0)
 bslib           0.10.0    2026-01-26 [1] CRAN (R 4.5.2)
 cachem          1.1.0     2024-05-16 [1] CRAN (R 4.5.0)
 cli             3.6.5     2025-04-23 [1] CRAN (R 4.5.0)
 crayon          1.5.3     2024-06-20 [1] CRAN (R 4.5.0)
 crosstalk       1.2.2     2025-08-26 [1] CRAN (R 4.5.0)
 curl            7.0.0     2025-08-19 [1] CRAN (R 4.5.0)
 devtools        2.4.5     2022-10-11 [1] CRAN (R 4.5.0)
 digest          0.6.39    2025-11-19 [1] CRAN (R 4.5.0)
 dplyr           1.1.4     2023-11-17 [1] CRAN (R 4.5.0)
 DT            * 0.33      2024-04-04 [3] CRAN (R 4.5.0)
 dtw             1.23-1    2022-09-19 [1] CRAN (R 4.5.0)
 ellipsis        0.3.2     2021-04-29 [3] CRAN (R 4.1.1)
 evaluate        1.0.5     2025-08-27 [1] CRAN (R 4.5.0)
 farver          2.1.2     2024-05-13 [1] CRAN (R 4.5.0)
 fastmap         1.2.0     2024-05-15 [1] CRAN (R 4.5.0)
 fftw            1.0-9     2024-09-20 [3] CRAN (R 4.5.0)
 fs              1.6.6     2025-04-12 [1] CRAN (R 4.5.0)
 generics        0.1.4     2025-05-09 [1] CRAN (R 4.5.0)
 ggplot2       * 4.0.2     2026-02-03 [1] CRAN (R 4.5.2)
 glue            1.8.0     2024-09-30 [1] CRAN (R 4.5.0)
 gridExtra       2.3       2017-09-09 [1] CRAN (R 4.5.0)
 gtable          0.3.6     2024-10-25 [1] CRAN (R 4.5.0)
 htmltools       0.5.9     2025-12-04 [1] CRAN (R 4.5.0)
 htmlwidgets     1.6.4     2023-12-06 [1] RSPM (R 4.5.0)
 httpuv          1.6.16    2025-04-16 [1] RSPM (R 4.5.0)
 httr            1.4.7     2023-08-15 [1] CRAN (R 4.5.0)
 jquerylib       0.1.4     2021-04-26 [1] CRAN (R 4.5.0)
 jsonlite        2.0.0     2025-03-27 [1] CRAN (R 4.5.0)
 kableExtra    * 1.4.0     2024-01-24 [1] CRAN (R 4.5.0)
 knitr         * 1.51      2025-12-20 [1] CRAN (R 4.5.2)
 labeling        0.4.3     2023-08-29 [1] CRAN (R 4.5.0)
 later           1.4.2     2025-04-08 [1] RSPM (R 4.5.0)
 lifecycle       1.0.5     2026-01-08 [1] CRAN (R 4.5.2)
 magrittr        2.0.4     2025-09-12 [1] CRAN (R 4.5.0)
 MASS            7.3-65    2025-02-28 [4] CRAN (R 4.4.3)
 memoise         2.0.1     2021-11-26 [1] CRAN (R 4.5.0)
 mime            0.13      2025-03-17 [1] CRAN (R 4.5.0)
 miniUI          0.1.2     2025-04-17 [3] CRAN (R 4.5.0)
 NatureSounds  * 1.0.5     2025-01-17 [1] CRAN (R 4.5.0)
 packrat         0.9.3     2025-06-16 [1] CRAN (R 4.5.2)
 pbapply         1.7-4     2025-07-20 [1] CRAN (R 4.5.0)
 pillar          1.11.1    2025-09-17 [1] CRAN (R 4.5.0)
 pkgbuild        1.4.8     2025-05-26 [1] CRAN (R 4.5.0)
 pkgconfig       2.0.3     2019-09-22 [1] CRAN (R 4.5.0)
 pkgload         1.4.1     2025-09-23 [1] CRAN (R 4.5.0)
 profvis         0.4.0     2024-09-20 [1] CRAN (R 4.5.0)
 promises        1.3.3     2025-05-29 [1] RSPM (R 4.5.0)
 proxy           0.4-29    2025-12-29 [1] CRAN (R 4.5.2)
 purrr           1.2.0     2025-11-04 [1] CRAN (R 4.5.0)
 R6              2.6.1     2025-02-15 [1] CRAN (R 4.5.0)
 RColorBrewer    1.1-3     2022-04-03 [1] CRAN (R 4.5.0)
 Rcpp            1.1.1     2026-01-10 [1] CRAN (R 4.5.2)
 RCurl           1.98-1.17 2025-03-22 [1] CRAN (R 4.5.0)
 remotes         2.5.0     2024-03-17 [1] CRAN (R 4.5.0)
 rjson           0.2.23    2024-09-16 [1] CRAN (R 4.5.0)
 rlang           1.1.7     2026-01-09 [1] CRAN (R 4.5.2)
 rmarkdown       2.30      2025-09-28 [1] CRAN (R 4.5.0)
 rstudioapi      0.17.1    2024-10-22 [1] CRAN (R 4.5.0)
 S7              0.2.1     2025-11-14 [1] CRAN (R 4.5.0)
 sass            0.4.10    2025-04-11 [1] CRAN (R 4.5.0)
 scales          1.4.0     2025-04-24 [1] CRAN (R 4.5.0)
 seewave       * 2.2.4     2025-08-19 [1] CRAN (R 4.5.0)
 sessioninfo     1.2.3     2025-02-05 [3] CRAN (R 4.5.0)
 shiny           1.10.0    2024-12-14 [1] CRAN (R 4.5.0)
 signal          1.8-1     2024-06-26 [1] CRAN (R 4.5.0)
 sketchy         1.0.7     2026-01-28 [1] CRANs (R 4.5.2)
 stringi         1.8.7     2025-03-27 [1] CRAN (R 4.5.0)
 stringr         1.6.0     2025-11-04 [1] CRAN (R 4.5.0)
 svglite         2.2.2     2025-10-21 [1] CRAN (R 4.5.0)
 systemfonts     1.3.1     2025-10-01 [1] CRAN (R 4.5.0)
 testthat        3.2.3     2025-01-13 [1] CRAN (R 4.5.0)
 textshaping     1.0.4     2025-10-10 [1] CRAN (R 4.5.0)
 tibble          3.3.0     2025-06-08 [1] RSPM (R 4.5.0)
 tidyselect      1.2.1     2024-03-11 [1] CRAN (R 4.5.0)
 tuneR         * 1.4.7     2024-04-17 [1] CRAN (R 4.5.0)
 urlchecker      1.0.1     2021-11-30 [1] CRAN (R 4.5.0)
 usethis         3.1.0     2024-11-26 [3] CRAN (R 4.5.0)
 vctrs           0.7.1     2026-01-23 [1] CRAN (R 4.5.2)
 viridis       * 0.6.5     2024-01-29 [1] CRAN (R 4.5.0)
 viridisLite   * 0.4.3     2026-02-04 [1] CRAN (R 4.5.2)
 warbleR       * 1.1.37    2025-10-22 [1] CRAN (R 4.5.0)
 withr           3.0.2     2024-10-28 [1] CRAN (R 4.5.0)
 xaringanExtra   0.8.0     2024-05-19 [1] CRAN (R 4.5.0)
 xfun            0.56      2026-01-18 [1] CRAN (R 4.5.2)
 xml2            1.5.2     2026-01-17 [1] CRAN (R 4.5.2)
 xtable          1.8-4     2019-04-21 [3] CRAN (R 4.0.1)
 yaml            2.3.12    2025-12-10 [1] CRAN (R 4.5.2)

 [1] /home/m/R/x86_64-pc-linux-gnu-library/4.5
 [2] /usr/local/lib/R/site-library
 [3] /usr/lib/R/site-library
 [4] /usr/lib/R/library
 * ── Packages attached to the search path.

──────────────────────────────────────────────────────────────────────────────