Checking annotation status

Bird song evolution

Author
Published

July 14, 2025

Source code, data and annotation protocol found at https://github.com/maRce10/bird_song_evolution

Purpose

  • Double-check annotations

 

Load packages

Code
# knitr is require for creating html/pdf/word reports formatR is
# used for soft-wrapping code

# install/ load packages
sketchy::load_packages(packages = c("knitr", "formatR", "rprojroot",
    "viridis", "googlesheets4", "warbleR", "maRce10/Rraven", "googledrive",
    "maRce10/ohun"))


path_sound_files <- "/home/m/OneDrive/bird_song_evolution/bird_song_recordings/consolidated_sound_files/"

# annotation_path <- '/home/m/Insync/marceloa27@gmail.com/Google
# Drive/bird_song_evolution/raven_selections/'
annotation_path <- "./data/processed/annotations/"

# check annotations warbleR_options(wav.path =
# 'run/user/1000/gvfs/smb-share:server=cinnas.local,share=neurobiología/marcelo_araya/bird_song_evolution/audios/consolidated_sound_files')


ann_spec_path <- "/home/m/OneDrive/bird_song_evolution/annotated_spectrograms/"

1 Consolidate sound files

Code
# cns <- consolidate(path =
# '/home/m/OneDrive/bird_song_evolution/bird_song_recordings/',
# dest.path =
# '/media/m/Expansion/Projects/Ongoing/bird_song_evolution/consolidated_sound_files/',
# parallel = 10)

2 Check sound files

Code
csf <- check_sound_files(path = path_sound_files, check.header = TRUE,
    parallel = 20)

csf2 <- csf[grep("corrupted", csf$result), ]


fix_wavs(path = path_sound_files, files = csf2$sound.files)


csf3 <- check_sound_files(path = file.path(path_sound_files, "converted_sound_files"),
    check.header = TRUE, parallel = 20)
Code
### CHANGES IN ANNOTATION FILES #########
prev_snapshot <- readRDS("./data/processed/prev_raven_annotations_snapshot.RDS")

new_snapshot <- fileSnapshot(annotation_path, md5sum = TRUE, recursive = TRUE)

changes <- changedFiles(before = prev_snapshot, after = new_snapshot)

# only save in something changed
if ((length(changes$added > 0) | length(changes$changed > 0) | length(changes$deleted >
    0)) & !knitr::is_html_output(excludes = "markdown")) {
    saveRDS(new_snapshot, "./data/processed/prev_raven_annotations_snapshot.RDS")
    saveRDS(changes, "./data/processed/changes_in_annotation_files.RDS")
}


### CHECK ANNOTATIONS ################# if not readin from
### google docs
googlesheets4::gs4_deauth()
# googlesheets4::gs4_auth()

rec_data <- read_sheet("https://docs.google.com/spreadsheets/d/16ukhyf37hm13f1FXB2JQ-tCfHoxXy2qj_TX2xWvTVu8/edit#gid=148139271")

rec_data$assigned_to[rec_data$assigned_to == "NA"] <- NA

# remove data with no family<`
rec_data <- rec_data[rec_data$family != "NA", ]

# re run manually

# prev_status <-
# readRDS('./data/processed/annotation_status_results.RDS')
# names(prev_status) <- paste0('prev_', names(prev_status))
# saveRDS(prev_status,
# './data/processed/previous_annotation_status_results.RDS')

anns <- imp_raven(path = annotation_path, all.data = TRUE, warbler.format = TRUE,
    name.from.file = TRUE, ext.case = "lower", pb = TRUE, recursive = TRUE)

# grep('.Table.1 (1)', anns$sound.files, fixed = TRUE, value =
# T)

anns$family <- dirname(anns$selec.file)

anns$selec.file <- basename(anns$selec.file)

anns$species <- gsub("_", " ", sapply(strsplit(anns$selec.file, "-"),
    "[", 1))


# get those with more than 1 annotation file
dup_anns <- table(anns$selec.file[!duplicated(anns$`Begin Path`)])

dup_anns <- dup_anns[dup_anns > 1]

dup_anns <- anns[anns$selec.file %in% names(dup_anns), c("sound.files",
    "selec.file", "Begin Path")]

dup_anns <- dup_anns[!duplicated(dup_anns), ]


ann_files <- unique(c(anns$selec.file, unlist(.Options$Rraven)))

ann_files <- data.frame(file = ann_files)

ann_files$species <- gsub("_", " ", sapply(strsplit(ann_files$file,
    "-"), "[", 1))

ann_files$family <- sapply(ann_files$species, function(x) anns$family[anns$species ==
    x][1])

undup_anns <- anns[!duplicated(paste(anns$sound.files, anns$selec.file)),
    ]
rownames(undup_anns) <- 1:nrow(undup_anns)

# get those in which the names inside the text file and in the
# txt file name don't match
file_name_no_match <- na.omit(undup_anns$selec.file[!sapply(seq_len(nrow(undup_anns)),
    function(x) grepl(gsub(".wav", "", undup_anns$sound.files[x]),
        undup_anns$selec.file[x]))])

# get those in which the name doesnt match the expected
weird_files <- ann_files[!grepl("Table.1.selections.txt", ann_files$file) |
    ann_files$file %in% c(unlist(.Options$Rraven), file_name_no_match),
    ]


weird_files$problem <- if (nrow(weird_files) > 0) "file name" else vector()
weird_files$problem[weird_files$file %in% unlist(.Options$Rraven)] <- "Empty file"
weird_files$problem[weird_files$file %in% file_name_no_match] <- "Species name in txt file name and Begin column dont match"

weird_files$problem[weird_files$file %in% unlist(.Options$Rraven)] <- "Empty file"

# keep only those with 'good' names
ann_files <- ann_files[!ann_files$file %in% weird_files$file, ]

# counts per family
fam_count <- aggregate(species ~ family, data = rec_data, length)
names(fam_count)[2] <- "total"

fam_count$annotated <- sapply(fam_count$family, function(x) sum(ann_files$family ==
    x))

fam_count$prop.annotated <- round(fam_count$annotated/fam_count$total,
    2)

fam_count$assinged.to <- sapply(fam_count$family, function(x) paste(unique(rec_data$assigned_to[rec_data$family ==
    x]), collapse = "/"))

fam_count$assinged.to[fam_count$assinged.to == "NA"] <- "not assigned"

weird_files$assinged.to <- sapply(weird_files$family, function(x) paste(unique(rec_data$assigned_to[rec_data$family ==
    x]), collapse = "/"))


# check_sound_files()
anns <- anns[, c("sound.files", "selec", "start", "end", "bottom.freq",
    "top.freq", "selec.file", "family", "species", "element", "song")]

anns <- anns[anns$selec.file %in% ann_files$file, ]

# anns$family <- sapply(anns$species, function(x)
# rec_data$family[rec_data$species == x][1])

anns$assinged.to <- sapply(anns$f, function(x) paste(unique(rec_data$assigned_to[rec_data$family ==
    x]), collapse = "/"))

all(ann_files$species %in% rec_data$species)

cs <- check_sels(anns, parallel = 20, pb = TRUE, fix.selec = TRUE,
    path = path_sound_files)


file_info <- info_sound_files(parallel = 20, path = path_sound_files,
    skip.error = TRUE)

file_info$species <- gsub("_", " ", sapply(strsplit(file_info$sound.files,
    "-"), "[", 1))

file_info$family <- sapply(file_info$species, function(x) rec_data$family[rec_data$species ==
    x][1])

as.data.frame(table(cs$check.res[cs$check.res != "OK"]))

unique(cs$sound.files[cs$check.res == "sound file not found"])

# exp_raven(cs, path = './data/processed', sound.file.path =
# '/media/m/Seagate Portable
# Drive/bird_song_recordings/consolidated_files',file.name =
# 'combined_annotations_22-03-2023')

saveRDS(list(cs = cs, anns = anns, rec_data = rec_data, ann_files = ann_files,
    weird_files = weird_files, fam_count = fam_count, file_info = file_info,
    dup_anns = dup_anns), "./data/processed/annotation_status_results.RDS")

### FIND PROBLEMATIC ANNOTATIONS ########### re run manually
prev_prob <- read.csv("./data/processed/problematic_annotations.csv")
write.csv(prev_prob, "./data/processed/prev_problematic_annotations.csv",
    row.names = FALSE)

problematic_sels <- cs[cs$check.res != "OK", ]

# weird duration or frequency range
long_anns <- cs[cs$end - cs$start > 10 & !cs$family %in% c("Tinamidae",
    "Accipitridae", "Falconidae"), ]
if (nrow(long_anns) > 0) long_anns$check.res <- "longer than 10 s"

broad_anns <- cs[cs$top.freq - cs$bottom.freq > 10, ]
broad_anns$check.res <- "frequency range wider than 10 kHz"

song_na <- cs[is.na(cs$song), ]
song_na$check.res <- "NAs in 'song'"

elem_na <- cs[is.na(cs$element), ]
elem_na$check.res <- "NAs in 'element'"

problematic_sels <- rbind(problematic_sels, long_anns, broad_anns,
    song_na, elem_na)

problematic_sels <- problematic_sels[, c("sound.files", "family",
    "assinged.to", "selec", "check.res")]

problematic_sels$spectrograms <- ""
names(problematic_sels) <- c("sound.file", "family", "assinged.to",
    "selection", "problem", "spectrograms")

problematic_sels <- problematic_sels[order(problematic_sels$family,
    problematic_sels$sound.file, problematic_sels$selection), ]

sf <- gsub("\\.wav", "", problematic_sels$sound.file)
sf <- gsub("_", "%5F", sf)
sf <- gsub("-", "%2D", sf)

problematic_sels <- problematic_sels[order(problematic_sels$sound.file,
    problematic_sels$selection), ]


# find overlapping songs
song_anns <- song_analysis(X = anns, parallel = 14)
song_anns <- check_sels(song_anns, parallel = 14, fix.selec = TRUE,
    path = path_sound_files)

# song_anns <- song_anns[song_anns$check.res == 'OK',]


song_anns <- song_anns[song_anns$end - song_anns$start > 0, ]

ovlp_songs <- overlapping_sels(song_anns[song_anns$check.res == "OK" &
    !is.na(song_anns$song), ], parallel = 14)

ovlp_songs <- ovlp_songs[!is.na(ovlp_songs$ovlp.sels), ]

ovlp_songs$family <- sapply(ovlp_songs$sound.files, function(x) anns$family[anns$sound.files ==
    x][1])
ovlp_songs$problem <- "overlapping songs"

ovlp_songs$assinged.to <- sapply(ovlp_songs$sound.files, function(x) anns$assinged.to[anns$sound.files ==
    x][1])

ovlp_song_probs <- aggregate(song ~ sound.files + family + assinged.to +
    problem, data = ovlp_songs, unique)

ovlp_song_probs$family <- sapply(ovlp_song_probs$sound.file, function(x) anns$family[anns$sound.files ==
    x][1])


ovlp_song_probs$selections <- sapply(seq_len(nrow(ovlp_song_probs)),
    function(x) paste(na.omit(anns$selec[anns$sound.files == ovlp_songs$sound.files[x] &
        anns$song == ovlp_songs$song[x]]), collapse = ", "))

agg_prob_sels <- aggregate(selection ~ sound.file + family + assinged.to +
    problem, data = problematic_sels, unique)

agg_prob_sels$sel.count <- sapply(agg_prob_sels$sound.file, function(x) sum(problematic_sels$sound.file ==
    x))

ovlp_song_probs$sel.count <- sapply(ovlp_song_probs$sound.file, function(x) sum(ovlp_songs$sound.files ==
    x))


names(agg_prob_sels)[5] <- "selections"

ovlp_song_probs$song <- NULL

names(ovlp_song_probs)[1] <- "sound.file"

agg_prob_sels <- rbind(agg_prob_sels, ovlp_song_probs)

agg_prob_sels <- data.frame(lapply(agg_prob_sels, as.character), stringsAsFactors = FALSE)

agg_prob_sels$fixed <- ""


# Copy problematic to folder in google drive

#### MAKE SURE SPECTROGRAMS EXISTS FOR ALL ANNOTATED FILES (if
#### not run spectrogram creator chunk below) search the jpeg
#### image files that match the species name and copy those
#### files to a new folder out <-
#### warbleR:::pblapply_wrblr_int(unique(agg_prob_sels$sound.file),
#### function(x){ jpegs <- list.files(path = ann_spec_path,
#### pattern = gsub('.wav', '', x), recursive = TRUE, full.names
#### = TRUE) file.copy(from = jpegs,
#### file.path('/home/m/Insync/marceloa27@gmail.com/Google
#### Drive/bird_song_evolution/annotated_spectrograms_problematic/',
#### basename(jpegs)), overwrite = TRUE) } ) problm_image_drive
#### <- googledrive::drive_ls(path =
#### 'bird_song_evolution/annotated_spectrograms_problematic/')
#### problm_image_drive



# image file page 1 agg_prob_sels$jpeg_p1 <- gsub('.wav',
# '--p1.jpeg',agg_prob_sels$sound.file)

# google drive link agg_prob_sels$gd_id <-
# sapply(agg_prob_sels$sound.file, function(x){
# as.vector(problm_image_drive$id[grep(gsub('.wav', '--p', x),
# problm_image_drive$name)])[1] }) agg_prob_sels$gd_link <-
# ifelse(!is.na(agg_prob_sels$gd_id),
# paste0('https://drive.google.com/file/d/',
# agg_prob_sels$gd_id, '/view?usp=drive_link'), NA)

sptrgm_files <- list.files(path = "~/Dropbox/Projects/bird_song_evolution/data/processed/annotated_spectrograms/pooled/",
    pattern = ".jpeg$", full.names = FALSE)

# agg_prob_sels$spectrograms <- sapply(agg_prob_sels$sound.file,
# function(x){ img <- grep(gsub('.wav$', '', x), sptrgm_files,
# value = TRUE)[1] out <- if (length(img) > 0)
# paste0('file:///home/m/Dropbox/Projects/bird_song_evolution/data/processed/annotated_spectrograms/pooled/',
# x) else NA return(out) })

agg_prob_sels$files <- kableExtra::cell_spec("link", "html", link = paste0("https://6f33fa7f78ea46e2aaca-my.sharepoint.com/personal/marcelo_araya_ucr_ac_cr/_layouts/15/onedrive.aspx?q=",
    gsub("_", "%5F", sapply(strsplit(agg_prob_sels$sound.file, "-"),
        "[", 1)), "&view=7&id=%2Fpersonal%2Fmarcelo%5Faraya%5Fucr%5Fac%5Fcr%2FDocuments&searchScope=folder"),
    new_tab = TRUE)


write.csv(agg_prob_sels, "./data/processed/problematic_annotations.csv",
    row.names = FALSE)

3 Changes in annnotation files

Code
changes <- readRDS("./data/processed/changes_in_annotation_files.RDS")
  • 7343 files unchanged
  • 27 files added
  • 1 files deleted
  • 4 files updated

3.1 New files by family

Code
added_df <- data.frame(family = dirname(changes$added), file = basename(changes$added))

fam_count_added <- as.data.frame(table(added_df$family))

names(fam_count_added) <- c("Family", "Added files")


fam_count_added_kbl <- kableExtra::kbl(fam_count_added, row.names = FALSE,
    escape = FALSE, format = "html", digits = )

fam_count_added_kbl <- kableExtra::kable_styling(fam_count_added_kbl,
    bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = FALSE, font_size = 12)

fam_count_added_kbl
Family Added files
Alaudidae 4
Cnemophilidae 2
Coraciidae 1
Dulidae 1
Eulacestomatidae 1
Eupetidae 1
Hylocitreidae 1
Hypocoliidae 1
Melampittidae 2
Melanocharitidae 2
Panuridae 1
Paramythiidae 2
Pityriasidae 1
Platylophidae 1
Pluvianidae 1
Rhagologidae 1
Rhynochetidae 1
Rostratulidae 2
Struthionidae 1

4 Descriptive stats

Code
attach(readRDS("./data/processed/annotation_status_results.RDS"))
  • 7373 species from 241 families already annotated (73% of all available especies; 353.18 recording hours; 222181 annotations)

  • 128 families with 90% of all available especies annotated (51% of all families)

  • 2716 species have not been annotated (26.91%; 90 recording hours; 21% of the total recording hours)

5 Species per family

All recordings in these families have been annotated (90 families, 417 recordings, 25.36 recording hours)

Code
sub_fam_count <- fam_count[fam_count$prop.annotated == 1, ]
prop_analzyed <- sub_fam_count$prop.annotated

sub_fam_count$prop.annotated <- ifelse(sub_fam_count$prop.annotated >
    0.9, kableExtra::cell_spec(sub_fam_count$prop.annotated, "html",
    color = "white", background = "green", bold = TRUE, font_size = 12,
    new_tab = TRUE), ifelse(sub_fam_count$prop.annotated > 0.5, kableExtra::cell_spec(sub_fam_count$prop.annotated,
    "html", color = "black", background = "yellow", bold = TRUE, font_size = 12,
    new_tab = TRUE), kableExtra::cell_spec(sub_fam_count$prop.annotated,
    "html", color = "white", background = "red", bold = TRUE, font_size = 12,
    new_tab = TRUE)))

sub_fam_count$assinged.to <- ifelse(sub_fam_count$assinged.to == "not assigned",
    kableExtra::cell_spec(sub_fam_count$assinged.to, "html", color = "white",
        background = "red", bold = TRUE, font_size = 12, new_tab = TRUE),
    kableExtra::cell_spec(sub_fam_count$assinged.to, "html", new_tab = TRUE))


sub_fam_count$recordings <- kableExtra::cell_spec("link", "html",
    link = paste0("https://6f33fa7f78ea46e2aaca-my.sharepoint.com/personal/marcelo_araya_ucr_ac_cr/_layouts/15/onedrive.aspx?ga=1&id=%2Fpersonal%2Fmarcelo%5Faraya%5Fucr%5Fac%5Fcr%2FDocuments%2Fbird%5Fsong%5Fevolution%2Fbird%5Fsong%5Frecordings%2F",
        sub_fam_count$family), new_tab = TRUE)

sub_fam_count$spectrograms <- ifelse(prop_analzyed > 0, kableExtra::cell_spec("link",
    "html", link = paste0("https://6f33fa7f78ea46e2aaca-my.sharepoint.com/personal/marcelo_araya_ucr_ac_cr/_layouts/15/onedrive.aspx?ga=1&id=%2Fpersonal%2Fmarcelo%5Faraya%5Fucr%5Fac%5Fcr%2FDocuments%2Fbird%5Fsong%5Fevolution%2Fannotated%5Fspectrograms%2F",
        sub_fam_count$family), new_tab = TRUE), "")

sub_fam_count_kbl <- kableExtra::kbl(sub_fam_count, row.names = FALSE,
    escape = FALSE, format = "html", digits = )
# sub_fam_count_kbl <- kableExtra::row_spec( kable_input =
# sub_fam_count_kbl, row = which(prop_analzyed == 1), background
# = grDevices::adjustcolor('#6DCD59FF', alpha.f = 0.3) )



sub_fam_count_kbl <- kableExtra::kable_styling(sub_fam_count_kbl,
    bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = FALSE, font_size = 12)

sub_fam_count_kbl
family total annotated prop.annotated assinged.to recordings spectrograms
Acanthisittidae 2 2 1 Daniela link link
Aegithalidae 10 10 1 Daniela link link
Aegithinidae 4 4 1 Daniela link link
Aegothelidae 6 6 1 Daniela link link
Anhimidae 3 3 1 Daniela link link
Anhingidae 4 4 1 Daniela link link
Anseranatidae 1 1 1 Daniela link link
Aramidae 1 1 1 Daniela link link
Atrichornithidae 2 2 1 Daniela link link
Balaenicipitidae 1 1 1 Daniela link link
Bombycillidae 3 3 1 Daniela link link
Brachypteraciidae 6 6 1 Daniela link link
Bucorvidae 2 2 1 Daniela link link
Buphagidae 2 2 1 Daniela link link
Capitonidae 15 15 1 Daniela link link
Cariamidae 2 2 1 Daniela link link
Cathartidae 7 7 1 Daniela link link
Chaetopidae 2 2 1 Daniela link link
Chionidae 1 1 1 Daniela link link
Chloropseidae 10 10 1 Daniela link link
Cinclidae 5 5 1 Daniela link link
Cnemophilidae 2 2 1 Daniela link link
Coraciidae 13 13 1 Daniela link link
Corcoracidae 2 2 1 Daniela link link
Dasyornithidae 3 3 1 Daniela link link
Donacobiidae 1 1 1 Daniela link link
Dromadidae 1 1 1 Daniela link link
Dulidae 1 1 1 Daniela link link
Elachuridae 1 1 1 Daniela link link
Eulacestomatidae 1 1 1 Daniela link link
Eupetidae 1 1 1 Daniela link link
Eurypygidae 1 1 1 Daniela link link
Formicariidae 12 12 1 Daniela link link
Gaviidae 5 5 1 Carlos link link
Grallariidae 67 67 1 Carlos link link
Heliornithidae 2 2 1 Carlos link link
Hydrobatidae 15 15 1 Carlos link link
Hylocitreidae 1 1 1 Paula link link
Hypocoliidae 1 1 1 Paula link link
Ibidorhynchidae 1 1 1 Carlos link link
Leptosomidae 1 1 1 Paula link link
Melampittidae 2 2 1 Daniela link link
Melanopareiidae 4 4 1 Daniela link link
Mesitornithidae 3 3 1 Paula link link
Mimidae 34 34 1 Paula link link
Mitrospingidae 4 4 1 Paula link link
Modulatricidae 3 3 1 Paula link link
Mohoidae 1 1 1 Paula link link
Mohouidae 3 3 1 Paula link link
Nesospingidae 1 1 1 Paula link link
Nicatoridae 3 3 1 Paula link link
Notiomystidae 1 1 1 Paula link link
Opisthocomidae 1 1 1 Paula link link
Pandionidae 1 1 1 Paula link link
Panuridae 1 1 1 Paula link link
Paramythiidae 2 2 1 Paula link link
Pardalotidae 4 4 1 Paula link link
Pedionomidae 1 1 1 Paula link link
Peucedramidae 1 1 1 Paula link link
Phaethontidae 3 3 1 Paula link link
Pityriasidae 1 1 1 Paula link link
Platylophidae 1 1 1 Paula link link
Pnoepygidae 5 5 1 Paula link link
Polioptilidae 21 21 1 Paula link link
Promeropidae 2 2 1 Paula link link
Psophiidae 3 3 1 Paula link link
Psophodidae 4 4 1 Paula link link
Ptiliogonatidae 4 4 1 Paula link link
Recurvirostridae 9 9 1 Paula link link
Rhagologidae 1 1 1 Paula link link
Rheidae 2 2 1 Paula link link
Rhodinocichlidae 1 1 1 Paula link link
Rhynochetidae 1 1 1 Paula link link
Rostratulidae 2 2 1 Paula link link
Sagittariidae 1 1 1 Paula link link
Sapayoidae 1 1 1 Paula link link
Scopidae 1 1 1 Paula link link
Semnornithidae 2 2 1 Paula link link
Spindalidae 4 4 1 Paula link link
Steatornithidae 1 1 1 Paula link link
Stercorariidae 7 7 1 Paula link link
Strigopidae 3 3 1 Paula link link
Struthionidae 1 1 1 Paula link link
Teretistridae 2 2 1 Paula link link
Tichodromidae 1 1 1 Paula link link
Tityridae 33 33 1 Paula link link
Todidae 5 5 1 Paula link link
Upupidae 2 2 1 Paula link link
Urocynchramidae 1 1 1 Paula link link
Zeledoniidae 1 1 1 Paula link link

90%-99% of recordings in these families have been annotated (37 families, 2135 recordings, 105.45 recording hours)

Code
sub_fam_count <- fam_count[fam_count$prop.annotated >= 0.9 & fam_count$prop.annotated <
    1, ]
prop_analzyed <- sub_fam_count$prop.annotated

sub_fam_count$prop.annotated <- ifelse(sub_fam_count$prop.annotated >
    0.9, kableExtra::cell_spec(sub_fam_count$prop.annotated, "html",
    color = "white", background = "green", bold = TRUE, font_size = 12,
    new_tab = TRUE), ifelse(sub_fam_count$prop.annotated > 0.5, kableExtra::cell_spec(sub_fam_count$prop.annotated,
    "html", color = "black", background = "yellow", bold = TRUE, font_size = 12,
    new_tab = TRUE), kableExtra::cell_spec(sub_fam_count$prop.annotated,
    "html", color = "white", background = "red", bold = TRUE, font_size = 12,
    new_tab = TRUE)))

sub_fam_count$assinged.to <- ifelse(sub_fam_count$assinged.to == "not assigned",
    kableExtra::cell_spec(sub_fam_count$assinged.to, "html", color = "white",
        background = "red", bold = TRUE, font_size = 12, new_tab = TRUE),
    kableExtra::cell_spec(sub_fam_count$assinged.to, "html", new_tab = TRUE))


sub_fam_count$recordings <- kableExtra::cell_spec("link", "html",
    link = paste0("https://6f33fa7f78ea46e2aaca-my.sharepoint.com/personal/marcelo_araya_ucr_ac_cr/_layouts/15/onedrive.aspx?ga=1&id=%2Fpersonal%2Fmarcelo%5Faraya%5Fucr%5Fac%5Fcr%2FDocuments%2Fbird%5Fsong%5Fevolution%2Fbird%5Fsong%5Frecordings%2F",
        sub_fam_count$family), new_tab = TRUE)

sub_fam_count$spectrograms <- ifelse(prop_analzyed > 0, kableExtra::cell_spec("link",
    "html", link = paste0("https://6f33fa7f78ea46e2aaca-my.sharepoint.com/personal/marcelo_araya_ucr_ac_cr/_layouts/15/onedrive.aspx?ga=1&id=%2Fpersonal%2Fmarcelo%5Faraya%5Fucr%5Fac%5Fcr%2FDocuments%2Fbird%5Fsong%5Fevolution%2Fannotated%5Fspectrograms%2F",
        sub_fam_count$family), new_tab = TRUE), "")

sub_fam_count_kbl <- kableExtra::kbl(sub_fam_count, row.names = FALSE,
    escape = FALSE, format = "html", digits = )

sub_fam_count_kbl <- kableExtra::row_spec(kable_input = sub_fam_count_kbl,
    row = which(prop_analzyed == 1), background = grDevices::adjustcolor("#6DCD59FF",
        alpha.f = 0.3))

sub_fam_count_kbl <- kableExtra::kable_styling(sub_fam_count_kbl,
    bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = FALSE, font_size = 12)

sub_fam_count_kbl
family total annotated prop.annotated assinged.to recordings spectrograms
Acanthizidae 64 59 0.92 Daniela link link
Accipitridae 224 215 0.96 Daniela link link
Alaudidae 89 80 0.9 Daniela link link
Alcidae 23 21 0.91 Daniela link link
Ardeidae 56 53 0.95 Daniela link link
Artamidae 21 20 0.95 Daniela link link
Bernieridae 11 10 0.91 Daniela link link
Bucerotidae 59 56 0.95 Daniela link link
Burhinidae 10 9 0.9 Daniela link link
Cacatuidae 24 23 0.96 Daniela link link
Cardinalidae 49 48 0.98 Daniela link link
Certhiidae 10 9 0.9 Daniela link link
Charadriidae 65 63 0.97 Daniela link link
Cinclosomatidae 12 11 0.92 Daniela link link
Corvidae 127 123 0.97 Daniela link link
Cuculidae 144 132 0.92 Daniela link link
Diomedeidae 13 12 0.92 Daniela link link
Falconidae 66 60 0.91 Daniela link link
Furnariidae 309 291 0.94 Daniela link link
Galbulidae 18 17 0.94 Carlos link link
Gruidae 15 14 0.93 Carlos/Paula link link
Haematopodidae 11 10 0.91 Carlos/Paula link link
Icteridae 105 95 0.9 Daniela/Carlos link link
Laridae 97 87 0.9 Daniela link link
Meropidae 29 27 0.93 Daniela/Paula link link
Momotidae 14 13 0.93 Paula link link
Musophagidae 22 20 0.91 Paula link link
Odontophoridae 33 31 0.94 Paula link link
Parulidae 113 106 0.94 Paula link link
Passerellidae 130 126 0.97 Paula link link
Podicipedidae 20 19 0.95 Paula link link
Prunellidae 12 11 0.92 Paula link link
Ramphastidae 36 34 0.94 Paula link link
Rhinocryptidae 66 64 0.97 Paula link link
Sylviidae 69 64 0.93 Paula link link
Tinamidae 46 43 0.93 Paula link link
Vireonidae 62 59 0.95 Paula link link

1%-90% of recordings in these families have been annotated (113 families, 4819 recordings, 222.34 recording hours)

Code
sub_fam_count <- fam_count[fam_count$prop.annotated >= 1e-04 & fam_count$prop.annotated <
    0.9, ]
prop_analzyed <- sub_fam_count$prop.annotated

sub_fam_count$prop.annotated <- ifelse(sub_fam_count$prop.annotated >
    0.9, kableExtra::cell_spec(sub_fam_count$prop.annotated, "html",
    color = "white", background = "green", bold = TRUE, font_size = 12,
    new_tab = TRUE), ifelse(sub_fam_count$prop.annotated > 0.5, kableExtra::cell_spec(sub_fam_count$prop.annotated,
    "html", color = "black", background = "yellow", bold = TRUE, font_size = 12,
    new_tab = TRUE), kableExtra::cell_spec(sub_fam_count$prop.annotated,
    "html", color = "white", background = "red", bold = TRUE, font_size = 12,
    new_tab = TRUE)))

sub_fam_count$assinged.to <- ifelse(sub_fam_count$assinged.to == "not assigned",
    kableExtra::cell_spec(sub_fam_count$assinged.to, "html", color = "white",
        background = "red", bold = TRUE, font_size = 12, new_tab = TRUE),
    kableExtra::cell_spec(sub_fam_count$assinged.to, "html", new_tab = TRUE))


sub_fam_count$recordings <- kableExtra::cell_spec("link", "html",
    link = paste0("https://6f33fa7f78ea46e2aaca-my.sharepoint.com/personal/marcelo_araya_ucr_ac_cr/_layouts/15/onedrive.aspx?ga=1&id=%2Fpersonal%2Fmarcelo%5Faraya%5Fucr%5Fac%5Fcr%2FDocuments%2Fbird%5Fsong%5Fevolution%2Fbird%5Fsong%5Frecordings%2F",
        sub_fam_count$family), new_tab = TRUE)

sub_fam_count$spectrograms <- ifelse(prop_analzyed > 0, kableExtra::cell_spec("link",
    "html", link = paste0("https://6f33fa7f78ea46e2aaca-my.sharepoint.com/personal/marcelo_araya_ucr_ac_cr/_layouts/15/onedrive.aspx?ga=1&id=%2Fpersonal%2Fmarcelo%5Faraya%5Fucr%5Fac%5Fcr%2FDocuments%2Fbird%5Fsong%5Fevolution%2Fannotated%5Fspectrograms%2F",
        sub_fam_count$family), new_tab = TRUE), "")

sub_fam_count_kbl <- kableExtra::kbl(sub_fam_count, row.names = FALSE,
    escape = FALSE, format = "html", digits = )

sub_fam_count_kbl <- kableExtra::row_spec(kable_input = sub_fam_count_kbl,
    row = which(prop_analzyed == 1), background = grDevices::adjustcolor("#6DCD59FF",
        alpha.f = 0.3))

sub_fam_count_kbl <- kableExtra::kable_styling(sub_fam_count_kbl,
    bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = FALSE, font_size = 12)

sub_fam_count_kbl
family total annotated prop.annotated assinged.to recordings spectrograms
Acrocephalidae 48 34 0.71 Daniela link link
Alcedinidae 99 88 0.89 Daniela link link
Anatidae 159 136 0.86 Daniela link link
Apodidae 90 47 0.52 Daniela link link
Apterygidae 5 4 0.8 Daniela link link
Bucconidae 36 31 0.86 Daniela link link
Calcariidae 6 5 0.83 Daniela link link
Callaeidae 3 2 0.67 Daniela link link
Calyptomenidae 6 5 0.83 Daniela link link
Calyptophilidae 2 1 0.5 Daniela link link
Campephagidae 77 67 0.87 Daniela link link
Casuariidae 4 3 0.75 Daniela link link
Ciconiidae 15 12 0.8 Daniela link link
Cisticolidae 152 133 0.88 Daniela link link
Climacteridae 7 6 0.86 Daniela link link
Coliidae 6 3 0.5 Daniela link link
Columbidae 292 250 0.86 Daniela link link
Conopophagidae 12 4 0.33 Daniela link link
Cotingidae 62 52 0.84 Daniela link link
Cracidae 56 45 0.8 Daniela link link
Dicaeidae 41 25 0.61 Daniela link link
Dicruridae 28 13 0.46 Daniela link link
Emberizidae 43 31 0.72 Daniela link link
Estrildidae 108 67 0.62 Daniela link link
Eurylaimidae 8 1 0.12 Daniela link link
Falcunculidae 3 1 0.33 Daniela link link
Fregatidae 4 2 0.5 Daniela link link
Fringillidae 200 170 0.85 Daniela link link
Glareolidae 14 11 0.79 Paula/Carlos link link
Hemiprocnidae 4 1 0.25 Paula/Carlos link link
Hirundinidae 80 68 0.85 Carlos/Paula link link
Hyliotidae 4 3 0.75 Carlos/Paula link link
Indicatoridae 16 10 0.62 Daniela/Carlos/Paula link link
Irenidae 2 1 0.5 Daniela link link
Jacanidae 7 5 0.71 Daniela link link
Laniidae 32 25 0.78 Daniela link link
Leiothrichidae 140 84 0.6 Daniela link link
Locustellidae 59 32 0.54 Daniela link link
Lybiidae 38 30 0.79 Daniela/Paula link link
Machaerirhynchidae 2 1 0.5 Daniela link link
Macrosphenidae 19 15 0.79 Daniela/Paula link link
Malaconotidae 51 43 0.84 Daniela/Paula link link
Maluridae 30 11 0.37 Daniela/Paula link link
Megalaimidae 34 12 0.35 Daniela link link
Megapodiidae 15 3 0.2 Daniela/Paula link link
Melanocharitidae 7 2 0.29 Daniela link link
Meliphagidae 151 119 0.79 Daniela/Paula link link
Menuridae 2 1 0.5 Daniela link link
Monarchidae 85 25 0.29 Paula link link
Motacillidae 68 59 0.87 Paula link link
Muscicapidae 329 225 0.68 Paula link link
Nectariniidae 129 86 0.67 Paula link link
Neosittidae 3 2 0.67 Paula link link
Numididae 6 4 0.67 Paula link link
Nyctibiidae 8 7 0.88 Paula link link
Oceanitidae 5 4 0.8 Paula link link
Oreoicidae 3 1 0.33 Paula link link
Oriolidae 31 15 0.48 Paula link link
Orthonychidae 3 2 0.67 Paula link link
Otididae 20 15 0.75 Paula link link
Oxyruncidae 7 6 0.86 Paula link link
Pachycephalidae 53 16 0.3 Paula link link
Paradisaeidae 39 4 0.1 Paula link link
Paridae 63 55 0.87 Paula link link
Passeridae 39 33 0.85 Paula link link
Pelecanidae 6 5 0.83 Paula link link
Pellorneidae 59 25 0.42 Paula link link
Petroicidae 46 24 0.52 Paula link link
Phaenicophilidae 4 3 0.75 Paula link link
Phalacrocoracidae 28 19 0.68 Paula link link
Phasianidae 167 107 0.64 Paula link link
Philepittidae 4 3 0.75 Paula link link
Phoenicopteridae 7 5 0.71 Paula link link
Phoeniculidae 9 8 0.89 Paula link link
Phylloscopidae 80 60 0.75 Paula link link
Picidae 224 180 0.8 Paula link link
Pipridae 55 46 0.84 Paula link link
Pittidae 43 11 0.26 Paula link link
Platysteiridae 31 18 0.58 Paula link link
Ploceidae 101 75 0.74 Paula link link
Podargidae 16 5 0.31 Paula link link
Pomatostomidae 5 4 0.8 Paula link link
Procellariidae 69 48 0.7 Paula link link
Psittaculidae 151 61 0.4 Paula link link
Pteroclidae 16 14 0.88 Paula link link
Ptilonorhynchidae 21 10 0.48 Paula link link
Pycnonotidae 141 81 0.57 Paula link link
Rallidae 117 90 0.77 Paula link link
Regulidae 6 5 0.83 Paula link link
Remizidae 11 8 0.73 Paula link link
Rhipiduridae 47 10 0.21 Paula link link
Sarothruridae 9 7 0.78 Paula link link
Scolopacidae 89 78 0.88 Paula link link
Scotocercidae 34 24 0.71 Paula link link
Sittidae 28 22 0.79 Paula link link
Spheniscidae 17 14 0.82 Paula link link
Stenostiridae 9 8 0.89 Paula link link
Strigidae 218 143 0.66 Paula link link
Sturnidae 104 63 0.61 Paula link link
Sulidae 9 8 0.89 Paula link link
Thamnophilidae 237 205 0.86 Paula link link
Thinocoridae 4 3 0.75 Daniela link link
Thraupidae 376 333 0.89 Daniela/Paula link link
Threskiornithidae 33 29 0.88 Paula link link
Timaliidae 57 31 0.54 Paula link link
Trogonidae 43 33 0.77 Paula link link
Turdidae 164 136 0.83 Paula link link
Turnicidae 12 9 0.75 Paula link link
Tyrannidae 453 398 0.88 Paula link link
Tytonidae 11 8 0.73 Paula link link
Vangidae 38 27 0.71 Paula link link
Viduidae 14 8 0.57 Paula link link
Zosteropidae 116 33 0.28 Paula link link

0 recordings in these families have been annotated (8 families)

Code
sub_fam_count <- fam_count[fam_count$prop.annotated == 0, ]
prop_analzyed <- sub_fam_count$prop.annotated

sub_fam_count$prop.annotated <- ifelse(sub_fam_count$prop.annotated >
    0.9, kableExtra::cell_spec(sub_fam_count$prop.annotated, "html",
    color = "white", background = "green", bold = TRUE, font_size = 12,
    new_tab = TRUE), ifelse(sub_fam_count$prop.annotated > 0.5, kableExtra::cell_spec(sub_fam_count$prop.annotated,
    "html", color = "black", background = "yellow", bold = TRUE, font_size = 12,
    new_tab = TRUE), kableExtra::cell_spec(sub_fam_count$prop.annotated,
    "html", color = "white", background = "red", bold = TRUE, font_size = 12,
    new_tab = TRUE)))

sub_fam_count$assinged.to <- ifelse(sub_fam_count$assinged.to == "not assigned",
    kableExtra::cell_spec(sub_fam_count$assinged.to, "html", color = "white",
        background = "red", bold = TRUE, font_size = 12, new_tab = TRUE),
    kableExtra::cell_spec(sub_fam_count$assinged.to, "html", new_tab = TRUE))


sub_fam_count$recordings <- kableExtra::cell_spec("link", "html",
    link = paste0("https://6f33fa7f78ea46e2aaca-my.sharepoint.com/personal/marcelo_araya_ucr_ac_cr/_layouts/15/onedrive.aspx?ga=1&id=%2Fpersonal%2Fmarcelo%5Faraya%5Fucr%5Fac%5Fcr%2FDocuments%2Fbird%5Fsong%5Fevolution%2Fbird%5Fsong%5Frecordings%2F",
        sub_fam_count$family), new_tab = TRUE)

sub_fam_count$spectrograms <- ifelse(prop_analzyed > 0, kableExtra::cell_spec("link",
    "html", link = paste0("https://6f33fa7f78ea46e2aaca-my.sharepoint.com/personal/marcelo_araya_ucr_ac_cr/_layouts/15/onedrive.aspx?ga=1&id=%2Fpersonal%2Fmarcelo%5Faraya%5Fucr%5Fac%5Fcr%2FDocuments%2Fbird%5Fsong%5Fevolution%2Fannotated%5Fspectrograms%2F",
        sub_fam_count$family), new_tab = TRUE), "")

sub_fam_count_kbl <- kableExtra::kbl(sub_fam_count, row.names = FALSE,
    escape = FALSE, format = "html", digits = )

sub_fam_count_kbl <- kableExtra::row_spec(kable_input = sub_fam_count_kbl,
    row = which(prop_analzyed == 1), background = grDevices::adjustcolor("#6DCD59FF",
        alpha.f = 0.3))

sub_fam_count_kbl <- kableExtra::kable_styling(sub_fam_count_kbl,
    bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = FALSE, font_size = 12)

sub_fam_count_kbl
family total annotated prop.annotated assinged.to recordings spectrograms
Caprimulgidae 87 0 0 MARCELO GETS ANNOTATIONS link
Icteriidae 1 0 0 Daniela link
Ifritidae 1 0 0 Daniela link
Picathartidae 1 0 0 Paula link
Pluvianidae 1 0 0 Paula link
Psittacidae 167 0 0 MARCELO GETS ANNOTATIONS link
Trochilidae 347 0 0 MARCELO GETS ANNOTATIONS link
Troglodytidae 86 0 0 MARCELO GETS ANNOTATIONS link

6 Double-checking annotations

  • 0 weirdly named file(s) and 0 empty file(s):
Code
weird_files
file species family problem assinged.to
  • Duplicated annotation files
Code
kableExtra::kable_styling(kable(dup_anns), bootstrap_options = c("striped",
    "hover", "condensed", "responsive"), full_width = FALSE, font_size = 12)
sound.files selec.file Begin Path
NA NA NA
:----------- :---------- :----------
Code
agg_prob_sels <- read.csv("./data/processed/problematic_annotations.csv")

agg_prob_sels <- agg_prob_sels[grep("not found|read", agg_prob_sels$problem,
    invert = TRUE), ]

agg_prob_sels$fixed <- ifelse(is.na(agg_prob_sels$fixed), "", agg_prob_sels$fixed)

# agg_prob_sels$files <- ifelse(!is.na(agg_prob_sels$files),
# kableExtra::cell_spec('link', 'html', link =
# agg_prob_sels$files, new_tab = TRUE), '')

The following table show selections (‘selections’ column) within sound files (‘sound.file’ column) that are problematic (described in ‘problem’ column). Please check if and fix accordingly. If a Raven selection table is fixed, please upload it to google drive, but make sure the old copy is removed first. After fixing annotations report that into this data sheet.

  • 32 files
Code
agg_prob_sels <- agg_prob_sels[order(agg_prob_sels$family, agg_prob_sels$sound.file,
    agg_prob_sels$problem), ]

agg_prob_sels <- agg_prob_sels[agg_prob_sels$problem != "frequency range wider than 10 kHz",
    ]


problematic_sels_kbl <- kableExtra::kbl(agg_prob_sels[, c("sound.file",
    "family", "problem", "selections", "files")], row.names = FALSE,
    escape = FALSE, format = "html", digits = 3)

problematic_sels_kbl <- kableExtra::kable_styling(problematic_sels_kbl,
    bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = FALSE, font_size = 12)

problematic_sels_kbl
sound.file family problem selections files
Aythya_collaris-ML130832.wav Anatidae overlapping songs 1, 2, 3, 5 link
Euphonia_jamaica-ML164921.wav Fringillidae NAs in 'element' c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151) link
Spinus_psaltria-ML22869.wav Fringillidae NAs in 'song' 416 link
Melanocharis_longicauda-ML100636.wav Melanocharitidae overlapping songs 4, 6, 7, 8, 9, 10, 11, 12, 13 link
Pluvianus_aegyptius-ML138466241.wav Pluvianellidae overlapping songs 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 29 link

7 Created annotated spectrograms

Code
warbleR_options(wav.path = path_sound_files)

source("~/Dropbox/R_package_testing/warbleR/R/full_spectrograms.R")
# create folders for each family
for(i in unique(rec_data$family))
    if (!file.exists(file.path(ann_spec_path, i)))
    dir.create(file.path(ann_spec_path, i))

new_and_changed_files <- gsub(".Table.1.selections.txt", ".wav", basename(c(changes$added, changes$changed)))

new_and_changed_files <- unique(c(new_and_changed_files, prev_prob$sound.file))


# out <- warbleR:::pblapply_wrblr_int(agg_prob_sels$sound.file, pbar = TRUE, cl = 10, function(x)
out <- warbleR:::pblapply_wrblr_int(unique(cs$sound.files), pbar = TRUE, cl = 1, function(x)
    {
    sub_anns <- cs[cs$sound.files == x, ]
    frq_range <- range(c(sub_anns$bottom.freq, sub_anns$top.freq))
    
    frq_range[1] <- frq_range[1] - 1
    if (frq_range[1] < 0) frq_range[1] <- 0

    frq_range[2] <- frq_range[2] + ((frq_range[2] - frq_range[1]) / 3)
    if(frq_range[2] < 6) frq_range[2] <- 6
    
    if (frq_range[2] > 22.05) frq_range[2] <- 22.05
    
    sub_anns$selec <- paste(sub_anns$selec, sub_anns$element, sep = "-")
    
    # print(file.path(sub_anns$family[1], x))
    
    # print(x)
    # unlink(list.files(path = file.path(ann_spec_path, sub_anns$family[1]), pattern = gsub(".wav", "", x)))
    a <- NA    
    # if(!any(file.exists(file.path(ann_spec_path, sub_anns$family[1], paste0(gsub(".wav", "", x), paste0("--p",1:40,".jpeg"))))))
   a <- try(
       full_spectrograms(
           X = sub_anns,
           flim = frq_range,
           sxrow = 4,
           rows = 10,
           ovlp = 25,
           collevels = seq(-100, 0, 5),
           parallel = 1,
           overwrite = TRUE,
           dest.path = file.path(ann_spec_path, sub_anns$family[1]),
           # dest.path = "/home/m/Dropbox/Projects/bird_song_evolution/data/processed/annotated_spectrograms/problematic/",
           song = "song",
           fast.spec = TRUE,
           horizontal = TRUE,
           pb = F,
           only.annotated = TRUE,
           path = path_sound_files
       )
       , silent = TRUE)
    
    # if (is(a,"try-error"))
    # print(file.path(sub_anns$family[1], x))
 
            }
    )

sum(!file.exists(file.path(ann_spec_path, cs$family[!duplicated(cs$sound.files)], gsub(".wav", "--p",cs$sound.files[!duplicated(cs$sound.files)]))))


# consolidate
cns_imgs <- consolidate(path = "/home/m/Dropbox/Projects/bird_song_evolution/data/processed/annotated_spectrograms/by_family/", dest.path = "/home/m/Dropbox/Projects/bird_song_evolution/data/processed/annotated_spectrograms/pooled/", save.csv = FALSE, file.ext = ".jpeg$")


# check if all annotations have images
ann_spec_files <- list.files(path = "/home/m/Dropbox/Projects/bird_song_evolution/data/processed/annotated_spectrograms/pooled/", pattern = ".jpeg$")

# extract species name, removing any thing link p1.jpeg or p2.jpeg and so on
spec_spp <- unique(gsub("--p[0-9]+\\.jpeg", "", ann_spec_files))


missing <- setdiff(unique(cs$sound.files), paste0(spec_spp, ".wav"))

8 Add new recordings

Code
new_data_no_cuts <- readxl::read_excel("./data/raw/Marcelo Files - no Production cut.xlsx")

new_data_cuts <- readxl::read_excel("./data/raw/Marcelo Files - Production cuts.xlsx")

new_data_cuts <- new_data_cuts[!new_data_cuts$`Scientific Name` %in%
    rec_data$species, ]

nrow(new_data_cuts)

new_data_no_cuts <- new_data_no_cuts[!new_data_no_cuts$`Parent Species` %in%
    rec_data$species, ]

nrow(new_data_no_cuts)

head(new_data_no_cuts)

new_data_no_cuts$source <- "not publication cut"

new_data_cuts$source <- "publication cut"

clm <- read.csv("./data/raw/NEW_Clements-Checklist-v2022-October-2022.csv")
clm.sp <- clm[clm$category == "species", ]


clm.sp$genus <- sapply(clm.sp$scientific.name, function(x) strsplit(x,
    " ")[[1]][1])

new_data_cuts$genus <- sapply(new_data_cuts$`Scientific Name`, function(x) strsplit(x,
    " ")[[1]][1])


new_data_cuts$family <- sapply(seq_len(nrow(new_data_cuts)), function(x) {
    fam <- clm.sp$family[clm.sp$genus == new_data_cuts$genus[x]][1]

    fam <- if (length(fam) == 0)
        NA else strsplit(fam, " \\(")[[1]][1]

    return(fam)
})

new_data_no_cuts$genus <- sapply(new_data_no_cuts$`Parent Species`,
    function(x) strsplit(x, " ")[[1]][1])

new_data_no_cuts$family <- sapply(seq_len(nrow(new_data_no_cuts)),
    function(x) {
        fam <- clm.sp$family[clm.sp$genus == new_data_no_cuts$genus[x]][1]

        fam <- if (length(fam) == 0)
            NA else strsplit(fam, " \\(")[[1]][1]

        return(fam)
    })


new_data_cuts$Orginal.Scientific.Name <- new_data_cuts$Scientific.Name <- new_data_cuts$`Scientific Name`

new_data_no_cuts$Orginal.Scientific.Name <- new_data_no_cuts$`Parent Species`

new_data_no_cuts$`Scientific Name` <- new_data_cuts$`Scientific Name` <- NULL

new_data_no_cuts$Scientific.Name <- new_data_no_cuts$`Parent Species`

new_data_no_cuts$file_url <- paste0("https://macaulaylibrary.org/asset/",
    new_data_no_cuts$`ML Catalog Number`)

new_data_cuts$file_url <- paste0("https://macaulaylibrary.org/asset/",
    new_data_cuts$`ML Catalog Number`)

new_data_no_cuts$species_ebird_url <- paste0("https://ebird.org/species/",
    new_data_no_cuts$`eBird Species Code`)


new_data_cuts$species_ebird_url <- paste0("https://ebird.org/species/",
    new_data_cuts$SpeciesCode)
new_data_cuts$`Common Name` <- new_data_cuts$`English Name`

new_data_cuts$Behaviors <- NA

names(new_data_no_cuts)
names(new_data_cuts)

new_data_cuts$Recordist <- paste(new_data_cuts$FirstName, new_data_cuts$LastName)
new_data_cuts$Year <- new_data_cuts$Month <- new_data_cuts$Day <- new_data_cuts$Locality <- new_data_cuts$`Loc ID` <- new_data_cuts$`Country State County` <- new_data_cuts$`Taxon Category` <- NA

common_colums <- intersect(names(new_data_no_cuts), names(new_data_cuts))

second_batch <- rbind(new_data_no_cuts[, common_colums], new_data_cuts[,
    common_colums])

second_batch$sound.files <- paste0(gsub(" ", "_", second_batch$Scientific.Name),
    "-ML", second_batch$`ML Catalog Number`, ".wav")


# ohun::feature_acoustic_data(path = '~/Downloads/combined/')
# warbleR::info_sound_files(path = '~/Downloads/combined/')


# this code is not organized (is a mess!)
fls <- list.files(path = "~/Downloads/combined/")

table(substr(fls, nchar(fls) - 3, nchar(fls)))

second_batch <- second_batch[second_batch$`ML Catalog Number` %in%
    gsub(".m4a|.mp3|.wav", "", fls), ]

nrow(second_batch)

sub_fls <- fls[gsub(".m4a|.mp3|.wav", "", fls) %in% second_batch$`ML Catalog Number`]


table(substr(sub_fls, nchar(sub_fls) - 3, nchar(sub_fls)))


sum(!second_batch$`ML Catalog Number` %in% gsub(".m4a|.mp3|.wav",
    "", fls))

mp3_2_wav(samp.rate = 44.1, bit.depth = 16, path = "~/Downloads/combined/",
    overwrite = TRUE, dest.path = "~/Downloads/combined/")

# delete mp3s


# setwd('~/Downloads/combined/')
for (i in grep("m4a$", fls, value = TRUE)) {
    cll <- paste0("ffmpeg -i ", i, " ", gsub("m4a$", "wav", i))
    system(cll)
}

write.csv(second_batch, "./data/raw/sound_files_and_extended_metadata_second_batch.csv",
    row.names = FALSE)

second_batch <- second_batch[, c("family", "Scientific.Name", "species_ebird_url",
    "ML Catalog Number", "sound.files", "Common Name", "Orginal.Scientific.Name",
    "Behaviors", "file_url")]

write.csv(second_batch, "./data/raw/sound_files_metadata_second_batch.csv",
    row.names = FALSE)

fix_wavs(samp.rate = 44.1, bit.depth = 16, path = "~/Downloads/combined/")


fr <- file.rename(from = file.path("~/Downloads/combined/converted_sound_files",
    paste0(second_batch$`ML Catalog Number`, ".wav")), to = file.path("~/Downloads/combined/converted_sound_files",
    second_batch$sound.files))

nrow(second_batch)
all(fr)


for (i in na.omit(unique(second_batch$family))) {
    print(i)
    if (!dir.exists(file.path("/run/user/1000/gvfs/smb-share:server=cinnas.local,share=neurobiología/marcelo_araya/bird_song_evolution/by_family",
        i)))
        dir.create(file.path("/run/user/1000/gvfs/smb-share:server=cinnas.local,share=neurobiología/marcelo_araya/bird_song_evolution/by_family",
            i))

    frm <- file.path("~/Downloads/combined/converted_sound_files",
        na.omit(second_batch$sound.files[second_batch$family == i]))
    tu <- file.path("/run/user/1000/gvfs/smb-share:server=cinnas.local,share=neurobiología/marcelo_araya/bird_song_evolution/by_family",
        i, na.omit(second_batch$sound.files[second_batch$family ==
            i]))

    fr <- file.copy(frm, to = tu)
}

Takeaways

  • Doing good progress

 


 

Session information

R version 4.5.0 (2025-04-11)
Platform: x86_64-pc-linux-gnu
Running under: Ubuntu 22.04.5 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0  LAPACK version 3.10.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=es_CR.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=es_CR.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=es_CR.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C       

time zone: America/Costa_Rica
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] ohun_1.0.2          googledrive_2.1.1   Rraven_1.0.14      
 [4] warbleR_1.1.35      NatureSounds_1.0.5  seewave_2.2.3      
 [7] tuneR_1.4.7         googlesheets4_1.1.1 viridis_0.6.5      
[10] viridisLite_0.4.2   rprojroot_2.0.4     formatR_1.14       
[13] knitr_1.50         

loaded via a namespace (and not attached):
 [1] gtable_0.3.6        rjson_0.2.23        xfun_0.52          
 [4] ggplot2_3.5.2       htmlwidgets_1.6.4   remotes_2.5.0      
 [7] gargle_1.5.2        vctrs_0.6.5         tools_4.5.0        
[10] bitops_1.0-9        generics_0.1.4      curl_6.4.0         
[13] parallel_4.5.0      tibble_3.3.0        proxy_0.4-27       
[16] pkgconfig_2.0.3     KernSmooth_2.23-26  checkmate_2.3.2    
[19] RColorBrewer_1.1-3  lifecycle_1.0.4     compiler_4.5.0     
[22] farver_2.1.2        stringr_1.5.1       brio_1.1.5         
[25] sketchy_1.0.5       class_7.3-23        htmltools_0.5.8.1  
[28] RCurl_1.98-1.17     yaml_2.3.10         pillar_1.11.0      
[31] crayon_1.5.3        MASS_7.3-65         classInt_0.4-11    
[34] tidyselect_1.2.1    packrat_0.9.2       digest_0.6.37      
[37] stringi_1.8.7       sf_1.0-20           dplyr_1.1.4        
[40] purrr_1.0.4         fastmap_1.2.0       grid_4.5.0         
[43] cli_3.6.5           magrittr_2.0.3      e1071_1.7-16       
[46] scales_1.4.0        backports_1.5.0     rmarkdown_2.29     
[49] httr_1.4.7          signal_1.8-1        igraph_2.1.4       
[52] gridExtra_2.3       cellranger_1.1.0    kableExtra_1.4.0   
[55] pbapply_1.7-2       evaluate_1.0.3      dtw_1.23-1         
[58] fftw_1.0-9          testthat_3.2.3      rlang_1.1.6        
[61] Rcpp_1.0.14         DBI_1.2.3           glue_1.8.0         
[64] xaringanExtra_0.8.0 xml2_1.3.8          svglite_2.1.3      
[67] rstudioapi_0.17.1   jsonlite_2.0.0      R6_2.6.1           
[70] systemfonts_1.2.3   units_0.8-7         fs_1.6.6