Checking annotation status

Bird song evolution

Author
Published

May 30, 2025

Source code, data and annotation protocol found at https://github.com/maRce10/bird_song_evolution

Purpose

  • Double-check annotations

 

Load packages

Code
# knitr is require for creating html/pdf/word reports formatR is
# used for soft-wrapping code

# install/ load packages
sketchy::load_packages(packages = c("knitr", "formatR", "rprojroot",
    "viridis", "googlesheets4", "warbleR", "maRce10/Rraven", "googledrive",
    "maRce10/ohun"))


path_sound_files <- "/media/m/Expansion/Projects/Ongoing/bird_song_evolution/consolidated_sound_files/"

# annotation_path <- '/home/m/Insync/marceloa27@gmail.com/Google
# Drive/bird_song_evolution/raven_selections/'
annotation_path <- "./data/processed/annotations/"

# check annotations warbleR_options(wav.path =
# 'run/user/1000/gvfs/smb-share:server=cinnas.local,share=neurobiología/marcelo_araya/bird_song_evolution/audios/consolidated_sound_files')

1 Consolidate sound files

Code
cns <- consolidate(path = "/media/m/Campylopterus/bird_song_evolution/by_family",
    dest.path = "/run/user/1000/gvfs/smb-share:server=cinnas.local,share=neurobiología/marcelo_araya/bird_song_evolution/consolidated_sound_files",
    parallel = 10)

2 Changes in annnotation files

Code
prev_snapshot <- readRDS("./data/processed/prev_raven_annotations_snapshot.RDS")

new_snapshot <- fileSnapshot(annotation_path, md5sum = TRUE, recursive = TRUE)

changes <- changedFiles(before = prev_snapshot, after = new_snapshot)

# changes$added

# saveRDS(new_snapshot,
# './data/processed/prev_raven_annotations_snapshot.RDS')
  • 0 files unchanged
  • 50 files added
  • 4 files deleted
  • 7289 files updated

2.1 New files by family

Code
# if not readin from google docs
googlesheets4::gs4_deauth()
# googlesheets4::gs4_auth()

rec_data <- read_sheet("https://docs.google.com/spreadsheets/d/16ukhyf37hm13f1FXB2JQ-tCfHoxXy2qj_TX2xWvTVu8/edit#gid=148139271")

rec_data$assigned_to[rec_data$assigned_to == "NA"] <- NA

# remove data with no family<`
rec_data <- rec_data[rec_data$family != "NA", ]

# new files added
new_file_fams <- as.data.frame(table(dirname(changes$added)))

names(new_file_fams) <- c("Family", "New files")

new_file_fams$By <- sapply(new_file_fams$Family, function(x) rec_data$assigned_to[rec_data$family ==
    x][1])

sub_fam_count_kbl <- kableExtra::kbl(new_file_fams, row.names = FALSE,
    escape = FALSE, format = "html", digits = )
# sub_fam_count_kbl <- kableExtra::row_spec( kable_input =
# sub_fam_count_kbl, row = which(prop_analzyed == 1), background
# = grDevices::adjustcolor('#6DCD59FF', alpha.f = 0.3) )



sub_fam_count_kbl <- kableExtra::kable_styling(sub_fam_count_kbl,
    bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = FALSE, font_size = 12)

sub_fam_count_kbl
Family New files By
Acanthizidae 20 Daniela
Accipitridae 4 Daniela
Acrocephalidae 12 Daniela
Alaudidae 3 Daniela
Alcidae 1 Daniela
Fringillidae 9 Daniela
Thraupidae 1 Daniela

3 Descriptive stats

Code
# prev_status <-
# readRDS('./data/processed/annotation_status_results.RDS')
# names(prev_status) <- paste0('prev_', names(prev_status))
# saveRDS(prev_status,
# './data/processed/previous_annotation_status_results.RDS')

anns <- imp_raven(path = annotation_path, all.data = TRUE, warbler.format = TRUE,
    name.from.file = TRUE, ext.case = "lower", pb = TRUE, recursive = TRUE)

# grep('.Table.1 (1)', anns$sound.files, fixed = TRUE, value =
# T)

anns$family <- dirname(anns$selec.file)

anns$selec.file <- basename(anns$selec.file)

anns$species <- gsub("_", " ", sapply(strsplit(anns$selec.file, "-"),
    "[", 1))


ann_files <- unique(c(anns$selec.file, unlist(.Options$Rraven)))

ann_files <- data.frame(file = ann_files)

ann_files$species <- gsub("_", " ", sapply(strsplit(ann_files$file,
    "-"), "[", 1))

ann_files$family <- sapply(ann_files$species, function(x) anns$family[anns$species ==
    x][1])

undup_anns <- anns[!duplicated(paste(anns$sound.files, anns$selec.file)),
    ]
rownames(undup_anns) <- 1:nrow(undup_anns)

# get those in which the names inside the text file and in the
# txt file name don't match
file_name_no_match <- na.omit(undup_anns$selec.file[!sapply(seq_len(nrow(undup_anns)),
    function(x) grepl(gsub(".wav", "", undup_anns$sound.files[x]),
        undup_anns$selec.file[x]))])

# get those in which the name doesnt match the expected
weird_files <- ann_files[!grepl("Table.1.selections.txt", ann_files$file) |
    ann_files$file %in% c(unlist(.Options$Rraven), file_name_no_match),
    ]


weird_files$problem <- if (nrow(weird_files) > 0) "file name" else vector()
weird_files$problem[weird_files$file %in% unlist(.Options$Rraven)] <- "Empty file"
weird_files$problem[weird_files$file %in% file_name_no_match] <- "Species name in txt file name and Begin column dont match"

weird_files$problem[weird_files$file %in% unlist(.Options$Rraven)] <- "Empty file"

# keep only those with 'good' names
ann_files <- ann_files[!ann_files$file %in% weird_files$file, ]

# counts per family
fam_count <- aggregate(species ~ family, data = rec_data, length)
names(fam_count)[2] <- "total"

fam_count$annotated <- sapply(fam_count$family, function(x) sum(ann_files$family ==
    x))

fam_count$prop.annotated <- round(fam_count$annotated/fam_count$total,
    2)

fam_count$assinged.to <- sapply(fam_count$family, function(x) paste(unique(rec_data$assigned_to[rec_data$family ==
    x]), collapse = "/"))

fam_count$assinged.to[fam_count$assinged.to == "NA"] <- "not assigned"

weird_files$assinged.to <- sapply(weird_files$family, function(x) paste(unique(rec_data$assigned_to[rec_data$family ==
    x]), collapse = "/"))


# check_sound_files()
anns <- anns[, c("sound.files", "selec", "start", "end", "bottom.freq",
    "top.freq", "selec.file", "family", "species", "element", "song")]

anns <- anns[anns$selec.file %in% ann_files$file, ]

# anns$family <- sapply(anns$species, function(x)
# rec_data$family[rec_data$species == x][1])

anns$assinged.to <- sapply(anns$f, function(x) paste(unique(rec_data$assigned_to[rec_data$family ==
    x]), collapse = "/"))

all(ann_files$species %in% rec_data$species)

cs <- check_sels(anns, parallel = 20, pb = TRUE, fix.selec = TRUE,
    path = path_sound_files)


file_info <- info_sound_files(parallel = 20, path = path_sound_files,
    skip.error = TRUE)

file_info$species <- gsub("_", " ", sapply(strsplit(file_info$sound.files,
    "-"), "[", 1))

file_info$family <- sapply(file_info$species, function(x) rec_data$family[rec_data$species ==
    x][1])

as.data.frame(table(cs$check.res[cs$check.res != "OK"]))

unique(cs$sound.files[cs$check.res == "sound file not found"])

# exp_raven(cs, path = './data/processed', sound.file.path =
# '/media/m/Seagate Portable
# Drive/bird_song_recordings/consolidated_files',file.name =
# 'combined_annotations_22-03-2023')

saveRDS(list(cs = cs, anns = anns, rec_data = rec_data, ann_files = ann_files,
    weird_files = weird_files, fam_count = fam_count, file_info = file_info),
    "./data/processed/annotation_status_results.RDS")
Code
attach(readRDS("./data/processed/annotation_status_results.RDS"))
  • 7336 species from 225 families already annotated (73% of all available especies; 352.4 recording hours; 221211 annotations)

  • 112 families with 90% of all available especies annotated (45% of all families)

  • 2753 species have not been annotated (27.28%; 91 recording hours; 21% of the total recording hours)

4 Species per family

All recordings in these families have been annotated (74 families, 385 recordings, 24.39 recording hours)

Code
sub_fam_count <- fam_count[fam_count$prop.annotated == 1, ]
prop_analzyed <- sub_fam_count$prop.annotated

sub_fam_count$prop.annotated <- ifelse(sub_fam_count$prop.annotated >
    0.9, kableExtra::cell_spec(sub_fam_count$prop.annotated, "html",
    color = "white", background = "green", bold = TRUE, font_size = 12,
    new_tab = TRUE), ifelse(sub_fam_count$prop.annotated > 0.5, kableExtra::cell_spec(sub_fam_count$prop.annotated,
    "html", color = "black", background = "yellow", bold = TRUE, font_size = 12,
    new_tab = TRUE), kableExtra::cell_spec(sub_fam_count$prop.annotated,
    "html", color = "white", background = "red", bold = TRUE, font_size = 12,
    new_tab = TRUE)))

sub_fam_count$assinged.to <- ifelse(sub_fam_count$assinged.to == "not assigned",
    kableExtra::cell_spec(sub_fam_count$assinged.to, "html", color = "white",
        background = "red", bold = TRUE, font_size = 12, new_tab = TRUE),
    kableExtra::cell_spec(sub_fam_count$assinged.to, "html", new_tab = TRUE))


sub_fam_count$recordings <- kableExtra::cell_spec("link", "html",
    link = paste0("https://6f33fa7f78ea46e2aaca-my.sharepoint.com/personal/marcelo_araya_ucr_ac_cr/_layouts/15/onedrive.aspx?ga=1&id=%2Fpersonal%2Fmarcelo%5Faraya%5Fucr%5Fac%5Fcr%2FDocuments%2Fbird%5Fsong%5Fevolution%2Fbird%5Fsong%5Frecordings%2F",
        sub_fam_count$family), new_tab = TRUE)

sub_fam_count$spectrograms <- ifelse(prop_analzyed > 0, kableExtra::cell_spec("link",
    "html", link = paste0("https://6f33fa7f78ea46e2aaca-my.sharepoint.com/personal/marcelo_araya_ucr_ac_cr/_layouts/15/onedrive.aspx?ga=1&id=%2Fpersonal%2Fmarcelo%5Faraya%5Fucr%5Fac%5Fcr%2FDocuments%2Fbird%5Fsong%5Fevolution%2Fannotated%5Fspectrograms%2F",
        sub_fam_count$family), new_tab = TRUE), "")

sub_fam_count_kbl <- kableExtra::kbl(sub_fam_count, row.names = FALSE,
    escape = FALSE, format = "html", digits = )
# sub_fam_count_kbl <- kableExtra::row_spec( kable_input =
# sub_fam_count_kbl, row = which(prop_analzyed == 1), background
# = grDevices::adjustcolor('#6DCD59FF', alpha.f = 0.3) )



sub_fam_count_kbl <- kableExtra::kable_styling(sub_fam_count_kbl,
    bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = FALSE, font_size = 12)

sub_fam_count_kbl
family total annotated prop.annotated assinged.to recordings spectrograms
Acanthisittidae 2 2 1 Daniela link link
Aegithalidae 10 10 1 Daniela link link
Aegithinidae 4 4 1 Daniela link link
Aegothelidae 6 6 1 Daniela link link
Anhimidae 3 3 1 Daniela link link
Anhingidae 4 4 1 Daniela link link
Anseranatidae 1 1 1 Daniela link link
Aramidae 1 1 1 Daniela link link
Atrichornithidae 2 2 1 Daniela link link
Balaenicipitidae 1 1 1 Daniela link link
Bombycillidae 3 3 1 Daniela link link
Brachypteraciidae 6 6 1 Daniela link link
Bucorvidae 2 2 1 Daniela link link
Buphagidae 2 2 1 Daniela link link
Capitonidae 15 15 1 Daniela link link
Cariamidae 2 2 1 Daniela link link
Cathartidae 7 7 1 Daniela link link
Chaetopidae 2 2 1 Daniela link link
Chionidae 1 1 1 Daniela link link
Chloropseidae 10 10 1 Daniela link link
Cinclidae 5 5 1 Daniela link link
Corcoracidae 2 2 1 Daniela link link
Dasyornithidae 3 3 1 Daniela link link
Donacobiidae 1 1 1 Daniela link link
Dromadidae 1 1 1 Daniela link link
Elachuridae 1 1 1 Daniela link link
Eurypygidae 1 1 1 Daniela link link
Formicariidae 12 12 1 Daniela link link
Gaviidae 5 5 1 Carlos link link
Grallariidae 67 67 1 Carlos link link
Heliornithidae 2 2 1 Carlos link link
Hydrobatidae 15 15 1 Carlos link link
Ibidorhynchidae 1 1 1 Carlos link link
Leptosomidae 1 1 1 Paula link link
Melanopareiidae 4 4 1 Daniela link link
Mesitornithidae 3 3 1 Paula link link
Mimidae 34 34 1 Paula link link
Mitrospingidae 4 4 1 Paula link link
Modulatricidae 3 3 1 Paula link link
Mohoidae 1 1 1 Paula link link
Mohouidae 3 3 1 Paula link link
Nesospingidae 1 1 1 Paula link link
Nicatoridae 3 3 1 Paula link link
Notiomystidae 1 1 1 Paula link link
Opisthocomidae 1 1 1 Paula link link
Pandionidae 1 1 1 Paula link link
Pardalotidae 4 4 1 Paula link link
Pedionomidae 1 1 1 Paula link link
Peucedramidae 1 1 1 Paula link link
Phaethontidae 3 3 1 Paula link link
Pnoepygidae 5 5 1 Paula link link
Polioptilidae 21 21 1 Paula link link
Promeropidae 2 2 1 Paula link link
Psophiidae 3 3 1 Paula link link
Psophodidae 4 4 1 Paula link link
Ptiliogonatidae 4 4 1 Paula link link
Recurvirostridae 9 9 1 Paula link link
Rheidae 2 2 1 Paula link link
Rhodinocichlidae 1 1 1 Paula link link
Sagittariidae 1 1 1 Paula link link
Sapayoidae 1 1 1 Paula link link
Scopidae 1 1 1 Paula link link
Semnornithidae 2 2 1 Paula link link
Spindalidae 4 4 1 Paula link link
Steatornithidae 1 1 1 Paula link link
Stercorariidae 7 7 1 Paula link link
Strigopidae 3 3 1 Paula link link
Teretistridae 2 2 1 Paula link link
Tichodromidae 1 1 1 Paula link link
Tityridae 33 33 1 Paula link link
Todidae 5 5 1 Paula link link
Upupidae 2 2 1 Paula link link
Urocynchramidae 1 1 1 Paula link link
Zeledoniidae 1 1 1 Paula link link

90%-99% of recordings in these families have been annotated (37 families, 2068 recordings, 103.67 recording hours)

Code
sub_fam_count <- fam_count[fam_count$prop.annotated >= 0.9 & fam_count$prop.annotated <
    1, ]
prop_analzyed <- sub_fam_count$prop.annotated

sub_fam_count$prop.annotated <- ifelse(sub_fam_count$prop.annotated >
    0.9, kableExtra::cell_spec(sub_fam_count$prop.annotated, "html",
    color = "white", background = "green", bold = TRUE, font_size = 12,
    new_tab = TRUE), ifelse(sub_fam_count$prop.annotated > 0.5, kableExtra::cell_spec(sub_fam_count$prop.annotated,
    "html", color = "black", background = "yellow", bold = TRUE, font_size = 12,
    new_tab = TRUE), kableExtra::cell_spec(sub_fam_count$prop.annotated,
    "html", color = "white", background = "red", bold = TRUE, font_size = 12,
    new_tab = TRUE)))

sub_fam_count$assinged.to <- ifelse(sub_fam_count$assinged.to == "not assigned",
    kableExtra::cell_spec(sub_fam_count$assinged.to, "html", color = "white",
        background = "red", bold = TRUE, font_size = 12, new_tab = TRUE),
    kableExtra::cell_spec(sub_fam_count$assinged.to, "html", new_tab = TRUE))


sub_fam_count$recordings <- kableExtra::cell_spec("link", "html",
    link = paste0("https://6f33fa7f78ea46e2aaca-my.sharepoint.com/personal/marcelo_araya_ucr_ac_cr/_layouts/15/onedrive.aspx?ga=1&id=%2Fpersonal%2Fmarcelo%5Faraya%5Fucr%5Fac%5Fcr%2FDocuments%2Fbird%5Fsong%5Fevolution%2Fbird%5Fsong%5Frecordings%2F",
        sub_fam_count$family), new_tab = TRUE)

sub_fam_count$spectrograms <- ifelse(prop_analzyed > 0, kableExtra::cell_spec("link",
    "html", link = paste0("https://6f33fa7f78ea46e2aaca-my.sharepoint.com/personal/marcelo_araya_ucr_ac_cr/_layouts/15/onedrive.aspx?ga=1&id=%2Fpersonal%2Fmarcelo%5Faraya%5Fucr%5Fac%5Fcr%2FDocuments%2Fbird%5Fsong%5Fevolution%2Fannotated%5Fspectrograms%2F",
        sub_fam_count$family), new_tab = TRUE), "")

sub_fam_count_kbl <- kableExtra::kbl(sub_fam_count, row.names = FALSE,
    escape = FALSE, format = "html", digits = )

sub_fam_count_kbl <- kableExtra::row_spec(kable_input = sub_fam_count_kbl,
    row = which(prop_analzyed == 1), background = grDevices::adjustcolor("#6DCD59FF",
        alpha.f = 0.3))

sub_fam_count_kbl <- kableExtra::kable_styling(sub_fam_count_kbl,
    bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = FALSE, font_size = 12)

sub_fam_count_kbl
family total annotated prop.annotated assinged.to recordings spectrograms
Acanthizidae 64 59 0.92 Daniela link link
Accipitridae 224 215 0.96 Daniela link link
Alcidae 23 21 0.91 Daniela link link
Ardeidae 56 53 0.95 Daniela link link
Artamidae 21 20 0.95 Daniela link link
Bernieridae 11 10 0.91 Daniela link link
Bucerotidae 59 56 0.95 Daniela link link
Burhinidae 10 9 0.9 Daniela link link
Cacatuidae 24 23 0.96 Daniela link link
Cardinalidae 49 48 0.98 Daniela link link
Certhiidae 10 9 0.9 Daniela link link
Charadriidae 65 63 0.97 Daniela link link
Cinclosomatidae 12 11 0.92 Daniela link link
Coraciidae 13 12 0.92 Daniela link link
Corvidae 127 123 0.97 Daniela link link
Cuculidae 144 132 0.92 Daniela link link
Diomedeidae 13 12 0.92 Daniela link link
Falconidae 66 60 0.91 Daniela link link
Furnariidae 309 291 0.94 Daniela link link
Galbulidae 18 17 0.94 Carlos link link
Gruidae 15 14 0.93 Carlos/Paula link link
Haematopodidae 11 10 0.91 Carlos/Paula link link
Icteridae 105 95 0.9 Daniela/Carlos link link
Laridae 97 87 0.9 Daniela link link
Meropidae 29 27 0.93 Daniela/Paula link link
Momotidae 14 13 0.93 Paula link link
Musophagidae 22 20 0.91 Paula link link
Odontophoridae 33 31 0.94 Paula link link
Parulidae 113 106 0.94 Paula link link
Passerellidae 130 126 0.97 Paula link link
Podicipedidae 20 19 0.95 Paula link link
Prunellidae 12 11 0.92 Paula link link
Ramphastidae 36 34 0.94 Paula link link
Rhinocryptidae 66 64 0.97 Paula link link
Sylviidae 69 64 0.93 Paula link link
Tinamidae 46 43 0.93 Paula link link
Vireonidae 62 59 0.95 Paula link link

1%-90% of recordings in these families have been annotated (113 families, 4882 recordings, 224.33 recording hours)

Code
sub_fam_count <- fam_count[fam_count$prop.annotated >= 1e-04 & fam_count$prop.annotated <
    0.9, ]
prop_analzyed <- sub_fam_count$prop.annotated

sub_fam_count$prop.annotated <- ifelse(sub_fam_count$prop.annotated >
    0.9, kableExtra::cell_spec(sub_fam_count$prop.annotated, "html",
    color = "white", background = "green", bold = TRUE, font_size = 12,
    new_tab = TRUE), ifelse(sub_fam_count$prop.annotated > 0.5, kableExtra::cell_spec(sub_fam_count$prop.annotated,
    "html", color = "black", background = "yellow", bold = TRUE, font_size = 12,
    new_tab = TRUE), kableExtra::cell_spec(sub_fam_count$prop.annotated,
    "html", color = "white", background = "red", bold = TRUE, font_size = 12,
    new_tab = TRUE)))

sub_fam_count$assinged.to <- ifelse(sub_fam_count$assinged.to == "not assigned",
    kableExtra::cell_spec(sub_fam_count$assinged.to, "html", color = "white",
        background = "red", bold = TRUE, font_size = 12, new_tab = TRUE),
    kableExtra::cell_spec(sub_fam_count$assinged.to, "html", new_tab = TRUE))


sub_fam_count$recordings <- kableExtra::cell_spec("link", "html",
    link = paste0("https://6f33fa7f78ea46e2aaca-my.sharepoint.com/personal/marcelo_araya_ucr_ac_cr/_layouts/15/onedrive.aspx?ga=1&id=%2Fpersonal%2Fmarcelo%5Faraya%5Fucr%5Fac%5Fcr%2FDocuments%2Fbird%5Fsong%5Fevolution%2Fbird%5Fsong%5Frecordings%2F",
        sub_fam_count$family), new_tab = TRUE)

sub_fam_count$spectrograms <- ifelse(prop_analzyed > 0, kableExtra::cell_spec("link",
    "html", link = paste0("https://6f33fa7f78ea46e2aaca-my.sharepoint.com/personal/marcelo_araya_ucr_ac_cr/_layouts/15/onedrive.aspx?ga=1&id=%2Fpersonal%2Fmarcelo%5Faraya%5Fucr%5Fac%5Fcr%2FDocuments%2Fbird%5Fsong%5Fevolution%2Fannotated%5Fspectrograms%2F",
        sub_fam_count$family), new_tab = TRUE), "")

sub_fam_count_kbl <- kableExtra::kbl(sub_fam_count, row.names = FALSE,
    escape = FALSE, format = "html", digits = )

sub_fam_count_kbl <- kableExtra::row_spec(kable_input = sub_fam_count_kbl,
    row = which(prop_analzyed == 1), background = grDevices::adjustcolor("#6DCD59FF",
        alpha.f = 0.3))

sub_fam_count_kbl <- kableExtra::kable_styling(sub_fam_count_kbl,
    bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = FALSE, font_size = 12)

sub_fam_count_kbl
family total annotated prop.annotated assinged.to recordings spectrograms
Acrocephalidae 48 33 0.69 Daniela link link
Alaudidae 89 66 0.74 Daniela link link
Alcedinidae 99 88 0.89 Daniela link link
Anatidae 159 136 0.86 Daniela link link
Apodidae 90 48 0.53 Daniela link link
Apterygidae 5 3 0.6 Daniela link link
Bucconidae 36 31 0.86 Daniela link link
Calcariidae 6 5 0.83 Daniela link link
Callaeidae 3 2 0.67 Daniela link link
Calyptomenidae 6 5 0.83 Daniela link link
Calyptophilidae 2 1 0.5 Daniela link link
Campephagidae 77 67 0.87 Daniela link link
Casuariidae 4 3 0.75 Daniela link link
Ciconiidae 15 12 0.8 Daniela link link
Cisticolidae 152 133 0.88 Daniela link link
Climacteridae 7 6 0.86 Daniela link link
Coliidae 6 3 0.5 Daniela link link
Columbidae 292 250 0.86 Daniela link link
Conopophagidae 12 4 0.33 Daniela link link
Cotingidae 62 52 0.84 Daniela link link
Cracidae 56 45 0.8 Daniela link link
Dicaeidae 41 25 0.61 Daniela link link
Dicruridae 28 13 0.46 Daniela link link
Emberizidae 43 31 0.72 Daniela link link
Estrildidae 108 67 0.62 Daniela link link
Eurylaimidae 8 1 0.12 Daniela link link
Falcunculidae 3 1 0.33 Daniela link link
Fregatidae 4 2 0.5 Daniela link link
Fringillidae 200 170 0.85 Daniela link link
Glareolidae 14 11 0.79 Paula/Carlos link link
Hemiprocnidae 4 1 0.25 Paula/Carlos link link
Hirundinidae 80 68 0.85 Carlos/Paula link link
Hyliotidae 4 3 0.75 Carlos/Paula link link
Indicatoridae 16 10 0.62 Daniela/Carlos/Paula link link
Irenidae 2 1 0.5 Daniela link link
Jacanidae 7 5 0.71 Daniela link link
Laniidae 32 25 0.78 Daniela link link
Leiothrichidae 140 84 0.6 Daniela link link
Locustellidae 59 32 0.54 Daniela link link
Lybiidae 38 30 0.79 Daniela/Paula link link
Machaerirhynchidae 2 1 0.5 Daniela link link
Macrosphenidae 19 15 0.79 Daniela/Paula link link
Malaconotidae 51 43 0.84 Daniela/Paula link link
Maluridae 30 11 0.37 Daniela/Paula link link
Megalaimidae 34 12 0.35 Daniela link link
Megapodiidae 15 3 0.2 Daniela/Paula link link
Meliphagidae 151 119 0.79 Daniela/Paula link link
Menuridae 2 1 0.5 Daniela link link
Monarchidae 85 25 0.29 Paula link link
Motacillidae 68 59 0.87 Paula link link
Muscicapidae 329 225 0.68 Paula link link
Nectariniidae 129 86 0.67 Paula link link
Neosittidae 3 2 0.67 Paula link link
Numididae 6 4 0.67 Paula link link
Nyctibiidae 8 7 0.88 Paula link link
Oceanitidae 5 4 0.8 Paula link link
Oreoicidae 3 1 0.33 Paula link link
Oriolidae 31 15 0.48 Paula link link
Orthonychidae 3 2 0.67 Paula link link
Otididae 20 15 0.75 Paula link link
Oxyruncidae 7 6 0.86 Paula link link
Pachycephalidae 53 16 0.3 Paula link link
Paradisaeidae 39 4 0.1 Paula link link
Paridae 63 55 0.87 Paula link link
Passeridae 39 33 0.85 Paula link link
Pelecanidae 6 5 0.83 Paula link link
Pellorneidae 59 25 0.42 Paula link link
Petroicidae 46 24 0.52 Paula link link
Phaenicophilidae 4 3 0.75 Paula link link
Phalacrocoracidae 28 19 0.68 Paula link link
Phasianidae 167 107 0.64 Paula link link
Philepittidae 4 3 0.75 Paula link link
Phoenicopteridae 7 5 0.71 Paula link link
Phoeniculidae 9 8 0.89 Paula link link
Phylloscopidae 80 60 0.75 Paula link link
Picidae 224 180 0.8 Paula link link
Pipridae 55 46 0.84 Paula link link
Pittidae 43 11 0.26 Paula link link
Platysteiridae 31 18 0.58 Paula link link
Ploceidae 101 75 0.74 Paula link link
Podargidae 16 5 0.31 Paula link link
Pomatostomidae 5 4 0.8 Paula link link
Procellariidae 69 48 0.7 Paula link link
Psittaculidae 151 61 0.4 Paula link link
Pteroclidae 16 14 0.88 Paula link link
Ptilonorhynchidae 21 10 0.48 Paula link link
Pycnonotidae 141 81 0.57 Paula link link
Rallidae 117 90 0.77 Paula link link
Regulidae 6 5 0.83 Paula link link
Remizidae 11 8 0.73 Paula link link
Rhipiduridae 47 10 0.21 Paula link link
Sarothruridae 9 7 0.78 Paula link link
Scolopacidae 89 78 0.88 Paula link link
Scotocercidae 34 24 0.71 Paula link link
Sittidae 28 22 0.79 Paula link link
Spheniscidae 17 14 0.82 Paula link link
Stenostiridae 9 8 0.89 Paula link link
Strigidae 218 143 0.66 Paula link link
Sturnidae 104 63 0.61 Paula link link
Sulidae 9 8 0.89 Paula link link
Thamnophilidae 237 205 0.86 Paula link link
Thinocoridae 4 3 0.75 Daniela link link
Thraupidae 376 333 0.89 Daniela/Paula link link
Threskiornithidae 33 29 0.88 Paula link link
Timaliidae 57 31 0.54 Paula link link
Trogonidae 43 33 0.77 Paula link link
Turdidae 164 136 0.83 Paula link link
Turnicidae 12 9 0.75 Paula link link
Tyrannidae 453 398 0.88 Paula link link
Tytonidae 11 8 0.73 Paula link link
Vangidae 38 27 0.71 Paula link link
Viduidae 14 8 0.57 Paula link link
Zosteropidae 116 33 0.28 Paula link link

0 recordings in these families have been annotated (24 families)

Code
sub_fam_count <- fam_count[fam_count$prop.annotated == 0, ]
prop_analzyed <- sub_fam_count$prop.annotated

sub_fam_count$prop.annotated <- ifelse(sub_fam_count$prop.annotated >
    0.9, kableExtra::cell_spec(sub_fam_count$prop.annotated, "html",
    color = "white", background = "green", bold = TRUE, font_size = 12,
    new_tab = TRUE), ifelse(sub_fam_count$prop.annotated > 0.5, kableExtra::cell_spec(sub_fam_count$prop.annotated,
    "html", color = "black", background = "yellow", bold = TRUE, font_size = 12,
    new_tab = TRUE), kableExtra::cell_spec(sub_fam_count$prop.annotated,
    "html", color = "white", background = "red", bold = TRUE, font_size = 12,
    new_tab = TRUE)))

sub_fam_count$assinged.to <- ifelse(sub_fam_count$assinged.to == "not assigned",
    kableExtra::cell_spec(sub_fam_count$assinged.to, "html", color = "white",
        background = "red", bold = TRUE, font_size = 12, new_tab = TRUE),
    kableExtra::cell_spec(sub_fam_count$assinged.to, "html", new_tab = TRUE))


sub_fam_count$recordings <- kableExtra::cell_spec("link", "html",
    link = paste0("https://6f33fa7f78ea46e2aaca-my.sharepoint.com/personal/marcelo_araya_ucr_ac_cr/_layouts/15/onedrive.aspx?ga=1&id=%2Fpersonal%2Fmarcelo%5Faraya%5Fucr%5Fac%5Fcr%2FDocuments%2Fbird%5Fsong%5Fevolution%2Fbird%5Fsong%5Frecordings%2F",
        sub_fam_count$family), new_tab = TRUE)

sub_fam_count$spectrograms <- ifelse(prop_analzyed > 0, kableExtra::cell_spec("link",
    "html", link = paste0("https://6f33fa7f78ea46e2aaca-my.sharepoint.com/personal/marcelo_araya_ucr_ac_cr/_layouts/15/onedrive.aspx?ga=1&id=%2Fpersonal%2Fmarcelo%5Faraya%5Fucr%5Fac%5Fcr%2FDocuments%2Fbird%5Fsong%5Fevolution%2Fannotated%5Fspectrograms%2F",
        sub_fam_count$family), new_tab = TRUE), "")

sub_fam_count_kbl <- kableExtra::kbl(sub_fam_count, row.names = FALSE,
    escape = FALSE, format = "html", digits = )

sub_fam_count_kbl <- kableExtra::row_spec(kable_input = sub_fam_count_kbl,
    row = which(prop_analzyed == 1), background = grDevices::adjustcolor("#6DCD59FF",
        alpha.f = 0.3))

sub_fam_count_kbl <- kableExtra::kable_styling(sub_fam_count_kbl,
    bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = FALSE, font_size = 12)

sub_fam_count_kbl
family total annotated prop.annotated assinged.to recordings spectrograms
Caprimulgidae 87 0 0 MARCELO GETS ANNOTATIONS link
Cnemophilidae 2 0 0 Daniela link
Dulidae 1 0 0 Daniela link
Eulacestomatidae 1 0 0 Daniela link
Eupetidae 1 0 0 Daniela link
Hylocitreidae 1 0 0 Paula link
Hypocoliidae 1 0 0 Paula link
Icteriidae 1 0 0 Daniela link
Ifritidae 1 0 0 Daniela link
Melampittidae 2 0 0 Daniela link
Melanocharitidae 7 0 0 Daniela link
Panuridae 1 0 0 Paula link
Paramythiidae 2 0 0 Paula link
Picathartidae 1 0 0 Paula link
Pityriasidae 1 0 0 Paula link
Platylophidae 1 0 0 Paula link
Pluvianidae 1 0 0 Paula link
Psittacidae 167 0 0 MARCELO GETS ANNOTATIONS link
Rhagologidae 1 0 0 Paula link
Rhynochetidae 1 0 0 Paula link
Rostratulidae 2 0 0 Paula link
Struthionidae 1 0 0 Paula link
Trochilidae 347 0 0 MARCELO GETS ANNOTATIONS link
Troglodytidae 86 0 0 MARCELO GETS ANNOTATIONS link

5 Double-checking annotations

  • 1 weirdly named file(s) and 0 empty file(s):
Code
weird_files
file species family problem assinged.to
284 Acrocephalus_caffer-ML191889321(1).Table.1.selections.txt Acrocephalus caffer Acrocephalidae Species name in txt file name and Begin column dont match Daniela
Code
ok_anns <- cs[cs$check.res == "OK", ]
Code
# re run manually
prev_prob <- read.csv("./data/processed/problematic_annotations.csv")
write.csv(prev_prob, "./data/processed/prev_problematic_annotations.csv",
    row.names = FALSE)

problematic_sels <- cs[cs$check.res != "OK", ]

# weird duration or frequency range
long_anns <- cs[cs$end - cs$start > 10 & !cs$family %in% c("Tinamidae",
    "Accipitridae", "Falconidae"), ]
if (nrow(long_anns) > 0) long_anns$check.res <- "longer than 10 s"

broad_anns <- cs[cs$top.freq - cs$bottom.freq > 10, ]
broad_anns$check.res <- "frequency range wider than 10 kHz"

song_na <- cs[is.na(cs$song), ]
song_na$check.res <- "NAs in 'song'"

elem_na <- cs[is.na(cs$element), ]
elem_na$check.res <- "NAs in 'element'"

problematic_sels <- rbind(problematic_sels, long_anns, broad_anns,
    song_na, elem_na)

problematic_sels <- problematic_sels[, c("sound.files", "family",
    "assinged.to", "selec", "check.res")]

problematic_sels$spectrograms <- ""
names(problematic_sels) <- c("sound.file", "family", "assinged.to",
    "selection", "problem", "spectrograms")

problematic_sels <- problematic_sels[order(problematic_sels$family,
    problematic_sels$sound.file, problematic_sels$selection), ]

sf <- gsub("\\.wav", "", problematic_sels$sound.file)
sf <- gsub("_", "%5F", sf)
sf <- gsub("-", "%2D", sf)

problematic_sels <- problematic_sels[order(problematic_sels$sound.file,
    problematic_sels$selection), ]


# find overlapping songs
song_anns <- song_analysis(X = anns, parallel = 14)
song_anns <- check_sels(song_anns, parallel = 14, fix.selec = TRUE,
    path = path_sound_files)

# song_anns <- song_anns[song_anns$check.res == 'OK',]


song_anns <- song_anns[song_anns$end - song_anns$start > 0, ]

ovlp_songs <- overlapping_sels(song_anns[song_anns$check.res == "OK" &
    !is.na(song_anns$song), ], parallel = 14)

ovlp_songs <- ovlp_songs[!is.na(ovlp_songs$ovlp.sels), ]

ovlp_songs$family <- sapply(ovlp_songs$sound.files, function(x) anns$family[anns$sound.files ==
    x][1])
ovlp_songs$problem <- "overlapping songs"

ovlp_songs$assinged.to <- sapply(ovlp_songs$sound.files, function(x) anns$assinged.to[anns$sound.files ==
    x][1])

ovlp_song_probs <- aggregate(song ~ sound.files + family + assinged.to +
    problem, data = ovlp_songs, unique)

ovlp_song_probs$family <- sapply(ovlp_song_probs$sound.file, function(x) anns$family[anns$sound.files ==
    x][1])


ovlp_song_probs$selections <- sapply(seq_len(nrow(ovlp_song_probs)),
    function(x) paste(na.omit(anns$selec[anns$sound.files == ovlp_songs$sound.files[x] &
        anns$song == ovlp_songs$song[x]]), collapse = ", "))

agg_prob_sels <- aggregate(selection ~ sound.file + family + assinged.to +
    problem, data = problematic_sels, unique)

agg_prob_sels$sel.count <- sapply(agg_prob_sels$sound.file, function(x) sum(problematic_sels$sound.file ==
    x))

ovlp_song_probs$sel.count <- sapply(ovlp_song_probs$sound.file, function(x) sum(ovlp_songs$sound.files ==
    x))


names(agg_prob_sels)[5] <- "selections"

ovlp_song_probs$song <- NULL

names(ovlp_song_probs)[1] <- "sound.file"

agg_prob_sels <- rbind(agg_prob_sels, ovlp_song_probs)

agg_prob_sels <- data.frame(lapply(agg_prob_sels, as.character), stringsAsFactors = FALSE)

agg_prob_sels$fixed <- ""


# Copy problematic to folder in google drive

#### MAKE SURE SPECTROGRAMS EXISTS FOR ALL ANNOTATED FILES (if
#### not run spectrogram creator chunk below) search the jpeg
#### image files that match the species name and copy those
#### files to a new folder
out <- warbleR:::pblapply_wrblr_int(unique(agg_prob_sels$sound.file),
    function(x) {
        jpegs <- list.files(path = ann_spec_path, pattern = gsub(".wav",
            "", x), recursive = TRUE, full.names = TRUE)

        file.copy(from = jpegs, file.path("/home/m/Insync/marceloa27@gmail.com/Google Drive/bird_song_evolution/annotated_spectrograms_problematic/",
            basename(jpegs)), overwrite = TRUE)

    })

problm_image_drive <- googledrive::drive_ls(path = "bird_song_evolution/annotated_spectrograms_problematic/")

problm_image_drive



# image file page 1 agg_prob_sels$jpeg_p1 <- gsub('.wav',
# '--p1.jpeg',agg_prob_sels$sound.file)

# google drive link
agg_prob_sels$gd_id <- sapply(agg_prob_sels$sound.file, function(x) {

    as.vector(problm_image_drive$id[grep(gsub(".wav", "--p", x), problm_image_drive$name)])[1]
})
agg_prob_sels$gd_link <- ifelse(!is.na(agg_prob_sels$gd_id), paste0("https://drive.google.com/file/d/",
    agg_prob_sels$gd_id, "/view?usp=drive_link"), NA)

write.csv(agg_prob_sels, "./data/processed/problematic_annotations.csv",
    row.names = FALSE)
Code
agg_prob_sels <- read.csv("./data/processed/problematic_annotations.csv")

agg_prob_sels <- agg_prob_sels[grep("not found|read", agg_prob_sels$problem,
    invert = TRUE), ]

agg_prob_sels$fixed <- ifelse(is.na(agg_prob_sels$fixed), "", agg_prob_sels$fixed)

agg_prob_sels$spectrograms <- ifelse(!is.na(agg_prob_sels$gd_link),
    kableExtra::cell_spec("link", "html", link = agg_prob_sels$gd_link,
        new_tab = TRUE), "")

agg_prob_sels$recording <- kableExtra::cell_spec("link", "html", link = paste0("https://6f33fa7f78ea46e2aaca-my.sharepoint.com/personal/marcelo_araya_ucr_ac_cr/_layouts/15/onedrive.aspx?q=",
    gsub("_", "%5F", sapply(strsplit(agg_prob_sels$sound.file, "-"),
        "[", 1)), "&view=7&id=%2Fpersonal%2Fmarcelo%5Faraya%5Fucr%5Fac%5Fcr%2FDocuments&searchScope=folder"),
    new_tab = TRUE)

The following tables show selections (‘selections’ column) within sound files (‘sound.file’ column) that are problematic (described in ‘problem’ column). Please check if and fix accordingly. If a Raven selection table is fixed, please upload it to google drive, but make sure the old copy is removed first. After fixing annotations report that into this data sheet.

  • 18 files
Code
agg_prob_sels <- agg_prob_sels[order(agg_prob_sels$family, agg_prob_sels$sound.file,
    agg_prob_sels$problem), ]

agg_prob_sels <- agg_prob_sels[agg_prob_sels$problem != "frequency range wider than 10 kHz",
    ]


problematic_sels_kbl <- kableExtra::kbl(agg_prob_sels[agg_prob_sels$assinged.to %in%
    c("Daniela", "Daniela/Carlos", "Daniela/Paula"), c("sound.file",
    "family", "problem", "selections", "spectrograms", "recording")],
    row.names = FALSE, escape = FALSE, format = "html", digits = 3)

problematic_sels_kbl <- kableExtra::kable_styling(problematic_sels_kbl,
    bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = FALSE, font_size = 12)

problematic_sels_kbl
sound.file family problem selections spectrograms recording
Haliastur_sphenurus-ML201204.wav Accipitridae NAs in 'song' 53 link link
Spilornis_rufipectus-ML32927.wav Accipitridae overlapping songs 2 link link
Alcedo_hercules-ML203909731.wav Alcedinidae overlapping songs 1, 1 link link
Aythya_collaris-ML130832.wav Anatidae overlapping songs 2, 2 link link
Melanitta_nigra-ML240574911.wav Anatidae NAs in 'song' 3 link link
Apteryx_haastii-ML810.wav Apodidae overlapping songs 3, 3 link link
Prinia_cooki-ML555978711.wav Cisticolidae overlapping songs 1, 2, 3, 5 link link
Ptilinopus_insularis-ML203895681.wav Columbidae overlapping songs 4, 6, 7, 8, 9, 10, 11, 12, 13 link link
Euphonia_jamaica-ML164921.wav Fringillidae NAs in 'element' c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151) link
Euphonia_jamaica-ML164921.wav Fringillidae NAs in 'song' c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151) link
Serinus_pusillus-ML541242.wav Fringillidae NAs in 'song' c(24, 25) link
Spinus_psaltria-ML22869.wav Fringillidae NAs in 'song' 416 link link
Philydor_fuscipenne-ML39940441.wav Furnariidae overlapping songs 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 35, 36, 56 link link
  • 1 files
Code
problematic_sels_kbl <- kableExtra::kbl(agg_prob_sels[agg_prob_sels$assinged.to %in%
    c("Paula", "Carlos/Paula"), c("sound.file", "family", "problem",
    "selections", "spectrograms", "recording")], row.names = FALSE,
    escape = FALSE, format = "html", digits = 3)

problematic_sels_kbl <- kableExtra::kable_styling(problematic_sels_kbl,
    bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = FALSE, font_size = 12)

problematic_sels_kbl
sound.file family problem selections spectrograms recording
Delichon_dasypus-ML146378371.wav Hirundinidae NAs in 'song' c(85, 86, 87, 88, 89, 90, 91, 92, 93, 94) link link
Delichon_dasypus-ML146378371.wav Hirundinidae overlapping songs 1, 3, 4, 5, 6, 7, 8 link link
  • 0 files
Code
problematic_sels_kbl <- kableExtra::kbl(agg_prob_sels[agg_prob_sels$assinged.to %in%
    c("Carlos"), c("sound.file", "family", "problem", "selections",
    "spectrograms", "recording")], row.names = FALSE, escape = FALSE,
    format = "html", digits = 3)

problematic_sels_kbl <- kableExtra::kable_styling(problematic_sels_kbl,
    bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = FALSE, font_size = 12)

problematic_sels_kbl
sound.file family problem selections spectrograms recording

6 Created annotated spectrograms

Code
warbleR_options(wav.path = path_sound_files)

source("~/Dropbox/R_package_testing/warbleR/R/full_spectrograms.R")
source("~/Dropbox/R_package_testing/warbleR/R/internal_functions.R")


ann_spec_path <- "./data/processed/annotated_spectrograms"

# create folders for each family
for (i in unique(rec_data$family)) if (!file.exists(file.path(ann_spec_path,
    i))) dir.create(file.path(ann_spec_path, i))

new_and_changed_files <- gsub(".Table.1.selections.txt", ".wav", basename(c(changes$added,
    changes$changed)))

new_and_changed_files <- unique(c(new_and_changed_files, prev_prob$sound.file))


out <- warbleR:::pblapply_wrblr_int(new_and_changed_files, pbar = TRUE,
    cl = 1, function(x) {
        sub_anns <- cs[cs$sound.files == x, ]
        frq_range <- range(c(sub_anns$bottom.freq, sub_anns$top.freq))

        frq_range[1] <- frq_range[1] - 1
        if (frq_range[1] < 0)
            frq_range[1] <- 0

        frq_range[2] <- frq_range[2] + ((frq_range[2] - frq_range[1])/3)
        if (frq_range[2] < 6)
            frq_range[2] <- 6

        if (frq_range[2] > 22.05)
            frq_range[2] <- 22.05

        sub_anns$selec <- paste(sub_anns$selec, sub_anns$element,
            sep = "-")

        print(file.path(ann_spec_path, sub_anns$family[1], x))

        unlink(list.files(path = file.path(ann_spec_path, sub_anns$family[1]),
            pattern = gsub(".wav", "", x)))

        try(full_spectrograms(X = sub_anns, flim = frq_range, sxrow = 4,
            rows = 10, ovlp = 50, collevels = seq(-100, 0, 5), parallel = 1,
            overwrite = TRUE, dest.path = file.path(ann_spec_path,
                sub_anns$family[1]), song = "song", fast.spec = TRUE,
            horizontal = TRUE, pb = FALSE, only.annotated = TRUE),
            silent = TRUE)

    })

sum(!file.exists(file.path(ann_spec_path, cs$family[!duplicated(cs$sound.files)],
    gsub(".wav", "--p", cs$sound.files[!duplicated(cs$sound.files)]))))

7 Add new recordings

Code
new_data_no_cuts <- readxl::read_excel("./data/raw/Marcelo Files - no Production cut.xlsx")

new_data_cuts <- readxl::read_excel("./data/raw/Marcelo Files - Production cuts.xlsx")

new_data_cuts <- new_data_cuts[!new_data_cuts$`Scientific Name` %in%
    rec_data$species, ]

nrow(new_data_cuts)

new_data_no_cuts <- new_data_no_cuts[!new_data_no_cuts$`Parent Species` %in%
    rec_data$species, ]

nrow(new_data_no_cuts)

head(new_data_no_cuts)

new_data_no_cuts$source <- "not publication cut"

new_data_cuts$source <- "publication cut"

clm <- read.csv("./data/raw/NEW_Clements-Checklist-v2022-October-2022.csv")
clm.sp <- clm[clm$category == "species", ]


clm.sp$genus <- sapply(clm.sp$scientific.name, function(x) strsplit(x,
    " ")[[1]][1])

new_data_cuts$genus <- sapply(new_data_cuts$`Scientific Name`, function(x) strsplit(x,
    " ")[[1]][1])


new_data_cuts$family <- sapply(seq_len(nrow(new_data_cuts)), function(x) {
    fam <- clm.sp$family[clm.sp$genus == new_data_cuts$genus[x]][1]

    fam <- if (length(fam) == 0)
        NA else strsplit(fam, " \\(")[[1]][1]

    return(fam)
})

new_data_no_cuts$genus <- sapply(new_data_no_cuts$`Parent Species`,
    function(x) strsplit(x, " ")[[1]][1])

new_data_no_cuts$family <- sapply(seq_len(nrow(new_data_no_cuts)),
    function(x) {
        fam <- clm.sp$family[clm.sp$genus == new_data_no_cuts$genus[x]][1]

        fam <- if (length(fam) == 0)
            NA else strsplit(fam, " \\(")[[1]][1]

        return(fam)
    })


new_data_cuts$Orginal.Scientific.Name <- new_data_cuts$Scientific.Name <- new_data_cuts$`Scientific Name`

new_data_no_cuts$Orginal.Scientific.Name <- new_data_no_cuts$`Parent Species`

new_data_no_cuts$`Scientific Name` <- new_data_cuts$`Scientific Name` <- NULL

new_data_no_cuts$Scientific.Name <- new_data_no_cuts$`Parent Species`

new_data_no_cuts$file_url <- paste0("https://macaulaylibrary.org/asset/",
    new_data_no_cuts$`ML Catalog Number`)

new_data_cuts$file_url <- paste0("https://macaulaylibrary.org/asset/",
    new_data_cuts$`ML Catalog Number`)

new_data_no_cuts$species_ebird_url <- paste0("https://ebird.org/species/",
    new_data_no_cuts$`eBird Species Code`)


new_data_cuts$species_ebird_url <- paste0("https://ebird.org/species/",
    new_data_cuts$SpeciesCode)
new_data_cuts$`Common Name` <- new_data_cuts$`English Name`

new_data_cuts$Behaviors <- NA

names(new_data_no_cuts)
names(new_data_cuts)

new_data_cuts$Recordist <- paste(new_data_cuts$FirstName, new_data_cuts$LastName)
new_data_cuts$Year <- new_data_cuts$Month <- new_data_cuts$Day <- new_data_cuts$Locality <- new_data_cuts$`Loc ID` <- new_data_cuts$`Country State County` <- new_data_cuts$`Taxon Category` <- NA

common_colums <- intersect(names(new_data_no_cuts), names(new_data_cuts))

second_batch <- rbind(new_data_no_cuts[, common_colums], new_data_cuts[,
    common_colums])

second_batch$sound.files <- paste0(gsub(" ", "_", second_batch$Scientific.Name),
    "-ML", second_batch$`ML Catalog Number`, ".wav")


# ohun::feature_acoustic_data(path = '~/Downloads/combined/')
# warbleR::info_sound_files(path = '~/Downloads/combined/')


# this code is not organized (is a mess!)
fls <- list.files(path = "~/Downloads/combined/")

table(substr(fls, nchar(fls) - 3, nchar(fls)))

second_batch <- second_batch[second_batch$`ML Catalog Number` %in%
    gsub(".m4a|.mp3|.wav", "", fls), ]

nrow(second_batch)

sub_fls <- fls[gsub(".m4a|.mp3|.wav", "", fls) %in% second_batch$`ML Catalog Number`]


table(substr(sub_fls, nchar(sub_fls) - 3, nchar(sub_fls)))


sum(!second_batch$`ML Catalog Number` %in% gsub(".m4a|.mp3|.wav",
    "", fls))

mp3_2_wav(samp.rate = 44.1, bit.depth = 16, path = "~/Downloads/combined/",
    overwrite = TRUE, dest.path = "~/Downloads/combined/")

# delete mp3s


# setwd('~/Downloads/combined/')
for (i in grep("m4a$", fls, value = TRUE)) {
    cll <- paste0("ffmpeg -i ", i, " ", gsub("m4a$", "wav", i))
    system(cll)
}

write.csv(second_batch, "./data/raw/sound_files_and_extended_metadata_second_batch.csv",
    row.names = FALSE)

second_batch <- second_batch[, c("family", "Scientific.Name", "species_ebird_url",
    "ML Catalog Number", "sound.files", "Common Name", "Orginal.Scientific.Name",
    "Behaviors", "file_url")]

write.csv(second_batch, "./data/raw/sound_files_metadata_second_batch.csv",
    row.names = FALSE)

fix_wavs(samp.rate = 44.1, bit.depth = 16, path = "~/Downloads/combined/")


fr <- file.rename(from = file.path("~/Downloads/combined/converted_sound_files",
    paste0(second_batch$`ML Catalog Number`, ".wav")), to = file.path("~/Downloads/combined/converted_sound_files",
    second_batch$sound.files))

nrow(second_batch)
all(fr)


for (i in na.omit(unique(second_batch$family))) {
    print(i)
    if (!dir.exists(file.path("/run/user/1000/gvfs/smb-share:server=cinnas.local,share=neurobiología/marcelo_araya/bird_song_evolution/by_family",
        i)))
        dir.create(file.path("/run/user/1000/gvfs/smb-share:server=cinnas.local,share=neurobiología/marcelo_araya/bird_song_evolution/by_family",
            i))

    frm <- file.path("~/Downloads/combined/converted_sound_files",
        na.omit(second_batch$sound.files[second_batch$family == i]))
    tu <- file.path("/run/user/1000/gvfs/smb-share:server=cinnas.local,share=neurobiología/marcelo_araya/bird_song_evolution/by_family",
        i, na.omit(second_batch$sound.files[second_batch$family ==
            i]))

    fr <- file.copy(frm, to = tu)
}

Takeaways

  • Doing good progress

 


 

Session information

R version 4.5.0 (2025-04-11)
Platform: x86_64-pc-linux-gnu
Running under: Ubuntu 22.04.5 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0  LAPACK version 3.10.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=es_CR.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=es_CR.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=es_CR.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C       

time zone: America/Costa_Rica
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] ohun_1.0.2          googledrive_2.1.1   Rraven_1.0.14      
 [4] warbleR_1.1.35      NatureSounds_1.0.5  seewave_2.2.3      
 [7] tuneR_1.4.7         googlesheets4_1.1.1 viridis_0.6.5      
[10] viridisLite_0.4.2   rprojroot_2.0.4     formatR_1.14       
[13] knitr_1.50         

loaded via a namespace (and not attached):
 [1] gtable_0.3.6        rjson_0.2.23        xfun_0.52          
 [4] ggplot2_3.5.2       htmlwidgets_1.5.4   remotes_2.5.0      
 [7] gargle_1.5.2        vctrs_0.6.5         tools_4.5.0        
[10] bitops_1.0-9        generics_0.1.4      curl_6.2.2         
[13] parallel_4.5.0      tibble_3.2.1        proxy_0.4-27       
[16] pkgconfig_2.0.3     KernSmooth_2.23-26  checkmate_2.3.2    
[19] RColorBrewer_1.1-3  lifecycle_1.0.4     compiler_4.5.0     
[22] farver_2.1.2        stringr_1.5.1       brio_1.1.5         
[25] sketchy_1.0.5       class_7.3-23        htmltools_0.5.8.1  
[28] RCurl_1.98-1.17     yaml_2.3.10         pillar_1.10.2      
[31] crayon_1.5.3        MASS_7.3-65         classInt_0.4-11    
[34] tidyselect_1.2.1    packrat_0.9.2       digest_0.6.37      
[37] stringi_1.8.7       sf_1.0-20           dplyr_1.1.4        
[40] purrr_1.0.4         fastmap_1.2.0       grid_4.5.0         
[43] cli_3.6.5           magrittr_2.0.3      e1071_1.7-16       
[46] scales_1.4.0        backports_1.5.0     rmarkdown_2.29     
[49] httr_1.4.7          signal_1.8-1        igraph_2.1.4       
[52] gridExtra_2.3       cellranger_1.1.0    kableExtra_1.4.0   
[55] pbapply_1.7-2       evaluate_1.0.3      dtw_1.23-1         
[58] fftw_1.0-9          testthat_3.2.3      rlang_1.1.6        
[61] Rcpp_1.0.14         DBI_1.2.3           glue_1.8.0         
[64] xaringanExtra_0.8.0 xml2_1.3.8          svglite_2.1.3      
[67] rstudioapi_0.17.1   jsonlite_2.0.0      R6_2.6.1           
[70] systemfonts_1.2.3   units_0.8-7         fs_1.6.6