Classifying USV subtypes

Rat vocalization and alcohol

Author
Published

November 17, 2023

Source code and data found at https://github.com/maRce10/rat_vocalization_alcohol

 

1 Purpose

  • Classify subtypes based on structure
  • Determine multi-element subtypes based on composing single subtypes

 

Load packages

Code
# knitr is require for creating html/pdf/word reports formatR is
# used for soft-wrapping code

# install/ load packages
sketchy::load_packages(packages = c("knitr", "formatR", "viridis",
    "warbleR", github = "maRce10/Rraven", github = "maRce10/warbleR",
    github = "maRce10/ohun", "caret", "randomForest", "DiagrammeR"))

warbleR_options(wav.path = "/media/m/Expansion/audios_cin_alcohol_2023")

Read data

Code
anns <- imp_raven("./data/processed/annotations", warbler.format = TRUE,
    all.data = TRUE)

anns$subc <- anns$subtipos_completo <- NULL

anns <- anns[!is.na(anns$subtipos), ]

anns$subtipos[anns$subtipos %in% c(5, 44)] <- 4

anns <- anns[order(anns$sound.files, anns$start), ]

nrow(anns)
[1] 17147
Code
anns <- anns[anns$start < anns$end, ]

anns$subtype.label <- NA

anns$subtype.label[anns$subtipos == 22] <- "22 kHz"
anns$subtype.label[anns$subtipos == 1] <- "flat"
anns$subtype.label[anns$subtipos == 4] <- "trill"
anns$subtype.label[anns$subtipos == 6] <- "complex"

2 Describe annotations (single element subtypes)

  • Proportion of calls per subtype
  • This is the annotation data without any modification
Code
count_types <- round(table(anns$subtype.label)/nrow(anns), 2)

count_types <- as.data.frame(count_types)

count_types$Count <- as.vector(table(anns$subtype.label))
names(count_types) <- c("Subtype", "Proportion", "Count")

count_types
Subtype Proportion Count
22 kHz 0.00 13
complex 0.14 2328
flat 0.04 681
trill 0.82 14123

3 Data analysis

flowchart LR
  A[Annotations] --> C(Measure\nsingle-note\nsubtype\nstructure)
  B[recordings] --> C
  C --> D(Build\ncomposed\nsubtypes)
  C --> E(Classify\nsimple\nsubtypes) 
  D --> F(Classify\ncomposed\nsubtypes)
  F --> G(Evaluate classification)
  E --> G
  
style A fill:#44015466
style B fill:#3E4A894D
style C fill:#26828E4D
style D fill:#31688E4D
style E fill:#6DCD594D
style G fill:#FDE7254D

4 Measure acoustic structure

Acoustic structure measured as spectrographic/spectrum/envelope features as well as statistical descriptors of Mel frequency cepstral coefficients

Code
cs <- check_sels(anns, parallel = 1)

sp <- spectro_analysis(anns, parallel = 5, ovlp = 70)
mfccs <- mfcc_stats(anns, parallel = 5, ovlp = 70)

anns_sp <- merge(anns, sp)

anns_sp <- merge(anns_sp, mfccs)
colnames(anns_sp)

write.csv(anns_sp, "./data/processed/annotations_and_spectrogram_and_mel_features.csv",
    row.names = FALSE)

5 Multi-element composed subtypes

  • Define as those in which single elements are within a 5 ms range
Code
anns <- read.csv("./data/processed/annotations_and_spectrogram_and_mel_features.csv")

anns$true.end <- anns$end

anns$end <- anns$end + 0.005

anns <- overlapping_sels(anns)

anns$end <- anns$true.end

anns$true.end <- NULL

# add song label to elements
anns$composed.subtype <- anns$ovlp.sels

for (i in seq_len(nrow(anns))) if (is.na(anns$composed.subtype[i])) anns$composed.subtype[i] <- max(anns$composed.subtype,
    na.rm = TRUE) + 1

write.csv(anns, "./data/processed/annotations_groups_spectrogram_and_mel_features.csv",
    row.names = FALSE)

5.1 Rules to determine composed subtypes

  • If any of the composing subtypes is complex then composed complex
  • If it contains 3 or more single subtypes then composed complex
  • If ‘flat-trill’ and ‘trill-flat’ then step flat trill
  • If it has at least two adjacent trills (‘trill-trill’) and the frequency change is equal or higher than 5 kHz then step trill
  • If it has at least two adjacent flats (‘flat-flat’) and the frequency change is equal or higher than 5 kHz then step flat

 

Code
anns <- read.csv("./data/processed/annotations_groups_spectrogram_and_mel_features.csv")


composed_anns_list <- lapply(unique(na.omit(anns$ovlp.sels)), function(x) {

    data.frame(sound.files = anns$sound.files[anns$ovlp.sels == x &
        !is.na(anns$ovlp.sels)][1], selec = x, start = min(anns$start[anns$ovlp.sels ==
        x & !is.na(anns$ovlp.sels)]), end = max(anns$end[anns$ovlp.sels ==
        x & !is.na(anns$ovlp.sels)]), bottom.freq = min(anns$bottom.freq[anns$ovlp.sels ==
        x & !is.na(anns$ovlp.sels)]), top.freq = max(anns$top.freq[anns$ovlp.sels ==
        x & !is.na(anns$ovlp.sels)]), subtypes = paste(anns$subtipos[anns$ovlp.sels ==
        x & !is.na(anns$ovlp.sels)], collapse = "-"), composed.subtype = x,
        count.subtypes = sum(anns$ovlp.sels == x & !is.na(anns$ovlp.sels)))
})

composed_anns <- do.call(rbind, composed_anns_list)

unique(composed_anns$subtypes)


# subtypes <- c(1 = 'flat', 4 = 'trill', 6 = 'complejas', 22 =
# '22 kHz')

# add labels
composed_anns$composed.subtype.label <- NA

# of it contain complex subtype then is complex
composed_anns$composed.subtype.label <- ifelse(grepl("6", composed_anns$subtypes),
    "composed.complex", composed_anns$composed.subtype.label)

# of it contain 3 or more elements it is complex
composed_anns$composed.subtype.label <- ifelse(composed_anns$count.subtypes >=
    3, "composed.complex", composed_anns$composed.subtype.label)

# 1-4 and 4-1 is a 3 (trill flat steps)
composed_anns$composed.subtype.label <- ifelse(grepl("^1-4$|^4-1$",
    composed_anns$subtypes), "step.flat.trill", composed_anns$composed.subtype.label)

# if it has at least two adjacent 4 ('4-4' two trills) and the
# frequency change is equal or higher than 5 kHz it is a step
composed_anns$composed.subtype.label <- sapply(seq_len(nrow(composed_anns)),
    function(x) {

        if (sum(grepl("4-4", composed_anns$subtypes[x])) & composed_anns$count.subtypes[x] ==
            2) {

            peakfs <- anns$meanpeakf[anns$composed.subtype == composed_anns$composed.subtype[x]]

            diff_freq <- abs(peakfs[1] - peakfs[2])

            out <- if (diff_freq >= 5)
                "step.trill" else "composed.trill"
        } else out <- composed_anns$composed.subtype.label[x]

        return(out)
    })

# if it has at least two adjacent 1 ('1-1' two flats) and the
# frequency change is equal or higher than 5 kHz it is a step
composed_anns$composed.subtype.label <- sapply(seq_len(nrow(composed_anns)),
    function(x) {

        if (sum(grepl("1-1", composed_anns$subtypes[x])) & composed_anns$count.subtypes[x] ==
            2) {

            peakfs <- anns$meanpeakf[anns$composed.subtype == composed_anns$composed.subtype[x]]

            diff_freq <- abs(peakfs[1] - peakfs[2])

            out <- if (diff_freq >= 5)
                "step.flat" else "composed.flat"
        } else out <- composed_anns$composed.subtype.label[x]

        return(out)
    })

unique(composed_anns$subtypes[composed_anns$count.subtypes == 2 &
    is.na(composed_anns$composed.subtype.label)])

round(table(composed_anns$composed.subtype.label)/nrow(composed_anns),
    2)

composed_anns$subtype.label <- ifelse(grepl("step", composed_anns$composed.subtype.label),
    "step", composed_anns$composed.subtype.label)

composed_anns$subtype.label <- ifelse(grepl("complex", composed_anns$composed.subtype.label),
    "complex", composed_anns$composed.subtype.label)


composed_anns$subtype.label <- ifelse(grepl("composed.trill", composed_anns$composed.subtype.label),
    "trill", composed_anns$subtype.label)

write.csv(composed_anns, "./data/processed/annotations_composed_subtypes_5ms.csv",
    row.names = FALSE)

anns$type <- "single"
anns$composed.subtype.label <- anns$subtype.label

composed_anns$type <- "composed"

cols <- intersect(colnames(anns), colnames(composed_anns))

combined_anns <- rbind(anns[is.na(anns$ovlp.sels), cols], composed_anns[,
    cols])

write.csv(combined_anns, "./data/processed/annotations_simple_and_composed_subtypes_5ms.csv",
    row.names = FALSE)

6 Summary

Proportion of calls per subtypes for composed (multi-element) subtypes

Code
composed_anns <- read.csv("./data/processed/annotations_composed_subtypes_5ms.csv")

count_types <- round(table(composed_anns$composed.subtype.label)/nrow(composed_anns),
    2)

count_types <- as.data.frame(count_types)

count_types$Count <- as.vector(table(composed_anns$composed.subtype.label))
names(count_types) <- c("Subtype", "Proportion", "Count")

count_types
Subtype Proportion Count
composed.complex 0.35 1156
composed.trill 0.01 28
step.flat 0.00 13
step.flat.trill 0.04 120
step.trill 0.60 1951

Combination of single element calls on each composed subtype

Code
subtypes <- c(`1` = "flat", `4` = "trill", `6` = "complex", `22` = "22 kHz")

composed_anns$single.type.combs <- composed_anns$subtypes

for (i in seq_along(subtypes)) composed_anns$single.type.combs <- gsub(names(subtypes)[i],
    subtypes[i], composed_anns$single.type.combs)

count_types <- round(table(composed_anns$single.type.combs)/nrow(composed_anns),
    2)

count_types <- as.data.frame(count_types)

count_types$Count <- as.vector(table(composed_anns$single.type.combs))

count_types$comp <- sapply(count_types$Var1, function(x) composed_anns$composed.subtype.label[composed_anns$single.type.combs ==
    x][1])

names(count_types) <- c("Single subtype combination", "Proportion",
    "Count", "Composed subtype")

count_types <- count_types[order(count_types$`Composed subtype`),
    ]

count_types[, c(4, 1:3)]
Composed subtype Single subtype combination Proportion Count
1 composed.complex complex-complex 0.01 23
2 composed.complex complex-complex-complex 0.00 3
3 composed.complex complex-flat 0.00 8
4 composed.complex complex-trill 0.03 91
5 composed.complex complex-trill-complex 0.00 1
6 composed.complex complex-trill-trill 0.00 8
7 composed.complex flat-complex 0.00 6
9 composed.complex flat-flat-trill 0.00 3
10 composed.complex flat-flat-trill-flat 0.00 1
11 composed.complex flat-flat-trill-trill 0.00 1
13 composed.complex flat-trill-flat 0.00 4
14 composed.complex flat-trill-flat-flat 0.00 1
15 composed.complex flat-trill-flat-trill 0.00 1
16 composed.complex flat-trill-trill 0.00 4
17 composed.complex flat-trill-trill-trill 0.00 4
18 composed.complex trill-complex 0.23 744
19 composed.complex trill-complex-complex 0.00 4
20 composed.complex trill-complex-trill 0.00 16
21 composed.complex trill-complex-trill-complex 0.00 1
22 composed.complex trill-complex-trill-trill 0.00 4
24 composed.complex trill-flat-complex 0.00 2
25 composed.complex trill-flat-flat 0.00 3
26 composed.complex trill-flat-flat-trill 0.00 5
27 composed.complex trill-flat-trill 0.01 17
28 composed.complex trill-flat-trill-complex 0.00 2
29 composed.complex trill-flat-trill-trill 0.00 9
30 composed.complex trill-flat-trill-trill-complex 0.00 1
32 composed.complex trill-trill-complex 0.01 29
33 composed.complex trill-trill-complex-complex 0.00 1
34 composed.complex trill-trill-complex-trill 0.00 1
35 composed.complex trill-trill-flat 0.00 4
36 composed.complex trill-trill-flat-trill 0.00 5
37 composed.complex trill-trill-trill 0.04 134
38 composed.complex trill-trill-trill-complex-trill 0.00 1
39 composed.complex trill-trill-trill-trill 0.00 13
40 composed.complex trill-trill-trill-trill-trill-trill 0.00 1
8 step.flat flat-flat 0.00 13
12 step.flat.trill flat-trill 0.02 56
23 step.flat.trill trill-flat 0.02 64
31 step.trill trill-trill 0.61 1979

Proportion of calls per subtypes for both single and composed (multi-element) subtypes

Code
combined_anns <- read.csv("./data/processed/annotations_simple_and_composed_subtypes_5ms.csv")

count_types <- round(table(combined_anns$subtype.label)/nrow(combined_anns),
    2)

count_types <- as.data.frame(count_types)

count_types$Count <- as.vector(table(combined_anns$subtype.label))
names(count_types) <- c("Subtype", "Proportion", "Count")

count_types$type <- sapply(count_types$Subtype, function(x) unique(combined_anns$type[combined_anns$subtype.label ==
    x]))

count_types
Subtype Proportion Count type
22 kHz 0.00 13 single
complex 0.18 2502 single , composed
flat 0.03 433 single
step.flat 0.00 13 composed
step.flat.trill 0.01 120 composed
step.trill 0.14 1951 composed
trill 0.63 8505 single , composed

Proportion of calls per subtypes for both single and composed by type (single vs composed)

Code
combined_anns <- read.csv("./data/processed/annotations_simple_and_composed_subtypes_5ms.csv")

count_types <- round(table(combined_anns$composed.subtype.label)/nrow(combined_anns),
    2)

count_types <- as.data.frame(count_types)

count_types$Count <- as.vector(table(combined_anns$composed.subtype.label))
names(count_types) <- c("Subtype", "Proportion", "Count")

count_types$type <- sapply(count_types$Subtype, function(x) unique(combined_anns$type[combined_anns$composed.subtype.label ==
    x]))

count_types
Subtype Proportion Count type
22 kHz 0.00 13 single
complex 0.10 1346 single
composed.complex 0.09 1156 composed
composed.trill 0.00 28 composed
flat 0.03 433 single
step.flat 0.00 13 composed
step.flat.trill 0.01 120 composed
step.trill 0.14 1951 composed
trill 0.63 8477 single

7 Create spectrograms

Code
types <- unique(combined_anns$composed.subtype.label)

for (i in types[7:1]) {
    print(i)
    X <- combined_anns[combined_anns$composed.subtype.label == i,
        ]

    # subset
    set.seed(123)
    if (nrow(X) > 64)
        X <- X[sample(seq_len(nrow(X)), 64), ]

    nrw <- ncl <- 2
    n <- nrow(X)
    if (n > 4) {
        nrw <- ncl <- ceiling(sqrt(n))
        if (((nrw - 1) * ncl) >= n)
            nrw <- nrw - 1
    }

    # print catalog
    catalog(X = X, nrow = nrw, ncol = ncl, same.time.scale = T, mar = 0.001,
        res = 100, pb = FALSE, spec.mar = 0.001, max.group.cols = 5,
        title = i, ovlp = 90, wl = 512, width = 15, height = 9, hatching = 0,
        cex = 1.3, fast.spec = FALSE, pal = viridis, img.prefix = i,
        rm.axes = TRUE, flim = c(min(X$bottom.freq), max(X$top.freq)) +
            c(-1, 1), collevels = seq(-120, 0, 5), box = FALSE, lab.mar = 1e-05)

    move_images(from = .Options$warbleR$path, to = "./scripts", overwrite = TRUE,
        cut = TRUE, pb = FALSE)
}

7.1 Single element subtypes

22 kHz

Flats

Trills Complex

7.2 Composed subtypes

Step flat Step trill

Step flat

Composed complex

8 Random forest classification

Models trained for 100 iterations on 75% of the data and tested on the remaining 25%.

8.1 On single element subtypes

Code
elm_anns <- read.csv("./data/processed/annotations_groups_spectrogram_and_mel_features.csv")

elm_anns <- elm_anns[elm_anns$subtype.label != "22", ]


# remove collinear
target_features <- names(elm_anns)[!names(elm_anns) %in%  c("sound.files", "selec", "View", "Channel", "start", "end", "bottom.freq", "top.freq", "Delta.Time..s.", "Begin.File", "subtipos", "selec.file", "true.end", "ovlp.sels", "composed.subtype", "subtype.label", names(elm_anns)[sapply(elm_anns, anyNA)])]

cormat <- cor(elm_anns[, target_features], use = "pairwise.complete.obs")
hc <- findCorrelation(cormat, cutoff= 0.9) # putt any value as a "cutoff" 
hc <- sort(hc)

target_features <- target_features[hc]

# Create data subsets
partition <- createDataPartition(
    y = elm_anns$subtype.label,
    times = 1,
    p = 0.75,
    list = FALSE
)



elm_anns$subtype.label <- as.factor(elm_anns$subtype.label)

trainset <- elm_anns[partition, c(target_features, "subtype.label")]
testset <- elm_anns[-partition, c(target_features, "subtype.label")]

trcontrol <-
    trainControl(
        method = "repeatedcv",
        number = 100,
        repeats = 100,
        savePredictions = TRUE,
        sampling = "down",
        classProbs = TRUE,
        returnResamp = "all"
    )

pred_model <- train(
    subtype.label ~ .,
    data = trainset,
    method = "rf",
    trControl = trcontrol,
    metric = "Accuracy",
    preProcess = "scale"
)

ggplot(pred_model) + theme_bw()

# save confusion matrix
conf_mat <-
    confusionMatrix(predict(pred_model, testset), testset$subtype.label)

conf_df <- as.data.frame(conf_mat$table)

conf_df$total <-
    sapply(conf_df$Reference, function(x)
        sum(testset$subtype.label ==
                x))

conf_df$proportion <- conf_df$Freq / conf_df$total

# fit model on complete data set
best_rf_model <- pred_model$finalModel

# all_rf_model <- randomForest(
#   subtype.label ~ .,
#   data = elm_anns[, c(target_features, "subtype.label")],  # Your entire dataset
#   proximity = TRUE,  # Include proximity matrix
#   ntree = best_rf_model$ntree,  # Number of trees
#   mtry = best_rf_model$mtry,    # Number of variables tried for splitting at each node
#   nodesize = best_rf_model$nodesize,  # Minimum size of terminal nodes
#   maxnodes = best_rf_model$maxnodes  # Maximum number of terminal nodes
# )

rf_model_results <-
    list(
        pred_model_bb = pred_model,
        conf_mat_bb = conf_mat,
        confusion_df_bb = conf_df,
        testset_bb = testset
        #all_rf_model = all_rf_model
    )


saveRDS(
    rf_model_results,
    "./data/processed/random_forest_model_results.RDS"
)

8.1.1 Checking performance on test data

Code
rf_model_results <- readRDS("./data/processed/random_forest_model_results.RDS")


# print confusion matrix results
rf_model_results$conf_mat_bb
Confusion Matrix and Statistics

          Reference
Prediction complex flat trill
   complex     550    0   357
   flat          2  138   604
   trill        30   32  2569

Overall Statistics
                                        
               Accuracy : 0.761         
                 95% CI : (0.748, 0.773)
    No Information Rate : 0.824         
    P-Value [Acc > NIR] : 1             
                                        
                  Kappa : 0.477         
                                        
 Mcnemar's Test P-Value : <2e-16        

Statistics by Class:

                     Class: complex Class: flat Class: trill
Sensitivity                   0.945      0.8118        0.728
Specificity                   0.904      0.8526        0.918
Pos Pred Value                0.606      0.1855        0.976
Neg Pred Value                0.991      0.9910        0.418
Prevalence                    0.136      0.0397        0.824
Detection Rate                0.128      0.0322        0.600
Detection Prevalence          0.212      0.1738        0.614
Balanced Accuracy             0.924      0.8322        0.823
Code
confusion_df <- rf_model_results$confusion_df_bb

ggplot(confusion_df, aes(x = Reference, y = Prediction, fill = proportion)) +
    geom_tile() + coord_equal() + scale_fill_distiller(palette = "Greens",
    direction = 1) + geom_text(aes(label = round(proportion, 2)),
    color = "black", size = 3) + theme_classic() + labs(x = "Observed",
    y = "Predicted") + theme(axis.text.x = element_text(color = "black",
    size = 11, angle = 30, vjust = 0.8, hjust = 0.8))

8.2 On composed (multi-element) subtypes

Summarize structure at the multi-element subtype level

Code
elm_anns <- read.csv("./data/processed/annotations_groups_spectrogram_and_mel_features.csv")

mean_colms <- c("duration", "meanfreq", "sd", "freq.median", "freq.Q25",
    "freq.Q75", "freq.IQR", "time.median", "time.Q25", "time.Q75",
    "time.IQR", "skew", "kurt", "sp.ent", "time.ent", "entropy", "sfm",
    "meandom", "mindom", "maxdom", "dfrange", "modindx", "startdom",
    "enddom", "dfslope", "meanpeakf", "min.cc1", "min.cc2", "min.cc3",
    "min.cc4", "min.cc5", "min.cc6", "min.cc7", "min.cc8", "min.cc9",
    "min.cc10", "min.cc11", "min.cc12", "min.cc13", "min.cc14", "min.cc15",
    "min.cc16", "min.cc17", "min.cc18", "min.cc19", "min.cc20", "min.cc21",
    "min.cc22", "min.cc23", "min.cc24", "min.cc25", "max.cc1", "max.cc2",
    "max.cc3", "max.cc4", "max.cc5", "max.cc6", "max.cc7", "max.cc8",
    "max.cc9", "max.cc10", "max.cc11", "max.cc12", "max.cc13", "max.cc14",
    "max.cc15", "max.cc16", "max.cc17", "max.cc18", "max.cc19", "max.cc20",
    "max.cc21", "max.cc22", "max.cc23", "max.cc24", "max.cc25", "median.cc1",
    "median.cc2", "median.cc3", "median.cc4", "median.cc5", "median.cc6",
    "median.cc7", "median.cc8", "median.cc9", "median.cc10", "median.cc11",
    "median.cc12", "median.cc13", "median.cc14", "median.cc15", "median.cc16",
    "median.cc17", "median.cc18", "median.cc19", "median.cc20", "median.cc21",
    "median.cc22", "median.cc23", "median.cc24", "median.cc25", "mean.cc1",
    "mean.cc2", "mean.cc3", "mean.cc4", "mean.cc5", "mean.cc6", "mean.cc7",
    "mean.cc8", "mean.cc9", "mean.cc10", "mean.cc11", "mean.cc12",
    "mean.cc13", "mean.cc14", "mean.cc15", "mean.cc16", "mean.cc17",
    "mean.cc18", "mean.cc19", "mean.cc20", "mean.cc21", "mean.cc22",
    "mean.cc23", "mean.cc24", "mean.cc25", "var.cc1", "var.cc2", "var.cc3",
    "var.cc4", "var.cc5", "var.cc6", "var.cc7", "var.cc8", "var.cc9",
    "var.cc10", "var.cc11", "var.cc12", "var.cc13", "var.cc14", "var.cc15",
    "var.cc16", "var.cc17", "var.cc18", "var.cc19", "var.cc20", "var.cc21",
    "var.cc22", "var.cc23", "var.cc24", "var.cc25", "skew.cc1", "skew.cc2",
    "skew.cc3", "skew.cc4", "skew.cc5", "skew.cc6", "skew.cc7", "skew.cc8",
    "skew.cc9", "skew.cc10", "skew.cc11", "skew.cc12", "skew.cc13",
    "skew.cc14", "skew.cc15", "skew.cc16", "skew.cc17", "skew.cc18",
    "skew.cc19", "skew.cc20", "skew.cc21", "skew.cc22", "skew.cc23",
    "skew.cc24", "skew.cc25", "kurt.cc1", "kurt.cc2", "kurt.cc3",
    "kurt.cc4", "kurt.cc5", "kurt.cc6", "kurt.cc7", "kurt.cc8", "kurt.cc9",
    "kurt.cc10", "kurt.cc11", "kurt.cc12", "kurt.cc13", "kurt.cc14",
    "kurt.cc15", "kurt.cc16", "kurt.cc17", "kurt.cc18", "kurt.cc19",
    "kurt.cc20", "kurt.cc21", "kurt.cc22", "kurt.cc23", "kurt.cc24",
    "kurt.cc25", "mean.d1.cc", "var.d1.cc", "mean.d2.cc", "var.d2.cc")

composed_param <- song_param(X = elm_anns, song_colm = "composed.subtype",
    mean_colm = mean_colms)

combined_anns <- read.csv("./data/processed/annotations_simple_and_composed_subtypes_5ms.csv")


composed_param$composed.subtype.label <- sapply(composed_param$composed.subtype,
    function(x) unique(combined_anns$composed.subtype.label[combined_anns$composed.subtype ==
        x]))


write.csv(composed_param, "./data/processed/accoustic_features_simple_and_composed_subtypes.csv",
    row.names = FALSE)

8.2.1 Train model

Code
composed_param <- read.csv("./data/processed/accoustic_features_simple_and_composed_subtypes.csv")

# keep only multi-element
composed_param <- composed_param[composed_param$num.elms > 1, ]

composed_param$song.rate[is.infinite(composed_param$song.rate)] <- mean(composed_param$song.rate[!is.infinite(composed_param$song.rate)])

# remove collinear
target_features <- names(composed_param)[!names(composed_param) %in%  c("sound.files", "selec", "View", "Channel", "start", "end", "bottom.freq", "top.freq", "Delta.Time..s.", "Begin.File", "subtipos", "selec.file", "true.end", "ovlp.sels", "composed.subtype", "subtype.label", "composed.subtype.label", names(composed_param)[sapply(composed_param, anyNA)])]

cormat <- cor(composed_param[, target_features], use = "pairwise.complete.obs")
hc <- findCorrelation(cormat, cutoff= 0.9) # putt any value as a "cutoff" 
hc <- sort(hc)

target_features <- target_features[hc]

# Create data subsets
partition <- createDataPartition(
    y = composed_param$composed.subtype.label,
    times = 1,
    p = 0.75,
    list = FALSE
)



composed_param$composed_param$composed.subtype.label <- as.factor(composed_param$composed.subtype.label)

trainset <- composed_param[partition, c(target_features, "composed.subtype.label")]
testset <- composed_param[-partition, c(target_features, "composed.subtype.label")]

trcontrol <-
    trainControl(
        method = "repeatedcv",
        number = 100,
        repeats = 100,
        savePredictions = TRUE,
        sampling = "down",
        classProbs = TRUE,
        returnResamp = "all"
    )

pred_model <- train(
    composed.subtype.label ~ .,
    data = trainset,
    method = "rf",
    trControl = trcontrol,
    metric = "Accuracy",
    preProcess = "scale"
)

# ggplot(pred_model) + theme_bw()

# save confusion matrix
conf_mat <-
    confusionMatrix(predict(pred_model, testset), as.factor(testset$composed.subtype.label))

conf_df <- as.data.frame(conf_mat$table)

conf_df$total <-
    sapply(conf_df$Reference, function(x)
        sum(testset$composed.subtype.label ==
                x))

conf_df$proportion <- conf_df$Freq / conf_df$total

# fit model on complete data set
best_rf_model <- pred_model$finalModel

# all_rf_model <- randomForest(
#   subtype.label ~ .,
#   data = elm_anns[, c(target_features, "subtype.label")],  # Your entire dataset
#   proximity = TRUE,  # Include proximity matrix
#   ntree = best_rf_model$ntree,  # Number of trees
#   mtry = best_rf_model$mtry,    # Number of variables tried for splitting at each node
#   nodesize = best_rf_model$nodesize,  # Minimum size of terminal nodes
#   maxnodes = best_rf_model$maxnodes  # Maximum number of terminal nodes
# )

rf_model_results <-
    list(
        pred_model_bb = pred_model,
        conf_mat_bb = conf_mat,
        confusion_df_bb = conf_df,
        testset_bb = testset
        #all_rf_model = all_rf_model
    )

saveRDS(
    rf_model_results,
    "./data/processed/random_forest_model_results_composed_subtypes.RDS"
)

8.2.2 Checking performance on test data

Code
rf_model_results <- readRDS("./data/processed/random_forest_model_results_composed_subtypes.RDS")


# print confusion matrix results
rf_model_results$conf_mat_bb
Confusion Matrix and Statistics

                  Reference
Prediction         composed.complex composed.trill step.flat step.flat.trill
  composed.complex              140              1         0               1
  composed.trill                 25              2         0               7
  step.flat                       4              0         2               5
  step.flat.trill                22              2         1              12
  step.trill                     98              2         0               5
                  Reference
Prediction         step.trill
  composed.complex         62
  composed.trill           90
  step.flat                14
  step.flat.trill          50
  step.trill              271

Overall Statistics
                                        
               Accuracy : 0.523         
                 95% CI : (0.488, 0.558)
    No Information Rate : 0.597         
    P-Value [Acc > NIR] : 1             
                                        
                  Kappa : 0.245         
                                        
 Mcnemar's Test P-Value : NA            

Statistics by Class:

                     Class: composed.complex Class: composed.trill
Sensitivity                            0.484               0.28571
Specificity                            0.879               0.84920
Pos Pred Value                         0.686               0.01613
Neg Pred Value                         0.757               0.99277
Prevalence                             0.354               0.00858
Detection Rate                         0.172               0.00245
Detection Prevalence                   0.250               0.15196
Balanced Accuracy                      0.681               0.56746
                     Class: step.flat Class: step.flat.trill Class: step.trill
Sensitivity                   0.66667                 0.4000             0.556
Specificity                   0.97171                 0.9046             0.681
Pos Pred Value                0.08000                 0.1379             0.721
Neg Pred Value                0.99874                 0.9753             0.509
Prevalence                    0.00368                 0.0368             0.597
Detection Rate                0.00245                 0.0147             0.332
Detection Prevalence          0.03064                 0.1066             0.461
Balanced Accuracy             0.81919                 0.6523             0.619
Code
confusion_df <- rf_model_results$confusion_df_bb

ggplot(confusion_df, aes(x = Reference, y = Prediction, fill = proportion)) +
    geom_tile() + coord_equal() + scale_fill_distiller(palette = "Greens",
    direction = 1) + geom_text(aes(label = round(proportion, 2)),
    color = "black", size = 3) + theme_classic() + labs(x = "Observed",
    y = "Predicted") + theme(axis.text.x = element_text(color = "black",
    size = 11, angle = 30, vjust = 0.8, hjust = 0.8))

8.3 On single and composed (multi-element) subtypes

8.3.1 Train model

Code
composed_param <- read.csv("./data/processed/accoustic_features_simple_and_composed_subtypes.csv")

composed_param <- composed_param[composed_param$composed.subtype.label != "22 kHz", ]

composed_param$song.rate[is.infinite(composed_param$song.rate)] <- mean(composed_param$song.rate[!is.infinite(composed_param$song.rate)])

# remove collinear
target_features <- names(composed_param)[!names(composed_param) %in%  c("sound.files", "selec", "View", "Channel", "start", "end", "bottom.freq", "top.freq", "Delta.Time..s.", "Begin.File", "subtipos", "selec.file", "true.end", "ovlp.sels", "composed.subtype", "subtype.label", "composed.subtype.label" , "song.rate", names(composed_param)[sapply(composed_param, anyNA)])]

cormat <- cor(composed_param[, target_features], use = "pairwise.complete.obs")
hc <- findCorrelation(cormat, cutoff= 0.9) # putt any value as a "cutoff" 
hc <- sort(hc)

target_features <- target_features[hc]

# Create data subsets
partition <- createDataPartition(
    y = composed_param$composed.subtype.label,
    times = 1,
    p = 0.75,
    list = FALSE
)



composed_param$composed_param$composed.subtype.label <- as.factor(composed_param$composed.subtype.label)

trainset <- composed_param[partition, c(target_features, "composed.subtype.label")]
testset <- composed_param[-partition, c(target_features, "composed.subtype.label")]

trcontrol <-
    trainControl(
        method = "repeatedcv",
        number = 100,
        repeats = 100,
        savePredictions = TRUE,
        sampling = "down",
        classProbs = TRUE,
        returnResamp = "all"
    )

pred_model <- train(
    composed.subtype.label ~ .,
    data = trainset,
    method = "rf",
    trControl = trcontrol,
    metric = "Accuracy",
    preProcess = "scale"
)

ggplot(pred_model) + theme_bw()

# save confusion matrix
conf_mat <-
    confusionMatrix(predict(pred_model, testset), as.factor(testset$composed.subtype.label))

conf_df <- as.data.frame(conf_mat$table)

conf_df$total <-
    sapply(conf_df$Reference, function(x)
        sum(testset$composed.subtype.label ==
                x))

conf_df$proportion <- conf_df$Freq / conf_df$total

# fit model on complete data set
best_rf_model <- pred_model$finalModel

# all_rf_model <- randomForest(
#   subtype.label ~ .,
#   data = elm_anns[, c(target_features, "subtype.label")],  # Your entire dataset
#   proximity = TRUE,  # Include proximity matrix
#   ntree = best_rf_model$ntree,  # Number of trees
#   mtry = best_rf_model$mtry,    # Number of variables tried for splitting at each node
#   nodesize = best_rf_model$nodesize,  # Minimum size of terminal nodes
#   maxnodes = best_rf_model$maxnodes  # Maximum number of terminal nodes
# )

rf_model_results <-
    list(
        pred_model_bb = pred_model,
        conf_mat_bb = conf_mat,
        confusion_df_bb = conf_df,
        testset_bb = testset
        #all_rf_model = all_rf_model
    )

saveRDS(
    rf_model_results,
    "./data/processed/random_forest_model_results_single_and_composed_subtypes.RDS"
)

8.3.2 Checking performance on test data

Code
rf_model_results <- readRDS("./data/processed/random_forest_model_results_single_and_composed_subtypes.RDS")

# print confusion matrix results
rf_model_results$conf_mat_bb
Confusion Matrix and Statistics

                  Reference
Prediction         complex composed.complex composed.trill flat step.flat
  complex              293               63              0    0         0
  composed.complex      16              121              2    0         0
  composed.trill         3               18              4   10         1
  flat                   0                0              0   46         0
  step.flat             16               10              1   39         2
  step.flat.trill        0               14              0    3         0
  step.trill             0               57              0    0         0
  trill                  8                6              0   10         0
                  Reference
Prediction         step.flat.trill step.trill trill
  complex                        0          5   120
  composed.complex               1        107   197
  composed.trill                 4         67   389
  flat                           0          3   385
  step.flat                     11         33   251
  step.flat.trill                7         49    97
  step.trill                     7        215    99
  trill                          0          8   581

Overall Statistics
                                        
               Accuracy : 0.376         
                 95% CI : (0.359, 0.392)
    No Information Rate : 0.627         
    P-Value [Acc > NIR] : 1             
                                        
                  Kappa : 0.256         
                                        
 Mcnemar's Test P-Value : NA            

Statistics by Class:

                     Class: complex Class: composed.complex
Sensitivity                  0.8720                  0.4187
Specificity                  0.9382                  0.8955
Pos Pred Value               0.6091                  0.2725
Neg Pred Value               0.9852                  0.9428
Prevalence                   0.0994                  0.0855
Detection Rate               0.0867                  0.0358
Detection Prevalence         0.1423                  0.1314
Balanced Accuracy            0.9051                  0.6571
                     Class: composed.trill Class: flat Class: step.flat
Sensitivity                        0.57143      0.4259         0.666667
Specificity                        0.85409      0.8814         0.893069
Pos Pred Value                     0.00806      0.1060         0.005510
Neg Pred Value                     0.99896      0.9789         0.999668
Prevalence                         0.00207      0.0320         0.000888
Detection Rate                     0.00118      0.0136         0.000592
Detection Prevalence               0.14679      0.1284         0.107428
Balanced Accuracy                  0.71276      0.6537         0.779868
                     Class: step.flat.trill Class: step.trill Class: trill
Sensitivity                         0.23333            0.4415        0.274
Specificity                         0.95133            0.9436        0.975
Pos Pred Value                      0.04118            0.5688        0.948
Neg Pred Value                      0.99283            0.9094        0.444
Prevalence                          0.00888            0.1441        0.627
Detection Rate                      0.00207            0.0636        0.172
Detection Prevalence                0.05031            0.1119        0.181
Balanced Accuracy                   0.59233            0.6926        0.624
Code
confusion_df <- rf_model_results$confusion_df_bb

ggplot(confusion_df, aes(x = Reference, y = Prediction, fill = proportion)) +
    geom_tile() + coord_equal() + scale_fill_distiller(palette = "Greens",
    direction = 1) + geom_text(aes(label = round(proportion, 2)),
    color = "black", size = 3) + theme_classic() + labs(x = "Observed",
    y = "Predicted") + theme(axis.text.x = element_text(color = "black",
    size = 11, angle = 30, vjust = 0.8, hjust = 0.8))

Takeaways

  • Decent classification of single element subtypes (accuracy: 0.76)
  • Poor classification of composed subtypes (accuracy: 0.52 and 0.37 when including single-element subtypes)

 


 

9 To-do list

  • Try other classification methods (VGGish, BirdNet, google model)
  • arreglar subtipos vacíos (NAs) y con etiquetas 5 y 44
  • buscar ejemplos específicos de subtipos raros para aumentar n de entrenamiento
  • buscar grabaciones completas (o cortes) con subtipos raros para n de prueba

Session information

R version 4.3.1 (2023-06-16)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.2 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3 
LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3;  LAPACK version 3.9.0

locale:
 [1] LC_CTYPE=pt_BR.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=es_CR.UTF-8        LC_COLLATE=pt_BR.UTF-8    
 [5] LC_MONETARY=es_CR.UTF-8    LC_MESSAGES=pt_BR.UTF-8   
 [7] LC_PAPER=es_CR.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C       

time zone: America/Costa_Rica
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] DiagrammeR_1.0.10    randomForest_4.7-1.1 caret_6.0-94        
 [4] lattice_0.22-5       ggplot2_3.4.4        ohun_1.0.0          
 [7] Rraven_1.0.14        warbleR_1.1.29       NatureSounds_1.0.4  
[10] seewave_2.2.3        tuneR_1.4.5          viridis_0.6.4       
[13] viridisLite_0.4.2    formatR_1.14         knitr_1.45          

loaded via a namespace (and not attached):
 [1] DBI_1.1.3            bitops_1.0-7         pROC_1.18.5         
 [4] pbapply_1.7-2        gridExtra_2.3        remotes_2.4.2.1     
 [7] testthat_3.2.0       rlang_1.1.2          magrittr_2.0.3      
[10] e1071_1.7-13         compiler_4.3.1       vctrs_0.6.4         
[13] reshape2_1.4.4       stringr_1.5.0        pkgconfig_2.0.3     
[16] crayon_1.5.2         fastmap_1.1.1        backports_1.4.1     
[19] labeling_0.4.3       utf8_1.2.4           rmarkdown_2.25      
[22] prodlim_2023.08.28   purrr_1.0.2          xfun_0.41           
[25] jsonlite_1.8.7       recipes_1.0.8        parallel_4.3.1      
[28] R6_2.5.1             RColorBrewer_1.1-3   stringi_1.7.12      
[31] parallelly_1.36.0    rpart_4.1.21         brio_1.1.3          
[34] lubridate_1.9.3      Rcpp_1.0.11          iterators_1.0.14    
[37] future.apply_1.11.0  Matrix_1.6-2         splines_4.3.1       
[40] nnet_7.3-19          igraph_1.5.1         timechange_0.2.0    
[43] tidyselect_1.2.0     rstudioapi_0.15.0    yaml_2.3.7          
[46] timeDate_4022.108    codetools_0.2-19     dtw_1.23-1          
[49] listenv_0.9.0        tibble_3.2.1         plyr_1.8.9          
[52] withr_2.5.2          evaluate_0.23        signal_0.7-7        
[55] future_1.33.0        survival_3.5-7       sf_1.0-14           
[58] sketchy_1.0.2        units_0.8-4          proxy_0.4-27        
[61] pillar_1.9.0         packrat_0.9.2        KernSmooth_2.23-22  
[64] stats4_4.3.1         checkmate_2.3.0      foreach_1.5.2       
[67] generics_0.1.3       RCurl_1.98-1.13      munsell_0.5.0       
[70] scales_1.2.1         globals_0.16.2       class_7.3-22        
[73] glue_1.6.2           tools_4.3.1          xaringanExtra_0.7.0 
[76] data.table_1.14.8    ModelMetrics_1.2.2.2 gower_1.0.1         
[79] visNetwork_2.1.2     grid_4.3.1           ipred_0.9-14        
[82] colorspace_2.1-0     nlme_3.1-163         cli_3.6.1           
[85] fansi_1.0.5          lava_1.7.3           dplyr_1.1.3         
[88] gtable_0.3.4         fftw_1.0-7           digest_0.6.33       
[91] classInt_0.4-10      farver_2.1.1         rjson_0.2.21        
[94] htmlwidgets_1.6.2    htmltools_0.5.7      lifecycle_1.0.4     
[97] hardhat_1.3.0        MASS_7.3-60