Number of sound files per model species and the “mimetic” bellbird:
<- read.csv("./data/processed/acoustic_parameters_model_and_mimetic.csv")
sp_param
$org.sound.files <- sapply(strsplit(sp_param$sound.files,
sp_param".wav", fixed = TRUE), "[", 1)
<- sp_param #[grep('mimetic', sp_param$species.vocalization, invert = TRUE), ]
model_sounds <- model_sounds[grep("babbling", model_sounds$species.vocalization,
model_sounds invert = TRUE), ]
$species.vocalization[grep("mimetic", model_sounds$species.vocalization)] <- "Mimetic"
model_sounds
$species.vocalization <- gsub("-model", "", model_sounds$species.vocalization)
model_sounds
aggregate(org.sound.files ~ species.vocalization, model_sounds, function(x) length(unique(x)))
species.vocalization | org.sound.files |
---|---|
Mimetic | 193 |
P.montezuma | 23 |
P.torquatus | 11 |
P.tricarunculatus-Monteverde | 32 |
P.tricarunculatus-Nicaragua | 15 |
P.tricarunculatus-Panama | 5 |
P.tricarunculatus-Talamanca | 7 |
R.sulfuratus | 26 |
Number of annotated vocalizations (after removing SNR < 2 dB):
aggregate(org.sound.files ~ species.vocalization, model_sounds, length)
species.vocalization | org.sound.files |
---|---|
Mimetic | 1483 |
P.montezuma | 74 |
P.torquatus | 64 |
P.tricarunculatus-Monteverde | 287 |
P.tricarunculatus-Nicaragua | 225 |
P.tricarunculatus-Panama | 55 |
P.tricarunculatus-Talamanca | 132 |
R.sulfuratus | 1272 |
<- read.csv("./data/processed/turicreate_results_test_many_dialects.csv")
vgg
$category <- factor(vgg$category)
vgg$pred <- factor(vgg$pred)
vgg
<- confusionMatrix(data = vgg$pred, reference = vgg$category)
conf_mat
<- as.data.frame(conf_mat$table)
conf_df
$total <- sapply(conf_df$Reference, function(x) sum(vgg$category ==
conf_df
x))
$proportion <- conf_df$Freq/conf_df$total
conf_df
<- conf_df[complete.cases(conf_df), ]
conf_df
ggplot(conf_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))
round(conf_mat$overall, 3)
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.968 0.946 0.949 0.981 0.606
## AccuracyPValue McnemarPValue
## 0.000 NaN
<- read.csv("./data/processed/turicreate_results_mimetic_many_dialects.csv")
vgg
$category <- factor(vgg$category, levels = c(unique(vgg$category),
vgg"P.tricarunculatus-Monteverde", "P.tricarunculatus-Nicaragua",
"P.tricarunculatus-Panama"))
$pred <- factor(vgg$pred)
vgg
<- confusionMatrix(data = vgg$pred, reference = vgg$category)
conf_mat
<- as.data.frame(conf_mat$table)
conf_df
$total <- sapply(conf_df$Reference, function(x) sum(vgg$category ==
conf_df
x))
$proportion <- conf_df$Freq/conf_df$total
conf_df
<- conf_df[complete.cases(conf_df), ]
conf_df
ggplot(conf_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))
round(conf_mat$overall, 3)
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.741 0.501 0.718 0.763 0.620
## AccuracyPValue McnemarPValue
## 0.000 NaN
<- read.csv("./data/processed/turicreate_results_test_no_dialects.csv")
vgg
$category <- factor(vgg$categorynd)
vgg$pred <- factor(vgg$pred)
vgg
<- confusionMatrix(data = vgg$pred, reference = vgg$category)
conf_mat
<- as.data.frame(conf_mat$table)
conf_df
$total <- sapply(conf_df$Reference, function(x) sum(vgg$category ==
conf_df
x))
$proportion <- conf_df$Freq/conf_df$total
conf_df
<- conf_df[complete.cases(conf_df), ]
conf_df
ggplot(conf_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))
round(conf_mat$overall, 3)
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.992 0.985 0.981 0.998 0.606
## AccuracyPValue McnemarPValue
## 0.000 NaN
<- read.csv("./data/processed/turicreate_results_mimetic_no_dialects.csv")
vgg
# vgg$category <- factor(vgg$category, levels =
# c(unique(vgg$category), 'P.tricarunculatus-Monteverde',
# 'P.tricarunculatus-Nicaragua', 'P.tricarunculatus-Panama'))
$category <- factor(vgg$categorynd)
vgg$pred <- factor(vgg$prednd)
vgg
<- confusionMatrix(data = vgg$pred, reference = vgg$category)
conf_mat
<- as.data.frame(conf_mat$table)
conf_df
$total <- sapply(conf_df$Reference, function(x) sum(vgg$category ==
conf_df
x))
$proportion <- conf_df$Freq/conf_df$total
conf_df
<- conf_df[complete.cases(conf_df), ]
conf_df
ggplot(conf_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))
round(conf_mat$overall, 3)
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.820 0.671 0.799 0.839 0.620
## AccuracyPValue McnemarPValue
## 0.000 0.000
<- read.table("./data/raw/Recording_dates_.csv", head = TRUE,
rec_info sep = ";")
# rec_info$date <- gsub('ago', 'aug', rec_info$date)
# rec_info$date <- gsub('abr', 'apr', rec_info$date)
# unique(rec_info$date)
$date <- as.Date(rec_info$date, format = "%d-%b-%y")
rec_info
$sound.files <- paste0(sapply(strsplit(as.character(vgg$filename),
vgg".wav", fixed = TRUE), "[", 1), ".wav")
$date <- sapply(1:nrow(vgg), function(x) as.character(rec_info$date[rec_info$sound.files ==
vgg$sound.files[x]][1]))
vgg
$date <- as.Date(vgg$date)
vgg
<- aggregate(sound.files ~ date + prednd, vgg, length)
agg_pred
ggplot(data = agg_pred, aes(x = date, y = sound.files, fill = prednd,
group = prednd)) + geom_bar(stat = "identity", position = "dodge",
width = 2) + scale_fill_viridis_d(begin = 0.1, end = 0.9) + labs(x = "Date",
y = "Vocalization count", fill = "Predicted species") + theme_classic()
<- read.table("./data/processed/turicreate_proabilities_test_and_train_no_dialects.csv",
probs head = FALSE, sep = " ")
<- read.csv("./data/processed/turicreate_predictions_test_and_train_no_dialects.csv")
preds
$V1 <- gsub("\\[|\\]", "", probs$V1)
probs
<- as.numeric(c(unlist(strsplit(probs$V1, " "))))
probs
<- matrix(probs, ncol = 4, byrow = TRUE)
prob_mat
<- cbind(preds, prob_mat)
model_pred_prob
$X1 <- NULL
model_pred_prob
<- prcomp(model_pred_prob[, c("1", "2", "3", "4")])
pca
<- cbind(preds, pca$x[, 1:2])
pred_pca
<- read.table("./data/processed/turicreate_probabilities_mimetic_no_dialects.csv",
mimetic_probs head = FALSE, sep = " ")
$V1 <- gsub("\\[|\\]", "", mimetic_probs$V1)
mimetic_probs
<- as.numeric(c(unlist(strsplit(mimetic_probs$V1, " "))))
mimetic_probs <- matrix(mimetic_probs, ncol = 4, byrow = TRUE)
mimetic_probs_mat
<- read.csv("./data/processed/turicreate_results_mimetic_no_dialects.csv")
mimetic_preds
<- cbind(mimetic_preds, mimetic_probs_mat)
mimetic_pred_prob
<- predict(pca, newdata = mimetic_pred_prob)
pca_mimetic
<- cbind(mimetic_preds, pca_mimetic[, 1:2])
mimetic_pred_pca
<- rbind(mimetic_pred_pca, pred_pca)
all_pca
$categorynd[all_pca$foldnd == "mimetic"] <- "Mimetic bird"
all_pca
$categorynd <- factor(all_pca$categorynd, levels = c("P.torquatus",
all_pca"R.sulfuratus", "P.tricarunculatus", "P.montezuma", "Mimetic bird"))
ggplot(all_pca[all_pca$foldnd != "mimetic", ], aes(x = PC1, y = PC2,
color = categorynd, shape = categorynd)) + geom_jitter(size = 4,
width = 0.04, height = 0.04) + scale_shape_manual(values = c(25:22)) +
scale_color_viridis_d(begin = 0.2, end = 0.8, alpha = 0.3) + theme_classic() +
ggtitle("Only model species vocalizations")
ggplot(all_pca[all_pca$foldnd == "test", ], aes(x = PC1, y = PC2,
color = categorynd, shape = categorynd)) + geom_jitter(size = 4,
width = 0.04, height = 0.04) + scale_shape_manual(values = c(25:22)) +
scale_color_viridis_d(begin = 0.2, end = 0.8, alpha = 0.5) + theme_classic() +
ggtitle("Model species test data set")
ggplot(all_pca[all_pca$foldnd == "test", ], aes(x = PC1, y = PC2,
color = categorynd, shape = categorynd)) + geom_jitter(size = 4,
width = 0.04, height = 0.04) + scale_shape_manual(values = c(25:22)) +
scale_color_viridis_d(begin = 0.2, end = 0.8, alpha = 0.5) + geom_point(data = all_pca[all_pca$foldnd ==
"mimetic", ], mapping = aes(x = PC1, y = PC2), color = "black",
shape = 21) + theme_classic() + ggtitle("Model species test and mimetic bellbird")
ggplot(all_pca[all_pca$foldnd == "test", ], aes(x = PC1, y = PC2,
shape = categorynd)) + geom_jitter(size = 4, width = 0.04, height = 0.04,
color = "gray", alpha = 0.4) + scale_shape_manual(values = c(25:22)) +
geom_point(data = all_pca[all_pca$foldnd == "mimetic", ], mapping = aes(x = PC1,
y = PC2), color = "black", shape = 21) + theme_classic() +
ggtitle("Model species test and mimetic bellbird")
ggplot(all_pca[all_pca$foldnd == "mimetic", ], aes(x = PC1, y = PC2,
color = categorynd, shape = categorynd, size = categorynd)) +
geom_jitter(width = 0.02, height = 0.02) + scale_shape_manual(values = 21) +
scale_color_manual(values = 4) + scale_size_manual(values = 2) +
# scale_fill_manual(values =c(viridis(4, begin = 0.2, end =
# 0.8, alpha = 0.3), 'transparent')) +
theme_classic() + ggtitle("Only mimetic bellbird")
Session information
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
##
## locale:
## [1] LC_CTYPE=pt_BR.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=es_CR.UTF-8 LC_COLLATE=pt_BR.UTF-8
## [5] LC_MONETARY=es_CR.UTF-8 LC_MESSAGES=pt_BR.UTF-8
## [7] LC_PAPER=es_CR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] pbapply_1.5-0 caret_6.0-93 lattice_0.20-44 formatR_1.12
## [5] ggplot2_3.3.6 viridis_0.6.2 viridisLite_0.4.1 Rraven_1.0.13
## [9] remotes_2.4.2 knitr_1.40 kableExtra_1.3.4 klippy_0.0.0.9500
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-152 lubridate_1.8.0 webshot_0.5.3
## [4] RColorBrewer_1.1-3 httr_1.4.4 tools_4.1.0
## [7] bslib_0.4.0 utf8_1.2.2 R6_2.5.1
## [10] rpart_4.1-15 DBI_1.1.1 colorspace_2.0-3
## [13] nnet_7.3-16 withr_2.5.0 tidyselect_1.1.2
## [16] gridExtra_2.3 compiler_4.1.0 cli_3.4.0
## [19] rvest_1.0.3 xml2_1.3.3 labeling_0.4.2
## [22] sass_0.4.2 scales_1.2.1 proxy_0.4-27
## [25] systemfonts_1.0.4 stringr_1.4.1 digest_0.6.29
## [28] rmarkdown_2.16 svglite_2.1.0 pkgconfig_2.0.3
## [31] htmltools_0.5.3 parallelly_1.32.1 fastmap_1.1.0
## [34] highr_0.9 rlang_1.0.5 rstudioapi_0.14
## [37] jquerylib_0.1.4 generics_0.1.3 farver_2.1.1
## [40] jsonlite_1.8.0 dplyr_1.0.10 ModelMetrics_1.2.2.2
## [43] magrittr_2.0.3 Matrix_1.3-4 Rcpp_1.0.9
## [46] munsell_0.5.0 fansi_1.0.3 lifecycle_1.0.2
## [49] stringi_1.7.8 pROC_1.18.0 yaml_2.3.5
## [52] MASS_7.3-54 plyr_1.8.7 recipes_1.0.1
## [55] grid_4.1.0 parallel_4.1.0 listenv_0.8.0
## [58] splines_4.1.0 pillar_1.8.1 future.apply_1.9.1
## [61] reshape2_1.4.4 codetools_0.2-18 stats4_4.1.0
## [64] glue_1.6.2 evaluate_0.16 data.table_1.14.2
## [67] vctrs_0.4.1 foreach_1.5.2 gtable_0.3.1
## [70] purrr_0.3.4 future_1.28.0 assertthat_0.2.1
## [73] cachem_1.0.6 xfun_0.32 gower_1.0.0
## [76] prodlim_2019.11.13 e1071_1.7-11 class_7.3-19
## [79] survival_3.2-11 timeDate_4021.104 tibble_3.1.8
## [82] iterators_1.0.14 hardhat_1.2.0 lava_1.6.10
## [85] globals_0.16.1 ipred_0.9-13