Number of sound files per model species and the “mimetic” bellbird:
<- read.csv("./data/processed/acoustic_parameters_model_and_mimetic.csv")
sp_param
$org.sound.files <- sapply(strsplit(sp_param$sound.files,
sp_param".wav", fixed = TRUE), "[", 1)
<- sp_param #[grep('mimetic', sp_param$species.vocalization, invert = TRUE), ]
model_sounds <- model_sounds[grep("babbling", model_sounds$species.vocalization,
model_sounds invert = TRUE), ]
$species.vocalization[grep("mimetic", model_sounds$species.vocalization)] <- "Mimetic"
model_sounds
$species.vocalization <- gsub("-model", "", model_sounds$species.vocalization)
model_sounds
aggregate(org.sound.files ~ species.vocalization, model_sounds, function(x) length(unique(x)))
species.vocalization | org.sound.files |
---|---|
Mimetic | 193 |
P.montezuma | 23 |
P.torquatus | 11 |
P.tricarunculatus-Monteverde | 32 |
P.tricarunculatus-Nicaragua | 15 |
P.tricarunculatus-Panama | 5 |
P.tricarunculatus-Talamanca | 7 |
R.sulfuratus | 26 |
Number of annotated vocalizations (after removing SNR < 2 dB):
aggregate(org.sound.files ~ species.vocalization, model_sounds, length)
species.vocalization | org.sound.files |
---|---|
Mimetic | 1483 |
P.montezuma | 74 |
P.torquatus | 64 |
P.tricarunculatus-Monteverde | 287 |
P.tricarunculatus-Nicaragua | 225 |
P.tricarunculatus-Panama | 55 |
P.tricarunculatus-Talamanca | 132 |
R.sulfuratus | 1272 |
warbleR_options(wav.path = "./data/raw/recordings/consolidated_files")
<- read.csv("./data/processed/pooled_annotations.csv")
sels
<- sig2noise(sels, mar = 0.05, parallel = 10)
sels_snr
sum(sels_snr$SNR >= 2)/nrow(sels_snr)
# keep only those that are mimetic or are models with SNR >= 2
<- sels[sels_snr$SNR >= 2 & !grepl("mimetic", sels$species.vocalization) |
high_snr_sels grepl("mimetic", sels$species.vocalization), ]
<- selection_table(high_snr_sels, extended = TRUE, confirm.extended = FALSE,
high_snr_est mar = 0.05, parallel = 10)
table(attr(high_snr_est, "check.res")$sample.rate)
<- resample_est_waves(high_snr_est, samp.rate = 44.1,
high_snr_est bit.depth = 16, parallel = 10)
saveRDS(high_snr_est, "./data/processed/extended_selection_table_high_snr_models_and_allmimetic_sounds.RDS")
<- readRDS("./data/processed/extended_selection_table_high_snr_models_and_allmimetic_sounds.RDS")
high_snr_est
<- spectro_analysis(high_snr_est, parallel = 14)
sp_param
$species.vocalization <- high_snr_est$species.vocalization
sp_param
write.csv(sp_param, "./data/processed/acoustic_parameters_model_and_mimetic.csv",
row.names = FALSE)
<- mfcc_stats(high_snr_est, parallel = 4)
mel_param
$species.vocalization <- high_snr_est$species.vocalization
mel_param
write.csv(mel_param, "./data/processed/mfcc_stats_model_and_mimetic.csv",
row.names = FALSE)
<- sp_param[grep("mimetic", sp_param$species.vocalization,
model_sp_param invert = TRUE), ]
# only bellbird dialects
<- model_sp_param[grep("P.tricarunculatus-", model_sp_param$species.vocalization),
model_sp_param
]
# remove babbling due to low sample size (16)
<- model_sp_param[model_sp_param$species.vocalization !=
model_sp_param "P.tricarunculatus-babbling", ]
$species.vocalization <- as.factor(make.names(model_sp_param$species.vocalization,
model_sp_paramunique = FALSE, allow_ = TRUE))
# Create data subsets
<- createDataPartition(y = model_sp_param$species.vocalization,
partition times = 1, p = 0.75, list = FALSE)
<- model_sp_param[partition, -c(1, 2)]
trainset <- model_sp_param[-partition, -c(1, 2)]
testset
<- trainControl(method = "repeatedcv", number = 100, savePredictions = TRUE,
trcontrol classProbs = TRUE, returnResamp = "all", repeats = 100)
<- train(species.vocalization ~ ., data = trainset, method = "rf",
pred_model trControl = trcontrol, metric = "Accuracy", preProcess = "BoxCox")
# save confusion matrix
<- confusionMatrix(predict(pred_model, testset), testset$species.vocalization)
conf_mat
<- as.data.frame(conf_mat$table)
conf_df
$total <- sapply(conf_df$Reference, function(x) sum(testset$species.vocalization ==
conf_df
x))
$proportion <- conf_df$Freq/conf_df$total
conf_df
saveRDS(list(pred_model_bb = pred_model, conf_mat_bb = conf_mat, confusion_df_bb = conf_df,
testset_bb = testset), "./data/processed/random_forest_model_results_only_bellbird_dialects.RDS")
<- read.csv("./data/processed/mfcc_stats_model_and_mimetic.csv")
mel_param
<- mel_param[grep("mimetic", mel_param$species.vocalization,
model_mel_param invert = TRUE), ]
# only bellbird dialects
<- model_mel_param[grep("P.tricarunculatus-", model_mel_param$species.vocalization),
model_mel_param
]
# remove babbling due to low sample size (16)
<- model_mel_param[model_mel_param$species.vocalization !=
model_mel_param "P.tricarunculatus-babbling", ]
$species.vocalization <- as.factor(make.names(model_mel_param$species.vocalization,
model_mel_paramunique = FALSE, allow_ = TRUE))
# Create data subsets
set.seed(123)
<- createDataPartition(y = model_mel_param$species.vocalization,
partition times = 1, p = 0.75, list = FALSE)
<- model_mel_param[partition, -c(1, 2)]
trainset <- model_mel_param[-partition, -c(1, 2)]
testset
<- trainControl(method = "repeatedcv", number = 30, savePredictions = TRUE,
trcontrol classProbs = TRUE, returnResamp = "all", repeats = 30)
<- train(species.vocalization ~ ., data = trainset, method = "rf",
pred_model trControl = trcontrol, metric = "Accuracy", preProcess = "BoxCox")
# save confusion matrix
<- confusionMatrix(predict(pred_model, testset), testset$species.vocalization)
conf_mat
<- as.data.frame(conf_mat$table)
conf_df
$total <- sapply(conf_df$Reference, function(x) sum(testset$species.vocalization ==
conf_df
x))
$proportion <- conf_df$Freq/conf_df$total
conf_df
saveRDS(list(pred_model_bb = pred_model, conf_mat_bb = conf_mat, confusion_df_bb = conf_df,
testset_bb = testset), "./data/processed/random_forest_model_results_only_bellbird_dialects_MFCCs.RDS")
<- readRDS("./data/processed/random_forest_model_results_only_bellbird_dialects.RDS")
rf_model_results_bb
<- rf_model_results_bb$confusion_df_bb
confusion_df $Prediction <- gsub(".model|P.tricarunculatus.", "", confusion_df$Prediction)
confusion_df$Reference <- gsub(".model|P.tricarunculatus.", "", confusion_df$Reference)
confusion_df
ggplot(confusion_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))
# print confusion matrix results
$conf_mat_bb$overall rf_model_results_bb
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.670520e-01 8.071442e-01 8.072182e-01 9.138199e-01 4.104046e-01
## AccuracyPValue McnemarPValue
## 1.415534e-35 1.207237e-02
<- readRDS("./data/processed/random_forest_model_results_only_bellbird_dialects_MFCCs.RDS")
rf_model_results_bb_mfcc
<- rf_model_results_bb_mfcc$confusion_df_bb
confusion_df $Prediction <- gsub(".model|P.tricarunculatus.", "", confusion_df$Prediction)
confusion_df$Reference <- gsub(".model|P.tricarunculatus.", "", confusion_df$Reference)
confusion_df
ggplot(confusion_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))
# print confusion matrix results
$conf_mat_bb$overall rf_model_results_bb_mfcc
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.479769e-01 9.232703e-01 9.035434e-01 9.759389e-01 4.104046e-01
## AccuracyPValue McnemarPValue
## 1.021618e-51 NaN
<- sp_param[grep("mimetic_P.tricarunculatus-adult",
mimetic_no_calls $species.vocalization), ]
sp_param
$species.vocalization <- as.factor(make.names(mimetic_no_calls$species.vocalization,
mimetic_no_callsunique = FALSE, allow_ = TRUE))
$pred.class <- as.character(predict(rf_model_results_bb$pred_model_bb,
mimetic_no_calls
mimetic_no_calls))
table(mimetic_no_calls$pred.class)
##
## P.tricarunculatus.Monteverde P.tricarunculatus.Nicaragua
## 28 127
## P.tricarunculatus.Panama P.tricarunculatus.Talamanca
## 9 86
<- read.csv("./data/processed/mfcc_stats_model_and_mimetic.csv")
mel_param
<- mel_param[grep("mimetic_P.tricarunculatus-adult",
mimetic_no_calls_mel $species.vocalization), ]
mel_param
$species.vocalization <- as.factor(make.names(mimetic_no_calls_mel$species.vocalization,
mimetic_no_calls_melunique = FALSE, allow_ = TRUE))
$pred.class <- as.character(predict(rf_model_results_bb_mfcc$pred_model_bb,
mimetic_no_calls_mel
mimetic_no_calls_mel))
table(mimetic_no_calls_mel$pred.class)
##
## P.tricarunculatus.Monteverde P.tricarunculatus.Nicaragua
## 28 138
## P.tricarunculatus.Talamanca
## 84
<- sp_param[grep("mimetic", sp_param$species.vocalization,
model_sp_param invert = TRUE), ]
# remove babbling due to low sample size (16)
<- model_sp_param[model_sp_param$species.vocalization !=
model_sp_param "P.tricarunculatus-babbling", ]
$species.vocalization <- as.factor(make.names(model_sp_param$species.vocalization,
model_sp_paramunique = FALSE, allow_ = TRUE))
# Create data subsets
<- createDataPartition(y = model_sp_param$species.vocalization,
partition times = 1, p = 0.75, list = FALSE)
<- model_sp_param[partition, -c(1, 2)]
trainset <- model_sp_param[-partition, -c(1, 2)]
testset
<- trainControl(method = "repeatedcv", number = 30, savePredictions = TRUE,
trcontrol classProbs = TRUE, returnResamp = "all", repeats = 100)
<- train(species.vocalization ~ ., data = trainset, method = "rf",
pred_model trControl = trcontrol, metric = "Accuracy", preProcess = "BoxCox")
# save confusion matrix
<- confusionMatrix(predict(pred_model, testset), testset$species.vocalization)
conf_mat
<- as.data.frame(conf_mat$table)
conf_df
$total <- sapply(conf_df$Reference, function(x) sum(testset$species.vocalization ==
conf_df
x))
$proportion <- conf_df$Freq/conf_df$total
conf_df
saveRDS(list(pred_model = pred_model, conf_mat = conf_mat, confusion_df = conf_df,
testset = testset), "./data/processed/random_forest_model_results_with_dialects.RDS")
<- mel_param[grep("mimetic", mel_param$species.vocalization,
model_mel_param invert = TRUE), ]
# remove babbling due to low sample size (16)
<- model_mel_param[model_mel_param$species.vocalization !=
model_mel_param "P.tricarunculatus-babbling", ]
$species.vocalization <- as.factor(make.names(model_mel_param$species.vocalization,
model_mel_paramunique = FALSE, allow_ = TRUE))
# Create data subsets
set.seed(123)
<- createDataPartition(y = model_mel_param$species.vocalization,
partition times = 1, p = 0.75, list = FALSE)
<- model_mel_param[partition, -c(1, 2)]
trainset <- model_mel_param[-partition, -c(1, 2)]
testset
<- trainControl(method = "repeatedcv", number = 30, savePredictions = TRUE,
trcontrol classProbs = TRUE, returnResamp = "all", repeats = 30)
<- train(species.vocalization ~ ., data = trainset, method = "rf",
pred_model trControl = trcontrol, metric = "Accuracy", preProcess = "BoxCox")
# save confusion matrix
<- confusionMatrix(predict(pred_model, testset), testset$species.vocalization)
conf_mat
<- as.data.frame(conf_mat$table)
conf_df
$total <- sapply(conf_df$Reference, function(x) sum(testset$species.vocalization ==
conf_df
x))
$proportion <- conf_df$Freq/conf_df$total
conf_df
saveRDS(list(pred_model = pred_model, conf_mat = conf_mat, confusion_df = conf_df,
testset = testset), "./data/processed/random_forest_model_results_with_dialects_MFCCs.RDS")
<- readRDS("./data/processed/random_forest_model_results_with_dialects.RDS")
rf_model_results
<- rf_model_results$confusion_df
confusion_df $Prediction <- gsub(".model", "", confusion_df$Prediction)
confusion_df$Reference <- gsub(".model", "", confusion_df$Reference)
confusion_df
ggplot(confusion_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))
# print confusion matrix results
$conf_mat$overall rf_model_results
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.276190e-01 8.756010e-01 9.020027e-01 9.482729e-01 6.057143e-01
## AccuracyPValue McnemarPValue
## 5.152081e-64 NaN
<- readRDS("./data/processed/random_forest_model_results_with_dialects_MFCCs.RDS")
rf_model_results_mfcc
<- rf_model_results_mfcc$confusion_df
confusion_df $Prediction <- gsub(".model", "", confusion_df$Prediction)
confusion_df$Reference <- gsub(".model", "", confusion_df$Reference)
confusion_df
ggplot(confusion_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))
# print confusion matrix results
$conf_mat$overall rf_model_results_mfcc
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.466667e-01 9.089744e-01 9.238382e-01 9.642715e-01 6.057143e-01
## AccuracyPValue McnemarPValue
## 7.428844e-74 NaN
<- sp_param[grep("mimetic", sp_param$species.vocalization),
mimetic_sp_param
]
<- mimetic_sp_param[!mimetic_sp_param$species.vocalization %in%
mimetic_no_calls c("mimetic_P.tricarunculatus-babbling", "mimetic_P.tricarunculatus-aggresive",
"mimetic_P.tricarunculatus-whistle"), ]
$species.vocalization <- as.factor(make.names(mimetic_no_calls$species.vocalization,
mimetic_no_callsunique = FALSE, allow_ = TRUE))
$pred.class <- as.character(predict(rf_model_results$pred_model,
mimetic_no_calls
mimetic_no_calls))
$species.vocalization <- as.character(mimetic_no_calls$species.vocalization)
mimetic_no_calls
$pred.class[grep("montezuma", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("montezuma",
mimetic_no_calls$species.vocalization)] <- "P.montezuma"
mimetic_no_calls
$pred.class[grep("torquatus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("torquatus",
mimetic_no_calls$species.vocalization)] <- "P.torquatus"
mimetic_no_calls
$pred.class[grep("sulfuratus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("sulfuratus",
mimetic_no_calls$species.vocalization)] <- "R.sulfuratus"
mimetic_no_calls
$species.vocalization[grep("tricarunculatus", mimetic_no_calls$species.vocalization)] <- "P.tricarunculatus.Talamanca"
mimetic_no_calls
<- confusionMatrix(data = factor(mimetic_no_calls$pred.class),
conf_mat reference = factor(mimetic_no_calls$species.vocalization, levels = unique(mimetic_no_calls$pred.class)))
<- as.data.frame(conf_mat$table)
conf_df
$total <- sapply(conf_df$Reference, function(x) sum(mimetic_no_calls$species.vocalization ==
conf_df
x))
$proportion <- conf_df$Freq/conf_df$total
conf_df
<- conf_df[conf_df$Reference %in% unique(mimetic_no_calls$species.vocalization),
conf_df
]
$Prediction <- factor(conf_df$Prediction, levels = c("R.sulfuratus",
conf_df"P.torquatus", "P.montezuma", "P.tricarunculatus.Talamanca", "P.tricarunculatus.Nicaragua",
"P.tricarunculatus.Panama", "P.tricarunculatus.Monteverde"))
ggplot(conf_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))
$overall conf_mat
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.958894e-01 6.093952e-01 7.738987e-01 8.166407e-01 6.520198e-01
## AccuracyPValue McnemarPValue
## 1.537619e-32 NaN
<- mel_param[grep("mimetic", mel_param$species.vocalization),
mimetic_mel_param
]
<- mimetic_mel_param[!mimetic_mel_param$species.vocalization %in%
mimetic_no_calls c("mimetic_P.tricarunculatus-babbling", "mimetic_P.tricarunculatus-aggresive",
"mimetic_P.tricarunculatus-whistle"), ]
$species.vocalization <- as.factor(make.names(mimetic_no_calls$species.vocalization,
mimetic_no_callsunique = FALSE, allow_ = TRUE))
$pred.class <- as.character(predict(rf_model_results_mfcc$pred_model,
mimetic_no_calls
mimetic_no_calls))
$species.vocalization <- as.character(mimetic_no_calls$species.vocalization)
mimetic_no_calls
$pred.class[grep("montezuma", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("montezuma",
mimetic_no_calls$species.vocalization)] <- "P.montezuma"
mimetic_no_calls
$pred.class[grep("torquatus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("torquatus",
mimetic_no_calls$species.vocalization)] <- "P.torquatus"
mimetic_no_calls
$pred.class[grep("sulfuratus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("sulfuratus",
mimetic_no_calls$species.vocalization)] <- "R.sulfuratus"
mimetic_no_calls
$species.vocalization[grep("tricarunculatus", mimetic_no_calls$species.vocalization)] <- "P.tricarunculatus.Talamanca"
mimetic_no_calls
<- confusionMatrix(data = factor(mimetic_no_calls$pred.class),
conf_mat reference = factor(mimetic_no_calls$species.vocalization, levels = unique(mimetic_no_calls$pred.class)))
<- as.data.frame(conf_mat$table)
conf_df
$total <- sapply(conf_df$Reference, function(x) sum(mimetic_no_calls$species.vocalization ==
conf_df
x))
$proportion <- conf_df$Freq/conf_df$total
conf_df
<- conf_df[conf_df$Reference %in% unique(mimetic_no_calls$species.vocalization),
conf_df
]
$Prediction <- factor(conf_df$Prediction, levels = c("R.sulfuratus",
conf_df"P.torquatus", "P.montezuma", "P.tricarunculatus.Talamanca", "P.tricarunculatus.Nicaragua",
"P.tricarunculatus.Panama", "P.tricarunculatus.Monteverde"))
ggplot(conf_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))
$overall conf_mat
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.491141e-01 5.401192e-01 7.256352e-01 7.715510e-01 6.520198e-01
## AccuracyPValue McnemarPValue
## 2.426438e-15 NaN
<- sp_param[grep("mimetic", sp_param$species.vocalization),
mimetic_sp_param
]
<- mimetic_sp_param[!mimetic_sp_param$species.vocalization %in%
mimetic_no_calls c("mimetic_P.tricarunculatus-babbling", "mimetic_P.tricarunculatus-aggresive",
"mimetic_P.tricarunculatus-whistle"), ]
$species.vocalization <- as.factor(make.names(mimetic_no_calls$species.vocalization,
mimetic_no_callsunique = FALSE, allow_ = TRUE))
$pred.class <- as.character(predict(rf_model_results$pred_model,
mimetic_no_calls
mimetic_no_calls))
$species.vocalization <- as.character(mimetic_no_calls$species.vocalization)
mimetic_no_calls
$pred.class[grep("tricarun", mimetic_no_calls$pred.class)] <- "P.tricarunculatus"
mimetic_no_calls
$pred.class[grep("montezuma", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("montezuma",
mimetic_no_calls$species.vocalization)] <- "P.montezuma"
mimetic_no_calls
$pred.class[grep("torquatus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("torquatus",
mimetic_no_calls$species.vocalization)] <- "P.torquatus"
mimetic_no_calls
$pred.class[grep("sulfuratus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("sulfuratus",
mimetic_no_calls$species.vocalization)] <- "R.sulfuratus"
mimetic_no_calls
$pred.class[grep("tricarunculatus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("tricarunculatus",
mimetic_no_calls$species.vocalization)] <- "P.tricarunculatus"
mimetic_no_calls
<- confusionMatrix(data = factor(mimetic_no_calls$pred.class),
conf_mat reference = factor(mimetic_no_calls$species.vocalization))
<- as.data.frame(conf_mat$table)
conf_df
$total <- sapply(conf_df$Reference, function(x) sum(mimetic_no_calls$species.vocalization ==
conf_df
x))
$proportion <- conf_df$Freq/conf_df$total
conf_df
ggplot(conf_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))
$overall conf_mat
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.419561e-01 6.884951e-01 8.218597e-01 8.606167e-01 6.520198e-01
## AccuracyPValue McnemarPValue
## 1.543505e-57 8.687272e-05
<- mel_param[grep("mimetic", mel_param$species.vocalization),
mimetic_mel_param
]
<- mimetic_mel_param[!mimetic_mel_param$species.vocalization %in%
mimetic_no_calls c("mimetic_P.tricarunculatus-babbling", "mimetic_P.tricarunculatus-aggresive",
"mimetic_P.tricarunculatus-whistle"), ]
$species.vocalization <- as.factor(make.names(mimetic_no_calls$species.vocalization,
mimetic_no_callsunique = FALSE, allow_ = TRUE))
$pred.class <- as.character(predict(rf_model_results_mfcc$pred_model,
mimetic_no_calls
mimetic_no_calls))
$species.vocalization <- as.character(mimetic_no_calls$species.vocalization)
mimetic_no_calls
$pred.class[grep("tricarun", mimetic_no_calls$pred.class)] <- "P.tricarunculatus"
mimetic_no_calls
$pred.class[grep("montezuma", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("montezuma",
mimetic_no_calls$species.vocalization)] <- "P.montezuma"
mimetic_no_calls
$pred.class[grep("torquatus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("torquatus",
mimetic_no_calls$species.vocalization)] <- "P.torquatus"
mimetic_no_calls
$pred.class[grep("sulfuratus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("sulfuratus",
mimetic_no_calls$species.vocalization)] <- "R.sulfuratus"
mimetic_no_calls
$pred.class[grep("tricarunculatus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("tricarunculatus",
mimetic_no_calls$species.vocalization)] <- "P.tricarunculatus"
mimetic_no_calls
<- confusionMatrix(data = factor(mimetic_no_calls$pred.class),
conf_mat reference = factor(mimetic_no_calls$species.vocalization))
<- as.data.frame(conf_mat$table)
conf_df
$total <- sapply(conf_df$Reference, function(x) sum(mimetic_no_calls$species.vocalization ==
conf_df
x))
$proportion <- conf_df$Freq/conf_df$total
conf_df
ggplot(conf_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))
$overall conf_mat
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.944720e-01 6.109651e-01 7.724303e-01 8.152803e-01 6.520198e-01
## AccuracyPValue McnemarPValue
## 6.637231e-32 2.611333e-08
<- sp_param[grep("mimetic", sp_param$species.vocalization,
model_sp_param invert = TRUE), ]
# remove babbling due to low sample size (16)
<- model_sp_param[model_sp_param$species.vocalization !=
model_sp_param "P.tricarunculatus-babbling", ]
# collapse bellbird dialects
$species.vocalization[grep("P.tricarunculatus", model_sp_param$species.vocalization)] <- "P.tricarunculatus"
model_sp_param
$species.vocalization <- as.factor(make.names(model_sp_param$species.vocalization,
model_sp_paramunique = FALSE, allow_ = TRUE))
# Create data subsets
<- createDataPartition(y = model_sp_param$species.vocalization,
partition times = 1, p = 0.75, list = FALSE)
<- model_sp_param[partition, -c(1, 2)]
trainset <- model_sp_param[-partition, -c(1, 2)]
testset
<- trainControl(method = "repeatedcv", number = 100, savePredictions = TRUE,
trcontrol classProbs = TRUE, returnResamp = "all", repeats = 100)
<- train(species.vocalization ~ ., data = trainset, method = "rf",
pred_model trControl = trcontrol, metric = "Accuracy", preProcess = "BoxCox")
# save confusion matrix
<- confusionMatrix(predict(pred_model, testset), testset$species.vocalization)
conf_mat
<- as.data.frame(conf_mat$table)
conf_df
$total <- sapply(conf_df$Reference, function(x) sum(testset$species.vocalization ==
conf_df
x))
$proportion <- conf_df$Freq/conf_df$total
conf_df
# nd = no dialects
saveRDS(list(pred_model_nd = pred_model, conf_mat_nd = conf_mat, confusion_df_nd = conf_df,
testset_nd = testset), "./data/processed/random_forest_model_results_collapsing_dialects.RDS")
<- mel_param[grep("mimetic", mel_param$species.vocalization,
model_mel_param invert = TRUE), ]
# remove babbling due to low sample size (16)
<- model_mel_param[model_mel_param$species.vocalization !=
model_mel_param "P.tricarunculatus-babbling", ]
# collapse bellbird dialects
$species.vocalization[grep("P.tricarunculatus", model_mel_param$species.vocalization)] <- "P.tricarunculatus"
model_mel_param
$species.vocalization <- as.factor(make.names(model_mel_param$species.vocalization,
model_mel_paramunique = FALSE, allow_ = TRUE))
# Create data subsets
<- createDataPartition(y = model_mel_param$species.vocalization,
partition times = 1, p = 0.75, list = FALSE)
<- model_mel_param[partition, -c(1, 2)]
trainset <- model_mel_param[-partition, -c(1, 2)]
testset
<- trainControl(method = "repeatedcv", number = 30, savePredictions = TRUE,
trcontrol classProbs = TRUE, returnResamp = "all", repeats = 30)
<- train(species.vocalization ~ ., data = trainset, method = "rf",
pred_model trControl = trcontrol, metric = "Accuracy", preProcess = "BoxCox")
# save confusion matrix
<- confusionMatrix(predict(pred_model, testset), testset$species.vocalization)
conf_mat
<- as.data.frame(conf_mat$table)
conf_df
$total <- sapply(conf_df$Reference, function(x) sum(testset$species.vocalization ==
conf_df
x))
$proportion <- conf_df$Freq/conf_df$total
conf_df
# nd = no dialects
saveRDS(list(pred_model_nd = pred_model, conf_mat_nd = conf_mat, confusion_df_nd = conf_df,
testset_nd = testset), "./data/processed/random_forest_model_results_collapsing_dialects_MFCCs.RDS")
<- readRDS("./data/processed/random_forest_model_results_collapsing_dialects.RDS")
rf_model_results_nd
<- rf_model_results_nd$confusion_df_nd
confusion_df $Prediction <- gsub(".model", "", confusion_df$Prediction)
confusion_df$Reference <- gsub(".model", "", confusion_df$Reference)
confusion_df
ggplot(confusion_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))
# print confusion matrix results
$conf_mat_nd$overall rf_model_results_nd
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.676190e-01 9.371952e-01 9.486601e-01 9.810260e-01 6.057143e-01
## AccuracyPValue McnemarPValue
## 1.323050e-86 NaN
<- readRDS("./data/processed/random_forest_model_results_collapsing_dialects_MFCCs.RDS")
rf_model_results_nd_mfccs
<- rf_model_results_nd_mfccs$confusion_df_nd
confusion_df $Prediction <- gsub(".model", "", confusion_df$Prediction)
confusion_df$Reference <- gsub(".model", "", confusion_df$Reference)
confusion_df
ggplot(confusion_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))
# print confusion matrix results
$conf_mat_nd$overall rf_model_results_nd_mfccs
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.771863e-01 9.561112e-01 9.604884e-01 9.881576e-01 6.045627e-01
## AccuracyPValue McnemarPValue
## 5.729264e-94 NaN
<- sp_param[grep("mimetic", sp_param$species.vocalization),
mimetic_sp_param
]
<- mimetic_sp_param[!mimetic_sp_param$species.vocalization %in%
mimetic_no_calls c("mimetic_P.tricarunculatus-babbling", "mimetic_P.tricarunculatus-aggresive",
"mimetic_P.tricarunculatus-whistle"), ]
$species.vocalization <- as.factor(make.names(mimetic_no_calls$species.vocalization,
mimetic_no_callsunique = FALSE, allow_ = TRUE))
$pred.class <- as.character(predict(rf_model_results_nd$pred_model_nd,
mimetic_no_calls
mimetic_no_calls))
$species.vocalization <- as.character(mimetic_no_calls$species.vocalization)
mimetic_no_calls
$pred.class[grep("tricarun", mimetic_no_calls$pred.class)] <- "P.tricarunculatus"
mimetic_no_calls
$pred.class[grep("montezuma", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("montezuma",
mimetic_no_calls$species.vocalization)] <- "P.montezuma"
mimetic_no_calls
$pred.class[grep("torquatus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("torquatus",
mimetic_no_calls$species.vocalization)] <- "P.torquatus"
mimetic_no_calls
$pred.class[grep("sulfuratus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("sulfuratus",
mimetic_no_calls$species.vocalization)] <- "R.sulfuratus"
mimetic_no_calls
$pred.class[grep("tricarunculatus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("tricarunculatus",
mimetic_no_calls$species.vocalization)] <- "P.tricarunculatus"
mimetic_no_calls
<- confusionMatrix(data = factor(mimetic_no_calls$pred.class),
conf_mat reference = factor(mimetic_no_calls$species.vocalization))
<- as.data.frame(conf_mat$table)
conf_df
$total <- sapply(conf_df$Reference, function(x) sum(mimetic_no_calls$species.vocalization ==
conf_df
x))
$proportion <- conf_df$Freq/conf_df$total
conf_df
ggplot(conf_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))
$overall conf_mat
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.284904e-01 6.664378e-01 8.077892e-01 8.478136e-01 6.520198e-01
## AccuracyPValue McnemarPValue
## 2.588161e-49 2.695079e-05
<- mel_param[grep("mimetic", mel_param$species.vocalization),
mimetic_mel_param
]
<- mimetic_mel_param[!mimetic_mel_param$species.vocalization %in%
mimetic_no_calls c("mimetic_P.tricarunculatus-babbling", "mimetic_P.tricarunculatus-aggresive",
"mimetic_P.tricarunculatus-whistle"), ]
$species.vocalization <- as.factor(make.names(mimetic_no_calls$species.vocalization,
mimetic_no_callsunique = FALSE, allow_ = TRUE))
$pred.class <- as.character(predict(rf_model_results_nd_mfccs$pred_model_nd,
mimetic_no_calls
mimetic_no_calls))
$species.vocalization <- as.character(mimetic_no_calls$species.vocalization)
mimetic_no_calls
$pred.class[grep("tricarun", mimetic_no_calls$pred.class)] <- "P.tricarunculatus"
mimetic_no_calls
$pred.class[grep("montezuma", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("montezuma",
mimetic_no_calls$species.vocalization)] <- "P.montezuma"
mimetic_no_calls
$pred.class[grep("torquatus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("torquatus",
mimetic_no_calls$species.vocalization)] <- "P.torquatus"
mimetic_no_calls
$pred.class[grep("sulfuratus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("sulfuratus",
mimetic_no_calls$species.vocalization)] <- "R.sulfuratus"
mimetic_no_calls
$pred.class[grep("tricarunculatus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("tricarunculatus",
mimetic_no_calls$species.vocalization)] <- "P.tricarunculatus"
mimetic_no_calls
<- confusionMatrix(data = factor(mimetic_no_calls$pred.class),
conf_mat reference = factor(mimetic_no_calls$species.vocalization))
<- as.data.frame(conf_mat$table)
conf_df
$total <- sapply(conf_df$Reference, function(x) sum(mimetic_no_calls$species.vocalization ==
conf_df
x))
$proportion <- conf_df$Freq/conf_df$total
conf_df
ggplot(conf_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))
$overall conf_mat
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 6.832034e-01 4.563042e-01 6.582118e-01 7.074298e-01 6.520198e-01
## AccuracyPValue McnemarPValue
## 7.220395e-03 1.666545e-21
<- sp_param[grep("mimetic", sp_param$species.vocalization,
model_sp_param invert = TRUE), ]
# only talamanca
<- model_sp_param[grep("Nicaragua|Monteverde|Panama",
model_sp_param $species.vocalization, invert = TRUE), ]
model_sp_param
# remove babbling due to low sample size (16)
<- model_sp_param[model_sp_param$species.vocalization !=
model_sp_param "P.tricarunculatus-babbling", ]
$species.vocalization <- as.factor(make.names(model_sp_param$species.vocalization,
model_sp_paramunique = FALSE, allow_ = TRUE))
# Create data subsets
<- createDataPartition(y = model_sp_param$species.vocalization,
partition times = 1, p = 0.75, list = FALSE)
<- model_sp_param[partition, -c(1, 2)]
trainset <- model_sp_param[-partition, -c(1, 2)]
testset
<- trainControl(method = "repeatedcv", number = 100, savePredictions = TRUE,
trcontrol classProbs = TRUE, returnResamp = "all", repeats = 100)
<- train(species.vocalization ~ ., data = trainset, method = "rf",
pred_model trControl = trcontrol, metric = "Accuracy", preProcess = "BoxCox")
# save confusion matrix
<- confusionMatrix(predict(pred_model, testset), testset$species.vocalization)
conf_mat
<- as.data.frame(conf_mat$table)
conf_df
$total <- sapply(conf_df$Reference, function(x) sum(testset$species.vocalization ==
conf_df
x))
$proportion <- conf_df$Freq/conf_df$total
conf_df
saveRDS(list(pred_model_talmn = pred_model, conf_mat_talmn = conf_mat,
confusion_df_talmn = conf_df, testset_talmn = testset), "./data/processed/random_forest_model_results_talamanca_dialect.RDS")
<- readRDS("./data/processed/random_forest_model_results_talamanca_dialect.RDS")
rf_model_results_talmn
<- rf_model_results_talmn$confusion_df_talmn
confusion_df $Prediction <- gsub(".model", "", confusion_df$Prediction)
confusion_df$Reference <- gsub(".model", "", confusion_df$Reference)
confusion_df
ggplot(confusion_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))
# print confusion matrix results
$conf_mat_talmn$overall rf_model_results_talmn
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.948052e-01 9.828363e-01 9.813615e-01 9.993703e-01 8.259740e-01
## AccuracyPValue McnemarPValue
## 3.621196e-29 NaN
<- sp_param[grep("mimetic", sp_param$species.vocalization),
mimetic_sp_param
]
<- mimetic_sp_param[!mimetic_sp_param$species.vocalization %in%
mimetic_no_calls c("mimetic_P.tricarunculatus-babbling", "mimetic_P.tricarunculatus-aggresive",
"mimetic_P.tricarunculatus-whistle"), ]
$species.vocalization <- as.factor(make.names(mimetic_no_calls$species.vocalization,
mimetic_no_callsunique = FALSE, allow_ = TRUE))
$pred.class <- as.character(predict(rf_model_results_talmn$pred_model_talmn,
mimetic_no_calls
mimetic_no_calls))
$species.vocalization <- as.character(mimetic_no_calls$species.vocalization)
mimetic_no_calls
$pred.class[grep("tricarun", mimetic_no_calls$pred.class)] <- "P.tricarunculatus"
mimetic_no_calls
$pred.class[grep("montezuma", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("montezuma",
mimetic_no_calls$species.vocalization)] <- "P.montezuma"
mimetic_no_calls
$pred.class[grep("torquatus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("torquatus",
mimetic_no_calls$species.vocalization)] <- "P.torquatus"
mimetic_no_calls
$pred.class[grep("sulfuratus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("sulfuratus",
mimetic_no_calls$species.vocalization)] <- "R.sulfuratus"
mimetic_no_calls
$pred.class[grep("tricarunculatus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("tricarunculatus",
mimetic_no_calls$species.vocalization)] <- "P.tricarunculatus"
mimetic_no_calls
<- confusionMatrix(data = factor(mimetic_no_calls$pred.class),
conf_mat reference = factor(mimetic_no_calls$species.vocalization))
<- as.data.frame(conf_mat$table)
conf_df
$total <- sapply(conf_df$Reference, function(x) sum(mimetic_no_calls$species.vocalization ==
conf_df
x))
$proportion <- conf_df$Freq/conf_df$total
conf_df
ggplot(conf_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))
$overall conf_mat
Test data
<- read.csv("./data/processed/turicreate_results_test_many_dialects.csv")
vgg
# vgg$category <- factor(vgg$category, levels =
# c(unique(vgg$category), 'P.tricarunculatus-Monteverde',
# 'P.tricarunculatus-Nicaragua', 'P.tricarunculatus-Panama'))
$category <- factor(vgg$category)
vgg$pred <- factor(vgg$pred)
vgg
<- confusionMatrix(data = vgg$pred, reference = vgg$category)
conf_mat
<- as.data.frame(conf_mat$table)
conf_df
$total <- sapply(conf_df$Reference, function(x) sum(vgg$category ==
conf_df
x))
$proportion <- conf_df$Freq/conf_df$total
conf_df
<- conf_df[complete.cases(conf_df), ]
conf_df
ggplot(conf_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))
$overall conf_mat
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.676190e-01 9.456068e-01 9.486601e-01 9.810260e-01 6.057143e-01
## AccuracyPValue McnemarPValue
## 1.323050e-86 NaN
Mimetic bellbird
<- read.csv("./data/processed/turicreate_results_mimetic_many_dialects.csv")
vgg
$category <- factor(vgg$category, levels = c(unique(vgg$category),
vgg"P.tricarunculatus-Monteverde", "P.tricarunculatus-Nicaragua",
"P.tricarunculatus-Panama"))
$pred <- factor(vgg$pred)
vgg
<- confusionMatrix(data = vgg$pred, reference = vgg$category)
conf_mat
<- as.data.frame(conf_mat$table)
conf_df
$total <- sapply(conf_df$Reference, function(x) sum(vgg$category ==
conf_df
x))
$proportion <- conf_df$Freq/conf_df$total
conf_df
<- conf_df[complete.cases(conf_df), ]
conf_df
ggplot(conf_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))
$overall conf_mat
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.410654e-01 5.005928e-01 7.179701e-01 7.632020e-01 6.203641e-01
## AccuracyPValue McnemarPValue
## 4.971336e-23 NaN
Test data
<- read.csv("./data/processed/turicreate_results_test_no_dialects.csv")
vgg
$category <- factor(vgg$categorynd)
vgg$pred <- factor(vgg$pred)
vgg
<- confusionMatrix(data = vgg$pred, reference = vgg$category)
conf_mat
<- as.data.frame(conf_mat$table)
conf_df
$total <- sapply(conf_df$Reference, function(x) sum(vgg$category ==
conf_df
x))
$proportion <- conf_df$Freq/conf_df$total
conf_df
<- conf_df[complete.cases(conf_df), ]
conf_df
ggplot(conf_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))
$overall conf_mat
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.923810e-01 9.854012e-01 9.806079e-01 9.979203e-01 6.057143e-01
## AccuracyPValue McnemarPValue
## 2.788403e-106 NaN
Mimetic bellbird
<- read.csv("./data/processed/turicreate_results_mimetic_no_dialects.csv")
vgg
# vgg$category <- factor(vgg$category, levels =
# c(unique(vgg$category), 'P.tricarunculatus-Monteverde',
# 'P.tricarunculatus-Nicaragua', 'P.tricarunculatus-Panama'))
$category <- factor(vgg$categorynd)
vgg$pred <- factor(vgg$prednd)
vgg
<- confusionMatrix(data = vgg$pred, reference = vgg$category)
conf_mat
<- as.data.frame(conf_mat$table)
conf_df
$total <- sapply(conf_df$Reference, function(x) sum(vgg$category ==
conf_df
x))
$proportion <- conf_df$Freq/conf_df$total
conf_df
<- conf_df[complete.cases(conf_df), ]
conf_df
ggplot(conf_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))
$overall conf_mat
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.199595e-01 6.705305e-01 7.994401e-01 8.392031e-01 6.203641e-01
## AccuracyPValue McnemarPValue
## 6.558722e-63 8.511534e-10
Session information
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
##
## locale:
## [1] LC_CTYPE=pt_BR.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=es_CR.UTF-8 LC_COLLATE=pt_BR.UTF-8
## [5] LC_MONETARY=es_CR.UTF-8 LC_MESSAGES=pt_BR.UTF-8
## [7] LC_PAPER=es_CR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] randomForest_4.7-1.1 caret_6.0-93 lattice_0.20-44
## [4] soundgen_2.5.2 shinyBS_0.61.1 formatR_1.12
## [7] ggplot2_3.3.6 viridis_0.6.2 viridisLite_0.4.1
## [10] Rraven_1.0.13 warbleR_1.1.28 NatureSounds_1.0.4
## [13] seewave_2.2.0 tuneR_1.4.0 remotes_2.4.2
## [16] knitr_1.40 kableExtra_1.3.4 klippy_0.0.0.9500
##
## loaded via a namespace (and not attached):
## [1] colorspace_2.0-3 rjson_0.2.21 ellipsis_0.3.2
## [4] class_7.3-19 rstudioapi_0.14 proxy_0.4-27
## [7] farver_2.1.1 listenv_0.8.0 prodlim_2019.11.13
## [10] fansi_1.0.3 lubridate_1.8.0 xml2_1.3.3
## [13] codetools_0.2-18 splines_4.1.0 cachem_1.0.6
## [16] jsonlite_1.8.0 pROC_1.18.0 shiny_1.7.2
## [19] compiler_4.1.0 httr_1.4.4 assertthat_0.2.1
## [22] Matrix_1.3-4 fastmap_1.1.0 cli_3.4.0
## [25] later_1.3.0 htmltools_0.5.3 tools_4.1.0
## [28] gtable_0.3.1 glue_1.6.2 reshape2_1.4.4
## [31] dplyr_1.0.10 Rcpp_1.0.9 jquerylib_0.1.4
## [34] vctrs_0.4.1 svglite_2.1.0 nlme_3.1-152
## [37] iterators_1.0.14 timeDate_4021.104 gower_1.0.0
## [40] xfun_0.32 stringr_1.4.1 globals_0.16.1
## [43] rvest_1.0.3 mime_0.12 lifecycle_1.0.2
## [46] future_1.28.0 MASS_7.3-54 scales_1.2.1
## [49] ipred_0.9-13 promises_1.2.0.1 parallel_4.1.0
## [52] RColorBrewer_1.1-3 yaml_2.3.5 pbapply_1.5-0
## [55] gridExtra_2.3 sass_0.4.2 rpart_4.1-15
## [58] stringi_1.7.8 highr_0.9 foreach_1.5.2
## [61] e1071_1.7-11 hardhat_1.2.0 lava_1.6.10
## [64] rlang_1.0.5 pkgconfig_2.0.3 systemfonts_1.0.4
## [67] dtw_1.22-3 bitops_1.0-7 evaluate_0.16
## [70] purrr_0.3.4 labeling_0.4.2 recipes_1.0.1
## [73] tidyselect_1.1.2 parallelly_1.32.1 plyr_1.8.7
## [76] magrittr_2.0.3 R6_2.5.1 fftw_1.0-7
## [79] generics_0.1.3 DBI_1.1.1 pillar_1.8.1
## [82] withr_2.5.0 survival_3.2-11 RCurl_1.98-1.8
## [85] nnet_7.3-16 tibble_3.1.8 future.apply_1.9.1
## [88] utf8_1.2.2 rmarkdown_2.16 grid_4.1.0
## [91] data.table_1.14.2 ModelMetrics_1.2.2.2 digest_0.6.29
## [94] webshot_0.5.3 xtable_1.8-4 httpuv_1.6.6
## [97] stats4_4.1.0 signal_0.7-7 munsell_0.5.0
## [100] bslib_0.4.0