Number of sound files per model species and the “mimetic” bellbird:
sp_param <- read.csv("./data/processed/acoustic_parameters_model_and_mimetic.csv")
sp_param$org.sound.files <- sapply(strsplit(sp_param$sound.files,
".wav", fixed = TRUE), "[", 1)
model_sounds <- sp_param #[grep('mimetic', sp_param$species.vocalization, invert = TRUE), ]
model_sounds <- model_sounds[grep("babbling", model_sounds$species.vocalization,
invert = TRUE), ]
model_sounds$species.vocalization[grep("mimetic", model_sounds$species.vocalization)] <- "Mimetic"
model_sounds$species.vocalization <- gsub("-model", "", model_sounds$species.vocalization)
aggregate(org.sound.files ~ species.vocalization, model_sounds, function(x) length(unique(x)))| species.vocalization | org.sound.files |
|---|---|
| Mimetic | 193 |
| P.montezuma | 23 |
| P.torquatus | 11 |
| P.tricarunculatus-Monteverde | 32 |
| P.tricarunculatus-Nicaragua | 15 |
| P.tricarunculatus-Panama | 5 |
| P.tricarunculatus-Talamanca | 7 |
| R.sulfuratus | 26 |
Number of annotated vocalizations (after removing SNR < 2 dB):
aggregate(org.sound.files ~ species.vocalization, model_sounds, length)| species.vocalization | org.sound.files |
|---|---|
| Mimetic | 1483 |
| P.montezuma | 74 |
| P.torquatus | 64 |
| P.tricarunculatus-Monteverde | 287 |
| P.tricarunculatus-Nicaragua | 225 |
| P.tricarunculatus-Panama | 55 |
| P.tricarunculatus-Talamanca | 132 |
| R.sulfuratus | 1272 |
warbleR_options(wav.path = "./data/raw/recordings/consolidated_files")
sels <- read.csv("./data/processed/pooled_annotations.csv")
sels_snr <- sig2noise(sels, mar = 0.05, parallel = 10)
sum(sels_snr$SNR >= 2)/nrow(sels_snr)
# keep only those that are mimetic or are models with SNR >= 2
high_snr_sels <- sels[sels_snr$SNR >= 2 & !grepl("mimetic", sels$species.vocalization) |
grepl("mimetic", sels$species.vocalization), ]
high_snr_est <- selection_table(high_snr_sels, extended = TRUE, confirm.extended = FALSE,
mar = 0.05, parallel = 10)
table(attr(high_snr_est, "check.res")$sample.rate)
high_snr_est <- resample_est_waves(high_snr_est, samp.rate = 44.1,
bit.depth = 16, parallel = 10)
saveRDS(high_snr_est, "./data/processed/extended_selection_table_high_snr_models_and_allmimetic_sounds.RDS")high_snr_est <- readRDS("./data/processed/extended_selection_table_high_snr_models_and_allmimetic_sounds.RDS")
sp_param <- spectro_analysis(high_snr_est, parallel = 14)
sp_param$species.vocalization <- high_snr_est$species.vocalization
write.csv(sp_param, "./data/processed/acoustic_parameters_model_and_mimetic.csv",
row.names = FALSE)
mel_param <- mfcc_stats(high_snr_est, parallel = 4)
mel_param$species.vocalization <- high_snr_est$species.vocalization
write.csv(mel_param, "./data/processed/mfcc_stats_model_and_mimetic.csv",
row.names = FALSE)model_sp_param <- sp_param[grep("mimetic", sp_param$species.vocalization,
invert = TRUE), ]
# only bellbird dialects
model_sp_param <- model_sp_param[grep("P.tricarunculatus-", model_sp_param$species.vocalization),
]
# remove babbling due to low sample size (16)
model_sp_param <- model_sp_param[model_sp_param$species.vocalization !=
"P.tricarunculatus-babbling", ]
model_sp_param$species.vocalization <- as.factor(make.names(model_sp_param$species.vocalization,
unique = FALSE, allow_ = TRUE))
# Create data subsets
partition <- createDataPartition(y = model_sp_param$species.vocalization,
times = 1, p = 0.75, list = FALSE)
trainset <- model_sp_param[partition, -c(1, 2)]
testset <- model_sp_param[-partition, -c(1, 2)]
trcontrol <- trainControl(method = "repeatedcv", number = 100, savePredictions = TRUE,
classProbs = TRUE, returnResamp = "all", repeats = 100)
pred_model <- train(species.vocalization ~ ., data = trainset, method = "rf",
trControl = trcontrol, metric = "Accuracy", preProcess = "BoxCox")
# save confusion matrix
conf_mat <- confusionMatrix(predict(pred_model, testset), testset$species.vocalization)
conf_df <- as.data.frame(conf_mat$table)
conf_df$total <- sapply(conf_df$Reference, function(x) sum(testset$species.vocalization ==
x))
conf_df$proportion <- conf_df$Freq/conf_df$total
saveRDS(list(pred_model_bb = pred_model, conf_mat_bb = conf_mat, confusion_df_bb = conf_df,
testset_bb = testset), "./data/processed/random_forest_model_results_only_bellbird_dialects.RDS")mel_param <- read.csv("./data/processed/mfcc_stats_model_and_mimetic.csv")
model_mel_param <- mel_param[grep("mimetic", mel_param$species.vocalization,
invert = TRUE), ]
# only bellbird dialects
model_mel_param <- model_mel_param[grep("P.tricarunculatus-", model_mel_param$species.vocalization),
]
# remove babbling due to low sample size (16)
model_mel_param <- model_mel_param[model_mel_param$species.vocalization !=
"P.tricarunculatus-babbling", ]
model_mel_param$species.vocalization <- as.factor(make.names(model_mel_param$species.vocalization,
unique = FALSE, allow_ = TRUE))
# Create data subsets
set.seed(123)
partition <- createDataPartition(y = model_mel_param$species.vocalization,
times = 1, p = 0.75, list = FALSE)
trainset <- model_mel_param[partition, -c(1, 2)]
testset <- model_mel_param[-partition, -c(1, 2)]
trcontrol <- trainControl(method = "repeatedcv", number = 30, savePredictions = TRUE,
classProbs = TRUE, returnResamp = "all", repeats = 30)
pred_model <- train(species.vocalization ~ ., data = trainset, method = "rf",
trControl = trcontrol, metric = "Accuracy", preProcess = "BoxCox")
# save confusion matrix
conf_mat <- confusionMatrix(predict(pred_model, testset), testset$species.vocalization)
conf_df <- as.data.frame(conf_mat$table)
conf_df$total <- sapply(conf_df$Reference, function(x) sum(testset$species.vocalization ==
x))
conf_df$proportion <- conf_df$Freq/conf_df$total
saveRDS(list(pred_model_bb = pred_model, conf_mat_bb = conf_mat, confusion_df_bb = conf_df,
testset_bb = testset), "./data/processed/random_forest_model_results_only_bellbird_dialects_MFCCs.RDS")rf_model_results_bb <- readRDS("./data/processed/random_forest_model_results_only_bellbird_dialects.RDS")
confusion_df <- rf_model_results_bb$confusion_df_bb
confusion_df$Prediction <- gsub(".model|P.tricarunculatus.", "", confusion_df$Prediction)
confusion_df$Reference <- gsub(".model|P.tricarunculatus.", "", confusion_df$Reference)
ggplot(confusion_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))# print confusion matrix results
rf_model_results_bb$conf_mat_bb$overall## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.670520e-01 8.071442e-01 8.072182e-01 9.138199e-01 4.104046e-01
## AccuracyPValue McnemarPValue
## 1.415534e-35 1.207237e-02
rf_model_results_bb_mfcc <- readRDS("./data/processed/random_forest_model_results_only_bellbird_dialects_MFCCs.RDS")
confusion_df <- rf_model_results_bb_mfcc$confusion_df_bb
confusion_df$Prediction <- gsub(".model|P.tricarunculatus.", "", confusion_df$Prediction)
confusion_df$Reference <- gsub(".model|P.tricarunculatus.", "", confusion_df$Reference)
ggplot(confusion_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))# print confusion matrix results
rf_model_results_bb_mfcc$conf_mat_bb$overall## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.479769e-01 9.232703e-01 9.035434e-01 9.759389e-01 4.104046e-01
## AccuracyPValue McnemarPValue
## 1.021618e-51 NaN
mimetic_no_calls <- sp_param[grep("mimetic_P.tricarunculatus-adult",
sp_param$species.vocalization), ]
mimetic_no_calls$species.vocalization <- as.factor(make.names(mimetic_no_calls$species.vocalization,
unique = FALSE, allow_ = TRUE))
mimetic_no_calls$pred.class <- as.character(predict(rf_model_results_bb$pred_model_bb,
mimetic_no_calls))
table(mimetic_no_calls$pred.class)##
## P.tricarunculatus.Monteverde P.tricarunculatus.Nicaragua
## 28 127
## P.tricarunculatus.Panama P.tricarunculatus.Talamanca
## 9 86
mel_param <- read.csv("./data/processed/mfcc_stats_model_and_mimetic.csv")
mimetic_no_calls_mel <- mel_param[grep("mimetic_P.tricarunculatus-adult",
mel_param$species.vocalization), ]
mimetic_no_calls_mel$species.vocalization <- as.factor(make.names(mimetic_no_calls_mel$species.vocalization,
unique = FALSE, allow_ = TRUE))
mimetic_no_calls_mel$pred.class <- as.character(predict(rf_model_results_bb_mfcc$pred_model_bb,
mimetic_no_calls_mel))
table(mimetic_no_calls_mel$pred.class)##
## P.tricarunculatus.Monteverde P.tricarunculatus.Nicaragua
## 28 138
## P.tricarunculatus.Talamanca
## 84
model_sp_param <- sp_param[grep("mimetic", sp_param$species.vocalization,
invert = TRUE), ]
# remove babbling due to low sample size (16)
model_sp_param <- model_sp_param[model_sp_param$species.vocalization !=
"P.tricarunculatus-babbling", ]
model_sp_param$species.vocalization <- as.factor(make.names(model_sp_param$species.vocalization,
unique = FALSE, allow_ = TRUE))
# Create data subsets
partition <- createDataPartition(y = model_sp_param$species.vocalization,
times = 1, p = 0.75, list = FALSE)
trainset <- model_sp_param[partition, -c(1, 2)]
testset <- model_sp_param[-partition, -c(1, 2)]
trcontrol <- trainControl(method = "repeatedcv", number = 30, savePredictions = TRUE,
classProbs = TRUE, returnResamp = "all", repeats = 100)
pred_model <- train(species.vocalization ~ ., data = trainset, method = "rf",
trControl = trcontrol, metric = "Accuracy", preProcess = "BoxCox")
# save confusion matrix
conf_mat <- confusionMatrix(predict(pred_model, testset), testset$species.vocalization)
conf_df <- as.data.frame(conf_mat$table)
conf_df$total <- sapply(conf_df$Reference, function(x) sum(testset$species.vocalization ==
x))
conf_df$proportion <- conf_df$Freq/conf_df$total
saveRDS(list(pred_model = pred_model, conf_mat = conf_mat, confusion_df = conf_df,
testset = testset), "./data/processed/random_forest_model_results_with_dialects.RDS")model_mel_param <- mel_param[grep("mimetic", mel_param$species.vocalization,
invert = TRUE), ]
# remove babbling due to low sample size (16)
model_mel_param <- model_mel_param[model_mel_param$species.vocalization !=
"P.tricarunculatus-babbling", ]
model_mel_param$species.vocalization <- as.factor(make.names(model_mel_param$species.vocalization,
unique = FALSE, allow_ = TRUE))
# Create data subsets
set.seed(123)
partition <- createDataPartition(y = model_mel_param$species.vocalization,
times = 1, p = 0.75, list = FALSE)
trainset <- model_mel_param[partition, -c(1, 2)]
testset <- model_mel_param[-partition, -c(1, 2)]
trcontrol <- trainControl(method = "repeatedcv", number = 30, savePredictions = TRUE,
classProbs = TRUE, returnResamp = "all", repeats = 30)
pred_model <- train(species.vocalization ~ ., data = trainset, method = "rf",
trControl = trcontrol, metric = "Accuracy", preProcess = "BoxCox")
# save confusion matrix
conf_mat <- confusionMatrix(predict(pred_model, testset), testset$species.vocalization)
conf_df <- as.data.frame(conf_mat$table)
conf_df$total <- sapply(conf_df$Reference, function(x) sum(testset$species.vocalization ==
x))
conf_df$proportion <- conf_df$Freq/conf_df$total
saveRDS(list(pred_model = pred_model, conf_mat = conf_mat, confusion_df = conf_df,
testset = testset), "./data/processed/random_forest_model_results_with_dialects_MFCCs.RDS")rf_model_results <- readRDS("./data/processed/random_forest_model_results_with_dialects.RDS")
confusion_df <- rf_model_results$confusion_df
confusion_df$Prediction <- gsub(".model", "", confusion_df$Prediction)
confusion_df$Reference <- gsub(".model", "", confusion_df$Reference)
ggplot(confusion_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))# print confusion matrix results
rf_model_results$conf_mat$overall## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.276190e-01 8.756010e-01 9.020027e-01 9.482729e-01 6.057143e-01
## AccuracyPValue McnemarPValue
## 5.152081e-64 NaN
rf_model_results_mfcc <- readRDS("./data/processed/random_forest_model_results_with_dialects_MFCCs.RDS")
confusion_df <- rf_model_results_mfcc$confusion_df
confusion_df$Prediction <- gsub(".model", "", confusion_df$Prediction)
confusion_df$Reference <- gsub(".model", "", confusion_df$Reference)
ggplot(confusion_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))# print confusion matrix results
rf_model_results_mfcc$conf_mat$overall## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.466667e-01 9.089744e-01 9.238382e-01 9.642715e-01 6.057143e-01
## AccuracyPValue McnemarPValue
## 7.428844e-74 NaN
mimetic_sp_param <- sp_param[grep("mimetic", sp_param$species.vocalization),
]
mimetic_no_calls <- mimetic_sp_param[!mimetic_sp_param$species.vocalization %in%
c("mimetic_P.tricarunculatus-babbling", "mimetic_P.tricarunculatus-aggresive",
"mimetic_P.tricarunculatus-whistle"), ]
mimetic_no_calls$species.vocalization <- as.factor(make.names(mimetic_no_calls$species.vocalization,
unique = FALSE, allow_ = TRUE))
mimetic_no_calls$pred.class <- as.character(predict(rf_model_results$pred_model,
mimetic_no_calls))
mimetic_no_calls$species.vocalization <- as.character(mimetic_no_calls$species.vocalization)
mimetic_no_calls$pred.class[grep("montezuma", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("montezuma",
mimetic_no_calls$species.vocalization)] <- "P.montezuma"
mimetic_no_calls$pred.class[grep("torquatus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("torquatus",
mimetic_no_calls$species.vocalization)] <- "P.torquatus"
mimetic_no_calls$pred.class[grep("sulfuratus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("sulfuratus",
mimetic_no_calls$species.vocalization)] <- "R.sulfuratus"
mimetic_no_calls$species.vocalization[grep("tricarunculatus", mimetic_no_calls$species.vocalization)] <- "P.tricarunculatus.Talamanca"
conf_mat <- confusionMatrix(data = factor(mimetic_no_calls$pred.class),
reference = factor(mimetic_no_calls$species.vocalization, levels = unique(mimetic_no_calls$pred.class)))
conf_df <- as.data.frame(conf_mat$table)
conf_df$total <- sapply(conf_df$Reference, function(x) sum(mimetic_no_calls$species.vocalization ==
x))
conf_df$proportion <- conf_df$Freq/conf_df$total
conf_df <- conf_df[conf_df$Reference %in% unique(mimetic_no_calls$species.vocalization),
]
conf_df$Prediction <- factor(conf_df$Prediction, levels = c("R.sulfuratus",
"P.torquatus", "P.montezuma", "P.tricarunculatus.Talamanca", "P.tricarunculatus.Nicaragua",
"P.tricarunculatus.Panama", "P.tricarunculatus.Monteverde"))
ggplot(conf_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))conf_mat$overall## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.958894e-01 6.093952e-01 7.738987e-01 8.166407e-01 6.520198e-01
## AccuracyPValue McnemarPValue
## 1.537619e-32 NaN
mimetic_mel_param <- mel_param[grep("mimetic", mel_param$species.vocalization),
]
mimetic_no_calls <- mimetic_mel_param[!mimetic_mel_param$species.vocalization %in%
c("mimetic_P.tricarunculatus-babbling", "mimetic_P.tricarunculatus-aggresive",
"mimetic_P.tricarunculatus-whistle"), ]
mimetic_no_calls$species.vocalization <- as.factor(make.names(mimetic_no_calls$species.vocalization,
unique = FALSE, allow_ = TRUE))
mimetic_no_calls$pred.class <- as.character(predict(rf_model_results_mfcc$pred_model,
mimetic_no_calls))
mimetic_no_calls$species.vocalization <- as.character(mimetic_no_calls$species.vocalization)
mimetic_no_calls$pred.class[grep("montezuma", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("montezuma",
mimetic_no_calls$species.vocalization)] <- "P.montezuma"
mimetic_no_calls$pred.class[grep("torquatus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("torquatus",
mimetic_no_calls$species.vocalization)] <- "P.torquatus"
mimetic_no_calls$pred.class[grep("sulfuratus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("sulfuratus",
mimetic_no_calls$species.vocalization)] <- "R.sulfuratus"
mimetic_no_calls$species.vocalization[grep("tricarunculatus", mimetic_no_calls$species.vocalization)] <- "P.tricarunculatus.Talamanca"
conf_mat <- confusionMatrix(data = factor(mimetic_no_calls$pred.class),
reference = factor(mimetic_no_calls$species.vocalization, levels = unique(mimetic_no_calls$pred.class)))
conf_df <- as.data.frame(conf_mat$table)
conf_df$total <- sapply(conf_df$Reference, function(x) sum(mimetic_no_calls$species.vocalization ==
x))
conf_df$proportion <- conf_df$Freq/conf_df$total
conf_df <- conf_df[conf_df$Reference %in% unique(mimetic_no_calls$species.vocalization),
]
conf_df$Prediction <- factor(conf_df$Prediction, levels = c("R.sulfuratus",
"P.torquatus", "P.montezuma", "P.tricarunculatus.Talamanca", "P.tricarunculatus.Nicaragua",
"P.tricarunculatus.Panama", "P.tricarunculatus.Monteverde"))
ggplot(conf_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))conf_mat$overall## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.491141e-01 5.401192e-01 7.256352e-01 7.715510e-01 6.520198e-01
## AccuracyPValue McnemarPValue
## 2.426438e-15 NaN
mimetic_sp_param <- sp_param[grep("mimetic", sp_param$species.vocalization),
]
mimetic_no_calls <- mimetic_sp_param[!mimetic_sp_param$species.vocalization %in%
c("mimetic_P.tricarunculatus-babbling", "mimetic_P.tricarunculatus-aggresive",
"mimetic_P.tricarunculatus-whistle"), ]
mimetic_no_calls$species.vocalization <- as.factor(make.names(mimetic_no_calls$species.vocalization,
unique = FALSE, allow_ = TRUE))
mimetic_no_calls$pred.class <- as.character(predict(rf_model_results$pred_model,
mimetic_no_calls))
mimetic_no_calls$species.vocalization <- as.character(mimetic_no_calls$species.vocalization)
mimetic_no_calls$pred.class[grep("tricarun", mimetic_no_calls$pred.class)] <- "P.tricarunculatus"
mimetic_no_calls$pred.class[grep("montezuma", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("montezuma",
mimetic_no_calls$species.vocalization)] <- "P.montezuma"
mimetic_no_calls$pred.class[grep("torquatus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("torquatus",
mimetic_no_calls$species.vocalization)] <- "P.torquatus"
mimetic_no_calls$pred.class[grep("sulfuratus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("sulfuratus",
mimetic_no_calls$species.vocalization)] <- "R.sulfuratus"
mimetic_no_calls$pred.class[grep("tricarunculatus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("tricarunculatus",
mimetic_no_calls$species.vocalization)] <- "P.tricarunculatus"
conf_mat <- confusionMatrix(data = factor(mimetic_no_calls$pred.class),
reference = factor(mimetic_no_calls$species.vocalization))
conf_df <- as.data.frame(conf_mat$table)
conf_df$total <- sapply(conf_df$Reference, function(x) sum(mimetic_no_calls$species.vocalization ==
x))
conf_df$proportion <- conf_df$Freq/conf_df$total
ggplot(conf_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))conf_mat$overall## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.419561e-01 6.884951e-01 8.218597e-01 8.606167e-01 6.520198e-01
## AccuracyPValue McnemarPValue
## 1.543505e-57 8.687272e-05
mimetic_mel_param <- mel_param[grep("mimetic", mel_param$species.vocalization),
]
mimetic_no_calls <- mimetic_mel_param[!mimetic_mel_param$species.vocalization %in%
c("mimetic_P.tricarunculatus-babbling", "mimetic_P.tricarunculatus-aggresive",
"mimetic_P.tricarunculatus-whistle"), ]
mimetic_no_calls$species.vocalization <- as.factor(make.names(mimetic_no_calls$species.vocalization,
unique = FALSE, allow_ = TRUE))
mimetic_no_calls$pred.class <- as.character(predict(rf_model_results_mfcc$pred_model,
mimetic_no_calls))
mimetic_no_calls$species.vocalization <- as.character(mimetic_no_calls$species.vocalization)
mimetic_no_calls$pred.class[grep("tricarun", mimetic_no_calls$pred.class)] <- "P.tricarunculatus"
mimetic_no_calls$pred.class[grep("montezuma", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("montezuma",
mimetic_no_calls$species.vocalization)] <- "P.montezuma"
mimetic_no_calls$pred.class[grep("torquatus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("torquatus",
mimetic_no_calls$species.vocalization)] <- "P.torquatus"
mimetic_no_calls$pred.class[grep("sulfuratus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("sulfuratus",
mimetic_no_calls$species.vocalization)] <- "R.sulfuratus"
mimetic_no_calls$pred.class[grep("tricarunculatus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("tricarunculatus",
mimetic_no_calls$species.vocalization)] <- "P.tricarunculatus"
conf_mat <- confusionMatrix(data = factor(mimetic_no_calls$pred.class),
reference = factor(mimetic_no_calls$species.vocalization))
conf_df <- as.data.frame(conf_mat$table)
conf_df$total <- sapply(conf_df$Reference, function(x) sum(mimetic_no_calls$species.vocalization ==
x))
conf_df$proportion <- conf_df$Freq/conf_df$total
ggplot(conf_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))conf_mat$overall## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.944720e-01 6.109651e-01 7.724303e-01 8.152803e-01 6.520198e-01
## AccuracyPValue McnemarPValue
## 6.637231e-32 2.611333e-08
model_sp_param <- sp_param[grep("mimetic", sp_param$species.vocalization,
invert = TRUE), ]
# remove babbling due to low sample size (16)
model_sp_param <- model_sp_param[model_sp_param$species.vocalization !=
"P.tricarunculatus-babbling", ]
# collapse bellbird dialects
model_sp_param$species.vocalization[grep("P.tricarunculatus", model_sp_param$species.vocalization)] <- "P.tricarunculatus"
model_sp_param$species.vocalization <- as.factor(make.names(model_sp_param$species.vocalization,
unique = FALSE, allow_ = TRUE))
# Create data subsets
partition <- createDataPartition(y = model_sp_param$species.vocalization,
times = 1, p = 0.75, list = FALSE)
trainset <- model_sp_param[partition, -c(1, 2)]
testset <- model_sp_param[-partition, -c(1, 2)]
trcontrol <- trainControl(method = "repeatedcv", number = 100, savePredictions = TRUE,
classProbs = TRUE, returnResamp = "all", repeats = 100)
pred_model <- train(species.vocalization ~ ., data = trainset, method = "rf",
trControl = trcontrol, metric = "Accuracy", preProcess = "BoxCox")
# save confusion matrix
conf_mat <- confusionMatrix(predict(pred_model, testset), testset$species.vocalization)
conf_df <- as.data.frame(conf_mat$table)
conf_df$total <- sapply(conf_df$Reference, function(x) sum(testset$species.vocalization ==
x))
conf_df$proportion <- conf_df$Freq/conf_df$total
# nd = no dialects
saveRDS(list(pred_model_nd = pred_model, conf_mat_nd = conf_mat, confusion_df_nd = conf_df,
testset_nd = testset), "./data/processed/random_forest_model_results_collapsing_dialects.RDS")model_mel_param <- mel_param[grep("mimetic", mel_param$species.vocalization,
invert = TRUE), ]
# remove babbling due to low sample size (16)
model_mel_param <- model_mel_param[model_mel_param$species.vocalization !=
"P.tricarunculatus-babbling", ]
# collapse bellbird dialects
model_mel_param$species.vocalization[grep("P.tricarunculatus", model_mel_param$species.vocalization)] <- "P.tricarunculatus"
model_mel_param$species.vocalization <- as.factor(make.names(model_mel_param$species.vocalization,
unique = FALSE, allow_ = TRUE))
# Create data subsets
partition <- createDataPartition(y = model_mel_param$species.vocalization,
times = 1, p = 0.75, list = FALSE)
trainset <- model_mel_param[partition, -c(1, 2)]
testset <- model_mel_param[-partition, -c(1, 2)]
trcontrol <- trainControl(method = "repeatedcv", number = 30, savePredictions = TRUE,
classProbs = TRUE, returnResamp = "all", repeats = 30)
pred_model <- train(species.vocalization ~ ., data = trainset, method = "rf",
trControl = trcontrol, metric = "Accuracy", preProcess = "BoxCox")
# save confusion matrix
conf_mat <- confusionMatrix(predict(pred_model, testset), testset$species.vocalization)
conf_df <- as.data.frame(conf_mat$table)
conf_df$total <- sapply(conf_df$Reference, function(x) sum(testset$species.vocalization ==
x))
conf_df$proportion <- conf_df$Freq/conf_df$total
# nd = no dialects
saveRDS(list(pred_model_nd = pred_model, conf_mat_nd = conf_mat, confusion_df_nd = conf_df,
testset_nd = testset), "./data/processed/random_forest_model_results_collapsing_dialects_MFCCs.RDS")rf_model_results_nd <- readRDS("./data/processed/random_forest_model_results_collapsing_dialects.RDS")
confusion_df <- rf_model_results_nd$confusion_df_nd
confusion_df$Prediction <- gsub(".model", "", confusion_df$Prediction)
confusion_df$Reference <- gsub(".model", "", confusion_df$Reference)
ggplot(confusion_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))# print confusion matrix results
rf_model_results_nd$conf_mat_nd$overall## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.676190e-01 9.371952e-01 9.486601e-01 9.810260e-01 6.057143e-01
## AccuracyPValue McnemarPValue
## 1.323050e-86 NaN
rf_model_results_nd_mfccs <- readRDS("./data/processed/random_forest_model_results_collapsing_dialects_MFCCs.RDS")
confusion_df <- rf_model_results_nd_mfccs$confusion_df_nd
confusion_df$Prediction <- gsub(".model", "", confusion_df$Prediction)
confusion_df$Reference <- gsub(".model", "", confusion_df$Reference)
ggplot(confusion_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))# print confusion matrix results
rf_model_results_nd_mfccs$conf_mat_nd$overall## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.771863e-01 9.561112e-01 9.604884e-01 9.881576e-01 6.045627e-01
## AccuracyPValue McnemarPValue
## 5.729264e-94 NaN
mimetic_sp_param <- sp_param[grep("mimetic", sp_param$species.vocalization),
]
mimetic_no_calls <- mimetic_sp_param[!mimetic_sp_param$species.vocalization %in%
c("mimetic_P.tricarunculatus-babbling", "mimetic_P.tricarunculatus-aggresive",
"mimetic_P.tricarunculatus-whistle"), ]
mimetic_no_calls$species.vocalization <- as.factor(make.names(mimetic_no_calls$species.vocalization,
unique = FALSE, allow_ = TRUE))
mimetic_no_calls$pred.class <- as.character(predict(rf_model_results_nd$pred_model_nd,
mimetic_no_calls))
mimetic_no_calls$species.vocalization <- as.character(mimetic_no_calls$species.vocalization)
mimetic_no_calls$pred.class[grep("tricarun", mimetic_no_calls$pred.class)] <- "P.tricarunculatus"
mimetic_no_calls$pred.class[grep("montezuma", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("montezuma",
mimetic_no_calls$species.vocalization)] <- "P.montezuma"
mimetic_no_calls$pred.class[grep("torquatus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("torquatus",
mimetic_no_calls$species.vocalization)] <- "P.torquatus"
mimetic_no_calls$pred.class[grep("sulfuratus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("sulfuratus",
mimetic_no_calls$species.vocalization)] <- "R.sulfuratus"
mimetic_no_calls$pred.class[grep("tricarunculatus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("tricarunculatus",
mimetic_no_calls$species.vocalization)] <- "P.tricarunculatus"
conf_mat <- confusionMatrix(data = factor(mimetic_no_calls$pred.class),
reference = factor(mimetic_no_calls$species.vocalization))
conf_df <- as.data.frame(conf_mat$table)
conf_df$total <- sapply(conf_df$Reference, function(x) sum(mimetic_no_calls$species.vocalization ==
x))
conf_df$proportion <- conf_df$Freq/conf_df$total
ggplot(conf_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))conf_mat$overall## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.284904e-01 6.664378e-01 8.077892e-01 8.478136e-01 6.520198e-01
## AccuracyPValue McnemarPValue
## 2.588161e-49 2.695079e-05
mimetic_mel_param <- mel_param[grep("mimetic", mel_param$species.vocalization),
]
mimetic_no_calls <- mimetic_mel_param[!mimetic_mel_param$species.vocalization %in%
c("mimetic_P.tricarunculatus-babbling", "mimetic_P.tricarunculatus-aggresive",
"mimetic_P.tricarunculatus-whistle"), ]
mimetic_no_calls$species.vocalization <- as.factor(make.names(mimetic_no_calls$species.vocalization,
unique = FALSE, allow_ = TRUE))
mimetic_no_calls$pred.class <- as.character(predict(rf_model_results_nd_mfccs$pred_model_nd,
mimetic_no_calls))
mimetic_no_calls$species.vocalization <- as.character(mimetic_no_calls$species.vocalization)
mimetic_no_calls$pred.class[grep("tricarun", mimetic_no_calls$pred.class)] <- "P.tricarunculatus"
mimetic_no_calls$pred.class[grep("montezuma", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("montezuma",
mimetic_no_calls$species.vocalization)] <- "P.montezuma"
mimetic_no_calls$pred.class[grep("torquatus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("torquatus",
mimetic_no_calls$species.vocalization)] <- "P.torquatus"
mimetic_no_calls$pred.class[grep("sulfuratus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("sulfuratus",
mimetic_no_calls$species.vocalization)] <- "R.sulfuratus"
mimetic_no_calls$pred.class[grep("tricarunculatus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("tricarunculatus",
mimetic_no_calls$species.vocalization)] <- "P.tricarunculatus"
conf_mat <- confusionMatrix(data = factor(mimetic_no_calls$pred.class),
reference = factor(mimetic_no_calls$species.vocalization))
conf_df <- as.data.frame(conf_mat$table)
conf_df$total <- sapply(conf_df$Reference, function(x) sum(mimetic_no_calls$species.vocalization ==
x))
conf_df$proportion <- conf_df$Freq/conf_df$total
ggplot(conf_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))conf_mat$overall## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 6.832034e-01 4.563042e-01 6.582118e-01 7.074298e-01 6.520198e-01
## AccuracyPValue McnemarPValue
## 7.220395e-03 1.666545e-21
model_sp_param <- sp_param[grep("mimetic", sp_param$species.vocalization,
invert = TRUE), ]
# only talamanca
model_sp_param <- model_sp_param[grep("Nicaragua|Monteverde|Panama",
model_sp_param$species.vocalization, invert = TRUE), ]
# remove babbling due to low sample size (16)
model_sp_param <- model_sp_param[model_sp_param$species.vocalization !=
"P.tricarunculatus-babbling", ]
model_sp_param$species.vocalization <- as.factor(make.names(model_sp_param$species.vocalization,
unique = FALSE, allow_ = TRUE))
# Create data subsets
partition <- createDataPartition(y = model_sp_param$species.vocalization,
times = 1, p = 0.75, list = FALSE)
trainset <- model_sp_param[partition, -c(1, 2)]
testset <- model_sp_param[-partition, -c(1, 2)]
trcontrol <- trainControl(method = "repeatedcv", number = 100, savePredictions = TRUE,
classProbs = TRUE, returnResamp = "all", repeats = 100)
pred_model <- train(species.vocalization ~ ., data = trainset, method = "rf",
trControl = trcontrol, metric = "Accuracy", preProcess = "BoxCox")
# save confusion matrix
conf_mat <- confusionMatrix(predict(pred_model, testset), testset$species.vocalization)
conf_df <- as.data.frame(conf_mat$table)
conf_df$total <- sapply(conf_df$Reference, function(x) sum(testset$species.vocalization ==
x))
conf_df$proportion <- conf_df$Freq/conf_df$total
saveRDS(list(pred_model_talmn = pred_model, conf_mat_talmn = conf_mat,
confusion_df_talmn = conf_df, testset_talmn = testset), "./data/processed/random_forest_model_results_talamanca_dialect.RDS")rf_model_results_talmn <- readRDS("./data/processed/random_forest_model_results_talamanca_dialect.RDS")
confusion_df <- rf_model_results_talmn$confusion_df_talmn
confusion_df$Prediction <- gsub(".model", "", confusion_df$Prediction)
confusion_df$Reference <- gsub(".model", "", confusion_df$Reference)
ggplot(confusion_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))# print confusion matrix results
rf_model_results_talmn$conf_mat_talmn$overall## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.948052e-01 9.828363e-01 9.813615e-01 9.993703e-01 8.259740e-01
## AccuracyPValue McnemarPValue
## 3.621196e-29 NaN
mimetic_sp_param <- sp_param[grep("mimetic", sp_param$species.vocalization),
]
mimetic_no_calls <- mimetic_sp_param[!mimetic_sp_param$species.vocalization %in%
c("mimetic_P.tricarunculatus-babbling", "mimetic_P.tricarunculatus-aggresive",
"mimetic_P.tricarunculatus-whistle"), ]
mimetic_no_calls$species.vocalization <- as.factor(make.names(mimetic_no_calls$species.vocalization,
unique = FALSE, allow_ = TRUE))
mimetic_no_calls$pred.class <- as.character(predict(rf_model_results_talmn$pred_model_talmn,
mimetic_no_calls))
mimetic_no_calls$species.vocalization <- as.character(mimetic_no_calls$species.vocalization)
mimetic_no_calls$pred.class[grep("tricarun", mimetic_no_calls$pred.class)] <- "P.tricarunculatus"
mimetic_no_calls$pred.class[grep("montezuma", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("montezuma",
mimetic_no_calls$species.vocalization)] <- "P.montezuma"
mimetic_no_calls$pred.class[grep("torquatus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("torquatus",
mimetic_no_calls$species.vocalization)] <- "P.torquatus"
mimetic_no_calls$pred.class[grep("sulfuratus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("sulfuratus",
mimetic_no_calls$species.vocalization)] <- "R.sulfuratus"
mimetic_no_calls$pred.class[grep("tricarunculatus", mimetic_no_calls$pred.class)] <- mimetic_no_calls$species.vocalization[grep("tricarunculatus",
mimetic_no_calls$species.vocalization)] <- "P.tricarunculatus"
conf_mat <- confusionMatrix(data = factor(mimetic_no_calls$pred.class),
reference = factor(mimetic_no_calls$species.vocalization))
conf_df <- as.data.frame(conf_mat$table)
conf_df$total <- sapply(conf_df$Reference, function(x) sum(mimetic_no_calls$species.vocalization ==
x))
conf_df$proportion <- conf_df$Freq/conf_df$total
ggplot(conf_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))
conf_mat$overallTest data
vgg <- read.csv("./data/processed/turicreate_results_test_many_dialects.csv")
# vgg$category <- factor(vgg$category, levels =
# c(unique(vgg$category), 'P.tricarunculatus-Monteverde',
# 'P.tricarunculatus-Nicaragua', 'P.tricarunculatus-Panama'))
vgg$category <- factor(vgg$category)
vgg$pred <- factor(vgg$pred)
conf_mat <- confusionMatrix(data = vgg$pred, reference = vgg$category)
conf_df <- as.data.frame(conf_mat$table)
conf_df$total <- sapply(conf_df$Reference, function(x) sum(vgg$category ==
x))
conf_df$proportion <- conf_df$Freq/conf_df$total
conf_df <- conf_df[complete.cases(conf_df), ]
ggplot(conf_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))conf_mat$overall## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.676190e-01 9.456068e-01 9.486601e-01 9.810260e-01 6.057143e-01
## AccuracyPValue McnemarPValue
## 1.323050e-86 NaN
Mimetic bellbird
vgg <- read.csv("./data/processed/turicreate_results_mimetic_many_dialects.csv")
vgg$category <- factor(vgg$category, levels = c(unique(vgg$category),
"P.tricarunculatus-Monteverde", "P.tricarunculatus-Nicaragua",
"P.tricarunculatus-Panama"))
vgg$pred <- factor(vgg$pred)
conf_mat <- confusionMatrix(data = vgg$pred, reference = vgg$category)
conf_df <- as.data.frame(conf_mat$table)
conf_df$total <- sapply(conf_df$Reference, function(x) sum(vgg$category ==
x))
conf_df$proportion <- conf_df$Freq/conf_df$total
conf_df <- conf_df[complete.cases(conf_df), ]
ggplot(conf_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))conf_mat$overall## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.410654e-01 5.005928e-01 7.179701e-01 7.632020e-01 6.203641e-01
## AccuracyPValue McnemarPValue
## 4.971336e-23 NaN
Test data
vgg <- read.csv("./data/processed/turicreate_results_test_no_dialects.csv")
vgg$category <- factor(vgg$categorynd)
vgg$pred <- factor(vgg$pred)
conf_mat <- confusionMatrix(data = vgg$pred, reference = vgg$category)
conf_df <- as.data.frame(conf_mat$table)
conf_df$total <- sapply(conf_df$Reference, function(x) sum(vgg$category ==
x))
conf_df$proportion <- conf_df$Freq/conf_df$total
conf_df <- conf_df[complete.cases(conf_df), ]
ggplot(conf_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))conf_mat$overall## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.923810e-01 9.854012e-01 9.806079e-01 9.979203e-01 6.057143e-01
## AccuracyPValue McnemarPValue
## 2.788403e-106 NaN
Mimetic bellbird
vgg <- read.csv("./data/processed/turicreate_results_mimetic_no_dialects.csv")
# vgg$category <- factor(vgg$category, levels =
# c(unique(vgg$category), 'P.tricarunculatus-Monteverde',
# 'P.tricarunculatus-Nicaragua', 'P.tricarunculatus-Panama'))
vgg$category <- factor(vgg$categorynd)
vgg$pred <- factor(vgg$prednd)
conf_mat <- confusionMatrix(data = vgg$pred, reference = vgg$category)
conf_df <- as.data.frame(conf_mat$table)
conf_df$total <- sapply(conf_df$Reference, function(x) sum(vgg$category ==
x))
conf_df$proportion <- conf_df$Freq/conf_df$total
conf_df <- conf_df[complete.cases(conf_df), ]
ggplot(conf_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
direction = 1) + geom_text(aes(label = round(proportion, 2)),
color = "black") + labs(x = "Manually labeled category", y = "Predicted category") +
theme_classic() + theme(axis.text.x = element_text(color = "black",
size = 11, angle = 30, vjust = 0.8, hjust = 0.8))conf_mat$overall## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.199595e-01 6.705305e-01 7.994401e-01 8.392031e-01 6.203641e-01
## AccuracyPValue McnemarPValue
## 6.558722e-63 8.511534e-10
Session information
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
##
## locale:
## [1] LC_CTYPE=pt_BR.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=es_CR.UTF-8 LC_COLLATE=pt_BR.UTF-8
## [5] LC_MONETARY=es_CR.UTF-8 LC_MESSAGES=pt_BR.UTF-8
## [7] LC_PAPER=es_CR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] randomForest_4.7-1.1 caret_6.0-93 lattice_0.20-44
## [4] soundgen_2.5.2 shinyBS_0.61.1 formatR_1.12
## [7] ggplot2_3.3.6 viridis_0.6.2 viridisLite_0.4.1
## [10] Rraven_1.0.13 warbleR_1.1.28 NatureSounds_1.0.4
## [13] seewave_2.2.0 tuneR_1.4.0 remotes_2.4.2
## [16] knitr_1.40 kableExtra_1.3.4 klippy_0.0.0.9500
##
## loaded via a namespace (and not attached):
## [1] colorspace_2.0-3 rjson_0.2.21 ellipsis_0.3.2
## [4] class_7.3-19 rstudioapi_0.14 proxy_0.4-27
## [7] farver_2.1.1 listenv_0.8.0 prodlim_2019.11.13
## [10] fansi_1.0.3 lubridate_1.8.0 xml2_1.3.3
## [13] codetools_0.2-18 splines_4.1.0 cachem_1.0.6
## [16] jsonlite_1.8.0 pROC_1.18.0 shiny_1.7.2
## [19] compiler_4.1.0 httr_1.4.4 assertthat_0.2.1
## [22] Matrix_1.3-4 fastmap_1.1.0 cli_3.4.0
## [25] later_1.3.0 htmltools_0.5.3 tools_4.1.0
## [28] gtable_0.3.1 glue_1.6.2 reshape2_1.4.4
## [31] dplyr_1.0.10 Rcpp_1.0.9 jquerylib_0.1.4
## [34] vctrs_0.4.1 svglite_2.1.0 nlme_3.1-152
## [37] iterators_1.0.14 timeDate_4021.104 gower_1.0.0
## [40] xfun_0.32 stringr_1.4.1 globals_0.16.1
## [43] rvest_1.0.3 mime_0.12 lifecycle_1.0.2
## [46] future_1.28.0 MASS_7.3-54 scales_1.2.1
## [49] ipred_0.9-13 promises_1.2.0.1 parallel_4.1.0
## [52] RColorBrewer_1.1-3 yaml_2.3.5 pbapply_1.5-0
## [55] gridExtra_2.3 sass_0.4.2 rpart_4.1-15
## [58] stringi_1.7.8 highr_0.9 foreach_1.5.2
## [61] e1071_1.7-11 hardhat_1.2.0 lava_1.6.10
## [64] rlang_1.0.5 pkgconfig_2.0.3 systemfonts_1.0.4
## [67] dtw_1.22-3 bitops_1.0-7 evaluate_0.16
## [70] purrr_0.3.4 labeling_0.4.2 recipes_1.0.1
## [73] tidyselect_1.1.2 parallelly_1.32.1 plyr_1.8.7
## [76] magrittr_2.0.3 R6_2.5.1 fftw_1.0-7
## [79] generics_0.1.3 DBI_1.1.1 pillar_1.8.1
## [82] withr_2.5.0 survival_3.2-11 RCurl_1.98-1.8
## [85] nnet_7.3-16 tibble_3.1.8 future.apply_1.9.1
## [88] utf8_1.2.2 rmarkdown_2.16 grid_4.1.0
## [91] data.table_1.14.2 ModelMetrics_1.2.2.2 digest_0.6.29
## [94] webshot_0.5.3 xtable_1.8-4 httpuv_1.6.6
## [97] stats4_4.1.0 signal_0.7-7 munsell_0.5.0
## [100] bslib_0.4.0