# Determine best tool for counts and coverage based on F1_Score
best_counts_cov <- complete_performance %>%
filter(metric %in% c("counts", "coverage")) %>%
group_by(metric) %>%
slice_max(order_by = F1_Score, n = 1, with_ties = FALSE) %>%
ungroup() %>%
select(metric, tool, F1_Score, auc_pr, Precision, Recall) %>%
arrange(desc(F1_Score))
best_counts_cov %>%
kable(caption = "Best Performing Method for Counts and Coverage", digits = 3) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
# Metric comparison box plots
# Define consistent metric order
metric_order <- c("counts", "tpm", "breadth", "coverage", "rpkm", "relative")
p1 <- ggplot(complete_performance, aes(x = factor(metric, levels = metric_order),
y = F1_Score, fill = metric)) +
geom_boxplot(alpha = 0.7, outlier.shape = NA) +
geom_jitter(width = 0.2, alpha = 0.6, size = 2) +
scale_fill_manual(values = metric_colors) +
scale_y_continuous(limits = c(0, 1)) +
labs(title = "A) F1-Score Distribution by Metric",
x = "Abundance Metric", y = "F1-Score") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none") +
coord_flip()
p2 <- ggplot(complete_performance %>% filter(!is.na(auc_pr)),
aes(x = factor(metric, levels = metric_order), y = auc_pr, fill = metric)) +
geom_boxplot(alpha = 0.7, outlier.shape = NA) +
geom_jitter(width = 0.2, alpha = 0.6, size = 2) +
scale_fill_manual(values = metric_colors) +
scale_y_continuous(limits = c(0, 1)) +
labs(title = "B) AUC-PR Distribution by Metric",
x = "Abundance Metric", y = "AUC-PR") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none") +
coord_flip()
p3 <- ggplot(complete_performance, aes(x = factor(metric, levels = metric_order),
y = Precision, fill = metric)) +
geom_boxplot(alpha = 0.7, outlier.shape = NA) +
geom_jitter(width = 0.2, alpha = 0.6, size = 2) +
scale_fill_manual(values = metric_colors) +
scale_y_continuous(limits = c(0, 1)) +
labs(title = "C) Precision Distribution by Metric",
x = "Abundance Metric", y = "Precision") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none") +
coord_flip()
p4 <- ggplot(complete_performance, aes(x = factor(metric, levels = metric_order),
y = Recall, fill = metric)) +
geom_boxplot(alpha = 0.7, outlier.shape = NA) +
geom_jitter(width = 0.2, alpha = 0.6, size = 2) +
scale_fill_manual(values = metric_colors) +
scale_y_continuous(limits = c(0, 1)) +
labs(title = "D) Recall Distribution by Metric",
x = "Abundance Metric", y = "Recall") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none") +
coord_flip()
gridExtra::grid.arrange(p1, p2, p3, p4, ncol = 2,
top = "Performance Metrics Distribution Across Abundance Metrics")