df_box <- df_106 %>%
mutate(
cluster = factor(km4$cluster)
) %>%
select(
cluster,
KPH,
AGL,
abs_angle,
VerticalRate
)
cluster_box <- df_box %>%
filter(VerticalRate >= -5, VerticalRate <= 5) %>%
select(
cluster,
KPH,
AGL,
abs_angle,
VerticalRate
) %>%
tidyr::pivot_longer(
cols = c(KPH, AGL, abs_angle, VerticalRate),
names_to = "variable",
values_to = "value"
) %>%
mutate(
variable = factor(variable,
levels = c("KPH", "AGL", "abs_angle", "VerticalRate"))
)
cluster_cols <- c(
"1" = "gold",
"2" = "forestgreen",
"3" = "dodgerblue3",
"4" = "darkorange"
)
vr_line <- data.frame(variable = "VerticalRate", yint = 0)
ggplot(cluster_box, aes(x = cluster, y = value, fill = cluster)) +
geom_boxplot(outlier.size = 0.7, alpha = 0.8) +
geom_hline(
data = vr_line,
aes(yintercept = yint),
linetype = "dashed",
inherit.aes = FALSE
) +
facet_wrap(~ variable, scales = "free_y", ncol = 2) +
scale_fill_manual(values = cluster_cols) +
labs(
x = "Cluster",
y = NULL,
title = "Boxplots of key variables across clusters (untransformed values)"
) +
theme_classic() +
theme(
legend.position = "none",
strip.text = element_text(face = "bold")
)