dist_df <- eagle_mov %>%
select(KPH, Sn, AGL, abs_angle, VerticalRate, absVR) %>%
pivot_longer(
cols = everything(),
names_to = "variable",
values_to = "value"
)
# 2x3 grid of distribution plots
dist_2x3 <- ggplot(dist_df, aes(x = value)) +
geom_histogram(bins = 40, color = "white") +
facet_wrap(~ variable, scales = "free", ncol = 3) +
theme_bw(base_size = 14) +
labs(
x = "Value",
y = "Count"
)# Elbow + silhouette over k = 2..7
set.seed(123)
k_vals <- 2:7
wss <- numeric(length(k_vals))
mean_sil <- numeric(length(k_vals))
for (i in seq_along(k_vals)) {
k <- k_vals[i]
km <- kmeans(X_scaled, centers = k, nstart = 10)
wss[i] <- km$tot.withinss
n_sub <- min(2000, nrow(X_scaled))
idx <- sample.int(nrow(X_scaled), n_sub)
d <- dist(X_scaled[idx, ])
sil <- silhouette(km$cluster[idx], d)
mean_sil[i] <- mean(sil[, "sil_width"])
}
# Silhouette Width Plot
sil_df <- data.frame(
K = k_vals,
mean_sil = mean_sil
)
sil_plot <- ggplot(sil_df, aes(x = K, y = mean_sil)) +
geom_point(size = 2) +
geom_line() +
theme_bw(base_size = 14) +
labs(
title = "Average silhouette vs K",
x = "Number of clusters (K)",
y = "Mean silhouette width"
)
# Elbow Plot
elbow_df <- data.frame(
K = k_vals,
WSS = wss
)
elbow_plot <- ggplot(elbow_df, aes(x = K, y = WSS)) +
geom_point(size = 2) +
geom_line() +
theme_bw(base_size = 14) +
labs(
title = "Elbow plot",
x = "Number of clusters (K)",
y = "Total within-cluster SS"
)
combined_elbow_sil <- elbow_plot + sil_plotset.seed(123)
km4 <- kmeans(X_scaled, centers = 4, nstart = 10)
# Attach cluster labels to data
eagle_mov$cluster <- factor(km4$cluster)
X_df$cluster <- eagle_mov$cluster
eagle_mov$behavior <- recode(eagle_mov$cluster,
"1" = "Perching",
"2" = "Ascending",
"3" = "Flapping",
"4" = "Gliding")
# PCA on the scaled movement variables
pca4 <- prcomp(X_scaled)
pca_df4 <- data.frame(
PC1 = -pca4$x[, 1],
PC2 = -pca4$x[, 2],
cluster = eagle_mov$cluster
)
k4_plot <- ggplot(pca_df4, aes(PC1, PC2, color = cluster)) +
geom_point(alpha = 0.35, size = 1) +
theme_bw() +
scale_color_manual(
values = c(
"1" = "steelblue",
"2" = "orange",
"3" = "forestgreen",
"4" = "purple"
)
) +
labs(
title = "PCA of Movement Variables (K = 4, scaled)",
x = "PC1",
y = "PC2",
color = "Cluster"
)# k-means at k=2
km2 <- kmeans(X_scaled, centers = 2, nstart = 10)
eagle_mov$cluster2 <- factor(km2$cluster)
X_df$cluster2 <- eagle_mov$cluster2
# Boxplots of variables by k = 2 clusters
eagle_long_k2 <- X_df %>%
select(cluster2, KPH_sc, Sn_sc, AGL_sc, Angle_sc, Vr_sc, Vr_abs_sc) %>%
pivot_longer(-cluster2, names_to = "variable", values_to = "value")
Box_2 <- ggplot(eagle_long_k2, aes(x = cluster2, y = value, fill = cluster2)) +
geom_boxplot(outlier.size = 0.5) +
facet_wrap(~ variable, scales = "free_y") +
theme_bw() +
scale_fill_manual(values = c("1" = "orange", "2" = "steelblue")) +
labs(
title = "K = 2 clusters: scaled movement variables",
x = "Cluster",
y = "Standardized value"
)# PCA for k = 2 clusters
pca2 <- prcomp(X_scaled)
pca_df2 <- data.frame(
PC1 = -pca2$x[, 1],
PC2 = -pca2$x[, 2],
cluster2 = eagle_mov$cluster2
)
PCA_k2 <- ggplot(pca_df2, aes(PC1, PC2, color = cluster2)) +
geom_point(alpha = 0.3, size = 1) +
theme_bw() +
scale_color_manual(values = c("1" = "orange", "2" = "steelblue")) +
labs(
title = "PCA of Movement Variables (K = 2, scaled)",
x = "PC1",
y = "PC2",
color = "Cluster"
)# Scatter in scaled space for KPH vs AGL
KPH_v_AGL <- ggplot(X_df, aes(x = KPH_sc, y = AGL_sc, color = cluster2)) +
geom_point(alpha = 0.3, size = 1) +
theme_bw() +
scale_color_manual(values = c("1" = "orange", "2" = "steelblue")) +
labs(
title = "KPH vs AGL (scaled) colored by K = 2 clusters",
x = "Speed (KPH)",
y = "Height Above Ground",
color = "Cluster"
)
combined_k2pca_KPH <- PCA_k2 + KPH_v_AGLpca_all <- prcomp(X_scaled)
PC1 <- -pca_all$x[, 1]
PC2 <- -pca_all$x[, 2]
# Update k_vals
k_vals <- 2:7
pca_k_list <- lapply(k_vals, function(k) {
km <- kmeans(X_scaled, centers = k, nstart = 10)
data.frame(
PC1 = PC1,
PC2 = PC2,
k = factor(k),
cluster = factor(km$cluster)
)
})
pca_k_df <- do.call(rbind, pca_k_list)
# Color code consistent with k=4 and k=2 plots
cluster_colors <- c(
"1" = "steelblue",
"2" = "orange",
"3" = "forestgreen",
"4" = "purple",
"5" = "brown",
"6" = "pink",
"7" = "black"
)
# Create 2x3 grid for k=2-7 for easy comparison
k3x2_plot <- ggplot(pca_k_df, aes(PC1, PC2, color = cluster)) +
geom_point(alpha = 0.35, size = 0.8) +
xlim(-5.2,5.2) +
ylim(-5.2,5.2) +
facet_wrap(~ k, ncol = 3) +
theme_bw() +
scale_color_manual(values = cluster_colors) +
labs(
title = "PCA of Movement Variables for K = 2–7",
x = "PC1",
y = "PC2",
color = "Cluster"
)# PCA scores
scores4 <- data.frame(
PC1 = -pca4$x[, 1],
PC2 = -pca4$x[, 2],
cluster = eagle_mov$cluster
)
# PCA loadings
loadings4 <- data.frame(
var = rownames(pca4$rotation),
PC1 = -pca4$rotation[, 1],
PC2 = -pca4$rotation[, 2]
)
# Label
loadings4$var <- recode(loadings4$var,
KPH_tr = "Speed (KPH)",
Sn_tr = "Sn",
AGL_tr = "AGL",
Angle_tr = "Angle",
Vr = "Vertical rate",
Vr_tr = "sqrt(|VR|)")
# range of scores
range_PC1 <- range(scores4$PC1)
range_PC2 <- range(scores4$PC2)
# range of raw loadings
range_L1 <- range(loadings4$PC1)
range_L2 <- range(loadings4$PC2)
arrow_mult <- min(
(diff(range_PC1) / diff(range_L1)),
(diff(range_PC2) / diff(range_L2))
) * 0.4
loadings4$PC1_arrow <- loadings4$PC1 * arrow_mult
loadings4$PC2_arrow <- loadings4$PC2 * arrow_mult
# PCA Biplot of k=4
pca4_biplot <- ggplot(scores4, aes(PC1, PC2, color = cluster)) +
geom_point(alpha = 0.35, size = 1) +
geom_segment(
data = loadings4,
aes(x = 0, y = 0, xend = PC1_arrow, yend = PC2_arrow),
arrow = arrow(length = unit(0.25, "cm")),
color = "black"
) +
geom_text(
data = loadings4,
aes(x = PC1_arrow * 1.1, y = PC2_arrow * 1.1, label = var),
color = "black",
size = 3,
fontface = "bold"
) +
xlim(-5.2,5.2) +
ylim(-5.2,5.2) +
theme_bw() +
scale_color_manual(
values = c(
"1" = "steelblue",
"2" = "orange",
"3" = "forestgreen",
"4" = "purple"
)
) +
labs(
title = "PCA of Movement Variables (K = 4) with Variable Vectors",
x = "PC1",
y = "PC2",
color = "Cluster"
)
# Create labeled PCA Biplot
pca4_biplot_labeled <- pca4_biplot +
coord_cartesian(ylim = c(-5.5, 5.2)) +
# Color code behavior labels for clarity
annotate("text", x = -3.0, y = -5.2, label = "Perching",
color = "orange", size = 4, fontface = "bold") +
annotate("text", x = -1.0, y = -5.2, label = "Flapping",
color = "forestgreen", size = 4, fontface = "bold") +
annotate("text", x = 1.0, y = -5.2, label = "Ascending",
color = "steelblue", size = 4, fontface = "bold") +
annotate("text", x = 3.0, y = -5.2, label = "Gliding",
color = "purple", size = 4, fontface = "bold")# Create 2x2 box plot grid for k=4 movement variables
cluster_colors <- c(
"1" = "steelblue",
"2" = "orange",
"3" = "forestgreen",
"4" = "purple"
)
box_df <- eagle_mov %>%
select(cluster, AGL, VerticalRate, KPH, abs_angle) %>%
pivot_longer(
cols = -cluster,
names_to = "variable",
values_to = "value"
)
# 2x2 boxplot grid
Box_2x2 <- ggplot(box_df, aes(x = cluster, y = value, fill = cluster)) +
geom_boxplot(outlier.size = 0.4) +
facet_wrap(~ variable, scales = "free_y", ncol = 2) +
scale_fill_manual(values = cluster_colors) +
theme_bw(base_size = 14) +
labs(
title = "Distributions of Movement Variables by Cluster (K = 4)",
x = "Cluster",
y = "Value",
fill = "Cluster"
)