Motivation/Objectives:
Settings/Context:
Study Design:
Can we use the movement variables KPH, Sn, AGL, |Angle|, Vertical rate, and |VR|, to distinguish in-flight from perching points?
Of the in-flight points:
Perching Characteristics:
In-Flight Characteristics:
Insights:
Associated Variables:
Indicating:
Associated Variables:
Indicating:
Associated Variables:
Indicating:
Insights (Segment 25156):
Ascending points can see when AGL increases and KPH is slow.
Flapping points can be seen when AGL is relativity constant and KPH is moderate.
Gliding points can be seen when AGL is decreasing and KPH is high.
| Cluster | Flight Mode | KPH | Above Ground Level | Vertical Rate |
|---|---|---|---|---|
| Purple | Perched | Very Low or None | Ground Level | Constant |
| Green | Ascending | Low | Moderate | Increasing |
| Blue | Gliding | High | Moderate | Decreasing |
| Red | Flapping | Moderate | Low | Constant |
#Reading in packages and data set
library(cluster)
library(factoextra)
library(tidyverse)
library(ggplot2)
library(patchwork)
library(ggrepel)
options(width=10000)
load('eagle_data.Rdata')
#-------------------------------------------------------------------------#
#Managing Data
#Selecting Bald eagle 111
eagle111 <- eagle_data %>%
filter(Animal_ID == 111)
# Select movement variables
eagle111_vars <- eagle111 %>%
select(KPH, Sn, AGL, VerticalRate, abs_angle, absVR)
#Take sqrt of the skewed variables
eagle111_sqrt <- eagle111_vars %>%
mutate(across(c(AGL, Sn, absVR, abs_angle), sqrt))
# Changing Variable Names
eagle111_sqrt <- eagle111_sqrt %>%
rename(
"|Angle|" = abs_angle,
"|VR|" = absVR
)
# Scale
eagle111_scaled <- scale(eagle111_sqrt)
#-------------------------------------------------------------------------#
#Creating Histograms to identify skewed variables
par(mfrow = c(3, 2))
#Histogram to see which variables are skewed.
for(i in 1:6){
hist(eagle111_vars[,i],
main = paste(colnames(eagle111_scaled)[i]),
xlab = "Value",
col = "skyblue",
border = "white")
}
# Reset layout
par(mfrow = c(1,1))
#-------------------------------------------------------------------------#
#Average Silhouette Width and WSS
#Finding WSS of Different K Clusters to determine K for K-Means
set.seed(234)
k2 <- kmeans(eagle111_scaled, centers = 2, nstart = 10, iter.max = 30)
set.seed(234)
k3 <- kmeans(eagle111_scaled, centers = 3, nstart = 10, iter.max = 30)
set.seed(234)
k4 <- kmeans(eagle111_scaled, centers = 4, nstart = 10, iter.max = 30)
set.seed(234)
k5 <- kmeans(eagle111_scaled, centers = 5, nstart = 10, iter.max = 30)
set.seed(234)
k6 <- kmeans(eagle111_scaled, centers = 6, nstart = 10, iter.max = 30)
set.seed(234)
k7 <- kmeans(eagle111_scaled, centers = 7, nstart = 10, iter.max = 30)
#Adding WWS to a data frame to plot
wss_df = data.frame(k = 2:7,
wss = c(k2$tot.withinss,
k3$tot.withinss,
k4$tot.withinss,
k5$tot.withinss,
k6$tot.withinss,
k7$tot.withinss))
#Bootstrapping The Silhouette width to save memory
B <- 200
avg.sils <- matrix(NA, B, 6)
set.seed(992021)
for(b in 1:B){
# sample 3000 observations with replacement
ids <- sample(seq_len(nrow(eagle111_scaled)), 3000, replace = TRUE)
# compute distance matrix once per bootstrap
distmat <- dist(eagle111_scaled[ids, ])
# cluster memberships for this subsample
clusters <- list(k2$cluster[ids],
k3$cluster[ids],
k4$cluster[ids],
k5$cluster[ids],
k6$cluster[ids],
k7$cluster[ids])
# compute silhouette for k = 2:7
for(i in 1:6) {
sil <- silhouette(clusters[[i]], distmat)
avg.sils[b, i] <- mean(sil[, 3])
}
}
#-------------------------------------------------------------------------#
#Plotting WSS and Solhouette
# WWS Plot
wss_plot <- ggplot(wss_df, aes(x = k, y = wss)) +
geom_line(color = "steelblue", size = 1) +
geom_point(color = "steelblue", size = 2) +
labs(title = "WSS vs k",
x = "Number of Clusters k",
y = "Within-cluster Sum of Squares") +
theme_classic()
# Avg Silhouette Width Plot
# Convert avg.sils to long format for ggplot
sil_df <- as.data.frame(avg.sils)
colnames(sil_df) <- paste0("k", 2:7)
sil_df$bootstrap <- 1:nrow(sil_df)
sil_long <- sil_df %>%
pivot_longer(cols = starts_with("k"),
names_to = "k",
names_prefix = "k",
values_to = "silhouette") %>%
mutate(k = as.numeric(k))
# Plot all bootstrap lines
asw_plot <- ggplot(sil_long, aes(x = k, y = silhouette, group = bootstrap)) +
geom_line(color = "#00000040") +
geom_point(color = "#00000040", size = 1) +
labs(title = "Bootstrap Silhouette",
x = "Number of Clusters k",
y = "Average Silhouette Width") +
theme_classic()
wss_plot / asw_plot
#-------------------------------------------------------------------------#
#Creating PCA Biplot of k=2
# K-means: 2 clusters
set.seed(234)
eagle111_km <- kmeans(eagle111_scaled, centers = 2, iter.max = 30, nstart = 10)
# PCA
eagle111_pca <- prcomp(eagle111_scaled)
#Adding variables to data set
eagle111_subset_k2 <- eagle111 %>%
mutate(k2 = eagle111_km$cluster) %>%
mutate(pc1 = eagle111_pca$x[, 1],
pc2 = eagle111_pca$x[, 2])
loadings111 <- as.data.frame(eagle111_pca$rotation[, 1:2])
loadings111$var <- rownames(loadings111)
#Scaling Vectors in PCA
arrow_scale <- 3
loadings111 <- loadings111 %>%
mutate(PC1 = PC1 * arrow_scale,
PC2 = PC2 * arrow_scale)
#Adding Color for clusters
cluster_colors <- c("1" = "dodgerblue2", "2" = "red2")
#Plotting K2 Clusters with PCA
eagle111_plot <- ggplot(eagle111_subset_k2) +
geom_point(aes(x = pc1, y = pc2, col = factor(k2)),
shape='.', alpha = 0.6) +
geom_segment(data = loadings111,
aes(x = 0, y = 0, xend = PC1, yend = PC2),
arrow = arrow(length = unit(0.25, "cm")),
color = "black") +
geom_text_repel(data = loadings111,
aes(x = PC1, y = PC2, label = var),
color = "black", size = 4) +
scale_color_manual(values = cluster_colors) +
xlim(c(-5,5)) + ylim(c(-6,6)) +
guides(color = "none") +
xlab("PC1") + ylab("PC2") +
ggtitle("Eagle 111") +
theme_classic()
eagle111_plot
#-------------------------------------------------------------------------#
#Creating PCA Biplot of k=4
#Looking at K=4 for the different patterns of in-flight points
# K-means: 4 clusters
set.seed(234)
eagle111_km4 <- kmeans(eagle111_scaled, centers = 4, iter.max = 30, nstart = 10)
#Adding variables to data set
eagle111_subset_k4 <- eagle111 %>%
mutate(k4 = eagle111_km4$cluster) %>%
mutate(pc1 = eagle111_pca$x[, 1],
pc2 = eagle111_pca$x[, 2])
cluster_colors_k4_111 <- c(
"1" = "#f8766d",
"2" = "#7fb007",
"3" = "#1cc6cb",
"4" = "#c97fff"
)
eagle111_subset_k4 <- eagle111_subset_k4 %>%
mutate(k4 = as.factor(k4))
#Plotting K4 Clusters with PCA
eagle111_k4_plot <- ggplot(eagle111_subset_k4) +
geom_point(aes(x = pc1, y = pc2, col = factor(k4)),
shape='.', alpha = 0.6) +
geom_segment(data = loadings111,
aes(x = 0, y = 0, xend = PC1, yend = PC2),
arrow = arrow(length = unit(0.25, "cm")),
color = "black") +
geom_text_repel(data = loadings111,
aes(x = PC1, y = PC2, label = var),
color = "black", size = 4) +
xlim(c(-5,5)) + ylim(c(-6,6)) +
scale_color_manual(values = cluster_colors_k4_111)+
guides(color = "none") +
xlab("PC1") + ylab("PC2") +
ggtitle("Eagle 111 with k = 4") +
theme_classic()
eagle111_k4_plot
#-------------------------------------------------------------------------#
#Creating Box Plots for 3 of the Movement variables
# Box plots
kph_box <- ggplot(eagle111_subset_k4) +
geom_boxplot(aes(x = k4, y = KPH, fill = k4), outlier.shape = NA) +
scale_y_continuous(limits = c(0, 100)) +
labs(fill = "Cluster", x = "Cluster") +
theme_classic()
angle_box <- ggplot(eagle111_subset_k4) +
geom_boxplot(aes(x = k4, y = AGL, fill = k4),outlier.shape = NA) +
scale_y_continuous(limits = c(0, 1000)) +
labs(fill = "Cluster", x = "Cluster") +
theme_classic()
VR_box <- ggplot(eagle111_subset_k4) +
geom_boxplot(aes(x = k4, y = VerticalRate, fill = k4),outlier.shape = NA) +
scale_y_continuous(limits = c(-5, 5)) +
labs(fill = "Cluster", x = "Cluster") +
theme_classic()
#-------------------------------------------------------------------------#
#Examining The Green Cluster
#Making sure the cluster is a factor
eagle111_subset_k4 <- eagle111_subset_k4 %>%
mutate(k4 = as.factor(k4))
eagle111_k4_plot_green <- ggplot(eagle111_subset_k4, aes(x = pc1, y = pc2)) +
geom_point(aes(col = k4), shape='.', alpha = 0.6) +
xlim(c(-4,6)) + ylim(c(-2.5,5)) +
# Ellipse around cluster 2 only
stat_ellipse(data = subset(eagle111_subset_k4, k4 == 2),
aes(x = pc1, y = pc2),
level = 0.95, # 95% confidence ellipse
color = "green3", # color of the ellipse
size = 1, # line thickness
linetype = "solid") +
geom_segment(data = loadings111,
aes(x = 0, y = 0, xend = PC1, yend = PC2),
arrow = arrow(length = unit(0.25, "cm")),
color = "black") +
geom_text_repel(data = loadings111,
aes(x = PC1, y = PC2, label = var),
color = "black", size = 4) +
xlab("PC1") + ylab("PC2") +
ggtitle("Green Cluster ~ Ascending") +
theme_classic() +
guides(color = "none")
(eagle111_k4_plot_green) /
(kph_box + angle_box + VR_box) +
plot_layout(
heights = c(4, 1),
guides = "collect")
#-------------------------------------------------------------------------#
#Examining The Blue Cluster
eagle111_k4_plot_blue <- ggplot(eagle111_subset_k4, aes(x = pc1, y = pc2)) +
geom_point(aes(col = k4), shape='.', alpha = 0.6) +
xlim(c(-4,6)) + ylim(c(-2.5,5)) +
# Ellipse around cluster 2 only
stat_ellipse(data = subset(eagle111_subset_k4, k4 == 3),
aes(x = pc1, y = pc2),
level = 0.95, # 95% confidence ellipse
color = "#008080", # color of the ellipse
size = 1, # line thickness
linetype = "solid") +
geom_segment(data = loadings111,
aes(x = 0, y = 0, xend = PC1, yend = PC2),
arrow = arrow(length = unit(0.25, "cm")),
color = "black") +
geom_text_repel(data = loadings111,
aes(x = PC1, y = PC2, label = var),
color = "black", size = 4) +
xlab("PC1") + ylab("PC2") +
ggtitle("Blue Cluster ~ Gliding") +
theme_classic() +
guides(color = "none")
(eagle111_k4_plot_blue) /
(kph_box + angle_box + VR_box) +
plot_layout(
heights = c(4, 1),
guides = "collect")
#-------------------------------------------------------------------------#
#Examining The Red Cluster
eagle111_k4_plot_red <- ggplot(eagle111_subset_k4, aes(x = pc1, y = pc2)) +
geom_point(aes(col = k4), shape='.', alpha = 0.6) +
xlim(c(-4,6)) + ylim(c(-2.5,5)) +
# Ellipse around cluster 2 only
stat_ellipse(data = subset(eagle111_subset_k4, k4 == 1),
aes(x = pc1, y = pc2),
level = 0.95, # 95% confidence ellipse
color = "#FA6076", # color of the ellipse
size = 1, # line thickness
linetype = "solid") +
geom_segment(data = loadings111,
aes(x = 0, y = 0, xend = PC1, yend = PC2),
arrow = arrow(length = unit(0.25, "cm")),
color = "black") +
geom_text_repel(data = loadings111,
aes(x = PC1, y = PC2, label = var),
color = "black", size = 4) +
xlab("PC1") + ylab("PC2") +
ggtitle("Red Cluster ~ Flapping") +
theme_classic() +
guides(color = "none")
(eagle111_k4_plot_red) /
(kph_box + angle_box + VR_box) +
plot_layout(
heights = c(4, 1),
guides = "collect")
#-------------------------------------------------------------------------#
#Visual Examples of Flight Segment 25156
#Finding segments
segment_lengths <- eagle111_subset_k4 %>%
select(c(segment_id, segment_length)) %>%
group_by(segment_id) %>%
summarise(segment_length = mean(segment_length, na.rm = TRUE)) %>%
arrange(desc(segment_length))
#Selected Segment ID 25156
segment <- eagle111_subset_k4 %>%
filter(segment_id == 25156) %>%
filter(k4 != 4)
#Calculating seconds from the start
segment <- segment %>%
arrange(LocalTime) %>% # ensure data is in chronological order
mutate(elapsed_sec = as.numeric(difftime(LocalTime, first(LocalTime), units = "secs")),
elapsed_min = elapsed_sec / 60)
#Changing Cluster Name for Readability
segment <- segment %>%
mutate(k4 = case_when(
k4 == 1 ~ "Flapping",
k4 == 2 ~ "Ascending",
k4 == 3 ~ "Gliding",
k4 == 4 ~ "Perching",
))
cluster_colors <- c(
"Flapping" = "#f8766d",
"Ascending" = "#7fb007",
"Gliding" = "#1cc6cb",
"Perching" = "#c97fff"
)
time_height <- ggplot(segment, aes(x = elapsed_min, y = AGL)) +
geom_line(color = "grey") +
geom_point(aes(color = factor(k4))) +
scale_color_manual(values = cluster_colors) +
labs(x = "Elapsed Time (minutes)", y = "Above Ground Level (AGL)", color = "Cluster") +
theme_classic()
time_speed <- ggplot(segment, aes(x = elapsed_min, y = KPH)) +
geom_line(color = "grey") +
geom_point(aes(color = factor(k4))) +
scale_color_manual(values = cluster_colors) +
labs(x = "Elapsed Time (minutes)", y = "Speed (KPH)", color = "Cluster") +
theme_classic()
(time_height / time_speed) +
plot_layout(guides = "collect") &
theme(legend.position = "right")
#-------------------------------------------------------------------------#
#Examining Flight Segment 24998
#Selected Segment ID 24998
segment <- eagle111_subset_k4 %>%
filter(segment_id == 24998) %>%
filter(k4 != 4)
#Calculating seconds from the start
segment <- segment %>%
arrange(LocalTime) %>% # ensure data is in chronological order
mutate(elapsed_sec = as.numeric(difftime(LocalTime, first(LocalTime), units = "secs")),
elapsed_min = elapsed_sec / 60)
#Changing Cluster Name for Readability
segment <- segment %>%
mutate(k4 = case_when(
k4 == 1 ~ "Flapping",
k4 == 2 ~ "Ascending",
k4 == 3 ~ "Gliding",
k4 == 4 ~ "Perching",
))
cluster_colors <- c(
"Flapping" = "#f8766d",
"Ascending" = "#7fb007",
"Gliding" = "#1cc6cb",
"Perching" = "#c97fff"
)
time_height <- ggplot(segment, aes(x = elapsed_min, y = AGL)) +
geom_line(color = "grey") +
geom_point(aes(color = factor(k4))) +
scale_color_manual(values = cluster_colors) +
labs(x = "Elapsed Time (minutes)", y = "Above Ground Level (AGL)", color = "Cluster") +
theme_classic()
time_speed <- ggplot(segment, aes(x = elapsed_min, y = KPH)) +
geom_line(color = "grey") +
geom_point(aes(color = factor(k4))) +
scale_color_manual(values = cluster_colors) +
labs(x = "Elapsed Time (minutes)", y = "Speed (KPH)", color = "Cluster") +
theme_classic()
(time_height + time_speed) +
plot_layout(guides = "collect") &
theme(legend.position = "bottom")
#-------------------------------------------------------------------------#
#Examining other eagles for reproducibility
#Selecting Bald eagle 139
eagle139 <- eagle_data %>%
filter(Animal_ID == 139)
# Select movement variables
eagle139_vars <- eagle139 %>%
select(KPH, Sn, AGL, VerticalRate, abs_angle, absVR)
#Take sqrt of the skewed variables
eagle139_sqrt <- eagle139_vars %>%
mutate(across(c(AGL, Sn, absVR, abs_angle), sqrt))
#Changing Variable Names
eagle139_sqrt <- eagle139_sqrt %>%
rename(
"|Angle|" = abs_angle,
"|VR|" = absVR
)
# Scale
eagle139_scaled <- scale(eagle139_sqrt)
# K-means: 4 clusters
set.seed(234)
eagle139_km <- kmeans(eagle139_scaled, centers = 4, iter.max = 30, nstart = 10)
# PCA
eagle139_pca <- prcomp(eagle139_scaled)
#Adding variables to data set
eagle139_subset_k4 <- eagle139 %>%
mutate(k4 = eagle139_km$cluster) %>%
mutate(pc1 = eagle139_pca$x[, 1],
pc2 = eagle139_pca$x[, 2])
loadings139 <- as.data.frame(eagle139_pca$rotation[, 1:2])
loadings139$var <- rownames(loadings139)
#Scaling Vectors in PCA
arrow_scale <- 3
loadings139 <- loadings139 %>%
mutate(PC1 = PC1 * arrow_scale,
PC2 = PC2 * arrow_scale)
#Setting the colors
cluster_colors_k4_139 <- c(
"1" = "#f8766d",
"2" = "#7fb007",
"3" = "#1cc6cb",
"4" = "#c97fff"
)
eagle139_k4_plot <- ggplot(eagle139_subset_k4) +
geom_point(aes(x = pc1, y = pc2, col = factor(k4)),
shape='.', alpha = 0.6) +
geom_segment(data = loadings139,
aes(x = 0, y = 0, xend = PC1, yend = PC2),
arrow = arrow(length = unit(0.25, "cm")),
color = "black") +
geom_text_repel(data = loadings139,
aes(x = PC1, y = PC2, label = var),
color = "black", size = 4) +
xlim(c(-5,5)) + ylim(c(-6,6)) +
scale_color_manual(values = cluster_colors_k4_139)+
guides(color = "none") +
xlab("PC1") + ylab("PC2") +
ggtitle("Eagle 139 with k = 4") +
theme_classic()
#Looking for in-flight or perching points for eagle 150
#Selecting Bald eagle 150
eagle150 <- eagle_data %>%
filter(Animal_ID == 150)
# Select movement variables
eagle150_vars <- eagle150 %>%
select(KPH, Sn, AGL, VerticalRate, abs_angle, absVR)
#Take sqrt of the skewed variables
eagle150_sqrt <- eagle150_vars %>%
mutate(across(c(AGL, Sn, absVR, abs_angle), sqrt))
#Changing Variable Names
eagle150_sqrt <- eagle150_sqrt %>%
rename(
"|Angle|" = abs_angle,
"|VR|" = absVR
)
# Scale
eagle150_scaled <- scale(eagle150_sqrt)
# K-means: 4 clusters
set.seed(234)
eagle150_km <- kmeans(eagle150_scaled, centers = 4, iter.max = 30, nstart = 10)
# PCA
eagle150_pca <- prcomp(eagle150_scaled)
#Adding variables to data set
eagle150_subset_k4 <- eagle150 %>%
mutate(k4 = eagle150_km$cluster) %>%
mutate(pc1 = eagle150_pca$x[, 1],
pc2 = eagle150_pca$x[, 2])
loadings150 <- as.data.frame(eagle150_pca$rotation[, 1:2])
loadings150$var <- rownames(loadings150)
#Scaling Vectors in PCA
arrow_scale <- 3
loadings150 <- loadings150 %>%
mutate(PC1 = PC1 * arrow_scale,
PC2 = PC2 * arrow_scale)
cluster_colors_k4_150 <- c(
"1" = "#1cc6cb",
"2" = "#7fb007",
"3" = "#c97fff",
"4" = "#f8766d"
)
#Plotting K4 Clusters with PCA
eagle150_k4_plot <- ggplot(eagle150_subset_k4) +
geom_point(aes(x = pc1, y = pc2, col = factor(k4)),
shape='.', alpha = 0.6) +
geom_segment(data = loadings150,
aes(x = 0, y = 0, xend = PC1, yend = PC2),
arrow = arrow(length = unit(0.25, "cm")),
color = "black") +
geom_text_repel(data = loadings150,
aes(x = PC1, y = PC2, label = var),
color = "black", size = 4) +
xlim(c(-5,5)) + ylim(c(-6,6)) +
guides(color = "none") +
scale_color_manual(values = cluster_colors_k4_150)+
xlab("PC1") + ylab("PC2") +
ggtitle("Eagle 150 with k = 4") +
theme_classic()
eagle150_k4_plot + eagle139_k4_plot