Eagle Movement Behavior Analysis

JR Vojtech

Background

  • Studying flight behavior is one of the best ways for us to ensure we are helping with habitat conservation and migration route protection.
  • Movement variables allow mus to understand the energy usage and habitat selection of bald eagles.

Motivations

  1. Can we find details as to what patterns bald eagles use when migrating and traveling.
  2. Are there distinct flight behaviors?
  3. Can we identify these flight behaviors?

Data

  • GPS data was collected from free-ranging bald eagles using high-resolution biologging devices.
  • 100 bald eagles were tagged from 2013 to 2019, 62 of which were captured in the nest, 30 using fish traps or net guns and 8 tagged when released from a facility.
  • The GPS records data at 3-10 second intervals when the bird is in motion however, when the bird comes to a stop for more than 1 minute it records in 15 minute intervals.

Movement Variables

  • KPH (instantaneous speed)
  • Sn (horizontal speed)
  • AGL (altitude above ground)
  • abs_angle (turning angle magnitude)
  • VerticalRate (Vr)
  • absVR (|Vr|)

Variable Distributions

Finding k

Comparing k Clusters

Classifying Perched Points

Classifying In-Flight Behaviors

Square Root Transform Variables

eagle_mov <- eagle %>%
  drop_na(KPH, Sn, AGL, abs_angle, VerticalRate, absVR) %>%
  mutate(
    KPH_tr   = sqrt(KPH),
    Sn_tr    = sqrt(Sn),
    AGL_tr   = sqrt(AGL),
    Angle_tr = sqrt(abs_angle),
    Vr       = VerticalRate,     
    Vr_tr    = sqrt(absVR)      
  )

Distributions of Movement Variables

dist_df <- eagle_mov %>%
  select(KPH, Sn, AGL, abs_angle, VerticalRate, absVR) %>%
  pivot_longer(
    cols = everything(),
    names_to = "variable",
    values_to = "value"
  )

# 2x3 grid of distribution plots

dist_2x3 <- ggplot(dist_df, aes(x = value)) +
  geom_histogram(bins = 40, color = "white") +
  facet_wrap(~ variable, scales = "free", ncol = 3) +
  theme_bw(base_size = 14) +
  labs(
    x = "Value",
    y = "Count"
  )

Create Elbow and Silhouette Plots

# Elbow + silhouette over k = 2..7 

set.seed(123)

k_vals    <- 2:7
wss       <- numeric(length(k_vals))
mean_sil  <- numeric(length(k_vals))

for (i in seq_along(k_vals)) {
  k  <- k_vals[i]
  km <- kmeans(X_scaled, centers = k, nstart = 10)

  wss[i] <- km$tot.withinss

  
  
  n_sub <- min(2000, nrow(X_scaled))
  idx   <- sample.int(nrow(X_scaled), n_sub)
  d     <- dist(X_scaled[idx, ])
  sil   <- silhouette(km$cluster[idx], d)

  mean_sil[i] <- mean(sil[, "sil_width"])
}

# Silhouette Width Plot

sil_df <- data.frame(
  K = k_vals,
  mean_sil = mean_sil
)

sil_plot <- ggplot(sil_df, aes(x = K, y = mean_sil)) +
  geom_point(size = 2) +
  geom_line() +
  theme_bw(base_size = 14) +
  labs(
    title = "Average silhouette vs K",
    x = "Number of clusters (K)",
    y = "Mean silhouette width"
  )

# Elbow Plot
elbow_df <- data.frame(
  K = k_vals,
  WSS = wss
)

elbow_plot <- ggplot(elbow_df, aes(x = K, y = WSS)) +
  geom_point(size = 2) +
  geom_line() +
  theme_bw(base_size = 14) +
  labs(
    title = "Elbow plot",
    x = "Number of clusters (K)",
    y = "Total within-cluster SS"
  )



combined_elbow_sil <- elbow_plot + sil_plot

Create k = 4 Pca Plot

set.seed(123)
km4 <- kmeans(X_scaled, centers = 4, nstart = 10)

# Attach cluster labels to data
eagle_mov$cluster <- factor(km4$cluster)
X_df$cluster      <- eagle_mov$cluster

eagle_mov$behavior <- recode(eagle_mov$cluster,
                             "1" = "Perching",
                             "2" = "Ascending",
                             "3" = "Flapping",
                             "4" = "Gliding")

# PCA on the scaled movement variables
pca4 <- prcomp(X_scaled)

pca_df4 <- data.frame(
  PC1     = -pca4$x[, 1],
  PC2     = -pca4$x[, 2],
  cluster = eagle_mov$cluster
)

k4_plot <- ggplot(pca_df4, aes(PC1, PC2, color = cluster)) +
  geom_point(alpha = 0.35, size = 1) +
  theme_bw() +
  scale_color_manual(
    values = c(
      "1" = "steelblue",
      "2" = "orange",
      "3" = "forestgreen",
      "4" = "purple"
    )
  ) +
  labs(
    title = "PCA of Movement Variables (K = 4, scaled)",
    x = "PC1",
    y = "PC2",
    color = "Cluster"
  )

Create Box Plots for k = 2

# k-means at k=2

km2 <- kmeans(X_scaled, centers = 2, nstart = 10)

eagle_mov$cluster2 <- factor(km2$cluster)
X_df$cluster2      <- eagle_mov$cluster2

# Boxplots of variables by k = 2 clusters
eagle_long_k2 <- X_df %>%
  select(cluster2, KPH_sc, Sn_sc, AGL_sc, Angle_sc, Vr_sc, Vr_abs_sc) %>%
  pivot_longer(-cluster2, names_to = "variable", values_to = "value")

Box_2 <- ggplot(eagle_long_k2, aes(x = cluster2, y = value, fill = cluster2)) +
  geom_boxplot(outlier.size = 0.5) +
  facet_wrap(~ variable, scales = "free_y") +
  theme_bw() +
  scale_fill_manual(values = c("1" = "orange", "2" = "steelblue")) +
  labs(
    title = "K = 2 clusters: scaled movement variables",
    x = "Cluster",
    y = "Standardized value"
  )

Create PCA Plot for k = 2

# PCA for k = 2 clusters
pca2 <- prcomp(X_scaled)

pca_df2 <- data.frame(
  PC1      = -pca2$x[, 1],
  PC2      = -pca2$x[, 2],
  cluster2 = eagle_mov$cluster2
)

PCA_k2 <- ggplot(pca_df2, aes(PC1, PC2, color = cluster2)) +
  geom_point(alpha = 0.3, size = 1) +
  theme_bw() +
  scale_color_manual(values = c("1" = "orange", "2" = "steelblue")) +
  labs(
    title = "PCA of Movement Variables (K = 2, scaled)",
    x = "PC1",
    y = "PC2",
    color = "Cluster"
  )

Plot KPH vs AGL Colored by k = 2 Clusters

# Scatter in scaled space for KPH vs AGL
KPH_v_AGL <- ggplot(X_df, aes(x = KPH_sc, y = AGL_sc, color = cluster2)) +
  geom_point(alpha = 0.3, size = 1) +
  theme_bw() +
  scale_color_manual(values = c("1" = "orange", "2" = "steelblue")) +
  labs(
    title = "KPH vs AGL (scaled) colored by K = 2 clusters",
    x = "Speed (KPH)",
    y = "Height Above Ground",
    color = "Cluster"
  )

combined_k2pca_KPH <- PCA_k2 + KPH_v_AGL

Create 2x3 grid of PCA plots for k = 2-7

pca_all <- prcomp(X_scaled)

PC1 <- -pca_all$x[, 1]
PC2 <- -pca_all$x[, 2]

# Update k_vals

k_vals <- 2:7

pca_k_list <- lapply(k_vals, function(k) {
  km <- kmeans(X_scaled, centers = k, nstart = 10)
  data.frame(
    PC1     = PC1,
    PC2     = PC2,
    k       = factor(k),
    cluster = factor(km$cluster)
  )
})

pca_k_df <- do.call(rbind, pca_k_list)

# Color code consistent with k=4 and k=2 plots

cluster_colors <- c(
  "1" = "steelblue",
  "2" = "orange",
  "3" = "forestgreen",
  "4" = "purple",
  "5" = "brown",
  "6" = "pink",
  "7" = "black"
)

# Create 2x3 grid for k=2-7 for easy comparison

k3x2_plot <- ggplot(pca_k_df, aes(PC1, PC2, color = cluster)) +
  geom_point(alpha = 0.35, size = 0.8) +
  xlim(-5.2,5.2) +
  ylim(-5.2,5.2) +
  facet_wrap(~ k, ncol = 3) +
  theme_bw() +
  scale_color_manual(values = cluster_colors) +
  labs(
    title = "PCA of Movement Variables for K = 2–7",
    x = "PC1",
    y = "PC2",
    color = "Cluster"
  )

Create PCA Biplot for k = 4 with Loading Vectors From Origin

# PCA scores

scores4 <- data.frame(
  PC1     = -pca4$x[, 1],          
  PC2     = -pca4$x[, 2],
  cluster = eagle_mov$cluster
)

# PCA loadings 

loadings4 <- data.frame(
  var = rownames(pca4$rotation),
  PC1 = -pca4$rotation[, 1],       
  PC2 = -pca4$rotation[, 2]
)

# Label

loadings4$var <- recode(loadings4$var,
                        KPH_tr   = "Speed (KPH)",
                        Sn_tr    = "Sn",
                        AGL_tr   = "AGL",
                        Angle_tr = "Angle",
                        Vr       = "Vertical rate",
                        Vr_tr    = "sqrt(|VR|)")



# range of scores
range_PC1 <- range(scores4$PC1)
range_PC2 <- range(scores4$PC2)

# range of raw loadings
range_L1 <- range(loadings4$PC1)
range_L2 <- range(loadings4$PC2)

arrow_mult <- min(
  (diff(range_PC1) / diff(range_L1)),
  (diff(range_PC2) / diff(range_L2))
) * 0.4    

loadings4$PC1_arrow <- loadings4$PC1 * arrow_mult
loadings4$PC2_arrow <- loadings4$PC2 * arrow_mult

# PCA Biplot of k=4

pca4_biplot <- ggplot(scores4, aes(PC1, PC2, color = cluster)) +
  geom_point(alpha = 0.35, size = 1) +
  geom_segment(
    data = loadings4,
    aes(x = 0, y = 0, xend = PC1_arrow, yend = PC2_arrow),
    arrow = arrow(length = unit(0.25, "cm")),
    color = "black"
  ) +
    geom_text(
    data = loadings4,
    aes(x = PC1_arrow * 1.1, y = PC2_arrow * 1.1, label = var),
    color = "black",
    size = 3,
    fontface = "bold"         
  ) +
  xlim(-5.2,5.2) +
  ylim(-5.2,5.2) +
  theme_bw() +
  scale_color_manual(
    values = c(
      "1" = "steelblue",
      "2" = "orange",
      "3" = "forestgreen",
      "4" = "purple"
    )
  ) +
  labs(
    title = "PCA of Movement Variables (K = 4) with Variable Vectors",
    x = "PC1",
    y = "PC2",
    color = "Cluster"
  )

# Create labeled PCA Biplot
pca4_biplot_labeled <- pca4_biplot +

  coord_cartesian(ylim = c(-5.5, 5.2)) +

  # Color code behavior labels for clarity
  annotate("text", x = -3.0, y = -5.2, label = "Perching",
           color = "orange", size = 4, fontface = "bold") +
  annotate("text", x = -1.0, y = -5.2, label = "Flapping",
           color = "forestgreen",      size = 4, fontface = "bold") +
  annotate("text", x =  1.0, y = -5.2, label = "Ascending",
           color = "steelblue",   size = 4, fontface = "bold") +
  annotate("text", x =  3.0, y = -5.2, label = "Gliding",
           color = "purple",   size = 4, fontface = "bold")

Create 2x2 Grid of k = 4 Box Plots

# Create 2x2 box plot grid for k=4 movement variables

cluster_colors <- c(
  "1" = "steelblue",
  "2" = "orange",
  "3" = "forestgreen",
  "4" = "purple"
)


box_df <- eagle_mov %>%
  select(cluster, AGL, VerticalRate, KPH, abs_angle) %>%
  pivot_longer(
    cols = -cluster,
    names_to = "variable",
    values_to = "value"
  )


# 2x2 boxplot grid
Box_2x2 <- ggplot(box_df, aes(x = cluster, y = value, fill = cluster)) +
  geom_boxplot(outlier.size = 0.4) +
  facet_wrap(~ variable, scales = "free_y", ncol = 2) +
  scale_fill_manual(values = cluster_colors) +
  theme_bw(base_size = 14) +
  labs(
    title = "Distributions of Movement Variables by Cluster (K = 4)",
    x = "Cluster",
    y = "Value",
    fill = "Cluster"
  )