Oscar Richmond - Eagle Analysis

Introduction

  • GPS movement data collected every few seconds
  • Key movement variables: speed, altitude, turning angle, vertical rate
  • Goal: uncover structure in movement behavior using PCA
  • PCA summarizes high-dimensional movement patterns into principal components

PCA Methods

  • Variables included in PCA:
    KPH, Sn, AGL, abs_angle, VerticalRate, absVR
  • Data standardized before PCA
  • Principal Components capture major axes of movement variation

Scree Plot

PCA Variable Plot

PCA Scores (PC1 vs PC2)

Transition to Clustering

  • PCA showed clear variation in movement based on speed, turning, and vertical motion.
  • The spread of points in PCA space suggests the presence of distinct movement patterns.
  • PCA explains structure, but clustering helps identify actual behavioral groups.
  • Next, I apply clustering methods to define and interpret these movement states.

Bootstrapped Silhouette Curve for K = 2–7

Bootstrap WSS Curve Plot

PCA Plot w/ Cluster Labels

How Movement Variables Differ Across Clusters

Appendix

Data Loading and Transformations

library(tidyverse)
library(readxl)
library(vegan)
library(cluster)
library(factoextra)
library(ggrepel)
library(purrr)

# Load base dataset
load("eagle_data.Rdata")

# Apply transformations recommended in movement ecology literature
# Square-root reduces right skew for positive variables
# VerticalRate (VR) left untransformed per original methodology

eagle_tr <- eagle_data %>%
  mutate(
    KPH_tr       = sqrt(KPH),
    Sn_tr        = sqrt(Sn),
    AGL_tr       = sqrt(AGL),
    abs_angle_tr = sqrt(abs_angle),
    VR_tr        = VerticalRate,     # leave untransformed
    absVR_tr     = sqrt(absVR)
  ) %>%
  # keep Animal_ID attached
  mutate(Animal_ID = Animal_ID) %>%
  # remove rows where any transformed movement variable is missing
  drop_na(KPH_tr, Sn_tr, AGL_tr, abs_angle_tr, VR_tr, absVR_tr)

# List of transformed variables used for PCA and clustering
vars_tr <- c("KPH_tr", "Sn_tr", "AGL_tr", "abs_angle_tr", "VR_tr", "absVR_tr")

Stratified Sampling for Clustering Code

points_per_eagle <- 5000
set.seed(123)

df_strat <- eagle_tr %>%
  group_by(Animal_ID) %>%              # keep equal representation per eagle
  sample_n(points_per_eagle, replace = TRUE) %>% 
  ungroup()

# Scaled matrix used for k-means clustering
X_strat <- scale(df_strat %>% select(all_of(vars_tr)))

PCA Computation

X_pca <- df_strat %>%
select(all_of(vars_tr)) %>%
scale()

pca_res <- prcomp(X_pca, center = FALSE, scale. = FALSE)
pca_scores <- as_tibble(pca_res$x)

Scree Plot Code

fviz_eig(pca_res) +
ggtitle("Scree Plot: Eagle Movement Variables PCA")

PCA Loading Code

load_df <- as.data.frame(pca_res$rotation[,1:2]) %>%
mutate(var = rownames(.),
PC1 = PC1 * 2.8,
PC2 = PC2 * 2.8)

ggplot(load_df, aes(PC1, PC2)) +
geom_segment(aes(x = 0, y = 0, xend = PC1, yend = PC2),
arrow = arrow(length = unit(0.25,"cm")),
linewidth = 1.1,
color = "darkblue") +
geom_text_repel(aes(label = var), size = 5)

Bootstrap Silhouette Code

K_vals <- 2:7
B <- 20
sil_boot <- tibble()

for (b in 1:B) {
df_b <- eagle_tr %>%
group_by(Animal_ID) %>%
sample_n(500, replace = TRUE) %>%
ungroup() %>%
select(all_of(vars_tr))

Xb <- scale(df_b)
db <- dist(Xb)

for (k in K_vals) {
km <- kmeans(Xb, centers = k, nstart = 20)
sil <- silhouette(km$cluster, db)
sil_boot <- bind_rows(
  sil_boot,
  tibble(bootstrap=b, K=k, silhouette=mean(sil[,3]))
)
}
}

Bootstrapped WSS Curve Code

wss_boot <- tibble()

for (b in 1:B) {
df_b <- eagle_tr %>%
group_by(Animal_ID) %>%
sample_n(500, replace = TRUE) %>%
ungroup()

Xb <- scale(df_b %>% select(all_of(vars_tr)))

for (k in K_vals) {
km <- kmeans(Xb, centers = k, nstart = 20)
wss_boot <- bind_rows(
wss_boot,
tibble(bootstrap=b, K=k, WSS=km$tot.withinss)
)
}
}

K-Means Clustering Code

set.seed(123)
km4 <- kmeans(X_strat, centers = 4, nstart = 25)

df_strat_k4 <- df_strat %>%
mutate(cluster = factor(km4$cluster))

BoxPlot Viz Code

df_long <- df_strat_k4 %>%
pivot_longer(cols = all_of(vars_tr),
names_to = "variable",
values_to = "value")

ggplot(df_long, aes(cluster, value, fill = cluster)) +
geom_boxplot(outlier.alpha = 0.3) +
facet_wrap(~variable, scales = "free_y")