Exam 2 — Jaden McCoy

Introduction

Ecologists in Iowa, captured eagle movement data and wanted to use analytic tools to understand movement patterns.
Movements were tracked with a GPS directly attached to eagles.
The dataset captures how eagles travel across the landscape, move vertically, and change speed over time.
Our goal was to be able to distinguish between perching and flight behaviors so that we are properly able to distinguish patterns in everyday bird flight.

Data Methods

Flight points were identified using a threshold of >2 KPH.
Movement variables used for analysis included speed (average and instantaneous) altitude, vertical rate, and turning angle.
A 50,000-point random sample was taken to examine overall movement structure.

Flight Behavior Clustering

GPS fixes were grouped into flight segments (KPH > 2) and segments with less than 5 GPS points were removed
Segments were summarized and scaled. Both averages and standard deviations used in clustering and PCA plots.
These features were placed into a PCA behavior space and clustered with k-means (k = 4) to identify four movement types, based on summarized segments.

Deeper Cluster Understanding

For each cluster, a representative flight segment was selected.
Segments were filtered to those within ±30% of the median length.
Within PCA space, the segments closest to the cluster’s centroid were chosen.
These representative segments were then visualized using movement paths and time series of speed, altitude, and vertical rate.

Perch Vs. Flight Points

Differing Flight Patterns

Flight Pattern Characteristics

Results Overview

It is possible to identify perch vs flight points.
KPH > 2 , is able to easily distinguish these points

Flight patterns are possible to identify.
- Clusters 1 & 3, soaring vs searching behaviors
- Clusters 2 & 4, steady vs fast traveling behaviors.

Overall we are to identify bird patterns through GPS tracking. This leaves a lot of room to explore more of why these behaviors occur and the commonality of these behaviors.

Research Question 1 Appendix (Install Packages)

#| eval: false
#| echo: true

install.packages(c(
  "tidyverse",
  "factoextra",
  "cluster",
  "dbscan",
  "lubridate",
  "patchwork",
  "ggrepel"
))

Load Packages

#| eval: false
#| echo: true

library(tidyverse)
library(factoextra)
library(cluster)
library(dbscan)
library(lubridate)
library(patchwork)
library(ggrepel)

Create random 50,000-point sample & classify flight vs perch

#| eval: false
#| echo: true

set.seed(1)

sample1 <- eagle_data[sample(nrow(eagle_data), 50000), ]

s1 <- sample1 %>%
  mutate(flight_status = ifelse(KPH > 2, "flight", "perch")) %>%
  select(KPH, Sn, AGL, VerticalRate, abs_angle, flight_status)

PCA to visualize perch vs flight

#| eval: false
#| echo: true

pca_sample1 <- prcomp(scale(s1 %>% select(-flight_status)))

fviz_pca_ind(
  pca_sample1,
  geom = "point",
  habillage = s1$flight_status,
  palette = c("flight"="#CC3D3D", "perch"="#005A9C"),
  addEllipses = FALSE,
  alpha = 0.4,
  title = "PCA – Sample 1 (50,000 rows)"
) + theme_minimal(base_size = 14)

Research Question 2 Appendix (Filter to Flight Only Points)

#| eval: false
#| echo: true

flight_only <- eagle_data %>% filter(KPH > 2)

Compute segment-level movement summaries

#| eval: false
#| echo: true

flight_segments <- flight_only %>%
  group_by(segment_id) %>%
  filter(n() >= 5) %>%
  summarize(
    mean_KPH = mean(KPH),
    mean_AGL = mean(AGL),
    mean_VR = mean(VerticalRate),
    mean_abs_angle = mean(abs_angle),
    mean_Sn = mean(Sn),
    sd_KPH = sd(KPH),
    sd_VR = sd(VerticalRate),
    sd_abs_angle = sd(abs_angle),
    n_points = n()
  ) %>% ungroup()

PCA on segment features

#| eval: false
#| echo: true

seg_mat <- scale(flight_segments %>%
  select(mean_KPH, mean_AGL, mean_VR, mean_abs_angle,
         mean_Sn, sd_KPH, sd_VR, sd_abs_angle, n_points))

pca_seg <- prcomp(seg_mat)

K-means clustering (k = 4)

#| eval: false
#| echo: true

set.seed(1)

km4 <- kmeans(seg_mat, centers = 4, nstart = 25)

flight_segments_clean <- flight_segments %>%
  drop_na() %>%
  mutate(cluster = factor(km4$cluster))

cluster_colors <- c("1"="#0072B2","2"="#E69F00","3"="#009E73","4"="#CC79A7")

Rename variables for PCA biplot arrows

#| eval: false
#| echo: true

colnames_for_pca <- c(
  "Mean Speed (KPH)",
  "Mean Altitude",
  "Mean Vertical Rate",
  "Mean Turning Angle",
  "Mean Average Speed",
  "Speed Variability",
  "Vertical Rate Variability",
  "Turning Angle Variability",
  "Segment Length"
)

seg_raw <- flight_segments %>%
  select(mean_KPH, mean_AGL, mean_VR, mean_abs_angle,
         mean_Sn, sd_KPH, sd_VR, sd_abs_angle, n_points)

colnames(seg_raw) <- colnames_for_pca

seg_mat <- scale(seg_raw)
pca_seg <- prcomp(seg_mat)

Select representative segments (closest to centroids)

#| eval: false
#| echo: true

global_median_len <- median(flight_segments_clean$n_points)

eligible_segments <- flight_segments_clean %>%
  filter(
    n_points >= 0.7 * global_median_len,
    n_points <= 1.3 * global_median_len
  )

segment_scores <- as.data.frame(pca_seg$x[, 1:2]) %>%
  mutate(
    segment_id = flight_segments_clean$segment_id,
    cluster = flight_segments_clean$cluster
  )

centroids <- segment_scores %>%
  filter(segment_id %in% eligible_segments$segment_id) %>%
  group_by(cluster) %>%
  summarize(PC1 = mean(PC1), PC2 = mean(PC2))

closest_segments <- segment_scores %>%
  filter(segment_id %in% eligible_segments$segment_id) %>%
  inner_join(centroids, by = "cluster") %>%
  rowwise() %>%
  mutate(dist = sqrt((PC1.x - PC1.y)^2 + (PC2.x - PC2.y)^2)) %>%
  slice_min(dist) %>%
  ungroup() %>%
  select(cluster, segment_id)

Extract representative segment data

#| eval: false
#| echo: true

flight_with_clusters <- flight_only %>%
  left_join(flight_segments_clean %>% select(segment_id, cluster), by = "segment_id")

rep_data <- flight_with_clusters %>%
  semi_join(closest_segments, by = "segment_id") %>%
  group_by(cluster, segment_id) %>%
  mutate(seconds = as.numeric(LocalTime - min(LocalTime))) %>%
  ungroup()

Compute time-of-day behavior frequencies

#| eval: false
#| echo: true

segment_times <- flight_only %>%
  group_by(segment_id) %>%
  summarize(segment_time = LocalTime[floor(n() / 2)])

flight_segments_clean <- flight_segments_clean %>%
  left_join(segment_times, by = "segment_id") %>%
  mutate(
    hour = hour(segment_time),
    time_block = case_when(
      hour >= 5  & hour < 10 ~ "Morning",
      hour >= 10 & hour < 15 ~ "Midday",
      hour >= 15 & hour < 19 ~ "Afternoon",
      TRUE ~ "Night"
    ),
    time_block = factor(time_block,
                        levels = c("Morning","Midday","Afternoon","Night"))
  )

behavior_by_block <- flight_segments_clean %>%
  group_by(cluster, time_block) %>%
  summarize(n = n(), .groups = "drop")

PCA biplot of flight clusters

#| eval: false
#| echo: true

fviz_pca_biplot(
  pca_seg,
  geom = "point",
  habillage = flight_segments_clean$cluster,
  palette = cluster_colors,
  col.var = "black",
  arrowsize = 1.0,
  repel = TRUE
) + theme_minimal(base_size = 14)

Time-of-day bar plot

#| eval: false
#| echo: true

ggplot(behavior_by_block, aes(x = time_block, y = n, fill = factor(cluster))) +
  geom_col(position = "dodge") +
  scale_fill_manual(values = cluster_colors)

Representative segment time-series plots

#| eval: false
#| echo: true

p_speed <- ggplot(rep_data, aes(seconds, KPH, color=factor(cluster))) +
  geom_line() +
  scale_color_manual(values = cluster_colors)

p_alt <- ggplot(rep_data, aes(seconds, AGL, color=factor(cluster))) +
  geom_line() +
  scale_color_manual(values = cluster_colors)

p_vr <- ggplot(rep_data, aes(seconds, VerticalRate, color=factor(cluster))) +
  geom_line() +
  scale_color_manual(values = cluster_colors)

p_speed / p_alt / p_vr