Exam 2 — Jaden McCoy

Introduction

  • Ecologists in Iowa, captured eagle movement data and wanted to use analytic tools to understand movement patterns.

  • Movements were tracked with a GPS directly attached to eagles.

  • The dataset captures how eagles travel across the landscape, move vertically, and change speed over time.

  • Our goal was to be able to distinguish between perching and flight behaviors so that we are properly able to distinguish patterns in everyday bird flight.

Data Methods

  • Flight points were identified using a threshold of >2 KPH.

  • Movement variables used for analysis included speed (average and instantaneous) altitude, vertical rate, and turning angle.

  • A 50,000-point random sample was taken to examine overall movement structure.

Flight Behavior Clustering

  • GPS fixes were grouped into flight segments (KPH > 2) and segments with less than 5 GPS points were removed

  • Segments were summarized and scaled. Both averages and standard deviations used in clustering and PCA plots.

  • These features were placed into a PCA behavior space and clustered with k-means (k = 4) to identify four movement types, based on summarized segments.

Deeper Cluster Understanding

  • For each cluster, a representative flight segment was selected.

  • Segments were filtered to those within ±30% of the median length.

  • Within PCA space, the segments closest to the cluster’s centroid were chosen.

  • These representative segments were then visualized using movement paths and time series of speed, altitude, and vertical rate.

Perch Vs. Flight Points

Differing Flight Patterns

Flight Pattern Characteristics

Results Overview

  • It is possible to identify perch vs flight points.

  • KPH > 2 , is able to easily distinguish these points

  • Flight patterns are possible to identify.

    • Clusters 1 & 3, soaring vs searching behaviors

    • Clusters 2 & 4, steady vs fast traveling behaviors.

  • Overall we are to identify bird patterns through GPS tracking. This leaves a lot of room to explore more of why these behaviors occur and the commonality of these behaviors.

Research Question 1 Appendix (Install Packages)

#| eval: false
#| echo: true

install.packages(c(
  "tidyverse",
  "factoextra",
  "cluster",
  "dbscan",
  "lubridate",
  "patchwork",
  "ggrepel"
))

Load Packages

#| eval: false
#| echo: true

library(tidyverse)
library(factoextra)
library(cluster)
library(dbscan)
library(lubridate)
library(patchwork)
library(ggrepel)

Create random 50,000-point sample & classify flight vs perch

#| eval: false
#| echo: true

set.seed(1)

sample1 <- eagle_data[sample(nrow(eagle_data), 50000), ]

s1 <- sample1 %>%
  mutate(flight_status = ifelse(KPH > 2, "flight", "perch")) %>%
  select(KPH, Sn, AGL, VerticalRate, abs_angle, flight_status)

PCA to visualize perch vs flight

#| eval: false
#| echo: true

pca_sample1 <- prcomp(scale(s1 %>% select(-flight_status)))

fviz_pca_ind(
  pca_sample1,
  geom = "point",
  habillage = s1$flight_status,
  palette = c("flight"="#CC3D3D", "perch"="#005A9C"),
  addEllipses = FALSE,
  alpha = 0.4,
  title = "PCA – Sample 1 (50,000 rows)"
) + theme_minimal(base_size = 14)

Research Question 2 Appendix (Filter to Flight Only Points)

#| eval: false
#| echo: true

flight_only <- eagle_data %>% filter(KPH > 2)

Compute segment-level movement summaries

#| eval: false
#| echo: true

flight_segments <- flight_only %>%
  group_by(segment_id) %>%
  filter(n() >= 5) %>%
  summarize(
    mean_KPH = mean(KPH),
    mean_AGL = mean(AGL),
    mean_VR = mean(VerticalRate),
    mean_abs_angle = mean(abs_angle),
    mean_Sn = mean(Sn),
    sd_KPH = sd(KPH),
    sd_VR = sd(VerticalRate),
    sd_abs_angle = sd(abs_angle),
    n_points = n()
  ) %>% ungroup()

PCA on segment features

#| eval: false
#| echo: true

seg_mat <- scale(flight_segments %>%
  select(mean_KPH, mean_AGL, mean_VR, mean_abs_angle,
         mean_Sn, sd_KPH, sd_VR, sd_abs_angle, n_points))

pca_seg <- prcomp(seg_mat)

K-means clustering (k = 4)

#| eval: false
#| echo: true

set.seed(1)

km4 <- kmeans(seg_mat, centers = 4, nstart = 25)

flight_segments_clean <- flight_segments %>%
  drop_na() %>%
  mutate(cluster = factor(km4$cluster))

cluster_colors <- c("1"="#0072B2","2"="#E69F00","3"="#009E73","4"="#CC79A7")

Rename variables for PCA biplot arrows

#| eval: false
#| echo: true

colnames_for_pca <- c(
  "Mean Speed (KPH)",
  "Mean Altitude",
  "Mean Vertical Rate",
  "Mean Turning Angle",
  "Mean Average Speed",
  "Speed Variability",
  "Vertical Rate Variability",
  "Turning Angle Variability",
  "Segment Length"
)

seg_raw <- flight_segments %>%
  select(mean_KPH, mean_AGL, mean_VR, mean_abs_angle,
         mean_Sn, sd_KPH, sd_VR, sd_abs_angle, n_points)

colnames(seg_raw) <- colnames_for_pca

seg_mat <- scale(seg_raw)
pca_seg <- prcomp(seg_mat)

Select representative segments (closest to centroids)

#| eval: false
#| echo: true

global_median_len <- median(flight_segments_clean$n_points)

eligible_segments <- flight_segments_clean %>%
  filter(
    n_points >= 0.7 * global_median_len,
    n_points <= 1.3 * global_median_len
  )

segment_scores <- as.data.frame(pca_seg$x[, 1:2]) %>%
  mutate(
    segment_id = flight_segments_clean$segment_id,
    cluster = flight_segments_clean$cluster
  )

centroids <- segment_scores %>%
  filter(segment_id %in% eligible_segments$segment_id) %>%
  group_by(cluster) %>%
  summarize(PC1 = mean(PC1), PC2 = mean(PC2))

closest_segments <- segment_scores %>%
  filter(segment_id %in% eligible_segments$segment_id) %>%
  inner_join(centroids, by = "cluster") %>%
  rowwise() %>%
  mutate(dist = sqrt((PC1.x - PC1.y)^2 + (PC2.x - PC2.y)^2)) %>%
  slice_min(dist) %>%
  ungroup() %>%
  select(cluster, segment_id)

Extract representative segment data

#| eval: false
#| echo: true

flight_with_clusters <- flight_only %>%
  left_join(flight_segments_clean %>% select(segment_id, cluster), by = "segment_id")

rep_data <- flight_with_clusters %>%
  semi_join(closest_segments, by = "segment_id") %>%
  group_by(cluster, segment_id) %>%
  mutate(seconds = as.numeric(LocalTime - min(LocalTime))) %>%
  ungroup()

Compute time-of-day behavior frequencies

#| eval: false
#| echo: true

segment_times <- flight_only %>%
  group_by(segment_id) %>%
  summarize(segment_time = LocalTime[floor(n() / 2)])

flight_segments_clean <- flight_segments_clean %>%
  left_join(segment_times, by = "segment_id") %>%
  mutate(
    hour = hour(segment_time),
    time_block = case_when(
      hour >= 5  & hour < 10 ~ "Morning",
      hour >= 10 & hour < 15 ~ "Midday",
      hour >= 15 & hour < 19 ~ "Afternoon",
      TRUE ~ "Night"
    ),
    time_block = factor(time_block,
                        levels = c("Morning","Midday","Afternoon","Night"))
  )

behavior_by_block <- flight_segments_clean %>%
  group_by(cluster, time_block) %>%
  summarize(n = n(), .groups = "drop")

PCA biplot of flight clusters

#| eval: false
#| echo: true

fviz_pca_biplot(
  pca_seg,
  geom = "point",
  habillage = flight_segments_clean$cluster,
  palette = cluster_colors,
  col.var = "black",
  arrowsize = 1.0,
  repel = TRUE
) + theme_minimal(base_size = 14)

Time-of-day bar plot

#| eval: false
#| echo: true

ggplot(behavior_by_block, aes(x = time_block, y = n, fill = factor(cluster))) +
  geom_col(position = "dodge") +
  scale_fill_manual(values = cluster_colors)

Representative segment time-series plots

#| eval: false
#| echo: true

p_speed <- ggplot(rep_data, aes(seconds, KPH, color=factor(cluster))) +
  geom_line() +
  scale_color_manual(values = cluster_colors)

p_alt <- ggplot(rep_data, aes(seconds, AGL, color=factor(cluster))) +
  geom_line() +
  scale_color_manual(values = cluster_colors)

p_vr <- ggplot(rep_data, aes(seconds, VerticalRate, color=factor(cluster))) +
  geom_line() +
  scale_color_manual(values = cluster_colors)

p_speed / p_alt / p_vr