Understanding Eagle Flight Metrics

Trevor Michel

57 Eagles Were Caught Over 4 Years

Study Was Conducted To Understand Eagle Flight Patterns

My goals
- Apply knowledge of unstructured learning and replicate the study
- Find patterns in the data to define whether a eagle is perching or flying
- Display those flight Patterns

The Original Study: Bergen et al.

The Data Is Fairly Skewed

After Square Root Transformation

Select Four Clusters

Perching, Ascending, Gliding, & Flapping

Flying High Involves More Ascending & Gliding Flying Low Has More Flapping

Appendix

theres a lot here sorry :3
intentionally left blank

import

## imports
library(cluster)
library(dbscan)
library(factoextra)
library(tidyverse)
library(patchwork)
library(ggrepel)
library(ggplot2)

create functions

this is meant to be blank ## make all six density histograms and return a patchwork object

make_eagle_density_panel <- function(df) {
  # base plot reused for every panel
  base_hist <- ggplot(df) +
    geom_histogram(
      aes(y = after_stat(density)),
      bins = 40
    ) +
    theme_bw() +
    labs(x = NULL, y = "Density")

  # one panel per variable, adding only what changes
 p_kph <- base_hist +  aes(x = KPH) +  ggtitle(    paste("KPH, Wilks:", round(shapiro.test(df$KPH)$statistic, 2)))
 p_sn <- base_hist +  aes(x = Sn) +  ggtitle(    paste("SN, Wilks:", round(shapiro.test(df$Sn)$statistic, 2))) 
 p_agl <- base_hist +  aes(x = AGL) +  ggtitle(    paste("AGL, Wilks:", round(shapiro.test(df$AGL)$statistic, 2)))
 p_ang <- base_hist +  aes(x = abs_angle) +  ggtitle(    paste("abs_angle, Wilks:", round(shapiro.test(df$abs_angle)$statistic, 2)))
 p_vr <- base_hist +  aes(x = VerticalRate) +  ggtitle(    paste("VerticalRate, Wilks:", round(shapiro.test(df$VerticalRate)$statistic, 2)))
 p_avr <- base_hist +  aes(x = absVR) +  ggtitle(    paste("absVR, Wilks:", round(shapiro.test(df$absVR)$statistic, 2)))

  # stitch into 2x3 grid with patchwork
  (p_kph | p_sn | p_agl) /
    (p_ang | p_vr | p_avr)
}

make skree charts

plot_skree_results  <- function(df){

fviz_nbclust(df,
  FUNcluster = kmeans,
  method='wss')+
    
fviz_nbclust(df,
    FUNcluster =  kmeans,
    method = 'silhouette'
  )

  }

Create Kmeans Cluster

plot_kmeans_results <- function(df,centers,title,label,kmeans_clusters){

numeric_only  <-  df

eagle_pca <- prcomp(numeric_only, center=TRUE)

fviz_pca(eagle_pca, 
         habillage = factor(kmeans_clusters$cluster),
          palette = c(
            "4" = "#6C3BAA",  # Perching
            "2" = "#8ed100",  # Ascending
            "1" = "dodgerblue",  # Gliding
            "3" = "orange"   # Flapping
          ),
#         select.ind = list(name = NULL, ind = sample(nrow(wdi_pca$x), 20)),
         repel = TRUE
         ,label = label
         ,geom  = "point"
         , pointshape  = 16
        #, geom.var = c("arrow")
        ,col.var = "black"
        ,arrowsize  = .75                  # arrow thickness (make them big)
) + 
      ggtitle(title) +
      guides(shape='none') + 
      labs(color='Cluster',shape='.')+
      theme_classic() + 
      theme(legend.position = "none")
}
}

Data Management

this is meant to be blank ## Load Data

options(width=10000)
load('eagle_data.Rdata') 

eagle_data <- eagle_data %>%  mutate(date = as.Date(eagle_data$LocalTime))

randomly select a special eagle

set.seed(55600)
selected_eagle <-  eagle_data %>%  select(Animal_ID) %>% slice_sample(n = 1)

Randomly select a particular date

set.seed(121672)  # had  to use  a different seed as using the  default seed  did  not return a usefull amount of data for part 2 of this examination.
selected_date <- (eagle_data  
                  
  %>% filter(Animal_ID == selected_eagle$Animal_ID)
  %>% select(date)
  %>% group_by(date)
  %>% slice_sample(n=1)
  %>% ungroup()
  %>% slice_sample(n=1)
 )

properly sample.

#sample the data randomly, while ensuring the special bird is entirely kept on it's special date which will be used in part2 
set.seed(55600)
# 1. All rows you MUST keep  
must_keep <- eagle_data %>%
  filter(Animal_ID == selected_eagle$Animal_ID,
         date == selected_date$date)

# 2. Random sample from the rest of the data
sampled_rest <- eagle_data %>%
  # exclude the must-keep rows from the sampling pool    ## NO DUPLICATES!
  filter(!(Animal_ID == selected_eagle$Animal_ID & date == selected_date$date)) %>%
  group_by(Animal_ID) %>%
  slice_sample(n = 169000) %>% # after we bind_rows it will be just barely over 170k lines.
  ungroup()

# 3. Combine them to ensure the bird is represented fully on this date.
eagle_data_noscale <- bind_rows(must_keep, sampled_rest)

correct for skewness

eagle_corrected <- eagle_data_noscale %>%
  mutate(
    across(all_of(c("KPH", "Sn", "AGL", "abs_angle", "absVR")), ~ sqrt(.x)),
    VerticalRate = sign(VerticalRate) * sqrt(abs(VerticalRate))
  )

scale

eagle_scale <- as.data.frame(scale(eagle_corrected
                   %>% dplyr::select(KPH,Sn,AGL,abs_angle, VerticalRate, absVR)
))

cluster the data

kmeans_clusters <- kmeans(eagle_scale ,
                         centers = 4,
                         iter.max = 20,
                         nstart = 10
                         )

filter data to only one bird/day.

one_bird <- eagle_corrected %>%
  mutate(
    cluster = kmeans_clusters$cluster,
    hour    = as.numeric(format(LocalTime, "%H"))   # hour 0–23
  ) %>%
  filter(
    Animal_ID == selected_eagle$Animal_ID,
    date      == selected_date$date,
  ) %>%
  arrange(LocalTime)

set up data for elevation chart…

# Start from one_bird; rebuild plotting data for continuous time
one_bird_plot <- (one_bird %>%
  arrange(LocalTime) %>%
  mutate(
    # numeric hour (0–23)
    hour = as.numeric(format(LocalTime, "%H")),
    # map cluster codes to behaviour names (your mapping)
    behaviour = factor(
      cluster,
      levels = c(2, 3, 1, 4),   # 2=Ascending, 3=Flapping, 1=Gliding, 4=Perching
      labels = c("Ascending", "Flapping", "Gliding", "Perching")
    )
  ) %>% 
  filter(behaviour != "Perching") 
)

display the actual charts

this is meant to be blank ## Create density plot for data before square root transformation

set.seed(55600)

make_eagle_density_panel(eagle_data_noscale %>% slice_sample(n=5000) %>% select(KPH,Sn,AGL,abs_angle, VerticalRate, absVR)
 )

create density plot for data after square root transformation

set.seed(55600)

make_eagle_density_panel(eagle_corrected %>% slice_sample(n=5000))

create skree chart

set.seed(55600)

plot_skree_results(as.data.frame(eagle_scale) %>% slice_sample(n = 25000))                      ### change to 25k

create clust chart

plot_kmeans_results(eagle_scale
                    ,4,"","var",kmeans_clusters)

create the elevation chart.

# Plot ONLY data between 12:00 and 13:00, but with a continuous time axis
ggplot(
  data = subset(one_bird_plot, hour >= 12 & hour < 13),
  aes(x = LocalTime, y = AGL, group = 1)
) +
  geom_path(color = "grey70", linetype = "dashed") +
  geom_point(aes(color = behaviour), size = 1.3) +
  scale_color_manual(
    name   = NULL,
    values = c(
      "Ascending" = "#8ed100",
      "Flapping"  = "orange",
      "Gliding"   = "dodgerblue"
    )
  ) +
  # control where ticks go + how they are labeled
  scale_x_datetime(
    date_breaks = "15 min",         # tick every 15 minutes (change as you like)
    date_labels = "%H:%M"           # show hour:minute
  ) +
  labs(
    x = "Time of day",
    y = "Meters Above Ground"
  )  + 
  theme_classic() +
  theme(legend.position = "none")