Labeling Eagle Flight Charecteristics From Unstructered Data

Trevor Michel

57 Eagles Were Caught And Tracked Over 4 Years

  • A solar powered gps tracker was strapped externally to each bird

  • Over 2 million data points collected

Study Was Conducted To Understand Eagle Flight Patterns

  • My goals
    • Apply knowledge of unstructured learning and replicate the study
    • Find patterns in the data to define whether a eagle is perching or flying
    • Display those flight Patterns

The Original Study: Bergen et al.

The Data Is Fairly Skewed Which Would Create Clustering Issues

Skewness Was Repaired After Square Root Transformation

Four Clusters Chosen As The Point Of Diminishing Gains

Perching, Ascending, Gliding, & Flapping

Flying High Involves More Ascending & Gliding Flying Low Has More Flapping

Conclusion

  • Thank you for viewing my project

  • Please let me know if you have any questions

  • Continue to see the code used during this project

Variable overview

Table 1 from Bergen et al. # imports

library(cluster)
library(dbscan)
library(factoextra)
library(tidyverse)
library(patchwork)
library(ggrepel)
library(ggplot2)

make all six density histograms and return a patchwork object

  • defined function
make_eagle_density_panel <- function(df) {
  # base plot reused for every panel
  base_hist <- ggplot(df) +
    geom_histogram(
      aes(y = after_stat(density)),
      bins = 40
    ) +
    theme_bw() +
    labs(x = NULL, y = "Density")
  
  # one panel per variable, adding only what changes
 p_kph <- base_hist +  aes(x = KPH) +  ggtitle(    paste("Speed (KPH), Wilks:", round(shapiro.test(df$KPH)$statistic, 2)))
 p_sn <- base_hist +  aes(x = Sn) +  ggtitle(    paste("Distance Between Points, Wilks:", round(shapiro.test(df$Sn)$statistic, 2))) 
 p_agl <- base_hist +  aes(x = AGL) +  ggtitle(    paste("Above Ground Level, Wilks:", round(shapiro.test(df$AGL)$statistic, 2)))
 p_ang <- base_hist +  aes(x = abs_angle) +  ggtitle(    paste("Abs Angle, Wilks:", round(shapiro.test(df$abs_angle)$statistic, 2)))
 p_vr <- base_hist +  aes(x = VerticalRate) +  ggtitle(    paste("Vertical Rate, Wilks:", round(shapiro.test(df$VerticalRate)$statistic, 2)))
 p_avr <- base_hist +  aes(x = absVR) +  ggtitle(    paste("Abs Vertical Rate, Wilks:", round(shapiro.test(df$absVR)$statistic, 2)))

  # stitch into 2x3 grid with patchwork
  (p_kph | p_sn | p_agl) /
    (p_ang | p_vr | p_avr)
}

make skree charts

  • defined function
plot_skree_results  <- function(df){

fviz_nbclust(df,
  FUNcluster = kmeans,
  method='wss')+
    
fviz_nbclust(df,
    FUNcluster =  kmeans,
    method = 'silhouette'
  )

  }

Create Kmeans Cluster

  • defined function
plot_kmeans_results <- function(df,centers,title,label,kmeans_clusters){

custom_labels <- c(
    "AGL" = "Above Ground Level"
  , "absVR" = "Abs Vertical Rate"
  , "VerticalRate" = "Vertical Rate"
  , "abs_angle" = "Abs Angle"
  , "Sn" = "Distance Between Points"
  , "KPH" = "Speed (KPH)" )  
  
numeric_only <- df 
colnames(numeric_only) <- custom_labels[colnames(numeric_only)]

eagle_pca <- prcomp(numeric_only, center=TRUE)



fviz_pca(eagle_pca, 
         habillage = factor(kmeans_clusters$cluster),
          palette = c(
            "4" = "#6C3BAA",  # Perching
            "2" = "#8ed100",  # Ascending
            "1" = "dodgerblue",  # Gliding
            "3" = "orange"   # Flapping
          ),
#         select.ind = list(name = NULL, ind = sample(nrow(wdi_pca$x), 20)),
         repel = TRUE
         ,label = label
         ,geom  = "point"
         , pointshape  = 16
        #, geom.var = c("arrow")
        ,col.var = "black"
        ,arrowsize  = .75                  # arrow thickness (make them big)
) + 
      ggtitle(title) +
      guides(shape='none') + 
      labs(color='Cluster',shape='.')+
      theme_classic() + 
      theme(legend.position = "none")

}

Data Management

# Load Data
options(width=10000)
load('eagle_data.Rdata') 

eagle_data <- eagle_data %>%  mutate(date = as.Date(eagle_data$LocalTime))

randomly select a special eagle

set.seed(55600)
selected_eagle <-  eagle_data %>%  select(Animal_ID) %>% slice_sample(n = 1)

Randomly select the date

set.seed(121672)  # had  to use  a different seed as using the  default seed  did  not return a usefull amount of data for part 2 of this examination.
selected_date <- (eagle_data  
                  
  %>% filter(Animal_ID == selected_eagle$Animal_ID)
  %>% select(date)
  %>% group_by(date)
  %>% slice_sample(n=1)
  %>% ungroup()
  %>% slice_sample(n=1)
 )

properly sample.

#sample the data randomly, while ensuring the special bird is entirely kept on it's special date which will be used in part 2 
set.seed(55600)

# 1. All rows you MUST keep  
must_keep <- eagle_data %>%
  filter(Animal_ID == selected_eagle$Animal_ID,
         date == selected_date$date)

# 2. Random sample from the rest of the data
sampled_rest <- eagle_data %>%
  # exclude the must-keep rows from the sampling pool    ## NO DUPLICATES!
  filter(!(Animal_ID == selected_eagle$Animal_ID & date == selected_date$date)) %>%
  group_by(Animal_ID) %>%
  slice_sample(n = 169000) %>% # after we bind_rows it will be just barely over 170k lines.
  ungroup()

# 3. Combine them to ensure the bird is represented fully on this date.
eagle_data_noscale <- bind_rows(must_keep, sampled_rest)

correct for skewness

eagle_corrected <- eagle_data_noscale %>%
  mutate(
    across(all_of(c("KPH", "Sn", "AGL", "abs_angle", "absVR")), ~ sqrt(.x)),
    VerticalRate = sign(VerticalRate) * sqrt(abs(VerticalRate))
  )

scale

eagle_scale <- as.data.frame(scale(eagle_corrected
                   %>% dplyr::select(KPH,Sn,AGL,abs_angle, VerticalRate, absVR)
))

cluster the data

kmeans_clusters <- kmeans(eagle_scale ,
                         centers = 4,
                         iter.max = 20,
                         nstart = 10
                         )

filter data to only one bird/day.

one_bird <- eagle_corrected %>%
  mutate(
    cluster = kmeans_clusters$cluster,
    hour    = as.numeric(format(LocalTime, "%H"))   # hour 0–23
  ) %>%
  filter(
    Animal_ID == selected_eagle$Animal_ID,
    date      == selected_date$date,
  ) %>%
  arrange(LocalTime)

set up data for elevation chart

# Start from one_bird; rebuild plotting data for continuous time
one_bird_plot <- (one_bird %>%
  arrange(LocalTime) %>%
  mutate(
    # numeric hour (0–23)
    hour = as.numeric(format(LocalTime, "%H")),
    # map cluster codes to behaviour names (your mapping)
    behaviour = factor(
      cluster,
      levels = c(2, 3, 1, 4),   # 2=Ascending, 3=Flapping, 1=Gliding, 4=Perching
      labels = c("Ascending", "Flapping", "Gliding", "Perching")
    )
  ) %>% 
  filter(behaviour != "Perching") 
)

The Data Is Fairly Skewed Which Would Create Clustering Issues

set.seed(55600)

make_eagle_density_panel(eagle_data_noscale %>% slice_sample(n=5000) %>% select(KPH,Sn,AGL,abs_angle, VerticalRate, absVR)
 )

Skewness Was Repaired After Square Root Transformation

set.seed(55600)

make_eagle_density_panel(eagle_corrected %>% slice_sample(n=5000))

Four Clusters Chosen As The Point Of Diminishing Gains

set.seed(55600)

plot_skree_results(as.data.frame(eagle_scale) %>% slice_sample(n = 25000))                      ### change to 25k

Perching, Ascending, Gliding, & Flapping

plot_kmeans_results(eagle_scale
                    ,4,"","var",kmeans_clusters)

Flying High Involves More Ascending & Gliding Flying Low Has More Flapping

# Plot ONLY data between 13:00 and 14:00, but with a continuous time axis
ggplot(
  data = subset(one_bird_plot, hour >= 12 & hour < 13),
  aes(x = LocalTime, y = AGL, group = 1)
) +
  geom_path(color = "grey70", linetype = "dashed") +
  geom_point(aes(color = behaviour), size = 1.3) +
  scale_color_manual(
    name   = NULL,
    values = c(
      "Ascending" = "#8ed100",
      "Flapping"  = "orange",
      "Gliding"   = "dodgerblue"
    )
  ) +
  # control where ticks go + how they are labeled
  scale_x_datetime(
    date_breaks = "15 min",         # tick every 15 minutes (change as you like)
    date_labels = "%H:%M"           # show hour:minute
  ) +
  labs(
    x = "Time of day",
    y = "Meters Above Ground"
  )  + 
  theme_classic() +
  theme(legend.position = "none")

Conclusion

  • Thank you for viewing my project

  • Please let me know if you have any questions!