Analyzing Flight Behaviors of Bald Eagles

Background

Background

  • Bergen et al. 2021
  • 2,093,022 GPS points of 57 different eagles
  • GPS points broken into segments
  • Instantaneous speed (KPH), average speed (m/s), change in vertical velocity (m/s), turn angle (radians), etc.

Goals

  • Can we identify GPS points in which the eagles are perched and which points they are flying?
  • Of the in-flight points, can we identify certain flight behaviors and the characteristics of these behaviors?

Methods

Data Cleaning

  • Several variables had skewed distributions.
    • Square root transformation
  • KPH, Sn, AGL, Turn Angle and Vertical Rate

Example of Transformation

Clustering

  • Performed a cluster analysis of the GPS Points
    • K-means
  • Used WSS and Average Silhouette Width to compare 2-7 clusters
    • Bootstrapped Silhouette Widths
  • Examined clusters using PCA and other visualizations to identify behaviors

Choosing K

Results

PCA Biplot

  • PCA Biplot of Eagle 112’s Flight Points
  • Points in the red cluster has low speed variables along with a lower distance above ground level
  • Points in the blue cluster have a high vertical rate and turning angle
  • Points in the purple cluster have a high speed and a lower vertical rate
  • Points in the green cluster appear to have middling to lower levels of each variable

Variable Boxplots

Flight Behaviors Summarized

Cluster Speed Vertical Rate Turning Angle AGL Behavior
1 0 0 Highly Variant Near 0 Perching
2 Medium 0 Lower Near 0 Maintaining
3 Medium Positive High Positive Climbing
4 High Negative Near 0 Positive Gliding

Visualizing a Flight Segment

  • Visualization of Flight Segment 60036 by Eagle 140.
  • Green points: Eagle is maintaining is position
  • Blue Points: Eagle is ascending
  • Purple Points: Eagle is descending

Conclusion

Conclusion

  • By using K-means clustering, we are able to identify 4 distinct behaviors of eagles
  • Perching, maintaining, climbing, and gliding.
  • Average speed, instantaneous speed, vertical climb rate, turning angle, and distance from the ground are important variables for distinguishing these behaviors.

Questions

Appendix

Preparing Data

# Packages Used
library(tidyverse)  # For Data Cleaning and Visualizing
library(cluster)    # For Clustering the Data
library(ggrepel)    # For Lableing Variables on PCA Plots
library(patchwork)  # For Putting Plots Together

#Loading Data

load('eagle_data.Rdata')

(
  eagle_data
  %>% mutate(KPH = sqrt(KPH),
             Sn = sqrt(Sn),
             AGL = sqrt(AGL),
             abs_angle = sqrt(abs_angle),
             absVR = sqrt(absVR))
) -> eagle_data # Square Root Transforming the variables


eagle_scaled <- (
  eagle_data
  %>% select(KPH:absVR)             # Selecting the numeric variables for Clustering
  %>% scale(scale = T, center = T)  # Scaling the data to put all of the variables on the same scale
) 

eagle_pca <- prcomp(eagle_scaled)   # Performing PCA on the data.

Creating the Square Root Transformed Distributions

ggplot(data = eagle_data, aes(x = (KPH)**2)) +              # Squaring KPH since we square-rooted it earlier
  geom_density() +                                          # Creating the Density Curve
  xlab('KPH') + ylab('Density') +
  ggtitle('Density of KPH') +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, size = 18),  # Adjusting the position and size of the title and axis labels
        axis.title.x = element_text(size = 16),
        axis.title.y = element_text(size = 16),
        axis.text = element_text(size = 13),
        # element_text(size = 16)
        )

ggplot(data = eagle_data, aes(x = (KPH))) +                 # The Data frame is storing square root of KPH as KPH
  geom_density() +    
  xlab('Square Root of KPH') + ylab('Density') +
  theme_classic() +
  ggtitle('Density of Square Root of KPH') +
  theme(plot.title = element_text(hjust = 0.5, size = 18),
        axis.title.x = element_text(size = 16),
        axis.title.y = element_text(size = 16),
        axis.text = element_text(size = 13),
        legend.title = element_text(size = 18),
        legend.text = element_text(size = 14),
        # element_text(size = 16)
        )

Clustering and Calculating WSS

set.seed(6725)

kmean_2 <- kmeans(eagle_scaled, centers = 2, iter.max = 20, nstart = 10)  # K-means for 2 clusters

set.seed(8770)

kmean_3 <- kmeans(eagle_scaled, centers = 3, iter.max = 20, nstart = 10)  # K-means for 3 Clusters

set.seed(5255)

kmean_4 <- kmeans(eagle_scaled, centers = 4, iter.max = 20, nstart = 10)  # K-means for 4 Clusters

set.seed(9925)

kmean_5 <- kmeans(eagle_scaled, centers = 5, iter.max = 20, nstart = 10)  # K-means for 5 Clusters

set.seed(2770)

kmean_6 <- kmeans(eagle_scaled, centers = 6, iter.max = 20, nstart = 10)  # K-means for 6 Clusters

set.seed(5403)

kmean_7 <- kmeans(eagle_scaled, centers = 7, iter.max = 20, nstart = 10)  # K-means for 7 Clusters

# All seeds came from random.org. A random number was generated between 1 and 10000. This was done so that cluster definitions would not change each time I render this document

wss_df <- (
  data.frame(clusters = c(2, 3, 4, 5, 6, 7), 
             wss = c(kmean_2$tot.withinss, kmean_3$tot.withinss,
                     kmean_4$tot.withinss, kmean_5$tot.withinss,
                     kmean_6$tot.withinss, kmean_7$tot.withinss
                     ))
)     # This data frame is stroing the WSS from each clustering solution. This data frame will be used to create the WSS plot later

Bootstrapping the silhouette widths

n = 100     # Number of bootstrap Samples
kmeans_sil <- data.frame(trial = c(0), clusters = c(0), avg_sil_width = c(0))  # Creating an Empty Data Frame to store the silhouette widths

(
  eagle_data
  %>% mutate(cluster_2 = kmean_2$cluster,
             cluster_3 = kmean_3$cluster,
             cluster_4 = kmean_4$cluster,
             cluster_5 = kmean_5$cluster,
             cluster_6 = kmean_6$cluster,
             cluster_7 = kmean_7$cluster)
) -> eagle_bootstrap

# Each of the points was assigned to a cluster for each of the k-means solutions (k=2 - k = 7). This data frame above is assigning each point to its cluster for each solution. This is the data frame we will take bootstrap samples from.

for (i in 1:n) {
  bootstrap_sample <- (
    eagle_bootstrap
    %>% sample_n(size = 10000, replace = FALSE)   # Taking a bootstrap sample of size 10,000
  )
  
  dist <- (
    bootstrap_sample
    %>% select(KPH:absVR)     # Selecting just the numeric variables
    %>% scale(scale = T, center = T)    # Scaling the variables
    %>% dist()    # Computing a Distance matrix for the scaled data
  )
  
  # These 6 lines of code calculate the silhouette width for each of the points for each solution.
  sils_2 <- silhouette(bootstrap_sample$cluster_2, dist)  
  sils_3 <- silhouette(bootstrap_sample$cluster_3, dist)
  sils_4 <- silhouette(bootstrap_sample$cluster_4, dist)
  sils_5 <- silhouette(bootstrap_sample$cluster_5, dist)
  sils_6 <- silhouette(bootstrap_sample$cluster_6, dist)
  sils_7 <- silhouette(bootstrap_sample$cluster_7, dist)
  
  # This data frame below saves the trial number, the number of clusters, and the average silhouette width.
  
  df_of_rows <- data.frame(trial = c(i,i,i,i,i,i), clusters = c(2, 3, 4, 5, 6, 7),
                           avg_sil_width = c(mean(sils_2[,3]),
                                         mean(sils_3[,3]),
                                         mean(sils_4[,3]),
                                         mean(sils_5[,3]),
                                         mean(sils_6[,3]),
                                         mean(sils_7[,3]))
       )
  kmeans_sil <- rbind(kmeans_sil, df_of_rows) # Adds the trial to the data frame that stores the silhouette widths
}

kmeans_sil <- (
  kmeans_sil
  %>% filter(clusters != 0)   # Filters out the blank row that was used to setup the data frame.
)

sil_mean <- (
  kmeans_sil
  %>% group_by(clusters)
  %>% summarize(avg = mean(avg_sil_width))    # Computes the average silhouette width across all trials to graph the overall average silhouette width later.
)

Choosing K

ggplot(data = wss_df, aes(x = clusters, y = wss)) + 
  geom_point() +      # Adds a point of the WSS for each cluster
  geom_line() +       # Connects the points with a line
  xlab('Number of Clusters') + ylab('WSS') +
  ggtitle('WSS by Cluster') +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, size = 18),     # Adjusts the size and position of the text
        axis.title.x = element_text(size = 16),
        axis.title.y = element_text(size = 16),
        axis.text = element_text(size = 13),
        )


ggplot() +
  geom_line(data = kmeans_sil, aes(x = clusters, y = avg_sil_width, group = trial), alpha = 0.5) +   # Creates one line for each bootstrap trial. This line is the average silhouette width for each cluster.
  geom_line(data = sil_mean, aes(x = clusters, y = avg), col = 'firebrick2', linewidth = 2) +     # Creates a big red line for the average silhouette width by cluster across all trials.
  xlab('Number of Clusters') + ylab('Average Bootstrapped Silhouette Width') +
  ggtitle('Average Bootstrapped Silhouette \nWidth by Cluster') +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, size = 18),   # Adjusts the size and position of the text
        axis.title.x = element_text(size = 16),
        axis.title.y = element_text(size = 16),
        axis.text = element_text(size = 13),
        legend.title = element_text(size = 18),
        legend.text = element_text(size = 14),
        # element_text(size = 16)
        )

PCA Biplot

(
  eagle_data
  %>% mutate(cluster = as.factor(kmean_4$cluster))
  %>% cbind(eagle_pca$x[,1:2])
) -> eagles_points        # Adds the Cluster assignments and the PC1 and PC2 values for each point into a data frame.

(
  data.frame(eagle_pca$rotation[,1:2])
  %>% rownames_to_column(var = 'var')
) -> arrow_df       # Pulls the loadings for PC1 and PC2 and adds them to a data frame. The purpose is to create the arrows on the Biplot.


# PCA Biplot Code

ggplot() +
  geom_point(data = eagles_points %>% filter(Animal_ID == 112), aes(x = PC1, y = PC2, color = cluster), shape = '.') + # Adds each point to the plot. Each point is color coded by its cluster.
  geom_segment(data = arrow_df, aes(x = 0, y = 0, xend = PC1*5, yend = PC2*5),  # Adds the arrows of the variables to this plot. The arrows on their own were too small, so they were scaled to be visible.
               arrow = arrow(length = unit(0.2, "cm"))) +
  geom_text_repel(data = arrow_df, aes(x = PC1*5, y = PC2*5, label = var)) +   # Adds the labels of the variables to the plot.
  xlab('PC1') + ylab('PC2') +
  ggtitle('PCA Biplot of Eagle 112') +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, size = 18),    # Adjusts the size and position of the text. 
        axis.title.x = element_text(size = 16),
        axis.title.y = element_text(size = 16),
        axis.text = element_text(size = 13),
        legend.position = "none"                       # Removes the legend
        # element_text(size = 16)
        )

Variable Boxplots

eagle_clusters <- (                               # Puts the numeric variables back on their regular scales. This is done for visualization purposes.
  eagle_data
  %>% mutate(KPH = KPH**2,
             Sn = Sn**2,
             AGL = AGL**2,
             abs_angle = abs_angle**2,
             absVR = absVR**2)
  %>% mutate(cluster = factor(kmean_4$cluster))
)

vr <- (
  ggplot(data = eagle_clusters, aes(x =cluster, y = VerticalRate, fill = cluster)) + 
  geom_boxplot(outlier.shape = NA) + # Remove the outliers from the plot
  xlab('Cluster') + ylab('Vertical Rate (m/s)') + ggtitle('Vertical rate by Cluster') +
  ylim(-5, 5) +                                     # Limits the y-axis to make the plots visible
  geom_hline(yintercept = 0, linetype = 'dashed') +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, size = 18),   # Adjusts the position and size of the text
        axis.title.x = element_text(size = 16),
        axis.title.y = element_text(size = 16),
        axis.text = element_text(size = 13),
        legend.position = "none"
        # element_text(size = 16)
        )
)

angle <- (
  ggplot(data = eagle_clusters, aes(x =cluster, y = abs_angle, fill = cluster)) + 
  geom_boxplot(outlier.shape = NA) + # Remove the outliers from the plot
  xlab('Cluster') + ylab('Angle (Radians)') + ggtitle('Absolute Value of Turning \nAngle by Cluster') +
  geom_hline(yintercept = 0, linetype = 'dashed') +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, size = 18),   # Adjusts the position and size of the text
        axis.title.x = element_text(size = 16),
        axis.title.y = element_text(size = 16),
        axis.text = element_text(size = 13),
        legend.position = "none"
        # element_text(size = 16)
        )
)

agl <- (
  ggplot(data = eagle_clusters, aes(x =cluster, y = AGL, fill = cluster)) + 
  geom_boxplot(outlier.shape = NA) +       # Remove the outliers from the plot
  xlab('Cluster') + ylab('Distance (Meters)') + ggtitle('Distance Above Ground \nLevel by Cluster') +
  geom_hline(yintercept = 0, linetype = 'dashed') +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, size = 18),  # Adjusts the position and size of the text
        axis.title.x = element_text(size = 16),
        axis.title.y = element_text(size = 16),
        axis.text = element_text(size = 13),
        legend.position = "none"
        # element_text(size = 16)
        )
)

kph <- (
  ggplot(data = eagle_clusters, aes(x =cluster, y = KPH, fill = cluster)) + 
  geom_boxplot(outlier.shape = NA) +     # Remove the outliers from the plot
  xlab('Cluster') + ylab('KPH') + ggtitle('KPH by Cluster') +
  geom_hline(yintercept = 0, linetype = 'dashed') +
  theme_classic()+
  theme(plot.title = element_text(hjust = 0.5, size = 18),   # Adjusts the position and size of the text
        axis.title.x = element_text(size = 16),
        axis.title.y = element_text(size = 16),
        axis.text = element_text(size = 13),
        legend.position = "none"
        # element_text(size = 16)
        )
)



# This code uses the patchwork package to put the plots together into one plot

kph + angle +
vr + agl +
plot_layout(ncol = 2)

Visualizing a Flight Segment

ggplot(data = eagle_clusters %>% filter(segment_id == 60036), aes(x = (
  as.numeric(LocalTime) - as.numeric((eagle_clusters %>%  filter(segment_id == 60036) %>%  summarize(min(as.numeric(LocalTime))))        # This code is setting the start of the segment to 0 seconds to make the numbers interpretable.
  ), y = AGL)) +
  geom_point(aes(color = cluster)) +
  xlab('Time (Seconds)') + ylab('Distance (Meters)') +
  ggtitle('Distance Above Ground Over Time') +
  theme_classic() +
  labs(color = "Cluster") +       # Setting the legend title to have a capital C.
  theme(plot.title = element_text(hjust = 0.5, size = 18),
        axis.title.x = element_text(size = 16),
        axis.title.y = element_text(size = 16),
        axis.text = element_text(size = 13),
        legend.title = element_text(size = 18),
        legend.text = element_text(size = 14),
        # element_text(size = 16)
        )