Analyzing Flight Behaviors of Bald Eagles

Background

Bergen et al. 2021
2,093,022 GPS points of 57 different eagles

GPS points broken into segments

Instantaneous speed (KPH), average speed (m/s), change in vertical velocity (m/s), turn angle (radians), etc.

Goals

Can we identify GPS points in which the eagles are perched and which points they are flying?

Of the in-flight points, can we identify certain flight behaviors and the characteristics of these behaviors?

Methods

Data Cleaning

Several variables had skewed distributions.
- Square root transformation

KPH, Sn, AGL, Turn Angle and Vertical Rate

Example of Transformation

Clustering

Performed a cluster analysis of the GPS Points
- K-means

Used WSS and Average Silhouette Width to compare 2-7 clusters
- Bootstrapped Silhouette Widths

Examined clusters using PCA and other visualizations to identify behaviors

Choosing K

Results

PCA Biplot

PCA Biplot of Eagle 112’s Flight Points
Points in the red cluster has low speed variables along with a lower distance above ground level
Points in the blue cluster have a high vertical rate and turning angle
Points in the purple cluster have a high speed and a lower vertical rate
Points in the green cluster appear to have middling to lower levels of each variable

Variable Boxplots

Flight Behaviors Summarized

Cluster	Speed	Vertical Rate	Turning Angle	AGL	Behavior
1	0	0	Highly Variant	Near 0	Perching
2	Medium	0	Lower	Near 0	Maintaining
3	Medium	Positive	High	Positive	Climbing
4	High	Negative	Near 0	Positive	Gliding

Visualizing a Flight Segment

Visualization of Flight Segment 60036 by Eagle 140.
Green points: Eagle is maintaining is position
Blue Points: Eagle is ascending
Purple Points: Eagle is descending

Conclusion

By using K-means clustering, we are able to identify 4 distinct behaviors of eagles
Perching, maintaining, climbing, and gliding.
Average speed, instantaneous speed, vertical climb rate, turning angle, and distance from the ground are important variables for distinguishing these behaviors.

Questions

Appendix

Preparing Data

# Packages Used
library(tidyverse)  # For Data Cleaning and Visualizing
library(cluster)    # For Clustering the Data
library(ggrepel)    # For Lableing Variables on PCA Plots
library(patchwork)  # For Putting Plots Together

#Loading Data

load('eagle_data.Rdata')

(
  eagle_data
  %>% mutate(KPH = sqrt(KPH),
             Sn = sqrt(Sn),
             AGL = sqrt(AGL),
             abs_angle = sqrt(abs_angle),
             absVR = sqrt(absVR))
) -> eagle_data # Square Root Transforming the variables


eagle_scaled <- (
  eagle_data
  %>% select(KPH:absVR)             # Selecting the numeric variables for Clustering
  %>% scale(scale = T, center = T)  # Scaling the data to put all of the variables on the same scale
) 

eagle_pca <- prcomp(eagle_scaled)   # Performing PCA on the data.

Creating the Square Root Transformed Distributions

ggplot(data = eagle_data, aes(x = (KPH)**2)) +              # Squaring KPH since we square-rooted it earlier
  geom_density() +                                          # Creating the Density Curve
  xlab('KPH') + ylab('Density') +
  ggtitle('Density of KPH') +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, size = 18),  # Adjusting the position and size of the title and axis labels
        axis.title.x = element_text(size = 16),
        axis.title.y = element_text(size = 16),
        axis.text = element_text(size = 13),
        # element_text(size = 16)
        )

ggplot(data = eagle_data, aes(x = (KPH))) +                 # The Data frame is storing square root of KPH as KPH
  geom_density() +    
  xlab('Square Root of KPH') + ylab('Density') +
  theme_classic() +
  ggtitle('Density of Square Root of KPH') +
  theme(plot.title = element_text(hjust = 0.5, size = 18),
        axis.title.x = element_text(size = 16),
        axis.title.y = element_text(size = 16),
        axis.text = element_text(size = 13),
        legend.title = element_text(size = 18),
        legend.text = element_text(size = 14),
        # element_text(size = 16)
        )

Clustering and Calculating WSS

set.seed(6725)

kmean_2 <- kmeans(eagle_scaled, centers = 2, iter.max = 20, nstart = 10)  # K-means for 2 clusters

set.seed(8770)

kmean_3 <- kmeans(eagle_scaled, centers = 3, iter.max = 20, nstart = 10)  # K-means for 3 Clusters

set.seed(5255)

kmean_4 <- kmeans(eagle_scaled, centers = 4, iter.max = 20, nstart = 10)  # K-means for 4 Clusters

set.seed(9925)

kmean_5 <- kmeans(eagle_scaled, centers = 5, iter.max = 20, nstart = 10)  # K-means for 5 Clusters

set.seed(2770)

kmean_6 <- kmeans(eagle_scaled, centers = 6, iter.max = 20, nstart = 10)  # K-means for 6 Clusters

set.seed(5403)

kmean_7 <- kmeans(eagle_scaled, centers = 7, iter.max = 20, nstart = 10)  # K-means for 7 Clusters

# All seeds came from random.org. A random number was generated between 1 and 10000. This was done so that cluster definitions would not change each time I render this document

wss_df <- (
  data.frame(clusters = c(2, 3, 4, 5, 6, 7), 
             wss = c(kmean_2$tot.withinss, kmean_3$tot.withinss,
                     kmean_4$tot.withinss, kmean_5$tot.withinss,
                     kmean_6$tot.withinss, kmean_7$tot.withinss
                     ))
)     # This data frame is stroing the WSS from each clustering solution. This data frame will be used to create the WSS plot later

Bootstrapping the silhouette widths

n = 100     # Number of bootstrap Samples
kmeans_sil <- data.frame(trial = c(0), clusters = c(0), avg_sil_width = c(0))  # Creating an Empty Data Frame to store the silhouette widths

(
  eagle_data
  %>% mutate(cluster_2 = kmean_2$cluster,
             cluster_3 = kmean_3$cluster,
             cluster_4 = kmean_4$cluster,
             cluster_5 = kmean_5$cluster,
             cluster_6 = kmean_6$cluster,
             cluster_7 = kmean_7$cluster)
) -> eagle_bootstrap

# Each of the points was assigned to a cluster for each of the k-means solutions (k=2 - k = 7). This data frame above is assigning each point to its cluster for each solution. This is the data frame we will take bootstrap samples from.

for (i in 1:n) {
  bootstrap_sample <- (
    eagle_bootstrap
    %>% sample_n(size = 10000, replace = FALSE)   # Taking a bootstrap sample of size 10,000
  )
  
  dist <- (
    bootstrap_sample
    %>% select(KPH:absVR)     # Selecting just the numeric variables
    %>% scale(scale = T, center = T)    # Scaling the variables
    %>% dist()    # Computing a Distance matrix for the scaled data
  )
  
  # These 6 lines of code calculate the silhouette width for each of the points for each solution.
  sils_2 <- silhouette(bootstrap_sample$cluster_2, dist)  
  sils_3 <- silhouette(bootstrap_sample$cluster_3, dist)
  sils_4 <- silhouette(bootstrap_sample$cluster_4, dist)
  sils_5 <- silhouette(bootstrap_sample$cluster_5, dist)
  sils_6 <- silhouette(bootstrap_sample$cluster_6, dist)
  sils_7 <- silhouette(bootstrap_sample$cluster_7, dist)
  
  # This data frame below saves the trial number, the number of clusters, and the average silhouette width.
  
  df_of_rows <- data.frame(trial = c(i,i,i,i,i,i), clusters = c(2, 3, 4, 5, 6, 7),
                           avg_sil_width = c(mean(sils_2[,3]),
                                         mean(sils_3[,3]),
                                         mean(sils_4[,3]),
                                         mean(sils_5[,3]),
                                         mean(sils_6[,3]),
                                         mean(sils_7[,3]))
       )
  kmeans_sil <- rbind(kmeans_sil, df_of_rows) # Adds the trial to the data frame that stores the silhouette widths
}

kmeans_sil <- (
  kmeans_sil
  %>% filter(clusters != 0)   # Filters out the blank row that was used to setup the data frame.
)

sil_mean <- (
  kmeans_sil
  %>% group_by(clusters)
  %>% summarize(avg = mean(avg_sil_width))    # Computes the average silhouette width across all trials to graph the overall average silhouette width later.
)

Choosing K

ggplot(data = wss_df, aes(x = clusters, y = wss)) + 
  geom_point() +      # Adds a point of the WSS for each cluster
  geom_line() +       # Connects the points with a line
  xlab('Number of Clusters') + ylab('WSS') +
  ggtitle('WSS by Cluster') +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, size = 18),     # Adjusts the size and position of the text
        axis.title.x = element_text(size = 16),
        axis.title.y = element_text(size = 16),
        axis.text = element_text(size = 13),
        )


ggplot() +
  geom_line(data = kmeans_sil, aes(x = clusters, y = avg_sil_width, group = trial), alpha = 0.5) +   # Creates one line for each bootstrap trial. This line is the average silhouette width for each cluster.
  geom_line(data = sil_mean, aes(x = clusters, y = avg), col = 'firebrick2', linewidth = 2) +     # Creates a big red line for the average silhouette width by cluster across all trials.
  xlab('Number of Clusters') + ylab('Average Bootstrapped Silhouette Width') +
  ggtitle('Average Bootstrapped Silhouette \nWidth by Cluster') +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, size = 18),   # Adjusts the size and position of the text
        axis.title.x = element_text(size = 16),
        axis.title.y = element_text(size = 16),
        axis.text = element_text(size = 13),
        legend.title = element_text(size = 18),
        legend.text = element_text(size = 14),
        # element_text(size = 16)
        )

PCA Biplot

(
  eagle_data
  %>% mutate(cluster = as.factor(kmean_4$cluster))
  %>% cbind(eagle_pca$x[,1:2])
) -> eagles_points        # Adds the Cluster assignments and the PC1 and PC2 values for each point into a data frame.

(
  data.frame(eagle_pca$rotation[,1:2])
  %>% rownames_to_column(var = 'var')
) -> arrow_df       # Pulls the loadings for PC1 and PC2 and adds them to a data frame. The purpose is to create the arrows on the Biplot.


# PCA Biplot Code

ggplot() +
  geom_point(data = eagles_points %>% filter(Animal_ID == 112), aes(x = PC1, y = PC2, color = cluster), shape = '.') + # Adds each point to the plot. Each point is color coded by its cluster.
  geom_segment(data = arrow_df, aes(x = 0, y = 0, xend = PC1*5, yend = PC2*5),  # Adds the arrows of the variables to this plot. The arrows on their own were too small, so they were scaled to be visible.
               arrow = arrow(length = unit(0.2, "cm"))) +
  geom_text_repel(data = arrow_df, aes(x = PC1*5, y = PC2*5, label = var)) +   # Adds the labels of the variables to the plot.
  xlab('PC1') + ylab('PC2') +
  ggtitle('PCA Biplot of Eagle 112') +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, size = 18),    # Adjusts the size and position of the text. 
        axis.title.x = element_text(size = 16),
        axis.title.y = element_text(size = 16),
        axis.text = element_text(size = 13),
        legend.position = "none"                       # Removes the legend
        # element_text(size = 16)
        )

Variable Boxplots

eagle_clusters <- (                               # Puts the numeric variables back on their regular scales. This is done for visualization purposes.
  eagle_data
  %>% mutate(KPH = KPH**2,
             Sn = Sn**2,
             AGL = AGL**2,
             abs_angle = abs_angle**2,
             absVR = absVR**2)
  %>% mutate(cluster = factor(kmean_4$cluster))
)

vr <- (
  ggplot(data = eagle_clusters, aes(x =cluster, y = VerticalRate, fill = cluster)) + 
  geom_boxplot(outlier.shape = NA) + # Remove the outliers from the plot
  xlab('Cluster') + ylab('Vertical Rate (m/s)') + ggtitle('Vertical rate by Cluster') +
  ylim(-5, 5) +                                     # Limits the y-axis to make the plots visible
  geom_hline(yintercept = 0, linetype = 'dashed') +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, size = 18),   # Adjusts the position and size of the text
        axis.title.x = element_text(size = 16),
        axis.title.y = element_text(size = 16),
        axis.text = element_text(size = 13),
        legend.position = "none"
        # element_text(size = 16)
        )
)

angle <- (
  ggplot(data = eagle_clusters, aes(x =cluster, y = abs_angle, fill = cluster)) + 
  geom_boxplot(outlier.shape = NA) + # Remove the outliers from the plot
  xlab('Cluster') + ylab('Angle (Radians)') + ggtitle('Absolute Value of Turning \nAngle by Cluster') +
  geom_hline(yintercept = 0, linetype = 'dashed') +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, size = 18),   # Adjusts the position and size of the text
        axis.title.x = element_text(size = 16),
        axis.title.y = element_text(size = 16),
        axis.text = element_text(size = 13),
        legend.position = "none"
        # element_text(size = 16)
        )
)

agl <- (
  ggplot(data = eagle_clusters, aes(x =cluster, y = AGL, fill = cluster)) + 
  geom_boxplot(outlier.shape = NA) +       # Remove the outliers from the plot
  xlab('Cluster') + ylab('Distance (Meters)') + ggtitle('Distance Above Ground \nLevel by Cluster') +
  geom_hline(yintercept = 0, linetype = 'dashed') +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, size = 18),  # Adjusts the position and size of the text
        axis.title.x = element_text(size = 16),
        axis.title.y = element_text(size = 16),
        axis.text = element_text(size = 13),
        legend.position = "none"
        # element_text(size = 16)
        )
)

kph <- (
  ggplot(data = eagle_clusters, aes(x =cluster, y = KPH, fill = cluster)) + 
  geom_boxplot(outlier.shape = NA) +     # Remove the outliers from the plot
  xlab('Cluster') + ylab('KPH') + ggtitle('KPH by Cluster') +
  geom_hline(yintercept = 0, linetype = 'dashed') +
  theme_classic()+
  theme(plot.title = element_text(hjust = 0.5, size = 18),   # Adjusts the position and size of the text
        axis.title.x = element_text(size = 16),
        axis.title.y = element_text(size = 16),
        axis.text = element_text(size = 13),
        legend.position = "none"
        # element_text(size = 16)
        )
)



# This code uses the patchwork package to put the plots together into one plot

kph + angle +
vr + agl +
plot_layout(ncol = 2)

Visualizing a Flight Segment

ggplot(data = eagle_clusters %>% filter(segment_id == 60036), aes(x = (
  as.numeric(LocalTime) - as.numeric((eagle_clusters %>%  filter(segment_id == 60036) %>%  summarize(min(as.numeric(LocalTime))))        # This code is setting the start of the segment to 0 seconds to make the numbers interpretable.
  ), y = AGL)) +
  geom_point(aes(color = cluster)) +
  xlab('Time (Seconds)') + ylab('Distance (Meters)') +
  ggtitle('Distance Above Ground Over Time') +
  theme_classic() +
  labs(color = "Cluster") +       # Setting the legend title to have a capital C.
  theme(plot.title = element_text(hjust = 0.5, size = 18),
        axis.title.x = element_text(size = 16),
        axis.title.y = element_text(size = 16),
        axis.text = element_text(size = 13),
        legend.title = element_text(size = 18),
        legend.text = element_text(size = 14),
        # element_text(size = 16)
        )