Final Oral Exam

Background

Research questions

Can we use the movement variables KPH, Sn, AGL, |Angle|, Vertical rate, and |VR|, to distinguish in-flight from perching points?
Of the in-flight points:

Are there distinct flight behaviors that can be identified?
What are the characteristics of these behaviors?
What are some visual examples of flight segments that demonstrate the different types of in-flight behaviors?

Methods

Used sampled data to answer the research questions

RQ1:

Prepared data to cluster
Use PCA and k = 2

Methods cont.

RQ2:

Determine max k
Cluster data using k=3
Elbow method
Output using fviz_cluster function

Results RQ1

Results RQ1 cont.

Results RQ2

Results RQ2 cont.

Results cont.

Conclusion

Yes, movement variables can distinguish in-flight from perching points

There are multiple active states that an eagle employs: flapping, soaring, and cruising

Characteristics such as speed and vertical rate determine the active status of the eagle

Appendix

library(tidyverse)
library(factoextra)
library(cluster)
library(dbscan)
library(ggplot2)
library(dplyr) 
options(width=10000)
load('eagle_data.Rdata')
(eagle_data
  %>% as_tibble() 
  ) %>% head
eagle_data <- data.frame(eagle_data)

eagle_data <- eagle_data %>%
  dplyr::mutate(
    KPH_sqrt = sqrt(KPH),
    Sn_sqrt = sqrt(Sn),
    AGL_sqrt = sqrt(AGL),
    Angle_abs_sqrt = sqrt(abs_angle),
    VR_abs_sqrt = sqrt(absVR)
  )

cluster_vars <- c(
    "KPH_sqrt",          
    "Sn_sqrt",           
    "AGL_sqrt",          
    "Angle_abs_sqrt",    
    "VerticalRate",     
    "VR_abs_sqrt"        
)

scaled_egl <- scale(eagle_data[, cluster_vars])
scaled_egl_data <- as.data.frame(scaled_egl)
eagle_data_clean <- eagle_data %>% slice(as.numeric(row.names(scaled_egl_data)))
head(scaled_egl_data)

# Load necessary libraries
library(stats)
library(factoextra) 


pca_result <- prcomp(scaled_egl_data, scale. = FALSE) 



# This shows how many PCs to retain
fviz_eig(pca_result, addlabels = TRUE, ylim = c(0, 70), 
         title = "Variance Explained by Principal Components")

# Load Libraries
library(dplyr)
library(stats)      
library(factoextra) 



#Transformation
eagle_data_temp <- eagle_data %>%
  mutate(
    KPH_sqrt = sqrt(KPH),
    Sn_sqrt = sqrt(Sn),
    AGL_sqrt = sqrt(AGL),
    Angle_abs_sqrt = sqrt(abs_angle),
    VR_abs_sqrt = sqrt(absVR)
  )

# Define Clustering Variables 
cluster_vars <- c("KPH_sqrt", "Sn_sqrt", "AGL_sqrt", "Angle_abs_sqrt", "VerticalRate", "VR_abs_sqrt")

# Create the Numeric Matrix 

clustering_matrix <- eagle_data_temp %>%
  select(all_of(cluster_vars)) %>%
  as.matrix() %>%
  #Remove rows with any NA values
  na.omit()

# Store the clean, original data rows
eagle_sample_original_rows <- eagle_data_temp %>% slice(as.numeric(row.names(clustering_matrix)))

# Sampling 
set.seed(42)
sample_rows_index <- sample(nrow(clustering_matrix), size = floor(nrow(clustering_matrix) * 0.10))
sample_matrix <- clustering_matrix[sample_rows_index, ]

# Scaling the Sample Matrix
scaled_eagle_sample <- scale(sample_matrix)
scaled_eagle_sample_df <- as.data.frame(scaled_eagle_sample)
print(paste("Final Sample Size for Clustering:", nrow(scaled_eagle_sample_df)))




# Perform PCA on the Scaled Sample
pca_result_sample <- prcomp(scaled_eagle_sample_df, scale. = FALSE) 

# Extract PC Scores (Using PC1 and PC2)
pca_scores_for_clustering <- as.data.frame(pca_result_sample$x[, 1:2])

# Run K-means (k=2)
set.seed(42) 
pca_clustering <- kmeans(
  x = pca_scores_for_clustering, 
  centers = 2, 
  nstart = 25 
)


# Visualization of Clusters in PCA Space
fviz_cluster(
    pca_clustering, 
    data = pca_scores_for_clustering, 
    palette = "jco", 
    geom = "point",
    ellipse.type = "convex", 
    main = "K-Means Clusters (k=2) on Sampled PCA Scores"
)

in_flight_cluster_number <- 1

# Filter the SCALED PCA scores to include ONLY the 'In-flight' points
in_flight_pca_scores <- pca_scores_for_clustering[pca_clustering$cluster == in_flight_cluster_number, ]

# Elbow method
print(paste("Data ready for RQ 2 with", nrow(in_flight_pca_scores), "observations."))

in_flight_cluster_number <- 2 
in_flight_data <- pca_scores_for_clustering[pca_clustering$cluster == in_flight_cluster_number, ]

in_flight_pca_scores_clean <- in_flight_data %>% 
    na.omit() %>%
    as.data.frame() # Ensure it's a data frame

print(paste("Clean In-Flight Observations for WSS analysis:", 
            nrow(in_flight_pca_scores_clean)))

library(dplyr)

# ensure dataframe
in_flight_data_df <- as.data.frame(in_flight_pca_scores_clean)

# sample 10,000 
sample_size_k_check <- min(10000, nrow(in_flight_data_df)) 

set.seed(42)
in_flight_sample_for_k <- in_flight_data_df %>%
    sample_n(size = sample_size_k_check)

print(paste("Running WSS on a sample size of:", nrow(in_flight_sample_for_k), "observations."))

library(factoextra)

# Determine the maximum k value to test 
k_max_value <- min(10, floor(sqrt(nrow(in_flight_sample_for_k)))) 

# Elbow method on sampled data
fviz_nbclust(
    in_flight_sample_for_k, 
    FUNcluster = kmeans,      
    method = "wss",           
    k.max = k_max_value,      
    nstart = 25
)

# Set the determined optimal k
k_flight_optimal <- 3 


set.seed(42) 
final_flight_clustering <- kmeans(
  x = in_flight_pca_scores_clean, 
  centers = k_flight_optimal, 
  nstart = 25 
)

scaled_in_flight_data <- scaled_eagle_sample_df[pca_clustering$cluster == in_flight_cluster_number, ]
scaled_in_flight_data_clean <- scaled_in_flight_data %>% na.omit() 

# Add the final cluster assignment to this scaled data subset
scaled_in_flight_data_clean$Flight_Behavior <- factor(final_flight_clustering$cluster)

# Define the clustering variables
cluster_vars <- c("KPH_sqrt", "Sn_sqrt", "AGL_sqrt", "Angle_abs_sqrt", "VerticalRate", "VR_abs_sqrt")

# mean of each original variable grouped by the new flight cluster
centroid_analysis <- scaled_in_flight_data_clean %>%
    group_by(Flight_Behavior) %>%
    summarise(
        across(all_of(cluster_vars), mean)
    )

print("Flight Behavior Cluster Centroids (Scaled Z-Scores):")
print(centroid_analysis)

library(factoextra)

flight_behavior_plot <- fviz_cluster(
    final_flight_clustering, 
    data = in_flight_pca_scores_clean, 
    palette = "Set1", 
    geom = "point",
    ellipse.type = "convex", 
    # Draws an encompassing oval around the clusters
    main = "Visualization of Three Distinct Flight Behaviors",
    xlab = "Principal Component 1 (PC1)",
    ylab = "Principal Component 2 (PC2)",
    legend.title = "Flight Cluster"
)

print(flight_behavior_plot)

library(rgl)
library(dplyr)

# PC1, PC2, and PC3 scores for the entire SCALED SAMPLE data
pca_3d_scores_all <- as.data.frame(pca_result_sample$x[, 1:3])

#Filter this 3D data
pca_3d_scores_in_flight <- pca_3d_scores_all[pca_clustering$cluster == in_flight_cluster_number, ]

#Add the final k=3 flight behavior cluster IDs
pca_3d_scores_in_flight$Flight_Behavior <- factor(final_flight_clustering$cluster)

cluster_colors <- c("blue", "red", "green3") 

# Plot the 3D scatter plot
rgl::plot3d(
    x = pca_3d_scores_in_flight$PC1, 
    y = pca_3d_scores_in_flight$PC2, 
    z = pca_3d_scores_in_flight$PC3,
    col = cluster_colors[pca_3d_scores_in_flight$Flight_Behavior], # Color by cluster
    size = 5,
    type = 's',  
    xlab = "Principal Component 1 (PC1)", 
    ylab = "Principal Component 2 (PC2)", 
    zlab = "Principal Component 3 (PC3)",
    main = "3D Visualization of Three Flight Behaviors (PC1, PC2, PC3)"
)

rgl::legend3d(
    'topright', 
    legend = paste("Cluster", 1:3),
    pch = 13, 
    col = cluster_colors, 
    cex = 1.0
)

library(ggplot2)
library(dplyr)

pca_3d_scores_in_flight <- pca_3d_scores_in_flight %>%
    # Rescale PC3 to be positive 
    mutate(PC3_Scaled = scale(PC3, center = min(PC3))) 

# static plot
static_3d_projection <- ggplot(
    pca_3d_scores_in_flight, 
    aes(x = PC1, y = PC2, color = Flight_Behavior)
) +
    geom_point(
        aes(size = PC3_Scaled, alpha = PC3_Scaled), 
        shape = 19
    ) +
    scale_size_continuous(range = c(1, 6)) + 
    labs(
        title = "2D Projection of Flight Behaviors
        with PC3 Depth Cueing",
        subtitle = "PC3 magnitude mapped to point size and intensity 
        (larger = higher PC3)",
        x = "Principal Component 1",
        y = "Principal Component 2",
        color = "Flight Cluster"
    ) +
    theme_minimal() +
    theme(legend.position = "bottom")

print(static_3d_projection)