Using our key variables, can we distinguish in-flight from perching points?
Of the in-flight points:
Can flight behaviors be identified?
What are characteristics of these behaviors?
What are visual examples to demonstrate these behaviors?
Variable Distributions
KPH & Sn: large frequency at 0, suggesting perched points
AGL & |Angle|: larger values suggest sharp turning points or flying above trees
Perching vs. In-Flight
Perching (cluster 1) - low speed and distance between points
In-flight (cluster 2) - high speed, distance between points, and VR change
Choosing the Amount of Clusters
First, we need to see what the optimal number of clusters is to analyze variation.
Is 4 Clusters Optimal?
We can wee the Silhouette and Elbow method both suggest 4 clusters is optimal.
Identifying Distinct In-Flight Behaviors
Perched
Gliding
Ascending
Flapping
Characteristics of These Behaviors
Gliding = high AGL & KPH, low absolute turning angle
Ascending = high absolute turning angle
Flapping = low AGL and KPH
Visualizing Flight Behaviors
Sampling Method
Any questions?
Appendix
Variable Distributions
library(tidyverse)load("C:/Users/fx3734qp/Downloads/DSCI 415/Project2/project_2/eagle_data.Rdata")eagle_data <-as_tibble(eagle_data)# Keep only the focal variables (raw, no transforms)numeric_only <- eagle_data %>%select(Sn, KPH, AGL, abs_angle, VerticalRate, absVR) %>%drop_na() %>%filter_all(all_vars(is.finite(.)))par(mfrow =c(2, 3))# histogram plots based on frequency for each variable hist(numeric_only$Sn, breaks =50, main ="Sn",xlab ="Sn", col ="lightblue")hist(numeric_only$KPH, breaks =50, main ="KPH",xlab ="KPH", col ="lightgreen")hist(numeric_only$AGL, breaks =50, main ="AGL",xlab ="AGL", col ="pink")hist(numeric_only$abs_angle, breaks =50, main ="|Angle|",xlab ="|Angle|", col ="purple")hist(numeric_only$VerticalRate, breaks =20, main ="VR",xlab ="Vertical Rate", col ="orange")hist(numeric_only$absVR, breaks =20, main ="|VR|",xlab ="|Vertical Rate|", col ="red")par(mfrow =c(1, 1))
Appendix
K-means 2 cluster plot
library(tidyverse)library(ggplot2)load("C:/Users/fx3734qp/Downloads/DSCI 415/Project2/project_2/eagle_data.Rdata")eagle_data <-as_tibble(eagle_data)# transforming the data with square rootnumeric_only <- eagle_data %>%mutate(KPH_sq =sqrt(pmax(KPH, 0)),Sn_sq =sqrt(pmax(Sn, 0)),AGL_sq =sqrt(pmax(AGL, 0)),abs_angle_sq =sqrt(pmax(abs_angle, 0)),absVR_sq =sqrt(pmax(absVR, 0)),VerticalRate = VerticalRate ) %>%# filtering to only needed columnsselect(KPH_sq, Sn_sq, AGL_sq, abs_angle_sq, absVR_sq, VerticalRate) %>%drop_na() %>%filter_all(all_vars(is.finite(.)))# scale the data and samplenumeric_scaled <-scale(numeric_only)set.seed(42)numeric_scaled <- numeric_scaled[sample(1:nrow(numeric_scaled), 20000), ]# kmeans cluster using 2 centerskmeans_clusters <-kmeans(numeric_scaled, centers =2, iter.max =20, nstart =25)# calculates PCA on scaled datapca_res <-prcomp(numeric_scaled, scale. =FALSE) # merge PCA scares with actual datapca_scores <-as_tibble(pca_res$x[, 1:2]) %>%mutate(cluster =factor(kmeans_clusters$cluster))# add in PCA explained on the x and y axispca_var <-round(100* pca_res$sdev^2/sum(pca_res$sdev^2), 1)xlab <-paste0("PC1 (", pca_var[1], "%)")ylab <-paste0("PC2 (", pca_var[2], "%)")# create loadings for all 6 variablesloadings <-as_tibble(pca_res$rotation[, 1:2], rownames ="variable") %>%mutate(across(c(PC1, PC2), ~ .x *5)) %>%mutate(variable =recode(variable,KPH_sq ="KPH",Sn_sq ="Sn",AGL_sq ="AGL",abs_angle_sq ="|Angle|",absVR_sq ="|VR|",VerticalRate ="VR"))# assign cluster colorscluster_colors <-c("1"="pink", "2"="lightgreen")p <-ggplot(pca_scores, aes(x = PC1, y = PC2, color = cluster)) +geom_point(alpha =0.6) +geom_segment(data = loadings,aes(x =0, y =0, xend = PC1, yend = PC2),arrow =arrow(length =unit(0.2, "cm")),color ="black") +geom_text(data = loadings,aes(x = PC1, y = PC2, label = variable),color ="black", size =4, vjust =-0.5) +scale_color_manual(values = cluster_colors) +labs(title ="",x = xlab, y = ylab, color ="Cluster") +theme_classic(base_size =16)print(p)
Appendix
Clusters 2-7
library(tidyverse)library(ggplot2)library(gridExtra)# again, loading in the data, transforming the data, selecting the correct columns, scaling, and taking a sampleload("C:/Users/fx3734qp/Downloads/DSCI 415/Project2/project_2/eagle_data.Rdata")eagle_data <-as_tibble(eagle_data)numeric_only <- eagle_data %>%mutate(KPH_sq =sqrt(pmax(KPH, 0)),Sn_sq =sqrt(pmax(Sn, 0)),AGL_sq =sqrt(pmax(AGL, 0)),abs_angle_sq =sqrt(pmax(abs_angle, 0)),absVR_sq =sqrt(pmax(absVR, 0)),VerticalRate = VerticalRate ) %>%select(KPH_sq, Sn_sq, AGL_sq, abs_angle_sq, absVR_sq, VerticalRate) %>%drop_na() %>%filter_all(all_vars(is.finite(.)))numeric_scaled <-scale(numeric_only)set.seed(42)numeric_scaled <- numeric_scaled[sample(1:nrow(numeric_scaled), 2000), ]# calucalte PCA on the scaled datapca_res <-prcomp(numeric_scaled, scale. =FALSE) pca_res$x[,1] <--pca_res$x[,1]pca_res$rotation[,1] <--pca_res$rotation[,1]# creating a scatterplot with PCA's on x and y axispca_scores_base <-as_tibble(pca_res$x[, 1:2])# assigning cluster colorscluster_colors <-c("1"="pink", "2"="lightgreen", "3"="lightblue","4"="violet", "5"="orange", "6"="brown", "7"="gray")# making the plot functionmake_plot <-function(k) {set.seed(42)# need k amount of centers as each plot will be different km <-kmeans(numeric_scaled, centers = k, iter.max =20, nstart =25)# PCA scores on x and y axis pca_scores <- pca_scores_base %>%mutate(cluster =factor(km$cluster))ggplot(pca_scores, aes(x = PC1, y = PC2, color = cluster)) +geom_point(alpha =0.6) +scale_color_manual(values = cluster_colors) +labs(title =paste(k, "clusters"),x ="PC1", y ="PC2") +theme_classic(base_size =14) +theme(legend.position ="none") # no legend}# creates the plot 6 times with clusters 2-7 using list applyplots <-lapply(2:7, make_plot)grid.arrange(grobs = plots, ncol =3)
Appendix
Determining K
library(tidyverse)library(cluster)library(factoextra)library(patchwork) # taking a sample of the data to run smootherset.seed(1)eagle_sample <- eagle_data %>%sample_n(2000)# selecting only the movement variables and scalingmovement_vars <- eagle_sample %>%select(KPH, Sn, AGL, abs_angle, VerticalRate, absVR)movement_scaled <-scale(movement_vars)# agglomerative clustering using the ward methodhc_ward <-agnes(movement_scaled, method ="ward")# elbow plot with a k max of 10p1 <-fviz_nbclust(movement_scaled, FUN = hcut, method ="wss",hc_method ="ward", k.max =10) +labs(title ="Elbow Method (WSS)") # silhouette width plot with a k max of 10p2 <-fviz_nbclust(movement_scaled, FUN = hcut, method ="silhouette",hc_method ="ward", k.max =10) +labs(title ="Silhouette Method")p1 + p2
Appendix
4 clusters with loadings
library(tidyverse)library(ggplot2)# again, loading in the data, transforming the data, selecting the correct columns, scaling, and taking a sampleload("C:/Users/fx3734qp/Downloads/DSCI 415/Project2/project_2/eagle_data.Rdata")eagle_data <-as_tibble(eagle_data)numeric_only <- eagle_data %>%mutate(KPH_sq =sqrt(pmax(KPH, 0)),Sn_sq =sqrt(pmax(Sn, 0)),AGL_sq =sqrt(pmax(AGL, 0)),abs_angle_sq =sqrt(pmax(abs_angle, 0)),absVR_sq =sqrt(pmax(absVR, 0)),VerticalRate = VerticalRate ) %>%select(KPH_sq, Sn_sq, AGL_sq, abs_angle_sq, absVR_sq, VerticalRate) %>%drop_na() %>%filter_all(all_vars(is.finite(.)))numeric_scaled <-scale(numeric_only)set.seed(42)numeric_scaled <- numeric_scaled[sample(1:nrow(numeric_scaled), 20000), ]# using kmeans clustering create 4 clusterskmeans_clusters <-kmeans(numeric_scaled, centers =4, iter.max =20, nstart =25)# calucalte PCA on the scaled datapca_res <-prcomp(numeric_scaled, scale. =FALSE)# creating a scatterplot with PCA's on x and y axispca_scores <-as_tibble(pca_res$x[, 1:2]) %>%# changing cluster 1-4 names to their behaviormutate(cluster =factor(kmeans_clusters$cluster,levels =c(1,2,3,4),labels =c("Perched","Gliding","Ascending","Flapping")))# add in variance explained on the PC axispca_var <-round(100* pca_res$sdev^2/sum(pca_res$sdev^2), 1)xlab <-paste0("PC1 (", pca_var[1], "%)")ylab <-paste0("PC2 (", pca_var[2], "%)")# insert laodings into graphloadings <-as_tibble(pca_res$rotation[, 1:2], rownames ="variable") %>%mutate(across(c(PC1, PC2), ~ .x *5)) %>%mutate(variable =recode(variable,KPH_sq ="KPH",Sn_sq ="Sn",AGL_sq ="AGL",abs_angle_sq ="|Angle|",absVR_sq ="|VR|",VerticalRate ="VR"))cluster_colors <-c("Perched"="orange","Gliding"="green","Ascending"="blue","Flapping"="purple")p <-ggplot(pca_scores, aes(x = PC1, y = PC2, color = cluster)) +geom_point(alpha =0.6) +geom_segment(data = loadings,aes(x =0, y =0, xend = PC1, yend = PC2),arrow =arrow(length =unit(0.2, "cm")),color ="black") +geom_text(data = loadings,aes(x = PC1, y = PC2, label = variable),color ="black", size =4, vjust =-0.5) +scale_color_manual(values = cluster_colors) +labs(title ="",x = xlab, y = ylab, color =NULL) +theme_classic(base_size =16)print(p)
Appendix
Boxplot of in-flight variables
library(tidyverse)library(ggplot2)#again, load in the data and select only needed variabels and sampleload("C:/Users/fx3734qp/Downloads/DSCI 415/Project2/project_2/eagle_data.Rdata")eagle_data <-as_tibble(eagle_data)numeric_only <- eagle_data %>%select(KPH, Sn, AGL, abs_angle, absVR, VerticalRate) %>%drop_na() %>%filter_all(all_vars(is.finite(.)))set.seed(42)numeric_only <-sample_n(numeric_only, 50000)# kmeans cluster on 4 clusters in order to separate each behavior when plottedkm <-kmeans(numeric_only, centers =4, iter.max =20, nstart =25)# attach cluster labelsboxplot_data <- numeric_only %>%mutate(kmeans_clusters =factor(km$cluster))# remove cluster 1 (as it is not in-flight) and relabel remaining clusters boxplot_data <- boxplot_data %>%filter(kmeans_clusters !=1) %>%mutate(kmeans_clusters =droplevels(kmeans_clusters)) %>%mutate(kmeans_clusters =fct_recode(kmeans_clusters,"Gliding"="2","Ascending"="3","Flapping"="4"))cluster_colors <-c("Gliding"="green","Ascending"="orange","Flapping"="purple")# boxplot with 2 rows and 3 columnspar(mfrow =c(2,3)) # each individual boxplot with specified variable and its frequencyboxplot(KPH ~ kmeans_clusters, data = boxplot_data,main ="KPH", xlab ="", ylab ="KPH",col = cluster_colors, cex.axis =0.8)boxplot(Sn ~ kmeans_clusters, data = boxplot_data,main ="Sn", xlab ="", ylab ="Sn",col = cluster_colors, cex.axis =0.8)boxplot(AGL ~ kmeans_clusters, data = boxplot_data,main ="AGL", xlab ="", ylab ="AGL",col = cluster_colors, cex.axis =0.8)boxplot(abs_angle ~ kmeans_clusters, data = boxplot_data,main ="|Angle|", xlab ="", ylab ="|Angle|",col = cluster_colors, cex.axis =0.8)boxplot(absVR ~ kmeans_clusters, data = boxplot_data,main ="|VR|", xlab ="", ylab ="|VR|",col = cluster_colors, cex.axis =0.8)boxplot(VerticalRate ~ kmeans_clusters, data = boxplot_data,main ="VerticalRate", xlab ="", ylab ="VerticalRate",col = cluster_colors, cex.axis =0.8)# reset layoutpar(mfrow =c(1,1))
Appendix
Visualizing Flight Behaviors
library(dplyr)library(ggplot2)library(patchwork)# keeps only variables I want to analyzevars <-c("KPH", "Sn", "AGL", "VerticalRate")df <- eagle_data %>%drop_na(all_of(vars))# scale and cluster by k meansX <-scale(df %>%select(all_of(vars)))set.seed(123)km <-kmeans(X, centers =4)df$cluster <-factor(km$cluster)perching_cluster <- df %>%group_by(cluster) %>%summarise(mean_speed =mean(KPH, na.rm =TRUE)) %>%arrange(mean_speed) %>%slice(1) %>%pull(cluster)# filters to one birdbird_id <-107flight_segment <- df %>%# Filter the data to include only the tracking pointsfilter(Animal_ID == bird_id, cluster != perching_cluster) %>%# Group the remaining data by the distinct flight segmentsgroup_by(segment_id) %>%# Calculate the range of speed for each segment by subtracting the minimum KPH from the maximum KPHsummarise(kph_range =max(KPH) -min(KPH), .groups="drop") %>%# Sort the resulting segments in descending order basedarrange(desc(kph_range)) %>%# Select only the first row, which represents the segment with the highest speed rangeslice(1) %>%pull(segment_id)flight_segment bird_df <- df %>%filter(Animal_ID == bird_id) %>%filter(segment_id == flight_segment) %>%# Filter out the perching points again, leaving only the three flight clustersfilter(cluster != perching_cluster) %>%arrange(LocalTime) %>%# Create a new column 'seconds' which represents the cumulative time elapsed from the start of the selected flight segmentmutate(seconds =cumsum(TimeDiff))remaining_clusters <-sort(unique(bird_df$cluster))custom_colors <-setNames(c("orange", "purple", "green"),remaining_clusters)