Information was gathered from 57 eagles over 4 Years in Iowa using GPS Biologgers on Various Flight Characteristics
Table 1 from Bergen et al.
The Best Plot for our Kmeans is our Clusters = 4
Distinct Groups by our Variables in 4 Clusters
PC1 accounts for 45% of Variation
PC2 accounts for 21% of Variation
PC1 defined by Velocity Variables (KPH & Sn)
PC2 defined by Vertical Rates. AGL, Angle
PC1 accounts for 45% of Variation
PC2 accounts for 21% of Variation
Our Orange Cluster(1) is our “Perching” Cluster
Our Green Cluster (2) is our “Ascending” Cluster
Our Blue Cluster(3) is our “Flapping” Cluster
Our Purple Cluster(4) is our “Gliding” Cluster
We can see that our Ascending Points steeply rise up
We can see the high angle our Flapping Points have.
We can see how spaced out and declining our Gliding Points are
Again we see that our Ascending Points steeply rise up
Again we see how spaced out our Gliding Points are
# Narrow Dataset to Numerics for Scaling
eagle_numerics <- (eagle_data
%>% select(KPH:absVR)
)
# Scaling Dataset and Correcting Skew
scaled_eagle <- eagle_numerics %>%
mutate_at(vars(AGL, Sn, absVR,abs_angle), sqrt) %>%
scale()
# For reproducibility
set.seed(123)
# Sampling data so Computer can run and store the data
sample_indices <- sample(1:nrow(scaled_eagle), size = 10000, replace = TRUE)
#Grabbing Samples
eagle_subsampled <- scaled_eagle[sample_indices, ]# testing kmeans for possible clusters determind from WSS and Silhouette width plot; This was repeated for 2:7
kmeans2 <- kmeans(eagle_subsampled, centers = 2, nstart = 10, iter.max = 30)
#Making PCA variable arrows for Kmeans plots
eagle_pca <- prcomp(eagle_subsampled, center=TRUE)
#Combining PC to Dataset for Plotting Variables and clusters; This was repeated for 2:7
eagle_update2 <- bind_cols(eagle_subsampled,k2=factor(kmeans2$cluster),eagle_pca$x[,1:2])
# Plotting PCA plots with clusters and Variable arrows to compare clusters with variable arrows; This was repeated for 2:7
p2 <- fviz_pca(eagle_pca,
axes= c(1,2),
label = 'var',
col.ind = 'white') +
geom_point(aes(x = PC1, y = PC2, col=factor(kmeans2$cluster)), data = eagle_update2, shape = '.', alpha = 1) +
guides(color = "none")+
labs(title = 'Plot of Kmeans 2')
# Patchwork to combine all plots together
p2 + p3 + p4 + p5 + p6 + p7# PCA Repeated for Loading Plots
eagle_pca <- prcomp(scaled_eagle, center=TRUE)
#Creating Dataframe for plotting Loadings
loadings_df <- data.frame(eagle_pca$rotation) %>%
rownames_to_column('Variable')
# Base plot for loadings
base_plot <- ggplot(loadings_df) +
theme_classic(base_size = 12) +
theme(axis.text.x = element_text(angle = 45,hjust=1,vjust=1)) +
labs(x = 'Variable') +
geom_hline(aes(yintercept= 0), linetype=2)
#Entering Specific dimension Loadings information
l1 <- base_plot +
geom_col(aes(x = reorder(Variable,PC1, decreasing=TRUE), y = PC1))
l2 <- base_plot+
geom_col(aes(x = reorder(Variable,PC2, decreasing=TRUE), y = PC2))
#Showing PC1 and PC2 loading plots
(l1+l2)+
plot_annotation(title = 'Loadings plots for Eagle PCA',
theme=theme(plot.title = element_text(hjust = 0.5)))#Kmeans of 4 Clusters for Boxplots
kmeans4eagle <- kmeans(scaled_eagle, center = 4, nstart = 10, iter.max = 30)
#Adding k4 to my numerics information
eagle_numerics_cluster <- eagle_numerics %>%
mutate(k4 = kmeans4eagle$cluster)
#Repeated PCA for Larger Eagle Data set
eagle_pca <- prcomp(scaled_eagle, center=TRUE)
#Adding PCA to larger eagle data set for boxplots
eagle_updatebig <- bind_cols(eagle_data,k4=factor(kmeans4eagle$cluster),eagle_pca$x[,1:2])
#Code for Boxplot with larger eagle dataset to compare characteristics to cluster group to help determine groupings; Repeated for all 4 boxplots
bp1 <- ggplot(data = eagle_numerics_cluster) +
geom_boxplot(aes(x = factor(kmeans4eagle$cluster),
fill = factor(kmeans4eagle$cluster),
y = AGL),outlier.shape = NA) +
ylim(c(0,1500)) +
xlab("Clusters") +
labs(fill = "Clusters")
#Patchwork to combine Plots
bp1+bp2+bp3+bp4#Creating Segment Group to help determine which segments to use for analyzing
eagle_segment <- eagle_updatebig %>% select(segment_id,segment_length)
#Grouping segments to allow easier identification of lengths for segments
eagle_segment_grouped <- (eagle_segment
%>% group_by(segment_id)
%>%summarize(n = n())
)
#Sorting Segment lengths and then choosing a value around 400 for easier analyzation of plotted points; This was repeated for both segments
eagle_segment_sorted_desc <- arrange(eagle_segment_grouped,desc(n))
#head(eagle_segment_sorted_desc,1000)
filtered_segment1 <- eagle_updatebig %>%
filter(segment_id==72341)
#Code for plotting segments by longitude and latitude with defined characteristics cluster coloring for points; This was repeated for both flight segments
ggplot(data = filtered_segment1,aes(x = Longitude, y = Latitude)) +
geom_point(aes(col = factor(k4)),size=.5) +
scale_color_discrete(
name = "Flight Characteristics",
labels = c(
"1" = "Perching",
"2" = "Ascending",
"3" = "Flapping",
"4" = "Gliding"
)
)