# Libraries
library(ggplot2)
library(dplyr)
# Load data
tmp_env <- new.env()
load("eagle_data.Rdata", envir = tmp_env)
eagle <- tmp_env[[ls(tmp_env)[1]]]
# Preprocessing
vars <- c("KPH", "Sn", "AGL", "abs_angle", "VerticalRate", "absVR")
sqrt_vars <- c("KPH", "Sn", "AGL", "abs_angle", "absVR")
for (v in sqrt_vars) {
eagle[[v]] <- sqrt(pmax(eagle[[v]], 0))
}
feat <- eagle[, vars]
feat_scaled <- scale(feat)
# Q1: in-flight vs perch
set.seed(11)
km2 <- kmeans(feat_scaled, centers = 2, nstart = 25)
centroid_speeds <- tapply(feat_scaled[, "KPH"], km2$cluster, mean)
flight_cluster <- which.max(centroid_speeds)
eagle$movement_state <- ifelse(km2$cluster == flight_cluster, "in_flight", "perch")
# Elbow method (in-flight only)
inflight_idx <- which(eagle$movement_state == "in_flight")
X_f <- feat_scaled[inflight_idx, ]
set.seed(11)
sample_idx <- sample(seq_len(nrow(X_f)), min(100000, nrow(X_f)))
X_sample <- X_f[sample_idx, ]
wss <- numeric(10)
for (k in 1:10) {
km <- kmeans(X_sample, centers = k, nstart = 10)
wss[k] <- km$tot.withinss
}
# Q2: 4 behavior clusters
set.seed(11)
km_behaviors <- kmeans(X_f, centers = 4, nstart = 25)
eagle$flight_behavior <- NA_integer_
eagle$flight_behavior[inflight_idx] <- km_behaviors$cluster
# Behavior summary
behavior_summary <- aggregate(
feat[inflight_idx, ],
by = list(behavior = eagle$flight_behavior[inflight_idx]),
FUN = function(x) c(mean = mean(x), sd = sd(x))
)
# Visual examples
example_segments <- eagle %>%
filter(movement_state == "in_flight") %>%
group_by(flight_behavior) %>%
slice_head(n = 200) %>%
ungroup() %>%
mutate(LocalTime_ct = as.POSIXct(LocalTime))