02/06/26')Preliminary EDA of blue whale integrated dataset (visual + acoustics) and OMP water mass covariates prior to building an integrated SDM (sdmTMB).
library(tidyverse)
library(lubridate)
library(janitor)
library(skimr)
library(GGally)
library(corrplot)
library(patchwork)
library(sf)
library(rnaturalearth)
library(rnaturalearthdata)
library(viridis)
library(patchwork)
data_path <- "N:/Michaela_working_disk_backup/CalCOFI/CalCOFI_integrated_model/data/integrated_datasets/Bm_visual_acoustic_density_with_OMP.csv"
dat <- readr::read_csv(data_path, show_col_types = FALSE) %>%
janitor::clean_names()
# Visual (animals / 1000 km^2): 0–100
p_V <- dat %>%
filter(density > 0, method == "V") %>%
ggplot(aes(density)) +
geom_histogram(bins = 300) +
coord_cartesian(xlim = c(0, 300)) +
labs(
title = "Visual density (non-zero)",
x = "Animals / 1000 km²",
y = "Count"
) +
theme_minimal()
print(p_V)
# Acoustic (calls / hr / 1000 km^2): 0–1
p_A <- dat %>%
filter(density > 0, method != "V") %>%
ggplot(aes(density)) +
geom_histogram(bins = 60) +
facet_wrap(~ method) +
coord_cartesian(xlim = c(0, 5)) +
labs(
title = "Acoustic call density (non-zero)",
x = "Calls / hr / 1000 km²",
y = "Count"
) +
theme_minimal()
print(p_A)
omp_cols <- names(dat) %>%
stringr::str_subset("^(pew|enpcw|psuw)_")
dat %>%
select(all_of(omp_cols)) %>%
pivot_longer(everything(), names_to = "covariate", values_to = "value") %>%
ggplot(aes(value)) +
geom_histogram(bins = 60) +
facet_wrap(~ covariate, scales = "free_y") +
labs(title = "OMP covariate distributions", x = "Contribution", y = "Count") +
theme_minimal()
omp_mat <- dat %>% select(all_of(omp_cols)) %>% tidyr::drop_na()
cor_omp <- cor(omp_mat, use = "pairwise.complete.obs")
corrplot::corrplot(cor_omp, method = "color", type = "upper", tl.cex = 0.8)
omp_mat <- dat %>% select(all_of(omp_cols)) %>% tidyr::drop_na()
set.seed(1)
GGally::ggpairs(omp_mat %>% dplyr::sample_n(min(nrow(omp_mat), 1500)))
plot_covs <- omp_cols[1:min(length(omp_cols), 6)]
# ----------------------------
# 1) VISUAL: non-zeros, 0–500
# ----------------------------
for (cc in plot_covs) {
pV <- dat %>%
filter(method == "V", density > 0, !is.na(.data[[cc]])) %>%
ggplot(aes(.data[[cc]], density)) +
geom_point(alpha = 0.20) +
geom_smooth(se = FALSE, method = "gam", formula = y ~ s(x, k = 8)) +
coord_cartesian(ylim = c(0, 500)) +
labs(
title = paste("Visual density (non-zero) vs", cc),
x = cc,
y = "Animals / 1000 km^2"
) +
theme_minimal()
print(pV)
}
# -----------------------------------------
# 2) ACOUSTICS: non-zeros, 0–5, facet by A*
# -----------------------------------------
for (cc in plot_covs) {
pA <- dat %>%
filter(method != "V", density > 0, !is.na(.data[[cc]])) %>%
ggplot(aes(.data[[cc]], density, color = method)) +
geom_point(alpha = 0.20) +
geom_smooth(se = FALSE, method = "gam", formula = y ~ s(x, k = 8)) +
facet_wrap(~ method, scales = "free_x") +
coord_cartesian(ylim = c(0, 5)) +
labs(
title = paste("Acoustic density (non-zero) vs", cc),
x = cc,
y = "Calls / hr / 1000 km^2"
) +
theme_minimal() +
theme(legend.position = "none")
print(pA)
}