1 Overview

Preliminary EDA of blue whale integrated dataset (visual + acoustics) and OMP water mass covariates prior to building an integrated SDM (sdmTMB).


library(tidyverse)
library(lubridate)
library(janitor)
library(skimr)
library(GGally)
library(corrplot)
library(patchwork)
library(sf)
library(rnaturalearth)
library(rnaturalearthdata)
library(viridis)
library(patchwork)

2 1) Read data

data_path <- "N:/Michaela_working_disk_backup/CalCOFI/CalCOFI_integrated_model/data/integrated_datasets/Bm_visual_acoustic_density_with_OMP.csv"

dat <- readr::read_csv(data_path, show_col_types = FALSE) %>%
  janitor::clean_names()

3 6) Distributions

# Visual (animals / 1000 km^2): 0–100
p_V <- dat %>%
  filter(density > 0, method == "V") %>%
  ggplot(aes(density)) +
  geom_histogram(bins = 300) +
  coord_cartesian(xlim = c(0, 300)) +
  labs(
    title = "Visual density (non-zero)",
    x = "Animals / 1000 km²",
    y = "Count"
  ) +
  theme_minimal()

print(p_V)

# Acoustic (calls / hr / 1000 km^2): 0–1
p_A <- dat %>%
  filter(density > 0, method != "V") %>%
  ggplot(aes(density)) +
  geom_histogram(bins = 60) +
  facet_wrap(~ method) +
  coord_cartesian(xlim = c(0, 5)) +
  labs(
    title = "Acoustic call density (non-zero)",
    x = "Calls / hr / 1000 km²",
    y = "Count"
  ) +
  theme_minimal()

print(p_A)

4 10) OMP covariates: composition + collinearity

omp_cols <- names(dat) %>%
  stringr::str_subset("^(pew|enpcw|psuw)_")

dat %>%
    select(all_of(omp_cols)) %>%
    pivot_longer(everything(), names_to = "covariate", values_to = "value") %>%
    ggplot(aes(value)) +
    geom_histogram(bins = 60) +
    facet_wrap(~ covariate, scales = "free_y") +
    labs(title = "OMP covariate distributions", x = "Contribution", y = "Count") +
    theme_minimal()

omp_mat <- dat %>% select(all_of(omp_cols)) %>% tidyr::drop_na()
cor_omp <- cor(omp_mat, use = "pairwise.complete.obs")
corrplot::corrplot(cor_omp, method = "color", type = "upper", tl.cex = 0.8)

omp_mat <- dat %>% select(all_of(omp_cols)) %>% tidyr::drop_na()
set.seed(1)
GGally::ggpairs(omp_mat %>% dplyr::sample_n(min(nrow(omp_mat), 1500)))

5 11) Quick-look: density vs a few OMP covariates

plot_covs <- omp_cols[1:min(length(omp_cols), 6)]

# ----------------------------
# 1) VISUAL: non-zeros, 0–500
# ----------------------------
for (cc in plot_covs) {
  pV <- dat %>%
    filter(method == "V", density > 0, !is.na(.data[[cc]])) %>%
    ggplot(aes(.data[[cc]], density)) +
    geom_point(alpha = 0.20) +
    geom_smooth(se = FALSE, method = "gam", formula = y ~ s(x, k = 8)) +
    coord_cartesian(ylim = c(0, 500)) +
    labs(
      title = paste("Visual density (non-zero) vs", cc),
      x = cc,
      y = "Animals / 1000 km^2"
    ) +
    theme_minimal()
    print(pV)
}

# -----------------------------------------
# 2) ACOUSTICS: non-zeros, 0–5, facet by A*
# -----------------------------------------
for (cc in plot_covs) {
  pA <- dat %>%
    filter(method != "V", density > 0, !is.na(.data[[cc]])) %>%
    ggplot(aes(.data[[cc]], density, color = method)) +
    geom_point(alpha = 0.20) +
    geom_smooth(se = FALSE, method = "gam", formula = y ~ s(x, k = 8)) +
    facet_wrap(~ method, scales = "free_x") +
    coord_cartesian(ylim = c(0, 5)) +
    labs(
      title = paste("Acoustic density (non-zero) vs", cc),
      x = cc,
      y = "Calls / hr / 1000 km^2"
    ) +
    theme_minimal() +
    theme(legend.position = "none")

  print(pA)
}