1 Thesis hypotheses modelling

Date: 21 April 2025

Description: Fitting GAMM models to three hypotheses related to emperor penguin foraging and movement

Load libraries.

# Clear the workspace
rm(list = ls(all = TRUE))
gc()

# Load libraries
library(tidyverse) # for general reordering, restructuring
library(ggplot2) # for plotting
library(dplyr) # for general reordering, restructuring
library(MASS)  # for negative binomial
library(itsadug)

## Warning: package 'mgcv' was built under R version 4.4.1

## Warning: package 'plotfunctions' was built under R version 4.4.1

library(lme4)  # for mixed models

## Warning: package 'Matrix' was built under R version 4.4.1

library(glmmTMB) # for negative binomial + random effects

## Warning: package 'glmmTMB' was built under R version 4.4.1

## Warning in check_dep_version(dep_pkg = "TMB"): package version mismatch: 
## glmmTMB was built with TMB package version 1.9.15
## Current TMB package version is 1.9.17
## Please re-install glmmTMB from source or restore original 'TMB' package (see '?reinstalling' for more information)

library(emmeans) # for pairwise comparison

## Warning: package 'emmeans' was built under R version 4.4.1

library(ggeffects) # for generating model predictions

## Warning: package 'ggeffects' was built under R version 4.4.1

library(knitr) # for making tables

## Warning: package 'knitr' was built under R version 4.4.1

library(performance) # for zero-inflation test

## Warning: package 'performance' was built under R version 4.4.1

library(AER) # for overdispersion test

## Warning: package 'AER' was built under R version 4.4.1

## Warning: package 'car' was built under R version 4.4.1

## Warning: package 'sandwich' was built under R version 4.4.1

library(mgcv) # for negative binomial
library(influence.ME) # for Cook's distance test
library(gratia)

## Warning: package 'gratia' was built under R version 4.4.1

library(ggcorrplot) # for correlation plots
library(gamm4) # for generalized additive mixed models
library(showtext)

## Warning: package 'sysfonts' was built under R version 4.4.1

library(mgcv) # for generalized additive mixed models
library(ggeffects)
library(patchwork)

## Warning: package 'patchwork' was built under R version 4.4.1

Read in csv files (metadata with sex and mass data & results file from MATLAB).

# Read in results table from MATLAB
results <- read_csv('/Users/taylorazizeh/Documents/research/active/emperor penguins/results/MATLAB/final/prey_capture_results.csv')
#head(results)

# Read in morphometrics table (sex, mass)
morpho <- read_csv('/Users/taylorazizeh/Documents/research/active/emperor penguins/data/cleaned/metadata/metadata.csv')
#head(morpho)

# Remove the "x" from BirdID (leftover from MATLAB)
results <- results %>%
  mutate(BirdID = sub("^x", "", BirdID))  # Removes "x" at the start

# Add in sex and mass
results <- results %>%
  left_join(morpho %>% dplyr::select(BirdID, Sex, StartMass), by = "BirdID") %>%
  dplyr::mutate(Year = substr(BirdID, 1, 2))  # add Year

#head(results)

Filter out any non-foraging dives.

# Create a simplified dataframe: one row per dive
dive_summary <- results %>%
  filter(DiveType != "NA") %>%
  group_by(BirdID, DiveNumber, DiveType, DiveDuration, Sex, StartMass) %>%
  mutate(Year = substr(BirdID, 1, 2)) %>%
  summarise(DiveEvents = sum(TotalEvents, na.rm = TRUE))

# Check the output
#head(dive_summary)

1.1 Hypothesis 1: Pelagic dives will have a higher rate of prey capture attempts, compared to benthic dives, because benthic prey are predicted to be of a higher caloric content.

1.1.1 Explore your data (EDA) (Zuur. pg 12)

# Add Times New Roman support
font_add(family = "Times New Roman", regular = "Times New Roman.ttf")  # Use full path if needed
showtext_auto()

# Now use it in ggplot
ggplot(dive_summary, aes(x = BirdID, y = DiveEvents)) +
  geom_boxplot(fill = "gray80", color = "black", outlier.shape = 1, outlier.size = 1.5) +
  labs(
    x = "Individual Penguin ID",
    y = "Number of prey capture events per dive",
    title = "Variation in foraging intensity across individuals"
  ) +
  theme_minimal(base_size = 12, base_family = "Times New Roman") +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(hjust = 0.5, face = "bold")
  )

# Create a histogram plot
# hist(dive_summary$DiveEvents,
#          xlab = "Frequency",
#          ylab = "Number of prey capture events (per dive)",
#          main = "Total prey capture events per dive")

# Create a box plot
boxplot(dive_summary$DiveEvents ~ factor(dive_summary$DiveType),
         xlab = "ID",
         ylab = "Number of prey capture events (per dive)",
         main = "Total prey capture events per dive",
         data = dive_summary)

# Check prey capture events by dive type
# ggplot(dive_summary, aes(x = DiveType, y = DiveEvents)) +
#   geom_boxplot(width = 0.1) +
#   labs(title = "Distribution of Prey Capture Events by Dive Type",
#        y = "Total Prey Capture Events",
#        x = "Dive Type") +
#   theme_minimal()

# Boxplot of dive duration by type
ggplot(dive_summary, aes(x = DiveType, y = DiveDuration)) +
  geom_boxplot(fill = "lightgreen", alpha = 0.6) +
  labs(title = "Dive Duration by Dive Type",
       y = "Dive Duration (s)",
       x = "Dive Type") +
  theme_minimal()

# Scatter plot
ggplot(dive_summary, aes(x = DiveDuration, y = DiveEvents, color = DiveType)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "lm", se = FALSE) +
  labs(title = "Prey Capture Events vs Dive Duration",
       x = "Dive Duration (s)",
       y = "Total Prey Capture Events") +
  theme_minimal()

dive_summary %>%
  group_by(DiveType) %>%
  summarise(
    mean_duration = mean(DiveDuration, na.rm = TRUE),
    median_duration = median(DiveDuration, na.rm = TRUE),
    minimum_duration = min(DiveDuration, na.rm = TRUE),
    maximum_duration = max(DiveDuration, na.rm = TRUE),
    n = n()
  ) %>%
  arrange(desc(mean_duration))

transit_dives <- dive_summary %>%
  filter(DiveType == "Transit")

# Histogram of dive durations for transit dives
ggplot(transit_dives, aes (x = DiveDuration)) +
  geom_histogram(binwidth = 10, fill = "steelblue", color = "black") +
  labs(
    title = "Distribution of transit dive durations",
    x = "Dive duration (seconds)",
    y = "Count"
  ) +
  theme_minimal()

Transit dives (n = 107) have a mean duration of 39.97 seconds, a median of 28.02 seconds, and four dives ranging from 95 - 493 seconds. I suspect that transit dives will have little to no foraging. I will do a check to see if transit dive have significantly less foraging and can therefore be excluded from analysis. I’m going to fit a simple GLM.

1.1.2 Evaluation of transit dives:

# Fit a poisson model for count data
transit_m1 <- glm(DiveEvents ~ DiveType + offset(log(DiveDuration)),
                  family = poisson,
                  data = dive_summary)

summary(transit_m1)

# Overdispersion check function
overdisp_fun <- function(model) {
  rdf <- df.residual(model)
  rp <- residuals(model, type = "pearson")
  Pearson.chisq <- sum(rp^2)
  ratio <- Pearson.chisq / rdf
  return(ratio)
}

overdisp_fun(transit_m1)

As we suspected, the model is overdispersed (8.94) so we will switch to a negative binomial distribution.

# Refit with neg binom
transit_m2 <- glm.nb(
  DiveEvents ~ DiveType + offset(log(DiveDuration)),
  data = dive_summary)

overdisp_fun(transit_m2)

## [1] 0.7992634

summary(transit_m2)

## 
## Call:
## glm.nb(formula = DiveEvents ~ DiveType + offset(log(DiveDuration)), 
##     data = dive_summary, init.theta = 0.2991927876, link = log)
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -3.31141    0.05851 -56.600   <2e-16 ***
## DiveTypeEpipelagic  -0.60496    0.05947 -10.172   <2e-16 ***
## DiveTypeMesopelagic  0.08957    0.06730   1.331    0.183    
## DiveTypeTransit     -2.30826    0.09716 -23.757   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(0.2992) family taken to be 1)
## 
##     Null deviance: 35114  on 39135  degrees of freedom
## Residual deviance: 33926  on 39132  degrees of freedom
## AIC: 168570
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  0.29919 
##           Std. Err.:  0.00315 
## 
##  2 x log-likelihood:  -168560.00700

anova(transit_m2, test = "Chisq")

## Warning in anova.negbin(transit_m2, test = "Chisq"): tests made without
## re-estimating 'theta'

## Analysis of Deviance Table
## 
## Model: Negative Binomial(0.2992), link: log
## 
## Response: DiveEvents
## 
## Terms added sequentially (first to last)
## 
## 
##          Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                     39135      35114              
## DiveType  3   1188.3     39132      33926 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Changing the distribution fixed the overdispersion (0.85).

# Pearson residuals vs fitted values
plot(fitted(transit_m2), residuals(transit_m2, type = "pearson"),
     xlab = "Fitted values", ylab = "Pearson residuals",
     main = "Residuals vs Fitted")
abline(h = 0, col = "red")

# Q-Q plots
qqnorm(residuals(transit_m2, type = "pearson"))
qqline(residuals(transit_m2, type = "pearson"), col = "red")

# Residuals
hist(residuals(transit_m2, type = "pearson"),
     main = "Histogram of Pearson Residuals",
     xlab = "Residuals")

# Cooks Distance
library(car)
influencePlot(transit_m2, id.method = "identify", main = "Influence Plot")

## Warning in plot.window(...): "id.method" is not a graphical parameter

## Warning in plot.xy(xy, type, ...): "id.method" is not a graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "id.method" is not
## a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "id.method" is not
## a graphical parameter

## Warning in box(...): "id.method" is not a graphical parameter

## Warning in title(...): "id.method" is not a graphical parameter

## Warning in plot.xy(xy.coords(x, y), type = type, ...): "id.method" is not a
## graphical parameter

##          StudRes          Hat        CookD
## 17565  4.8006301 0.0005941029 0.0656367655
## 17566  4.6589084 0.0005604766 0.0561341427
## 37620  0.3113419 0.0015423801 0.0000454839
## 38165 -1.1768290 0.0015581563 0.0001011813

There are some outliers influencing the model but none exceed the thresholds for Cook’s Distance (> 0.5 or 1). Now we can evaluate the effect size of foraging during transit dives.

emm <- emmeans(transit_m2, ~ DiveType)
summary(emm)

##  DiveType    emmean     SE  df asymp.LCL asymp.UCL
##  Benthic      2.012 0.0585 Inf     1.897     2.126
##  Epipelagic   1.407 0.0107 Inf     1.386     1.428
##  Mesopelagic  2.101 0.0333 Inf     2.036     2.166
##  Transit     -0.297 0.0776 Inf    -0.449    -0.145
## 
## Results are given on the log (not the response) scale. 
## Confidence level used: 0.95

# Back-transform emmeans to the response scale
exp_summary <- summary(emm) %>%
  dplyr::mutate(
    rate = exp(emmean), # Number of events per dive
    lower.CL = exp(asymp.LCL),
    upper.CL = exp(asymp.UCL)
  )
print(exp_summary)

##      DiveType     emmean         SE  df  asymp.LCL  asymp.UCL      rate
## 1     Benthic  2.0117109 0.05850576 Inf  1.8970417  2.1263801 7.4760974
## 2  Epipelagic  1.4067465 0.01068669 Inf  1.3858009  1.4276920 4.0826507
## 3 Mesopelagic  2.1012803 0.03326547 Inf  2.0360812  2.1664794 8.1766317
## 4     Transit -0.2965481 0.07757256 Inf -0.4485875 -0.1445087 0.7433799
##    lower.CL  upper.CL
## 1 6.6661450 8.3844610
## 2 3.9980268 4.1690658
## 3 7.6605300 8.7275040
## 4 0.6385294 0.8654474

library(ggplot2)
ggplot(exp_summary, aes(x = DiveType, y = rate)) +
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2) +
  labs(y = "Estimated prey capture rate per dive", x = "Dive Type") +
  theme_minimal()

For results:

I used a negative binomial generalized linear model to evaluate whether transit dives exhibited a meaningful level of foraging attempts. The model included dive type as a categorical predictor and dive duration as a log-transformed offset term to account for dive duration. On the response scale, mesopelagic dives exhibited the highest estimated rate of prey capture (mean = 8.18 events/dive, 95% CI: 7.66–8.73), followed closely by benthic dives (mean = 7.48 events/dive, 95% CI: 6.67–8.38). Epipelagic dives had lower prey capture rates (mean = 4.08 events/dive, 95% CI: 4.00–4.17), while transit dives had substantially fewer prey capture events (mean = 0.74 events/dive, 95% CI: 0.64–0.87). Although prey capture was not entirely absent from transit dives, the estimated rate was nearly an order of magnitude lower (~5.5x to ~11x lower) than other dive types. Based on this, I excluded transit dives from subsequent analyses focused on testing hypotheses related to foraging dive behavior.

Now, we can filter out transit dives and move forward to model fitting.

# Filter for foraging dives and to remove transit dives
foraging_dives <- dive_summary %>%
  filter(DiveEvents >= 1) %>%
  filter(DiveType != "NA") %>% # Remove transit dives
  filter(DiveType != "Transit") %>% # Remove transit dives
  ungroup()

na_dives_removed <- results %>%
  filter(DiveType != "NA")

# Check the outputs
nrow(na_dives_removed)

## [1] 39136

nrow(foraging_dives)

## [1] 17398

There are 17398 dives with foraging attempts (transit dives excluded) out of 39136 total dives (44.45%). There were 3810 “NA” dives, where birds did not have GPS data and therefore could not be paired to bathymetry.

# Create the dive_summary_time table with parsed datetime objects and create relative time structures
dive_summary_time <- results %>%
  filter(TotalEvents >= 1) %>%  # Foraging dives only
  filter(DiveType != "NA") %>%  # Removes NA dive types
  filter(DiveType != "Transit") %>%  # Removes transit dives
  mutate(
    Year = substr(BirdID, 1, 2),
    DiveStartTime_parsed = as.POSIXct(strptime(DiveStartTime, format = "%d-%b-%Y %H:%M:%OS", tz = "UTC")),
    DiveEndTime_parsed   = as.POSIXct(strptime(DiveEndTime,   format = "%d-%b-%Y %H:%M:%OS", tz = "UTC")),
    DiveEvents = TotalEvents  # Simply rename/alias
  ) %>%
  group_by(BirdID) %>%
  mutate(
    time_rel_hours = as.numeric(difftime(DiveStartTime_parsed, min(DiveStartTime_parsed), units = "hours")),
    time_rel_mins  = as.numeric(difftime(DiveStartTime_parsed, min(DiveStartTime_parsed), units = "mins"))
  ) %>%
  ungroup()

head(dive_summary_time)

## # A tibble: 6 × 22
##   BirdID   DiveNumber DiveStartTime DiveEndTime DiveDuration ToEvents DestEvents
##   <chr>         <dbl> <chr>         <chr>              <dbl>    <dbl>      <dbl>
## 1 19EP_30…          4 29-Oct-2019 … 29-Oct-201…        107.         3          0
## 2 19EP_30…          6 29-Oct-2019 … 29-Oct-201…        151.         0          0
## 3 19EP_30…          9 29-Oct-2019 … 29-Oct-201…        188.         0          3
## 4 19EP_30…         11 29-Oct-2019 … 29-Oct-201…         22.3        1          1
## 5 19EP_30…         12 29-Oct-2019 … 29-Oct-201…         50.6        2          0
## 6 19EP_30…         17 29-Oct-2019 … 29-Oct-201…         36.1        0          2
## # ℹ 15 more variables: FromEvents <dbl>, TotalEvents <dbl>, ODBA_Total <dbl>,
## #   ToPhaseDuration <dbl>, DestPhaseDuration <dbl>, FromPhaseDuration <dbl>,
## #   DiveType <chr>, Sex <chr>, StartMass <dbl>, Year <chr>,
## #   DiveStartTime_parsed <dttm>, DiveEndTime_parsed <dttm>, DiveEvents <dbl>,
## #   time_rel_hours <dbl>, time_rel_mins <dbl>

Now we can fit the full model. I included the smoothers which account for the random effect of Bird ID and relative time for each bird.

1.1.3 Model fitting

# Make sure BirdID is a factor
dive_summary_time$BirdID <- as.factor(dive_summary_time$BirdID)

h1_model1 <- bam(DiveEvents ~ DiveType + StartMass + Sex + Year +
                   s(BirdID, bs = "re") + # "bs = re" indicates a random effect for BirdID
                   s(time_rel_hours, by = BirdID, k = 15) + # Creates a separate smooth term for each BirdID - each bird's time series is allowed to be a nonlinear function of time, with a maximum complexity of 14 degrees of freedom (k = 15 - 1)
                   offset(log(DiveDuration)),
                   family = nb(),
                   data = dive_summary_time,
                   method = "fREML",
                   discrete = TRUE)

summary(h1_model1)

## 
## Family: Negative Binomial(2.031) 
## Link function: log 
## 
## Formula:
## DiveEvents ~ DiveType + StartMass + Sex + Year + s(BirdID, bs = "re") + 
##     s(time_rel_hours, by = BirdID, k = 15) + offset(log(DiveDuration))
## 
## Parametric coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -4.74957    1.02073  -4.653  3.3e-06 ***
## DiveTypeEpipelagic  -0.03265    0.03379  -0.966   0.3339    
## DiveTypeMesopelagic -0.02786    0.03708  -0.751   0.4526    
## StartMass            0.05940    0.04388   1.354   0.1758    
## SexM                -0.20041    0.14193  -1.412   0.1580    
## Year22               0.13907    0.07442   1.869   0.0617 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                       edf Ref.df      F  p-value    
## s(BirdID)                          0.6879 17.000  0.072 0.016770 *  
## s(time_rel_hours):BirdID19EP_302d 11.2741 12.220  8.581  < 2e-16 ***
## s(time_rel_hours):BirdID19EP_302e 12.1238 13.384 15.373  < 2e-16 ***
## s(time_rel_hours):BirdID19EP_303b  8.0800  8.842 14.653  < 2e-16 ***
## s(time_rel_hours):BirdID19EP_303d 10.9334 12.253 21.718  < 2e-16 ***
## s(time_rel_hours):BirdID19EP_303e  1.7642  1.947  4.079 0.035529 *  
## s(time_rel_hours):BirdID19EP_303g  6.1670  6.782 51.953  < 2e-16 ***
## s(time_rel_hours):BirdID19EP_304e  8.7594  9.286 23.953  < 2e-16 ***
## s(time_rel_hours):BirdID19EP_304f  7.2964  8.059 11.391  < 2e-16 ***
## s(time_rel_hours):BirdID22EP_306c  9.2198  9.954  8.562  < 2e-16 ***
## s(time_rel_hours):BirdID22EP_307b  3.2695  3.994  6.060 6.58e-05 ***
## s(time_rel_hours):BirdID22EP_307d  6.6843  6.946 15.163  < 2e-16 ***
## s(time_rel_hours):BirdID22EP_307g  1.1245  1.229  0.329 0.704961    
## s(time_rel_hours):BirdID22EP_308a  9.9049 10.955 32.060  < 2e-16 ***
## s(time_rel_hours):BirdID22EP_308b  8.6613 10.101  3.323 0.000244 ***
## s(time_rel_hours):BirdID22EP_309a  6.8947  7.554 11.516  < 2e-16 ***
## s(time_rel_hours):BirdID22EP_309c  5.3812  6.400  8.767  < 2e-16 ***
## s(time_rel_hours):BirdID22EP_309d  6.9148  7.423 38.873  < 2e-16 ***
## s(time_rel_hours):BirdID22EP_309f  4.8961  5.881 17.429  < 2e-16 ***
## s(time_rel_hours):BirdID22EP_313a  7.8942  8.667 38.025  < 2e-16 ***
## s(time_rel_hours):BirdID22EP_313b 11.0086 12.187 13.660  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.321   Deviance explained = 22.1%
## fREML =  22601  Scale est. = 1         n = 15307

resid_bam <- residuals(h1_model1, type = "pearson")
acf(resid_bam, main = "ACF of Deviance Residuals")

gam.check(h1_model1)

## 
## Method: fREML   Optimizer: perf chol
## $grad
##  [1]  7.215679e-08  1.880361e-08 -1.159156e-07 -1.556409e-08 -1.396612e-07
##  [6] -1.243252e-08 -1.397367e-10  4.186329e-08 -1.565390e-08  7.942885e-08
## [11]  2.425822e-07 -1.430987e-07 -1.000824e-08 -3.493190e-08  1.752466e-06
## [16] -6.918673e-08 -4.797146e-07  2.562596e-08  2.845391e-07 -2.034313e-07
## [21]  2.400932e-07
## 
## $hess
##                [,1]          [,2]          [,3]          [,4]          [,5]
##  [1,]  0.1647932101 -0.0502179433 -1.800505e-02 -4.195966e-04  7.198202e-02
##  [2,] -0.0502179433  2.0887232405  1.873532e-03 -8.719798e-03 -5.699616e-03
##  [3,] -0.0180050482  0.0018735325  2.731932e+00 -3.571513e-03  6.765026e-04
##  [4,] -0.0004195966 -0.0087197983 -3.571513e-03  2.140353e+00  2.387937e-03
##  [5,]  0.0719820157 -0.0056996161  6.765026e-04  2.387937e-03  2.918488e+00
##  [6,]  0.0012788745 -0.0059844554 -1.344940e-03 -3.398417e-04 -2.318818e-03
##  [7,] -0.0018144959  0.0174902621  3.845188e-03  1.563700e-03  6.817981e-03
##  [8,]  0.0045978479  0.0095794954  3.018774e-03  1.450902e-03  1.073067e-02
##  [9,] -0.0012653132 -0.0015175916  1.902217e-04  1.296427e-05 -7.835694e-05
## [10,] -0.0007454097 -0.0013010314  3.493668e-03  1.027706e-03 -1.829892e-03
## [11,] -0.0402356458 -0.0028387152 -4.472452e-03 -2.642333e-03 -9.283556e-03
## [12,]  0.0007716685  0.0002243059  3.335711e-03  2.011446e-03  5.252031e-04
## [13,]  0.0003174086  0.0106535475 -2.496109e-05  3.136991e-05  1.845962e-03
## [14,] -0.0094677760 -0.0180503251 -4.176629e-04 -4.913939e-04 -4.349420e-06
## [15,]  0.0730041746 -0.0247179026 -1.875523e-02 -4.291329e-03  3.704343e-02
## [16,] -0.0006195440 -0.0100912950 -7.828132e-03 -2.326321e-03  3.154168e-03
## [17,]  0.0102238813 -0.0265867745 -5.079762e-03  1.480492e-04  7.521038e-03
## [18,]  0.0008441361 -0.0022276963 -7.842755e-04  8.471845e-05  1.466541e-03
## [19,] -0.0064035614 -0.0009683612  3.112693e-03 -3.693540e-04 -2.956742e-03
## [20,]  0.0038757799 -0.0082482150 -3.597931e-04 -9.337496e-05  1.906443e-03
## [21,]  0.0038361865 -0.1922880611 -5.363938e-03 -6.573531e-03 -2.637701e-02
##                [,6]          [,7]          [,8]          [,9]         [,10]
##  [1,]  1.278874e-03 -1.814496e-03  4.597848e-03 -1.265313e-03 -7.454097e-04
##  [2,] -5.984455e-03  1.749026e-02  9.579495e-03 -1.517592e-03 -1.301031e-03
##  [3,] -1.344940e-03  3.845188e-03  3.018774e-03  1.902217e-04  3.493668e-03
##  [4,] -3.398417e-04  1.563700e-03  1.450902e-03  1.296427e-05  1.027706e-03
##  [5,] -2.318818e-03  6.817981e-03  1.073067e-02 -7.835694e-05 -1.829892e-03
##  [6,]  2.917135e-01  7.850578e-04  5.672078e-04  4.511536e-06  1.634660e-04
##  [7,]  7.850578e-04  2.212668e+00 -2.009043e-03 -1.131840e-05 -5.881226e-04
##  [8,]  5.672078e-04 -2.009043e-03  1.569470e+00  2.081974e-05 -7.108758e-04
##  [9,]  4.511536e-06 -1.131840e-05  2.081974e-05  1.601626e+00  3.849035e-06
## [10,]  1.634660e-04 -5.881226e-04 -7.108758e-04  3.849035e-06  2.817282e+00
## [11,] -3.659396e-04  1.864878e-03  2.875395e-03 -5.366046e-05  1.683882e-03
## [12,]  2.455022e-04 -1.274287e-03 -1.447356e-03 -4.360382e-05 -9.343590e-04
## [13,]  2.396174e-04 -6.170458e-04 -3.317159e-04  3.969261e-05  1.241607e-04
## [14,] -3.322046e-04  9.719045e-04  3.667679e-04  4.002907e-06 -1.399221e-04
## [15,] -1.405469e-03  3.399286e-03 -7.276046e-04  2.599072e-04  3.791983e-03
## [16,] -6.094485e-04  1.908624e-03  1.782446e-03 -3.864867e-05  2.167205e-03
## [17,] -6.940083e-04  1.117233e-03 -1.875126e-04 -1.098559e-04  8.746390e-04
## [18,] -6.499965e-05  3.939914e-05 -1.091191e-04 -1.471077e-05  2.011850e-04
## [19,]  4.792845e-05  3.209271e-04  1.301197e-04  8.433776e-05 -1.161850e-03
## [20,] -1.929662e-04  4.532465e-04 -9.981041e-05  9.099611e-06 -2.197223e-04
## [21,] -4.384436e-03  1.341565e-02  8.323311e-03 -2.975979e-04 -1.712060e-03
##               [,11]         [,12]         [,13]         [,14]         [,15]
##  [1,] -4.023565e-02  7.716685e-04  3.174086e-04 -9.467776e-03  0.0730041746
##  [2,] -2.838715e-03  2.243059e-04  1.065355e-02 -1.805033e-02 -0.0247179026
##  [3,] -4.472452e-03  3.335711e-03 -2.496109e-05 -4.176629e-04 -0.0187552315
##  [4,] -2.642333e-03  2.011446e-03  3.136991e-05 -4.913939e-04 -0.0042913287
##  [5,] -9.283556e-03  5.252031e-04  1.845962e-03 -4.349420e-06  0.0370434334
##  [6,] -3.659396e-04  2.455022e-04  2.396174e-04 -3.322046e-04 -0.0014054691
##  [7,]  1.864878e-03 -1.274287e-03 -6.170458e-04  9.719045e-04  0.0033992862
##  [8,]  2.875395e-03 -1.447356e-03 -3.317159e-04  3.667679e-04 -0.0007276046
##  [9,] -5.366046e-05 -4.360382e-05  3.969261e-05  4.002907e-06  0.0002599072
## [10,]  1.683882e-03 -9.343590e-04  1.241607e-04 -1.399221e-04  0.0037919835
## [11,]  5.382117e-01  2.735505e-03 -1.272324e-03 -2.105092e-03 -0.0177751312
## [12,]  2.735505e-03  1.391889e+00  1.909531e-04  1.967054e-04  0.0025831884
## [13,] -1.272324e-03  1.909531e-04  6.604030e-03  1.485880e-04 -0.0021719502
## [14,] -2.105092e-03  1.967054e-04  1.485880e-04  3.254093e+00 -0.0099850457
## [15,] -1.777513e-02  2.583188e-03 -2.171950e-03 -9.985046e-03  1.0360748502
## [16,] -1.735132e-03  1.841312e-03  4.398452e-04  3.252273e-04 -0.0051180726
## [17,]  7.927154e-03 -1.089784e-03  2.228424e-03  1.358244e-03  0.0064842764
## [18,] -2.506522e-04 -1.772145e-04  2.017869e-05 -1.374791e-04 -0.0027449939
## [19,]  1.064617e-03  4.521486e-04  2.100513e-04 -3.174367e-04  0.0067044538
## [20,]  4.087107e-03 -1.829562e-04  8.730773e-04  4.323079e-04  0.0058627150
## [21,]  1.916408e-02  2.712009e-03  9.967535e-03 -1.905347e-03  0.0443274049
##               [,16]         [,17]         [,18]         [,19]         [,20]
##  [1,] -6.195440e-04  0.0102238813  8.441361e-04 -6.403561e-03  3.875780e-03
##  [2,] -1.009129e-02 -0.0265867745 -2.227696e-03 -9.683612e-04 -8.248215e-03
##  [3,] -7.828132e-03 -0.0050797623 -7.842755e-04  3.112693e-03 -3.597931e-04
##  [4,] -2.326321e-03  0.0001480492  8.471845e-05 -3.693540e-04 -9.337496e-05
##  [5,]  3.154168e-03  0.0075210384  1.466541e-03 -2.956742e-03  1.906443e-03
##  [6,] -6.094485e-04 -0.0006940083 -6.499965e-05  4.792845e-05 -1.929662e-04
##  [7,]  1.908624e-03  0.0011172335  3.939914e-05  3.209271e-04  4.532465e-04
##  [8,]  1.782446e-03 -0.0001875126 -1.091191e-04  1.301197e-04 -9.981041e-05
##  [9,] -3.864867e-05 -0.0001098559 -1.471077e-05  8.433776e-05  9.099611e-06
## [10,]  2.167205e-03  0.0008746390  2.011850e-04 -1.161850e-03 -2.197223e-04
## [11,] -1.735132e-03  0.0079271538 -2.506522e-04  1.064617e-03  4.087107e-03
## [12,]  1.841312e-03 -0.0010897841 -1.772145e-04  4.521486e-04 -1.829562e-04
## [13,]  4.398452e-04  0.0022284237  2.017869e-05  2.100513e-04  8.730773e-04
## [14,]  3.252273e-04  0.0013582443 -1.374791e-04 -3.174367e-04  4.323079e-04
## [15,] -5.118073e-03  0.0064842764 -2.744994e-03  6.704454e-03  5.862715e-03
## [16,]  1.970327e+00 -0.0053852180 -4.237017e-04  2.074295e-03 -9.301711e-04
## [17,] -5.385218e-03  0.6642361383 -6.132056e-04  1.745125e-03 -5.104617e-03
## [18,] -4.237017e-04 -0.0006132056  2.553508e+00  7.344485e-04  1.386423e-04
## [19,]  2.074295e-03  0.0017451247  7.344485e-04  1.753079e+00 -9.986740e-04
## [20,] -9.301711e-04 -0.0051046166  1.386423e-04 -9.986740e-04  1.378208e+00
## [21,] -1.062204e-02 -0.0392032108  1.528411e-03 -1.172181e-02 -1.881269e-02
##               [,21]
##  [1,]  0.0038361865
##  [2,] -0.1922880611
##  [3,] -0.0053639376
##  [4,] -0.0065735309
##  [5,] -0.0263770106
##  [6,] -0.0043844356
##  [7,]  0.0134156534
##  [8,]  0.0083233113
##  [9,] -0.0002975979
## [10,] -0.0017120599
## [11,]  0.0191640814
## [12,]  0.0027120088
## [13,]  0.0099675347
## [14,] -0.0019053470
## [15,]  0.0443274049
## [16,] -0.0106220370
## [17,] -0.0392032108
## [18,]  0.0015284111
## [19,] -0.0117218145
## [20,] -0.0188126852
## [21,]  3.4668760074
## 
## Model rank =  306 / 306 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                                       k'    edf k-index p-value    
## s(BirdID)                         20.000  0.688      NA      NA    
## s(time_rel_hours):BirdID19EP_302d 14.000 11.274     0.9  <2e-16 ***
## s(time_rel_hours):BirdID19EP_302e 14.000 12.124     0.9  <2e-16 ***
## s(time_rel_hours):BirdID19EP_303b 14.000  8.080     0.9  <2e-16 ***
## s(time_rel_hours):BirdID19EP_303d 14.000 10.933     0.9  <2e-16 ***
## s(time_rel_hours):BirdID19EP_303e 14.000  1.764     0.9   0.005 ** 
## s(time_rel_hours):BirdID19EP_303g 14.000  6.167     0.9  <2e-16 ***
## s(time_rel_hours):BirdID19EP_304e 14.000  8.759     0.9  <2e-16 ***
## s(time_rel_hours):BirdID19EP_304f 14.000  7.296     0.9  <2e-16 ***
## s(time_rel_hours):BirdID22EP_306c 14.000  9.220     0.9  <2e-16 ***
## s(time_rel_hours):BirdID22EP_307b 14.000  3.270     0.9  <2e-16 ***
## s(time_rel_hours):BirdID22EP_307d 14.000  6.684     0.9  <2e-16 ***
## s(time_rel_hours):BirdID22EP_307g 14.000  1.125     0.9  <2e-16 ***
## s(time_rel_hours):BirdID22EP_308a 14.000  9.905     0.9  <2e-16 ***
## s(time_rel_hours):BirdID22EP_308b 14.000  8.661     0.9  <2e-16 ***
## s(time_rel_hours):BirdID22EP_309a 14.000  6.895     0.9  <2e-16 ***
## s(time_rel_hours):BirdID22EP_309c 14.000  5.381     0.9  <2e-16 ***
## s(time_rel_hours):BirdID22EP_309d 14.000  6.915     0.9  <2e-16 ***
## s(time_rel_hours):BirdID22EP_309f 14.000  4.896     0.9  <2e-16 ***
## s(time_rel_hours):BirdID22EP_313a 14.000  7.894     0.9  <2e-16 ***
## s(time_rel_hours):BirdID22EP_313b 14.000 11.009     0.9  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

That didn’t really help the autocorrelation so I’m going to fit the model with two tensor products. The first (te(time_rel_hours, BirdID…)) models the effect of time changes by individual bird where relative hours is treated as a smooth continuous variable and BirdID is a factor, modeled as a random effect. The second (te(lag1_DiveEvents, time_since_last…)) models the interaction between previous foraging success (lag1) and recovery time between dives (time_since_last).

# Create lag variable and time since last
dive_summary_time <- dive_summary_time %>%
  group_by(BirdID) %>%
  arrange(time_rel_hours) %>%
  mutate(
    lag1_DiveEvents = lag(DiveEvents, 1),
    time_since_last = time_rel_hours - lag(time_rel_hours, 1)
  ) %>%
  ungroup()

h1_model2 <- bam(DiveEvents ~ DiveType + StartMass + Sex + Year +
                   te(time_rel_hours, BirdID, bs = c("cr", "re"), k = c(25, 15)) + 
                   te(lag1_DiveEvents, time_since_last, k = c(10, 8)) +
                   offset(log(DiveDuration)),
                   family = nb(),
                   data = dive_summary_time,
                   method = "fREML",
                   discrete = TRUE)

summary(h1_model2)

## 
## Family: Negative Binomial(2.506) 
## Link function: log 
## 
## Formula:
## DiveEvents ~ DiveType + StartMass + Sex + Year + te(time_rel_hours, 
##     BirdID, bs = c("cr", "re"), k = c(25, 15)) + te(lag1_DiveEvents, 
##     time_since_last, k = c(10, 8)) + offset(log(DiveDuration))
## 
## Parametric coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -3.37326    0.62014  -5.440 5.42e-08 ***
## DiveTypeEpipelagic  -0.01126    0.03228  -0.349 0.727215    
## DiveTypeMesopelagic -0.03229    0.03498  -0.923 0.355935    
## StartMass           -0.03206    0.02165  -1.481 0.138760    
## SexM                 0.06125    0.06328   0.968 0.333151    
## Year22               0.17115    0.04747   3.605 0.000313 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                        edf Ref.df       F p-value    
## te(BirdID,time_rel_hours)           235.54 334.00   5.415  <2e-16 ***
## te(time_since_last,lag1_DiveEvents)  15.83  18.61 117.178  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.453   Deviance explained = 33.6%
## fREML =  22646  Scale est. = 1         n = 15287

resid_bam <- residuals(h1_model2, type = "pearson")
acf(resid_bam, main = "ACF of Deviance Residuals")

gam.check(h1_model2)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1]  8.576150e-07  1.069677e-06 -2.537538e-07 -1.643657e-07
## 
## $hess
##             [,1]       [,2]        [,3]        [,4]
## [1,] 52.09167766 9.88923952 0.437898921 0.097331577
## [2,]  9.88923952 9.98017035 0.121851890 0.052701759
## [3,]  0.43789892 0.12185189 3.061525692 0.002964942
## [4,]  0.09733158 0.05270176 0.002964942 1.253115343
## 
## Model rank =  585 / 585 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                                        k'   edf k-index p-value  
## te(BirdID,time_rel_hours)           500.0 235.5      NA      NA  
## te(time_since_last,lag1_DiveEvents)  79.0  15.8    0.94   0.065 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

That helped the autocorrelation. Now we need to find the best k-values (to shorten the output, I’m commenting this out so just let me know if you need to see it).

# k_grid <- expand.grid(
#   k1_time = c(10, 15, 20, 25),
#   k1_bird = c(9, 12, 15),
#   k2_lag  = c(8, 10, 12),
#   k2_time = c(6, 8, 10)
# )
# resu <- lapply(1:nrow(k_grid), function(i) {
#   k_vals <- k_grid[i, ]
# 
#   model <- bam(
#     DiveEvents ~ DiveType + StartMass + Sex + Year +
#       te(time_rel_hours, BirdID, bs = c("cr", "re"), k = c(k_vals$k1_time, k_vals$k1_bird)) +
#       te(lag1_DiveEvents, time_since_last, k = c(k_vals$k2_lag, k_vals$k2_time)) +
#       offset(log(DiveDuration)),
#     family = nb(),
#     data = dive_summary_time,
#     method = "fREML",
#     discrete = TRUE
#   )
# 
#   list(
#     k1 = c(k_vals$k1_time, k_vals$k1_bird),
#     k2 = c(k_vals$k2_lag, k_vals$k2_time),
#     AIC = AIC(model),
#     DevExpl = summary(model)$dev.expl,
#     EDF = sum(model$edf)
#   )
# })
# 
# # Summarize results with all 4 k values
# summary_table <- do.call(rbind, lapply(resu, function(x) {
#   data.frame(
#     k1_time = x$k1[1],
#     k1_bird = x$k1[2],
#     k2_lag = x$k2[1],
#     k2_time = x$k2[2],
#     AIC = x$AIC,
#     DevExpl = x$DevExpl,
#     EDF = x$EDF
#   )
# }))
# 
# # Sort by AIC
# summary_table <- summary_table[order(summary_table$AIC), ]
# 
# # View the summary
# print(summary_table)
# 
# ggplot(summary_table, aes(x = k1_time, y = AIC, color = as.factor(k1_bird))) +
#   geom_point(size = 3) +
#   geom_line(aes(group = k1_bird)) +
#   facet_grid(k2_lag ~ k2_time, labeller = label_both) +
#   labs(
#     x = "k for time_rel_hours",
#     y = "AIC",
#     color = "k for BirdID",
#     title = "AIC by k values for te(time_rel_hours, BirdID)",
#     subtitle = "Faceted by k values of te(lag1_DiveEvents, time_since_last)"
#   ) +
#   theme_minimal()
# 
# ggplot(summary_table, aes(x = k2_lag, y = AIC, color = as.factor(k2_time))) +
#   geom_point(size = 3) +
#   geom_line(aes(group = k2_time)) +
#   facet_grid(k1_time ~ k1_bird, labeller = label_both) +
#   labs(
#     x = "k for lag1_DiveEvents",
#     y = "AIC",
#     color = "k for time_since_last",
  #   title = "AIC by k values for te(lag1_DiveEvents, time_since_last)",
  #   subtitle = "Faceted by k values of te(time_rel_hours, BirdID)"
  # ) +
  # theme_minimal()

Using the best fit values, we can fit the model again.

best_model <- bam(DiveEvents ~ DiveType + StartMass + Sex + Year +
                    te(time_rel_hours, BirdID, bs = c("cr", "re"), k = c(25, 15)) + 
                    te(lag1_DiveEvents, time_since_last, k = c(10, 8)) +                        # random intercept for BirdID
                    offset(log(DiveDuration)),
                    family = nb(),
                    data = dive_summary_time,
                    method = "fREML",
                    discrete = TRUE)

summary(best_model)

## 
## Family: Negative Binomial(2.506) 
## Link function: log 
## 
## Formula:
## DiveEvents ~ DiveType + StartMass + Sex + Year + te(time_rel_hours, 
##     BirdID, bs = c("cr", "re"), k = c(25, 15)) + te(lag1_DiveEvents, 
##     time_since_last, k = c(10, 8)) + offset(log(DiveDuration))
## 
## Parametric coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -3.37326    0.62014  -5.440 5.42e-08 ***
## DiveTypeEpipelagic  -0.01126    0.03228  -0.349 0.727215    
## DiveTypeMesopelagic -0.03229    0.03498  -0.923 0.355935    
## StartMass           -0.03206    0.02165  -1.481 0.138760    
## SexM                 0.06125    0.06328   0.968 0.333151    
## Year22               0.17115    0.04747   3.605 0.000313 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                        edf Ref.df       F p-value    
## te(BirdID,time_rel_hours)           235.54 334.00   5.415  <2e-16 ***
## te(time_since_last,lag1_DiveEvents)  15.83  18.61 117.178  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.453   Deviance explained = 33.6%
## fREML =  22646  Scale est. = 1         n = 15287

resid_bam <- residuals(best_model, type = "pearson")
acf(resid_bam, main = "ACF of Deviance Residuals")

gam.check(best_model)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1]  8.576150e-07  1.069677e-06 -2.537538e-07 -1.643657e-07
## 
## $hess
##             [,1]       [,2]        [,3]        [,4]
## [1,] 52.09167766 9.88923952 0.437898921 0.097331577
## [2,]  9.88923952 9.98017035 0.121851890 0.052701759
## [3,]  0.43789892 0.12185189 3.061525692 0.002964942
## [4,]  0.09733158 0.05270176 0.002964942 1.253115343
## 
## Model rank =  585 / 585 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                                        k'   edf k-index p-value  
## te(BirdID,time_rel_hours)           500.0 235.5      NA      NA  
## te(time_since_last,lag1_DiveEvents)  79.0  15.8    0.94    0.07 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Now, we can move forward to simplifying the fixed effects. We will start by removing DiveType because it is the least significant.

best_model_no_type <- update(best_model, . ~ . - DiveType)

anova(best_model, best_model_no_type, test = "Chisq")

## Analysis of Deviance Table
## 
## Model 1: DiveEvents ~ DiveType + StartMass + Sex + Year + te(time_rel_hours, 
##     BirdID, bs = c("cr", "re"), k = c(25, 15)) + te(lag1_DiveEvents, 
##     time_since_last, k = c(10, 8)) + offset(log(DiveDuration))
## Model 2: DiveEvents ~ StartMass + Sex + Year + te(time_rel_hours, BirdID, 
##     bs = c("cr", "re"), k = c(25, 15)) + te(lag1_DiveEvents, 
##     time_since_last, k = c(10, 8)) + offset(log(DiveDuration))
##   Resid. Df Resid. Dev     Df Deviance Pr(>Chi)
## 1     14986     100980                         
## 2     14987     100981 -1.833  -1.3556   0.4664

AIC(best_model, best_model_no_type)

##                          df      AIC
## best_model         261.1916 101502.0
## best_model_no_type 259.3924 101499.7

Removing DiveType improved the AIC by about 3, but it’s marginal so I will retain it. Next, I’ll remove Sex.

dive_summary_time_sex <- dive_summary_time %>%
  filter(Sex != "NA")

best_model_na_sex <- bam(DiveEvents ~ DiveType + StartMass + Sex + Year +
                    te(time_rel_hours, BirdID, bs = c("cr", "re"), k = c(25, 15)) + 
                    te(lag1_DiveEvents, time_since_last, k = c(10, 8)) +                        # random intercept for BirdID
                    offset(log(DiveDuration)),
                    family = nb(),
                    data = dive_summary_time_sex,
                    method = "fREML",
                    discrete = TRUE)

best_model_minus_sex <- bam(DiveEvents ~ DiveType + StartMass + Year +
                    te(time_rel_hours, BirdID, bs = c("cr", "re"), k = c(25, 15)) + 
                    te(lag1_DiveEvents, time_since_last, k = c(10, 8)) +                        # random intercept for BirdID
                    offset(log(DiveDuration)),
                    family = nb(),
                    data = dive_summary_time_sex,
                    method = "fREML",
                    discrete = TRUE)

AIC(best_model_na_sex, best_model_minus_sex)

##                            df      AIC
## best_model_na_sex    261.1916 101502.0
## best_model_minus_sex 261.0338 101502.3

anova(best_model_na_sex, best_model_minus_sex)

## Analysis of Deviance Table
## 
## Model 1: DiveEvents ~ DiveType + StartMass + Sex + Year + te(time_rel_hours, 
##     BirdID, bs = c("cr", "re"), k = c(25, 15)) + te(lag1_DiveEvents, 
##     time_since_last, k = c(10, 8)) + offset(log(DiveDuration))
## Model 2: DiveEvents ~ DiveType + StartMass + Year + te(time_rel_hours, 
##     BirdID, bs = c("cr", "re"), k = c(25, 15)) + te(lag1_DiveEvents, 
##     time_since_last, k = c(10, 8)) + offset(log(DiveDuration))
##   Resid. Df Resid. Dev        Df Deviance
## 1     14986     100980                   
## 2     14986     100980 -0.060575 -0.63807

Removing sex does not worsen or improve fit. Now, we will remove Mass.

# Rerun with all birds
best_model_minus_sex <- bam(DiveEvents ~ DiveType + StartMass + Year +
                    te(time_rel_hours, BirdID, bs = c("cr", "re"), k = c(25, 15)) + 
                    te(lag1_DiveEvents, time_since_last, k = c(10, 8)) +                        # random intercept for BirdID
                    offset(log(DiveDuration)),
                    family = nb(),
                    data = dive_summary_time,
                    method = "fREML",
                    discrete = TRUE)

best_model_na_mass <- bam(DiveEvents ~ DiveType + Year +
                    te(time_rel_hours, BirdID, bs = c("cr", "re"), k = c(25, 15)) + 
                    te(lag1_DiveEvents, time_since_last, k = c(10, 8)) +                        # random intercept for BirdID
                    offset(log(DiveDuration)),
                    family = nb(),
                    data = dive_summary_time,
                    method = "fREML",
                    discrete = TRUE)

anova(best_model_minus_sex, best_model_na_mass)

## Analysis of Deviance Table
## 
## Model 1: DiveEvents ~ DiveType + StartMass + Year + te(time_rel_hours, 
##     BirdID, bs = c("cr", "re"), k = c(25, 15)) + te(lag1_DiveEvents, 
##     time_since_last, k = c(10, 8)) + offset(log(DiveDuration))
## Model 2: DiveEvents ~ DiveType + Year + te(time_rel_hours, BirdID, bs = c("cr", 
##     "re"), k = c(25, 15)) + te(lag1_DiveEvents, time_since_last, 
##     k = c(10, 8)) + offset(log(DiveDuration))
##   Resid. Df Resid. Dev        Df Deviance
## 1     17034     114689                   
## 2     17034     114689 -0.043002  0.22708

AIC(best_model_minus_sex, best_model_na_mass)

##                            df      AIC
## best_model_minus_sex 295.9725 115280.8
## best_model_na_mass   295.9723 115280.6

Removing mass does not significantly worsen or improve model fit. We will retain Year because it’s significant.

best_model_final <- bam(DiveEvents ~ DiveType + Year +
                          te(time_rel_hours, BirdID, bs = c("cr", "re"), k = c(25, 15)) + 
                          te(lag1_DiveEvents, time_since_last, k = c(10, 8)) +
                          offset(log(DiveDuration)),
                        family = nb(),
                        data = dive_summary_time,
                        method = "fREML",
                        discrete = TRUE)


summary(best_model_final)

## 
## Family: Negative Binomial(2.517) 
## Link function: log 
## 
## Formula:
## DiveEvents ~ DiveType + Year + te(time_rel_hours, BirdID, bs = c("cr", 
##     "re"), k = c(25, 15)) + te(lag1_DiveEvents, time_since_last, 
##     k = c(10, 8)) + offset(log(DiveDuration))
## 
## Parametric coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -4.00483    0.26246 -15.259  < 2e-16 ***
## DiveTypeEpipelagic   0.01468    0.03033   0.484  0.62825    
## DiveTypeMesopelagic -0.03304    0.03288  -1.005  0.31490    
## Year22               0.13496    0.04633   2.913  0.00359 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                        edf Ref.df       F p-value    
## te(BirdID,time_rel_hours)           272.09 383.00   5.773  <2e-16 ***
## te(time_since_last,lag1_DiveEvents)  16.05  18.82 136.190  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.455   Deviance explained = 33.6%
## fREML =  25744  Scale est. = 1         n = 17375

gam.check(best_model_final)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1]  8.089894e-08  4.583122e-08 -4.757090e-09  1.151457e-08
## 
## $hess
##             [,1]        [,2]      [,3]       [,4]
## [1,] 59.94839052 11.74436155 0.4265288 0.07549227
## [2,] 11.74436155 11.66724913 0.1415708 0.06002812
## [3,]  0.42652881  0.14157079 3.2525919 0.09731380
## [4,]  0.07549227  0.06002812 0.0973138 1.35117527
## 
## Model rank =  658 / 658 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                                        k'   edf k-index p-value  
## te(BirdID,time_rel_hours)           575.0 272.1      NA      NA  
## te(time_since_last,lag1_DiveEvents)  79.0  16.1    0.94   0.025 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(best_model_final)$r.sq

## [1] 0.4554605

summary(best_model_final)$dev.expl

## [1] 0.3363782

# Residuals vs Fitted
plot(fitted(best_model_final), residuals(best_model_final, type = "pearson"),
     xlab = "Fitted values", ylab = "Pearson residuals",
     main = "Residuals vs Fitted")
abline(h = 0, col = "red")

# Q-Q Plot
qqnorm(residuals(best_model_final, type = "pearson"))
qqline(residuals(best_model_final, type = "pearson"), col = "red")

# ACF plot
acf(residuals(best_model_final, type = "pearson"), main = "ACF of Residuals")

# Plot smooth interaction
plot(best_model_final, select = "s(time_rel_hours):BirdID19EP_302c")
plot_smooth(best_model_final, view = "lag1_DiveEvents", cond = list(time_rel_hours = median(dive_summary_time$time_rel_hours)))

## Summary:
##  * DiveType : factor; set to the value(s): Epipelagic. 
##  * Year : factor; set to the value(s): 22. 
##  * DiveDuration : numeric predictor; set to the value(s): 302.239999999991. 
##  * time_rel_hours : numeric predictor; set to the value(s): 115.232811111079. (Might be canceled as random effect, check below.) 
##  * BirdID : factor; set to the value(s): 19EP_303d. (Might be canceled as random effect, check below.) 
##  * lag1_DiveEvents : numeric predictor; with 30 values ranging from 1.000000 to 86.000000. 
##  * time_since_last : numeric predictor; set to the value(s): 0.127016666664019. 
##  * NOTE : The following random effects columns are canceled: te(BirdID,time_rel_hours)
##

vis.gam(best_model_final,
        view = c("lag1_DiveEvents", "time_since_last"),
        plot.type = "persp",
        color = "terrain",
        theta = 140, phi = 25,
        main = "Effect Surface: lag1_DiveEvents × time_since_last")

appraise(best_model_final) +
  patchwork::plot_layout(ncol = 2)

plot(best_model_final, select = 21, scheme = 2, shade = TRUE,
     main = "3D Smooth: lag1_TotalEvents × time_since_last",
     xlab = "lag1_TotalEvents", ylab = "time_since_last (min)")

plot(best_model_final, select = 21, scheme = 1,
     main = "Contour: lag1_TotalEvents × time_since_last",
     xlab = "lag1_TotalEvents", ylab = "time_since_last (min)")

1.1.4 Interpretation

A generalized additive model (GAM) was fit using the bam() function from the mgcv package to evaluate how dive type, year, and both temporal and behavioral autocorrelation influenced the number of prey capture attempts per dive, while accounting for dive duration via an offset. The final model included a fixed effect for dive type (epipelagic, mesopelagic, and benthic) and year, tensor smooth products for time elapsed per individual and the interaction between previous dive performance and the inter-dive interval. A negative binomial distribution was used to accommodate overdispersion.

The model explained 33.6% of the deviance (adjusted R^2 = 0.455). Year was a significant predictor, with penguins exhibiting a higher rate of prey captures in 2022 compared to 2019 (estimate = 0.135, p = 0.0036). Dive type was not a significant predictor, with no meaningful difference in prey capture rates between mesopelagic or epipelagic dives relative to benthic dives. The smooth term for time elapsed per individual was highly significant (edf = 272.1, p < 0.001), indicating strong individual=level temporal structure in foraging behavior. Similarly, the interaction between the previous dive’s prey capture attempts and inter-dive interval was highly significant (edf = 16.1, p < 0.001). This demonstrates that the expected number of prey capture attempts was highest when dives occured shortly after a previously successful dive and declined markedly when dives followed longer inter-dive intervals, particularly after highly active prior dives.

Figures

# Ensure 'Year' is a factor
dive_summary_time <- dive_summary_time %>%
  mutate(Year = as.factor(Year))

# Set a typical dive duration value
median_duration <- median(dive_summary_time$DiveDuration, na.rm = TRUE)

# --- Partial effect surface for lag1_DiveEvents × time_since_last
draw(best_model_final, 
     select = "te(time_since_last,lag1_DiveEvents)", 
     contour = TRUE, raster = TRUE) +
  scale_fill_viridis_c(option = "B", name = "Partial Effect") +
  labs(
    title = "Partial Effect of Previous Foraging & Recovery Time",
    x = "Previous Dive Events",
    y = "Time Since Last Dive (min)"
  ) +
  theme_minimal(base_size = 14) +
  theme(legend.position = "right")

## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

# Evaluate the smooth
bird_time_smooth <- smooth_estimates(best_model_final, 
                                     smooth = "te(BirdID,time_rel_hours)", 
                                     n = 100)

## Warning: The `smooth` argument of `smooth_estimates()` is deprecated as of gratia
## 0.8.9.9.
## ℹ Please use the `select` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Confirm structure
head(bird_time_smooth)

## # A tibble: 6 × 7
##   .smooth                   .type    .by   .estimate   .se BirdID time_rel_hours
##   <chr>                     <chr>    <chr>     <dbl> <dbl> <fct>           <dbl>
## 1 te(BirdID,time_rel_hours) Tensor … <NA>      0.205 0.206 19EP_…         0.0106
## 2 te(BirdID,time_rel_hours) Tensor … <NA>      0.349 0.146 19EP_…         3.74  
## 3 te(BirdID,time_rel_hours) Tensor … <NA>      0.480 0.113 19EP_…         7.47  
## 4 te(BirdID,time_rel_hours) Tensor … <NA>      0.584 0.109 19EP_…        11.2   
## 5 te(BirdID,time_rel_hours) Tensor … <NA>      0.648 0.111 19EP_…        14.9   
## 6 te(BirdID,time_rel_hours) Tensor … <NA>      0.662 0.105 19EP_…        18.7

ggplot(bird_time_smooth, aes(x = time_rel_hours, y = .estimate)) +
  geom_line(color = "black", linewidth = 0.6) +
  geom_ribbon(aes(ymin = .estimate - 2 * .se, ymax = .estimate + 2 * .se), 
              alpha = 0.2, fill = "grey60") +
  facet_wrap(~ BirdID, scales = "free_y") +
  labs(
    title = "Bird-specific Temporal Smooths",
    x = "Time Since Deployment (hours)",
    y = "Partial Effect on Dive Events"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    strip.background = element_blank(),
    strip.text = element_text(size = 12, face = "bold"),
    plot.title = element_text(size = 16, face = "bold"),
    axis.title = element_text(size = 14)
  )

ggplot(bird_time_smooth, aes(x = time_rel_hours, y = .estimate, color = BirdID)) +
  geom_line(alpha = 0.8, linewidth = 1) +
  labs(
    title = "Temporal Trends in Dive Events Across Individuals",
    x = "Time Since Deployment (hours)",
    y = "Partial Effect",
    color = "Bird ID"
  ) +
  theme_minimal(base_size = 14)

dive_summary_time <- dive_summary_time %>%
  mutate(
    Year = factor(Year),
    BirdID = factor(BirdID)
  )

best_model_final <- bam(
  DiveEvents ~ DiveType + Year + 
    te(time_rel_hours, BirdID, bs = c("cr", "re"), k = c(25, 15)) + 
    te(lag1_DiveEvents, time_since_last, k = c(10, 8)) +
    offset(log(DiveDuration)),
  family = nb(),
  data = dive_summary_time,
  method = "fREML",
  discrete = TRUE
)
median_duration <- median(dive_summary_time$DiveDuration, na.rm = TRUE)

gg_year <- ggemmeans(best_model_final, terms = "Year")

## Model uses a transformed offset term. Predictions may not be correct.
##   It is recommended to fix the offset term using the `condition` argument,
##   e.g. `condition = c(lag1_DiveEvents = 1)`.
##   You could also transform the offset variable before fitting the model
##   and use `offset(lag1_DiveEvents)` in the model formula.

## Error in `[.data.frame`(tbl, , vars, drop = FALSE) : 
##   undefined columns selected

plot(gg_year) +
  labs(
    title = "Estimated Effect of Year on Dive Events",
    x = "Year",
    y = "Predicted Dive Events"
  ) +
  theme_minimal(base_size = 14)

# --- Model diagnostics from gratia
appraise(best_model_final) +
  patchwork::plot_layout(ncol = 2)

# Make sure Year is a factor
dive_summary_time$Year <- factor(dive_summary_time$Year)

# Violin plot
ggplot(dive_summary_time, aes(x = Year, y = DiveEvents, fill = Year)) +
  geom_violin(trim = FALSE, alpha = 0.7) +
  geom_boxplot(width = 0.1, outlier.shape = NA, color = "black", fill = "white") +
  scale_fill_manual(values = c("steelblue", "darkorange")) +
  labs(
    title = "Distribution of Dive Events per Dive by Year",
    x = "Year",
    y = "Dive Events"
  ) +
  theme_minimal(base_size = 14) +
  theme(legend.position = "none")

1.2 Hypothesis 2: The rate of prey capture attempts will be highest in the bottom phase of the dive, where emperor penguins are believed to be primarily hunting.

Reformat results.

# Join lag, time_since_last, and time_rel_hours into results
results_enriched <- results %>%
  left_join(
    dive_summary_time %>%
      dplyr::select(BirdID, DiveNumber, lag1_DiveEvents, time_since_last, time_rel_hours),
    by = c("BirdID", "DiveNumber")
  )

# Prepare the dataset: remove NA and Transit dives, and filter for dives with ≥ 1 foraging event
phase_long <- results_enriched %>%
  filter(
    !is.na(DiveType),
    DiveType != "Transit",
    TotalEvents >= 1
  ) %>%
  dplyr::select(
    BirdID, DiveNumber, DiveType, Sex, StartMass, Year,
    lag1_DiveEvents, time_since_last, time_rel_hours,
    ToEvents, DestEvents, FromEvents,
    ToPhaseDuration, DestPhaseDuration, FromPhaseDuration
  ) %>%
  pivot_longer(
    cols = c(ToEvents, DestEvents, FromEvents),
    names_to = "Phase",
    values_to = "Events"
  ) %>%
  pivot_longer(
    cols = c(ToPhaseDuration, DestPhaseDuration, FromPhaseDuration),
    names_to = "PhaseDur",
    values_to = "Duration"
  ) %>%
  filter(
    (Phase == "ToEvents"   & PhaseDur == "ToPhaseDuration")   |
    (Phase == "DestEvents" & PhaseDur == "DestPhaseDuration") |
    (Phase == "FromEvents" & PhaseDur == "FromPhaseDuration")
  ) %>%
  mutate(
    Phase = dplyr::recode(Phase,
                          "ToEvents" = "Descent",
                          "DestEvents" = "Bottom",
                          "FromEvents" = "Ascent")
  ) %>%
  dplyr::select(
    BirdID, DiveNumber, DiveType, Sex, StartMass, Year,
    lag1_DiveEvents, time_since_last, time_rel_hours,
    Phase, Events, Duration
  )

# View structure and first rows
glimpse(phase_long)

## Rows: 52,194
## Columns: 12
## $ BirdID          <chr> "19EP_302c", "19EP_302c", "19EP_302c", "19EP_302c", "1…
## $ DiveNumber      <dbl> 4, 4, 4, 6, 6, 6, 9, 9, 9, 11, 11, 11, 12, 12, 12, 17,…
## $ DiveType        <chr> "Epipelagic", "Epipelagic", "Epipelagic", "Epipelagic"…
## $ Sex             <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ StartMass       <dbl> 23.5, 23.5, 23.5, 23.5, 23.5, 23.5, 23.5, 23.5, 23.5, …
## $ Year            <chr> "19", "19", "19", "19", "19", "19", "19", "19", "19", …
## $ lag1_DiveEvents <dbl> NA, NA, NA, 3, 3, 3, 9, 9, 9, 3, 3, 3, 2, 2, 2, 2, 2, …
## $ time_since_last <dbl> NA, NA, NA, 0.080844444, 0.080844444, 0.080844444, 0.1…
## $ time_rel_hours  <dbl> 0.00000000, 0.00000000, 0.00000000, 0.08084444, 0.0808…
## $ Phase           <chr> "Descent", "Bottom", "Ascent", "Descent", "Bottom", "A…
## $ Events          <dbl> 3, 0, 0, 0, 0, 9, 0, 3, 0, 1, 1, 0, 2, 0, 0, 0, 2, 0, …
## $ Duration        <dbl> 33.52, 52.74, 20.82, 34.72, 30.26, 86.40, 86.16, 41.78…

head(phase_long)

## # A tibble: 6 × 12
##   BirdID    DiveNumber DiveType   Sex   StartMass Year  lag1_DiveEvents
##   <chr>          <dbl> <chr>      <chr>     <dbl> <chr>           <dbl>
## 1 19EP_302c          4 Epipelagic <NA>       23.5 19                 NA
## 2 19EP_302c          4 Epipelagic <NA>       23.5 19                 NA
## 3 19EP_302c          4 Epipelagic <NA>       23.5 19                 NA
## 4 19EP_302c          6 Epipelagic <NA>       23.5 19                  3
## 5 19EP_302c          6 Epipelagic <NA>       23.5 19                  3
## 6 19EP_302c          6 Epipelagic <NA>       23.5 19                  3
## # ℹ 5 more variables: time_since_last <dbl>, time_rel_hours <dbl>, Phase <chr>,
## #   Events <dbl>, Duration <dbl>

1.2.1 Explore your data (EDA) (Zuur. pg 12)

ggplot(phase_long, aes(x = Phase, y = Events, fill = Phase)) +
  geom_boxplot(outlier.shape = NA, alpha = 0.7) +
  geom_jitter(width = 0.2, alpha = 0.2, color = "black") +
  labs(
    title = "Prey Capture Events by Dive Phase",
    x = "Dive Phase",
    y = "Number of Prey Capture Events"
  ) +
  scale_fill_brewer(palette = "Set2") +
  theme_minimal(base_size = 14) +
  theme(legend.position = "none")

ggplot(phase_long, aes(x = Phase, y = Events, fill = DiveType)) +
  geom_violin(trim = FALSE, alpha = 0.7) +
  geom_jitter(width = 0.2, size = 0.5, alpha = 0.2) +
  labs(
    title = "Prey Capture by Dive Phase and Dive Type",
    x = "Dive Phase",
    y = "Number of Prey Capture Events",
    fill = "Dive Type"
  ) +
  theme_minimal(base_size = 14)

library(dplyr)

summary_phase <- phase_long %>%
  group_by(Phase) %>%
  summarise(
    mean_events = mean(Events, na.rm = TRUE),
    se = sd(Events, na.rm = TRUE) / sqrt(n())
  )

ggplot(summary_phase, aes(x = Phase, y = mean_events, fill = Phase)) +
  geom_col(alpha = 0.8) +
  geom_errorbar(aes(ymin = mean_events - se, ymax = mean_events + se), width = 0.2) +
  labs(
    title = "Mean Prey Capture Attempts by Dive Phase",
    x = "Dive Phase",
    y = "Mean Events ± SE"
  ) +
  scale_fill_brewer(palette = "Set2") +
  theme_minimal(base_size = 14) +
  theme(legend.position = "none")

1.2.2 Model fitting

# Reformat to factor
phase_long <- phase_long %>%
  mutate(
    BirdID = factor(BirdID),
    Phase = factor(Phase, levels = c("Descent", "Bottom", "Ascent")),
    DiveType = factor(DiveType),
    Year = factor(Year),
    Sex = factor(Sex)
  )

# Fit the full model
h2_model1 <- bam(
  Events ~ Phase + DiveType + Year + StartMass + Sex +
    offset(log(Duration)),
  data = phase_long,
  family = nb(),
  method = "fREML")

summary(h2_model1)

## 
## Family: Negative Binomial(0.656) 
## Link function: log 
## 
## Formula:
## Events ~ Phase + DiveType + Year + StartMass + Sex + offset(log(Duration))
## 
## Parametric coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -3.653382   0.145348 -25.135  < 2e-16 ***
## PhaseBottom          1.989572   0.016743 118.832  < 2e-16 ***
## PhaseAscent          0.992929   0.017157  57.874  < 2e-16 ***
## DiveTypeEpipelagic   0.373829   0.028713  13.020  < 2e-16 ***
## DiveTypeMesopelagic  0.089571   0.032032   2.796  0.00517 ** 
## Year22               0.197638   0.013499  14.641  < 2e-16 ***
## StartMass           -0.052278   0.006029  -8.671  < 2e-16 ***
## SexM                 0.120735   0.017851   6.763 1.36e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## R-sq.(adj) =  0.366   Deviance explained =   23%
## -REML =  42226  Scale est. = 1         n = 45921

resid_bam <- residuals(h2_model1, type = "pearson")
acf(resid_bam, main = "ACF of Deviance Residuals")

gam.check(h2_model1)

## 
## Method: REML   Optimizer: outer newton
## Model required no smoothing parameter selectionModel rank =  8 / 8

We need to address the autocorrelation.

# First, identify start of each time series (i.e., beginning of a new dive)
# Ensure correct factor formatting
phase_long <- phase_long %>%
  mutate(
    BirdID = factor(BirdID),
    Phase = factor(Phase, levels = c("Descent", "Bottom", "Ascent")),
    DiveType = factor(DiveType),
    Year = factor(Year),
    Sex = factor(Sex)
  ) %>%
  arrange(BirdID, time_rel_hours) %>%
  group_by(BirdID, DiveNumber) %>%
  mutate(AR.start = row_number() == 1) %>%
  ungroup()

# Estimate autocorrelation from previous model
acf_res <- acf(residuals(h2_model1, type = "pearson"), lag.max = 1, plot = FALSE)
rho_est <- as.numeric(acf_res$acf[2])

# Refit model with phase-specific temporal smooth
h2_model2 <- bam(
  Events ~ Phase + DiveType + Year + StartMass + Sex +
    te(time_rel_hours, BirdID, by = Phase, bs = c("cr", "re"), k = c(10, 10)) +
    s(DiveNumber, BirdID, bs = "re") +        
    s(Phase, DiveNumber, BirdID, bs = "re") + 
    offset(log(Duration)),
  data = phase_long,
  family = nb(),
  method = "fREML",
  discrete = TRUE,
  rho = rho_est,
  AR.start = phase_long$AR.start
)

# Evaluate autocorrelation again
resid_bam <- residuals(h2_model2, type = "pearson")
acf(resid_bam, main = "ACF of Deviance Residuals")

summary(h2_model2)

## 
## Family: Negative Binomial(0.864) 
## Link function: log 
## 
## Formula:
## Events ~ Phase + DiveType + Year + StartMass + Sex + te(time_rel_hours, 
##     BirdID, by = Phase, bs = c("cr", "re"), k = c(10, 10)) + 
##     s(DiveNumber, BirdID, bs = "re") + s(Phase, DiveNumber, BirdID, 
##     bs = "re") + offset(log(Duration))
## 
## Parametric coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -3.66037    1.00315  -3.649 0.000264 ***
## PhaseBottom          2.01110    0.11711  17.172  < 2e-16 ***
## PhaseAscent          1.12256    0.11233   9.993  < 2e-16 ***
## DiveTypeEpipelagic   0.37831    0.03314  11.414  < 2e-16 ***
## DiveTypeMesopelagic  0.04923    0.03669   1.342 0.179638    
## Year22               0.33741    0.09409   3.586 0.000336 ***
## StartMass           -0.06967    0.04208  -1.656 0.097783 .  
## SexM                 0.12184    0.12181   1.000 0.317170    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                            edf Ref.df      F  p-value    
## te(time_rel_hours,BirdID):PhaseDescent 113.965    173 70.207  < 2e-16 ***
## te(time_rel_hours,BirdID):PhaseBottom  111.065    170 51.154 0.000697 ***
## te(time_rel_hours,BirdID):PhaseAscent  104.978    169 25.186 0.000985 ***
## s(DiveNumber,BirdID)                     3.996     20  0.300 0.000410 ***
## s(Phase,DiveNumber,BirdID)              12.215     60  0.304  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.499   Deviance explained = 35.1%
## fREML =  66938  Scale est. = 1         n = 45921

gam.check(h2_model2)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1] -8.952838e-13 -1.666223e-11 -4.476419e-13  5.293543e-12  5.098144e-13
## [6]  1.659828e-11 -1.567901e-11 -1.561240e-11
## 
## $hess
##              [,1]        [,2]        [,3]        [,4]        [,5]         [,6]
## [1,]  5.940966968  5.30368527  0.08359450  0.07910430 -0.02477515 -0.003862005
## [2,]  5.303685267 28.54436613  0.06130363  0.70680744 -0.03439187 -0.600994564
## [3,]  0.083594501  0.06130363  5.53677728  5.04167445 -0.01418738 -0.014025423
## [4,]  0.079104296  0.70680744  5.04167445 23.24704637  0.03510852  0.506298686
## [5,] -0.024775147 -0.03439187 -0.01418738  0.03510852  5.03099871  5.048400925
## [6,] -0.003862005 -0.60099456 -0.01402542  0.50629869  5.04840093 25.828651697
## [7,] -0.014342005  0.34889317  0.01071009 -0.23170295 -0.08490455  0.435771710
## [8,]  0.006376078  0.75299931 -0.16368982  1.00336410 -0.23976214  0.848620094
##             [,7]         [,8]
## [1,] -0.01434201  0.006376078
## [2,]  0.34889317  0.752999313
## [3,]  0.01071009 -0.163689818
## [4,] -0.23170295  1.003364101
## [5,] -0.08490455 -0.239762138
## [6,]  0.43577171  0.848620094
## [7,]  0.42598922  0.322320247
## [8,]  0.32232025  1.557986918
## 
## Model rank =  688 / 688 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                                           k'   edf k-index p-value
## te(time_rel_hours,BirdID):PhaseDescent 200.0 114.0      NA      NA
## te(time_rel_hours,BirdID):PhaseBottom  200.0 111.1      NA      NA
## te(time_rel_hours,BirdID):PhaseAscent  200.0 105.0      NA      NA
## s(DiveNumber,BirdID)                    20.0   4.0      NA      NA
## s(Phase,DiveNumber,BirdID)              60.0  12.2      NA      NA

Now, we can move towards simplifying the fixed effects. The least significant is sex, so we can start with that.

phase_long_na_sex <- phase_long %>%
  filter(Sex != "NA")

h2_model2_na_sex <- bam(
  Events ~ Phase + DiveType + Year + StartMass + Sex +
    te(time_rel_hours, BirdID, by = Phase, bs = c("cr", "re"), k = c(10, 10)) +
    s(DiveNumber, BirdID, bs = "re") +        
    s(Phase, DiveNumber, BirdID, bs = "re") + 
    offset(log(Duration)),
  data = phase_long_na_sex,
  family = nb(),
  method = "fREML",
  discrete = TRUE,
  rho = rho_est,
  AR.start = phase_long_na_sex$AR.start)

h2_model2_minus_sex <- bam(
  Events ~ Phase + DiveType + Year + StartMass +
    te(time_rel_hours, BirdID, by = Phase, bs = c("cr", "re"), k = c(10, 10)) +
    s(DiveNumber, BirdID, bs = "re") +        
    s(Phase, DiveNumber, BirdID, bs = "re") + 
    offset(log(Duration)),
  data = phase_long_na_sex,
  family = nb(),
  method = "fREML",
  discrete = TRUE,
  rho = rho_est,
  AR.start = phase_long_na_sex$AR.start)

AIC(h2_model2_na_sex, h2_model2_minus_sex)

##                           df      AIC
## h2_model2_na_sex    358.2323 195813.2
## h2_model2_minus_sex 358.0867 195814.1

anova(h2_model2_na_sex, h2_model2_minus_sex)

## Analysis of Deviance Table
## 
## Model 1: Events ~ Phase + DiveType + Year + StartMass + Sex + te(time_rel_hours, 
##     BirdID, by = Phase, bs = c("cr", "re"), k = c(10, 10)) + 
##     s(DiveNumber, BirdID, bs = "re") + s(Phase, DiveNumber, BirdID, 
##     bs = "re") + offset(log(Duration))
## Model 2: Events ~ Phase + DiveType + Year + StartMass + te(time_rel_hours, 
##     BirdID, by = Phase, bs = c("cr", "re"), k = c(10, 10)) + 
##     s(DiveNumber, BirdID, bs = "re") + s(Phase, DiveNumber, BirdID, 
##     bs = "re") + offset(log(Duration))
##   Resid. Df Resid. Dev       Df Deviance
## 1     45532     195097                  
## 2     45532     195098 -0.12211  -1.2213

Sex does not improve model fit, so we can remove it. Now, we can move onto mass.

h2_model2_mass <- bam(
  Events ~ Phase + DiveType + Year + StartMass +
    te(time_rel_hours, BirdID, by = Phase, bs = c("cr", "re"), k = c(10, 10)) +
    s(DiveNumber, BirdID, bs = "re") +        
    s(Phase, DiveNumber, BirdID, bs = "re") + 
    offset(log(Duration)),
  data = phase_long,
  family = nb(),
  method = "fREML",
  discrete = TRUE,
  rho = rho_est,
  AR.start = phase_long$AR.start
)

h2_model2_minus_mass <- bam(
  Events ~ Phase + DiveType + Year +
    te(time_rel_hours, BirdID, by = Phase, bs = c("cr", "re"), k = c(10, 10)) +
    s(DiveNumber, BirdID, bs = "re") +        
    s(Phase, DiveNumber, BirdID, bs = "re") + 
    offset(log(Duration)),
  data = phase_long,
  family = nb(),
  method = "fREML",
  discrete = TRUE,
  rho = rho_est,
  AR.start = phase_long$AR.start
)

anova(h2_model2_mass, h2_model2_minus_mass)

## Analysis of Deviance Table
## 
## Model 1: Events ~ Phase + DiveType + Year + StartMass + te(time_rel_hours, 
##     BirdID, by = Phase, bs = c("cr", "re"), k = c(10, 10)) + 
##     s(DiveNumber, BirdID, bs = "re") + s(Phase, DiveNumber, BirdID, 
##     bs = "re") + offset(log(Duration))
## Model 2: Events ~ Phase + DiveType + Year + te(time_rel_hours, BirdID, 
##     by = Phase, bs = c("cr", "re"), k = c(10, 10)) + s(DiveNumber, 
##     BirdID, bs = "re") + s(Phase, DiveNumber, BirdID, bs = "re") + 
##     offset(log(Duration))
##   Resid. Df Resid. Dev        Df Deviance
## 1     51748     221240                   
## 2     51748     221242 -0.037231  -1.9576

AIC(h2_model2_mass, h2_model2_minus_mass)

##                            df      AIC
## h2_model2_mass       411.6797 222063.6
## h2_model2_minus_mass 411.6314 222065.5

Mass does not improve model, so we can remove that as well.

# 1. Fit a temporary version without rho to estimate residuals
temp_model <- bam(
  Events ~ Phase + DiveType + Year +
    te(time_rel_hours, BirdID, by = Phase, bs = c("cr", "re"), k = c(10, 10)) +
    s(DiveNumber, BirdID, bs = "re") +
    s(Phase, DiveNumber, BirdID, bs = "re") +
    offset(log(Duration)),
  data = phase_long,
  family = nb(),
  method = "fREML",
  discrete = TRUE
)

# 2. Estimate rho from lag-1 autocorrelation
acf_res <- acf(residuals(temp_model, type = "pearson"), lag.max = 1, plot = FALSE)
rho_est <- as.numeric(acf_res$acf[2])

# 3. Refit with updated rho
h2_best_model <- bam(
  Events ~ Phase + DiveType + Year +
    te(time_rel_hours, BirdID, by = Phase, bs = c("cr", "re"), k = c(10, 10)) +
    s(DiveNumber, BirdID, bs = "re") +
    s(Phase, DiveNumber, BirdID, bs = "re") +
    offset(log(Duration)),
  data = phase_long,
  family = nb(),
  method = "fREML",
  discrete = TRUE,
  rho = rho_est,
  AR.start = phase_long$AR.start
)

summary(h2_best_model)

## 
## Family: Negative Binomial(0.874) 
## Link function: log 
## 
## Formula:
## Events ~ Phase + DiveType + Year + te(time_rel_hours, BirdID, 
##     by = Phase, bs = c("cr", "re"), k = c(10, 10)) + s(DiveNumber, 
##     BirdID, bs = "re") + s(Phase, DiveNumber, BirdID, bs = "re") + 
##     offset(log(Duration))
## 
## Parametric coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -5.22943    0.11619 -45.008  < 2e-16 ***
## PhaseBottom          1.97122    0.11655  16.914  < 2e-16 ***
## PhaseAscent          1.11127    0.11145   9.971  < 2e-16 ***
## DiveTypeEpipelagic   0.37986    0.03107  12.225  < 2e-16 ***
## DiveTypeMesopelagic  0.01909    0.03418   0.559  0.57647    
## Year22               0.26372    0.09009   2.927  0.00342 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                            edf Ref.df       F  p-value    
## te(time_rel_hours,BirdID):PhaseDescent 132.647    174 118.504 5.62e-07 ***
## te(time_rel_hours,BirdID):PhaseBottom  127.388    176  83.407 0.008422 ** 
## te(time_rel_hours,BirdID):PhaseAscent  120.239    175  40.565 0.003293 ** 
## s(DiveNumber,BirdID)                     5.952     23   0.448 0.000228 ***
## s(Phase,DiveNumber,BirdID)              15.349     69   0.342  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.499   Deviance explained = 35.3%
## fREML =  76076  Scale est. = 1         n = 52194

resid_bam <- residuals(h2_best_model, type = "pearson")
acf(resid_bam, main = "ACF of Deviance Residuals")

summary(h2_best_model)

## 
## Family: Negative Binomial(0.874) 
## Link function: log 
## 
## Formula:
## Events ~ Phase + DiveType + Year + te(time_rel_hours, BirdID, 
##     by = Phase, bs = c("cr", "re"), k = c(10, 10)) + s(DiveNumber, 
##     BirdID, bs = "re") + s(Phase, DiveNumber, BirdID, bs = "re") + 
##     offset(log(Duration))
## 
## Parametric coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -5.22943    0.11619 -45.008  < 2e-16 ***
## PhaseBottom          1.97122    0.11655  16.914  < 2e-16 ***
## PhaseAscent          1.11127    0.11145   9.971  < 2e-16 ***
## DiveTypeEpipelagic   0.37986    0.03107  12.225  < 2e-16 ***
## DiveTypeMesopelagic  0.01909    0.03418   0.559  0.57647    
## Year22               0.26372    0.09009   2.927  0.00342 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                            edf Ref.df       F  p-value    
## te(time_rel_hours,BirdID):PhaseDescent 132.647    174 118.504 5.62e-07 ***
## te(time_rel_hours,BirdID):PhaseBottom  127.388    176  83.407 0.008422 ** 
## te(time_rel_hours,BirdID):PhaseAscent  120.239    175  40.565 0.003293 ** 
## s(DiveNumber,BirdID)                     5.952     23   0.448 0.000228 ***
## s(Phase,DiveNumber,BirdID)              15.349     69   0.342  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.499   Deviance explained = 35.3%
## fREML =  76076  Scale est. = 1         n = 52194

gam.check(h2_best_model)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1] -2.685852e-12 -8.498091e-12 -3.911538e-12  2.732037e-11  1.385558e-13
## [6] -1.084999e-11 -1.146150e-11 -2.113865e-12
## 
## $hess
##             [,1]        [,2]          [,3]        [,4]        [,5]        [,6]
## [1,]  6.35973834  5.46340894  7.446602e-02  0.10452447 -0.02634447 -0.02043474
## [2,]  5.46340894 31.49918892  1.294214e-02  0.93086218 -0.04954641 -0.81201933
## [3,]  0.07446602  0.01294214  5.530229e+00  5.23614351 -0.01175926 -0.05959611
## [4,]  0.10452447  0.93086218  5.236144e+00 26.26660051  0.05192667  0.52346448
## [5,] -0.02634447 -0.04954641 -1.175926e-02  0.05192667  4.50628852  5.33231716
## [6,] -0.02043474 -0.81201933 -5.959611e-02  0.52346448  5.33231716 30.21946516
## [7,] -0.02713974  0.56817018 -3.869646e-05 -0.42850632 -0.08894909  0.59504180
## [8,]  0.02245292  0.72720196 -1.494796e-01  0.92049562 -0.21909747  1.24325318
##               [,7]        [,8]
## [1,] -2.713974e-02  0.02245292
## [2,]  5.681702e-01  0.72720196
## [3,] -3.869646e-05 -0.14947958
## [4,] -4.285063e-01  0.92049562
## [5,] -8.894909e-02 -0.21909747
## [6,]  5.950418e-01  1.24325318
## [7,]  7.255864e-01  0.47571022
## [8,]  4.757102e-01  1.98326452
## 
## Model rank =  788 / 788 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                                            k'    edf k-index p-value
## te(time_rel_hours,BirdID):PhaseDescent 230.00 132.65      NA      NA
## te(time_rel_hours,BirdID):PhaseBottom  230.00 127.39      NA      NA
## te(time_rel_hours,BirdID):PhaseAscent  230.00 120.24      NA      NA
## s(DiveNumber,BirdID)                    23.00   5.95      NA      NA
## s(Phase,DiveNumber,BirdID)              69.00  15.35      NA      NA

# --- Model diagnostics from gratia
appraise(h2_best_model) +
  patchwork::plot_layout(ncol = 2)

1.2.3 Interpretation

I fit a generalized addition model (GAM) with a negative binomial distribution to account for model overdispersion. The model evaluated the relationship between prey capture attempts at the dive phase level with a fixed effect for dive phase, dive type, and year of tag deployment. Temporal trends in foraging behavior were modeled with phase-specific smooth terms over relative deployment time within each individual bird. I also included random intercepts for each dive, and for each phase nested within dives to account for repeated measures and temporal autocorrelation. An offset term for the natural logarithm of phase duration was included to model prey capture rate per unit time. Temporal autocorrelation was also addressed using an AR(1) structure (rho = 0.045) and phase-level time series initialization. The final model explained 35.3% of the deviance and had an adjusted R^2 of 0.499.

Dive phase had a strong effect on the number of prey capture attempts (all phases p < 0.001). Compared to the descent phase, penguins exhibited significantly higher prey capture rates during the bottom (estimate = 1.97, SE = 0.12, p < 0.001) and ascent phase (estimate = 1.11, SE = 0.11, p < 0.001), confirming that foraging is concentrated in the latter parts of the dive. Smooth terms revealed substantial non-linear variation in prey capture attempts over time, varying by both individual and dive phase. Temporal smooths were significant for all phases, with the greatest variation observed in the bottom phase (edf = 127.4). Random intercepts at both the dive and dive-phase level also explained additional variance in foraging effort (p < 0.001 for both smooths), indicating meaningful heterogeneity in foraging behavior at fine temporal scales.

Although dive type alone was not a significant predictor of prey capture attempts in the intial dive-level model (p = 0.63), the refined phase-level model revealed a significant postitive association between epipelagic and prey capture attempts (estimate = 0.38, SE = 0.03, p < 0.001). This difference likely reflects improved resolution at the dive-phase scale and enhanced ability to model individual and temporal variation in foraging effort. Mesopelagic dives did not significantly differ from benthic dives (Estimate = 0.02, SE = 0.03, p = 0.576). Penguins deployed in 2022 had higher prey capture attempt rates than those deployed in 2019 (Estimate = 0.26, SE = 0.09, p = 0.003), suggesting interannual differences in prey availability or behavior.

The key components of the model include:

Retained fixed effects for dive type, year, and dive phase
[te(time_rel_hours, BirdID, by = Phase…)] — Phase-specific smooth trends of time since deployment by bird, allows for nonlinear patterns over deployment for each phase
[s(DiveNumber, BirdID, bs = “re”)] — Random intercept for each dive nested in bird
[s(Phase, DiveNumber, BirdID, bs = “re)] — Random effect for each phase within each dive nested in bird, accounts for repeated measures within dives
[offset(log(Duration))] — Calculates rate
[rho + AR.start] — Rho is the derived autocorrelation at lag 1 in the residuals (the correlation between adjacent observations in time within the same series), with AR.Start identifying the start of each new dive, this models within-dive serial correlation in prey capture attempts while avoiding assumption of correlation across distinct dives

Figures

# --- Load libraries ---
library(mgcv)
library(ggeffects)
library(ggplot2)
library(patchwork)
library(gratia)
library(dplyr)

# --- Clean and reformat data for plotting ---
phase_long_plot <- phase_long %>%
  dplyr::select(-AR.start) %>%
  mutate(
    BirdID = factor(BirdID),
    Phase = factor(Phase, levels = c("Descent", "Bottom", "Ascent")),
    DiveType = factor(DiveType, levels = c("Epipelagic", "Mesopelagic", "Benthic")),
    Year = factor(Year),
    Sex = factor(Sex)
  )

# --- Refit model for plotting ---
h2_best_model_plot <- bam(
  Events ~ Phase + DiveType + Year +
    te(time_rel_hours, BirdID, by = Phase, bs = c("cr", "re"), k = c(10, 10)) +
    s(DiveNumber, BirdID, bs = "re") +
    s(Phase, DiveNumber, BirdID, bs = "re") +
    offset(log(Duration)),
  data = phase_long_plot,
  family = nb(),
  method = "fREML",
  discrete = TRUE
)

# --- Marginal predictions at median duration ---
median_duration <- median(phase_long_plot$Duration, na.rm = TRUE)

pred_phase <- ggpredict(h2_best_model_plot, terms = "Phase", condition = c(Duration = median_duration))
pred_type  <- ggpredict(h2_best_model_plot, terms = "DiveType", condition = c(Duration = median_duration))
pred_year  <- ggpredict(h2_best_model_plot, terms = "Year", condition = c(Duration = median_duration))

# --- Helper function for prediction panels ---
plot_preds <- function(df, title) {
  ggplot(df, aes(x = x, y = predicted)) +
    geom_point(size = 3) +
    geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.2) +
    labs(
      title = title,
      x = NULL,
      y = "Predicted Prey Capture Attempts"
    ) +
    ylim(0, max(df$conf.high, na.rm = TRUE) + 1) +
    theme_minimal(base_size = 14) +
    theme(
      plot.title = element_text(size = 14, face = "bold"),
      axis.title.y = element_text(face = "bold"),
      axis.text.x = element_text(angle = 45, hjust = 1)
    )
}

# --- Combine marginal prediction panels ---
p1 <- plot_preds(pred_phase, "Dive Phase")
p2 <- plot_preds(pred_type, "Dive Type")
p3 <- plot_preds(pred_year, "Year")

(p1 | p2 | p3) +
  plot_annotation(
    title = "Predicted Prey Capture Attempts",
    subtitle = paste("Predictions at median duration:", round(median_duration, 1), "sec"),
    theme = theme(
      plot.title = element_text(size = 18, face = "bold"),
      plot.subtitle = element_text(size = 13)
    )
  )

# --- Smooth estimates by phase using gratia ---
smooths_all <- smooth_estimates(h2_best_model_plot)
tensor_smooths <- smooths_all %>%
  filter(grepl("te\\(time_rel_hours,BirdID\\):Phase", .smooth)) %>%
  mutate(
    Phase = case_when(
      grepl("Descent", .smooth) ~ "Descent",
      grepl("Bottom", .smooth) ~ "Bottom",
      grepl("Ascent", .smooth) ~ "Ascent"
    )
  )

# --- Plot tensor smooths by phase ---
plot_tensor <- function(phase_name) {
  ggplot(filter(tensor_smooths, Phase == phase_name), aes(x = time_rel_hours, y = .estimate)) +
    geom_line(color = "black") +
    geom_ribbon(aes(ymin = .estimate - 2 * .se, ymax = .estimate + 2 * .se), alpha = 0.2) +
    labs(
      title = paste("Phase:", phase_name),
      x = "Time Since Deployment (hours)",
      y = "Partial Effect"
    ) +
    theme_minimal(base_size = 14) +
    theme(plot.title = element_text(face = "bold"))
}

p_descent <- plot_tensor("Descent")
p_bottom  <- plot_tensor("Bottom")
p_ascent  <- plot_tensor("Ascent")

(p_descent | p_bottom | p_ascent) +
  plot_annotation(
    title = "Phase-specific Temporal Smooths",
    subtitle = "Effect of Deployment Time on Prey Capture Attempts",
    theme = theme(
      plot.title = element_text(size = 18, face = "bold"),
      plot.subtitle = element_text(size = 13)
    )
  )

# --- Observed vs Predicted ---
phase_long_plot$predicted <- predict(h2_best_model_plot, type = "response")

ggplot(phase_long_plot, aes(x = predicted, y = Events)) +
  geom_point(alpha = 0.2) +
  geom_smooth(method = "lm", color = "red") +
  labs(
    title = "Observed vs Predicted Prey Capture Attempts",
    x = "Predicted",
    y = "Observed"
  ) +
  theme_minimal(base_size = 14)

## `geom_smooth()` using formula = 'y ~ x'

1.3 Hypothesis 3: Energy expenditure (activity) will increase with prey capture rate, depending on dive type.

Reformat.

odba_results <- results_enriched %>%
  filter(
    !is.na(DiveType),                 # Remove NA dive types
    DiveType != "Transit",           # Remove transit dives
    TotalEvents >= 1                 # Keep only foraging dives
  )

1.3.1 Explore your data (EDA) (Zuur. pg 12)

ggplot(odba_results, aes(x = TotalEvents, y = ODBA_Total, color = DiveType)) +
  geom_point(alpha = 0.4) +
  geom_smooth(method = "lm", se = FALSE) +
  labs(
    x = "Total Prey Capture Attempts",
    y = "Total ODBA (Energy Expenditure)",
    title = "Relationship Between Prey Capture and Activity by Dive Type"
  ) +
  theme_minimal()

## `geom_smooth()` using formula = 'y ~ x'

ggplot(odba_results, aes(x = TotalEvents, y = ODBA_Total)) +
  geom_point(alpha = 0.4) +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  facet_wrap(~ DiveType) +
  labs(
    x = "Total Prey Capture Attempts",
    y = "Total ODBA",
    title = "Prey Capture vs Activity by Dive Type"
  ) +
  theme_minimal()

## `geom_smooth()` using formula = 'y ~ x'

ggplot(odba_results, aes(x = DiveType, y = ODBA_Total, fill = DiveType)) +
  geom_boxplot() +
  labs(
    y = "Total ODBA",
    title = "Energy Expenditure Across Dive Types"
  ) +
  theme_minimal()

ggplot(odba_results, aes(x = DiveType, y = TotalEvents, fill = DiveType)) +
  geom_boxplot() +
  labs(
    y = "Total Prey Capture Attempts",
    title = "Prey Capture Across Dive Types"
  ) +
  theme_minimal()

ggplot(odba_results, aes(x = TotalEvents, y = ODBA_Total)) +
  geom_hex(bins = 30) +
  facet_wrap(~ DiveType) +
  scale_fill_viridis_c() +
  labs(
    x = "Prey Capture Attempts",
    y = "ODBA",
    title = "Density of Activity vs Prey Capture"
  ) +
  theme_minimal()

odba_results %>%
  group_by(DiveType, TotalEventsBin = cut(TotalEvents, breaks = c(1, 5, 10, 20, 40, 80))) %>%
  summarise(mean_odba = mean(ODBA_Total, na.rm = TRUE),
            n = n()) %>%
  ggplot(aes(x = TotalEventsBin, y = mean_odba, group = DiveType, color = DiveType)) +
  geom_line() +
  geom_point() +
  labs(x = "Binned Prey Capture Attempts", y = "Mean ODBA", title = "ODBA Increases with Prey Capture Across Dive Types") +
  theme_minimal()

## `summarise()` has grouped output by 'DiveType'. You can override using the
## `.groups` argument.

ggplot(odba_results, aes(x = TotalEvents, y = ODBA_Total)) +
  geom_point(alpha = 0.2) +
  geom_smooth(method = "lm") +
  facet_wrap(~DiveType) +
  labs(title = "Linear Fit Residuals by Dive Type") +
  theme_minimal()

## `geom_smooth()` using formula = 'y ~ x'

ggplot(odba_results, aes(x = TotalEvents, y = ODBA_Total)) +
  geom_point(alpha = 0.2) +
  geom_smooth(method = "gam", formula = y ~ s(x)) +
  facet_wrap(~DiveType) +
  labs(title = "Nonlinear Fit: ODBA vs Prey Capture Attempts") +
  theme_minimal()

1.3.2 Model fitting

# 1. Clean and coerce factors
odba_results <- odba_results %>%
  filter(!is.na(DiveType), !is.na(ODBA_Total), !is.na(TotalEvents), !is.na(DiveDuration)) %>%
  mutate(
    DiveType = factor(DiveType),
    Year = factor(Year),
    Sex = factor(Sex),
    BirdID = factor(BirdID)
  )

# 2. Refit with separate smooths per DiveType (excluding reference)
h3_model1 <- bam(
  ODBA_Total ~ 
    s(TotalEvents, by = DiveType, k = 10) +  # smooths for non-reference levels
    DiveType + Year + Sex + StartMass +
    s(BirdID, bs = "re") +
    offset(log(DiveDuration)),
  data = odba_results,
  family = Gamma(link = "log"),
  method = "fREML",
  discrete = TRUE
)

summary(h3_model1)

## 
## Family: Gamma 
## Link function: log 
## 
## Formula:
## ODBA_Total ~ s(TotalEvents, by = DiveType, k = 10) + DiveType + 
##     Year + Sex + StartMass + s(BirdID, bs = "re") + offset(log(DiveDuration))
## 
## Parametric coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          2.664542   0.466371   5.713 1.13e-08 ***
## DiveTypeEpipelagic   0.025787   0.018768   1.374    0.169    
## DiveTypeMesopelagic  0.009089   0.020232   0.449    0.653    
## Year22               0.062731   0.043966   1.427    0.154    
## SexM                 0.061989   0.055509   1.117    0.264    
## StartMass           -0.008331   0.019672  -0.423    0.672    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                       edf Ref.df      F p-value    
## s(TotalEvents):DiveTypeBenthic      2.079  2.677  60.17  <2e-16 ***
## s(TotalEvents):DiveTypeEpipelagic   4.529  5.334 383.35  <2e-16 ***
## s(TotalEvents):DiveTypeMesopelagic  3.250  4.040 151.91  <2e-16 ***
## s(BirdID)                          15.752 17.000 996.30  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.806   Deviance explained = 80.8%
## fREML = -616.36  Scale est. = 0.074293  n = 15307

resid_bam <- residuals(h3_model1, type = "pearson")
acf(resid_bam, main = "ACF of Deviance Residuals")

gam.check(h3_model1)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1] -4.926014e-05 -2.597422e-07 -3.446425e-09  7.790298e-07  1.565495e-04
## 
## $hess
##            [,1]          [,2]         [,3]          [,4]         [,5]
##    0.3794346476  0.0003329144 -0.000266935 -0.0003772046   -0.5396845
##    0.0003329144  1.1465972193 -0.001032645  0.0095946799   -1.7644383
##   -0.0002669350 -0.0010326448  1.273110183  0.0021533478   -1.1250771
##   -0.0003772046  0.0095946799  0.002153348  7.4945925060   -7.8759556
## d -0.5396845070 -1.7644383334 -1.125077094 -7.8759555681 7648.9998435
## 
## Model rank =  53 / 53 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                                       k'   edf k-index p-value
## s(TotalEvents):DiveTypeBenthic      9.00  2.08       1    0.66
## s(TotalEvents):DiveTypeEpipelagic   9.00  4.53       1    0.68
## s(TotalEvents):DiveTypeMesopelagic  9.00  3.25       1    0.67
## s(BirdID)                          20.00 15.75      NA      NA

There is autocorrelation so we can add a smooth for time.

h3_model2 <- bam(
  ODBA_Total ~ s(TotalEvents, by = DiveType, k = 10) +
    DiveType + Year + Sex + StartMass +
    s(time_rel_hours, BirdID, bs = "fs", k = 20) +
    offset(log(DiveDuration)),
  data = odba_results,
  family = Gamma(link = "log"),
  method = "fREML",
  discrete = TRUE
)

summary(h3_model2)

## 
## Family: Gamma 
## Link function: log 
## 
## Formula:
## ODBA_Total ~ s(TotalEvents, by = DiveType, k = 10) + DiveType + 
##     Year + Sex + StartMass + s(time_rel_hours, BirdID, bs = "fs", 
##     k = 20) + offset(log(DiveDuration))
## 
## Parametric coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          2.308021   0.338446   6.819 9.48e-12 ***
## DiveTypeEpipelagic   0.050705   0.021228   2.389   0.0169 *  
## DiveTypeMesopelagic  0.048235   0.024135   1.999   0.0457 *  
## Year22               0.072134   0.030970   2.329   0.0199 *  
## SexM                -0.024082   0.043104  -0.559   0.5764    
## StartMass            0.006448   0.014208   0.454   0.6500    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                        edf  Ref.df       F p-value    
## s(TotalEvents):DiveTypeBenthic       3.010   3.808  53.072  <2e-16 ***
## s(TotalEvents):DiveTypeEpipelagic    3.882   4.683 461.839  <2e-16 ***
## s(TotalEvents):DiveTypeMesopelagic   3.303   4.102 178.733  <2e-16 ***
## s(time_rel_hours,BirdID)           175.815 340.000   8.708  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.839   Deviance explained =   84%
## fREML = -1788.2  Scale est. = 0.057881  n = 15307

resid_bam <- residuals(h3_model2, type = "pearson")
acf(resid_bam, main = "ACF of Deviance Residuals")

gam.check(h3_model2)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1]  1.527192e-07 -8.937445e-07 -1.317201e-07  7.042488e-05 -9.470336e-06
## [6] -1.581461e-04  3.258101e-04
## 
## $hess
##            [,1]          [,2]          [,3]          [,4]          [,5]
##    9.614635e-01  6.814194e-05  2.663904e-04 -4.538823e-02 -1.193434e-03
##    6.814194e-05  8.259197e-01 -1.745913e-03  5.448132e-02  1.874420e-03
##    2.663904e-04 -1.745913e-03  1.265256e+00 -9.723623e-03 -3.959699e-03
##   -4.538823e-02  5.448132e-02 -9.723623e-03  3.186118e+01  7.583244e-01
##   -1.193434e-03  1.874420e-03 -3.959699e-03  7.583244e-01  4.171835e+00
##   -1.060118e-06  6.666110e-07 -2.659468e-07 -9.925934e-06  8.006286e-06
## d -1.004750e+00 -1.441177e+00 -1.151440e+00 -8.293738e+01 -4.970150e+00
##            [,6]          [,7]
##   -1.060118e-06 -1.004750e+00
##    6.666110e-07 -1.441177e+00
##   -2.659468e-07 -1.151440e+00
##   -9.925934e-06 -8.293738e+01
##    8.006286e-06 -4.970150e+00
##    1.581018e-04 -8.062162e-05
## d -8.062162e-05  7.649000e+03
## 
## Model rank =  433 / 433 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                                        k'    edf k-index p-value  
## s(TotalEvents):DiveTypeBenthic       9.00   3.01    0.99   0.405  
## s(TotalEvents):DiveTypeEpipelagic    9.00   3.88    0.99   0.345  
## s(TotalEvents):DiveTypeMesopelagic   9.00   3.30    0.99   0.465  
## s(time_rel_hours,BirdID)           400.00 175.82    0.97   0.015 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

There is still autocorrelation.

# Prepare data: add lag and AR.start indicator
odba_results <- odba_results %>%
  arrange(BirdID, DiveNumber) %>%   # ensure proper ordering
  group_by(BirdID) %>%
  mutate(
    lag1_TotalEvents = lag(TotalEvents, default = NA),
    StartOfDive = row_number() == 1   # AR.start: TRUE for start of new individual
  ) %>%
  ungroup()

# Fit GAM with autoregressive structure
h3_model3 <- bam(
  ODBA_Total ~ s(TotalEvents, by = DiveType, k = 12) +
    DiveType + Year + Sex + StartMass +
    te(time_rel_hours, BirdID, bs = c("cr", "re"), k = c(30, 9)) +
    s(BirdID, bs = "re") +
    s(lag1_TotalEvents, by = DiveType, k = 12) +
    offset(log(DiveDuration)),
  data = odba_results,
  family = Gamma(link = "log"),
  method = "fREML",
  discrete = TRUE,
  rho = 0.3,                             # try adjusting this (e.g., 0.3–0.6)
  AR.start = odba_results$StartOfDive
)

# Summary and diagnostics
summary(h3_model3)

## 
## Family: Gamma 
## Link function: log 
## 
## Formula:
## ODBA_Total ~ s(TotalEvents, by = DiveType, k = 12) + DiveType + 
##     Year + Sex + StartMass + te(time_rel_hours, BirdID, bs = c("cr", 
##     "re"), k = c(30, 9)) + s(BirdID, bs = "re") + s(lag1_TotalEvents, 
##     by = DiveType, k = 12) + offset(log(DiveDuration))
## 
## Parametric coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          2.57750    0.39116   6.589 4.57e-11 ***
## DiveTypeEpipelagic   0.07044    0.02349   2.999  0.00272 ** 
## DiveTypeMesopelagic  0.05312    0.02681   1.981  0.04759 *  
## Year22               0.06156    0.03662   1.681  0.09275 .  
## SexM                 0.02933    0.04726   0.621  0.53481    
## StartMass           -0.00549    0.01647  -0.333  0.73886    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                             edf  Ref.df       F  p-value    
## s(TotalEvents):DiveTypeBenthic            3.142   3.989  39.137  < 2e-16 ***
## s(TotalEvents):DiveTypeEpipelagic         3.886   4.793 354.140  < 2e-16 ***
## s(TotalEvents):DiveTypeMesopelagic        3.281   4.131 164.011  < 2e-16 ***
## te(BirdID,time_rel_hours)               242.784 396.000   4.864 2.58e-05 ***
## s(BirdID)                                12.410  17.000   2.813  < 2e-16 ***
## s(lag1_TotalEvents):DiveTypeBenthic       1.000   1.001   2.865   0.0905 .  
## s(lag1_TotalEvents):DiveTypeEpipelagic    1.003   1.007  21.159 4.70e-06 ***
## s(lag1_TotalEvents):DiveTypeMesopelagic   1.306   1.550   1.423   0.1369    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =   0.85   Deviance explained = 84.9%
## fREML = -2858.1  Scale est. = 0.057601  n = 15287

resid_bam <- residuals(h3_model3, type = "pearson")
acf(resid_bam, main = "ACF of Deviance Residuals")

gam.check(h3_model3)

## 
## Method: fREML   Optimizer: perf chol
## $grad
##  [1]  7.803092e-12  8.186563e-12  6.368017e-12  1.738712e-09  4.988078e-09
##  [6] -2.334914e-10 -4.317564e-05 -5.618711e-05  3.967465e-12 -5.022775e-08
## 
## $hess
##            [,1]          [,2]          [,3]          [,4]          [,5]
##    1.064377e+00 -3.794960e-04  7.760756e-05 -4.897919e-02 -1.058148e-02
##   -3.794960e-04  6.851597e-01 -2.777677e-03  5.938617e-02  6.809935e-03
##    7.760756e-05 -2.777677e-03  1.231265e+00  1.185002e-02  1.621902e-02
##   -4.897919e-02  5.938617e-02  1.185002e-02  4.908080e+01  9.586308e+00
##   -1.058148e-02  6.809935e-03  1.621902e-02  9.586308e+00  4.787446e+00
##   -2.684419e-03  4.996928e-03 -5.225195e-04  2.375852e+00 -1.354302e-01
##    3.183729e-06 -1.294274e-06  5.443543e-08  3.572889e-05  1.038265e-05
##    1.146087e-04  4.536835e-05  5.077511e-05  2.830016e-04 -4.554476e-05
##   -1.863708e-04 -1.376167e-03 -4.661305e-03  5.386488e-04  9.107058e-03
## d -1.070824e+00 -1.442979e+00 -1.140628e+00 -9.075046e+01 -3.064154e+01
##            [,6]          [,7]          [,8]          [,9]         [,10]
##   -2.684419e-03  3.183729e-06  1.146087e-04 -1.863708e-04 -1.070824e+00
##    4.996928e-03 -1.294274e-06  4.536835e-05 -1.376167e-03 -1.442979e+00
##   -5.225195e-04  5.443543e-08  5.077511e-05 -4.661305e-03 -1.140628e+00
##    2.375852e+00  3.572889e-05  2.830016e-04  5.386488e-04 -9.075046e+01
##   -1.354302e-01  1.038265e-05 -4.554476e-05  9.107058e-03 -3.064154e+01
##    3.081528e+00  1.699895e-07 -3.605230e-06 -1.262591e-03 -6.204923e+00
##    1.699895e-07  4.321808e-05 -6.647807e-09  8.350100e-08 -1.523888e-04
##   -3.605230e-06 -6.647807e-09  5.866251e-05  1.431542e-05 -1.663978e-03
##   -1.262591e-03  8.350100e-08  1.431542e-05  3.423877e-02 -1.528121e-01
## d -6.204923e+00 -1.523888e-04 -1.663978e-03 -1.528121e-01  7.637500e+03
## 
## Model rank =  692 / 692 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                                             k'    edf k-index p-value  
## s(TotalEvents):DiveTypeBenthic           11.00   3.14    0.97   0.050 *
## s(TotalEvents):DiveTypeEpipelagic        11.00   3.89    0.97   0.025 *
## s(TotalEvents):DiveTypeMesopelagic       11.00   3.28    0.97   0.035 *
## te(BirdID,time_rel_hours)               600.00 242.78      NA      NA  
## s(BirdID)                                20.00  12.41      NA      NA  
## s(lag1_TotalEvents):DiveTypeBenthic      11.00   1.00    0.98   0.065 .
## s(lag1_TotalEvents):DiveTypeEpipelagic   11.00   1.00    0.98   0.105  
## s(lag1_TotalEvents):DiveTypeMesopelagic  11.00   1.31    0.98   0.065 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

I’m going to try removing the lag smooths and the tensor smooths and check model fit.

odba_results_complete <- odba_results %>%
  filter(!is.na(lag1_TotalEvents))  # Optionally, also check other predictors if needed

h3_model3 <- bam(
  ODBA_Total ~ s(TotalEvents, by = DiveType, k = 12) +
    DiveType + Year + Sex + StartMass +
    te(time_rel_hours, BirdID, bs = c("cr", "re"), k = c(30, 9)) +
    s(BirdID, bs = "re") +
    s(lag1_TotalEvents, by = DiveType, k = 12) +
    offset(log(DiveDuration)),
  data = odba_results_complete,
  family = Gamma(link = "log"),
  method = "fREML",
  discrete = TRUE,
  rho = 0.3,
  AR.start = odba_results_complete$StartOfDive
)

h3_model_nolag <- bam(
  ODBA_Total ~ s(TotalEvents, by = DiveType, k = 12) +
    DiveType + Year + Sex + StartMass +
    te(time_rel_hours, BirdID, bs = c("cr", "re"), k = c(30, 9)) +
    s(BirdID, bs = "re") +
    offset(log(DiveDuration)),
  data = odba_results_complete,
  family = Gamma(link = "log"),
  method = "fREML",
  discrete = TRUE,
  rho = 0.3,
  AR.start = odba_results_complete$StartOfDive
)

AIC(h3_model3, h3_model_nolag)

##                     df      AIC
## h3_model3      281.338 238145.1
## h3_model_nolag 279.598 238047.4

anova(h3_model3, h3_model_nolag, test = "Chisq")

## Analysis of Deviance Table
## 
## Model 1: ODBA_Total ~ s(TotalEvents, by = DiveType, k = 12) + DiveType + 
##     Year + Sex + StartMass + te(time_rel_hours, BirdID, bs = c("cr", 
##     "re"), k = c(30, 9)) + s(BirdID, bs = "re") + s(lag1_TotalEvents, 
##     by = DiveType, k = 12) + offset(log(DiveDuration))
## Model 2: ODBA_Total ~ s(TotalEvents, by = DiveType, k = 12) + DiveType + 
##     Year + Sex + StartMass + te(time_rel_hours, BirdID, bs = c("cr", 
##     "re"), k = c(30, 9)) + s(BirdID, bs = "re") + offset(log(DiveDuration))
##   Resid. Df Resid. Dev     Df Deviance Pr(>Chi)
## 1     14944     595.63                         
## 2     14947     593.55 -2.174   2.0737

h3_model_notensor <- bam(
  ODBA_Total ~ s(TotalEvents, by = DiveType, k = 12) +
    DiveType + Year + Sex + StartMass +
    s(BirdID, bs = "re") +
    s(lag1_TotalEvents, by = DiveType, k = 12) +
    offset(log(DiveDuration)),
  data = odba_results_complete,
  family = Gamma(link = "log"),
  method = "fREML",
  discrete = TRUE,
  rho = 0.3,
  AR.start = odba_results_complete$StartOfDive
)

AIC(h3_model3, h3_model_notensor)

##                          df      AIC
## h3_model3         281.33797 238145.1
## h3_model_notensor  41.21209 241022.3

anova(h3_model3, h3_model_notensor, test = "Chisq")

## Analysis of Deviance Table
## 
## Model 1: ODBA_Total ~ s(TotalEvents, by = DiveType, k = 12) + DiveType + 
##     Year + Sex + StartMass + te(time_rel_hours, BirdID, bs = c("cr", 
##     "re"), k = c(30, 9)) + s(BirdID, bs = "re") + s(lag1_TotalEvents, 
##     by = DiveType, k = 12) + offset(log(DiveDuration))
## Model 2: ODBA_Total ~ s(TotalEvents, by = DiveType, k = 12) + DiveType + 
##     Year + Sex + StartMass + s(BirdID, bs = "re") + s(lag1_TotalEvents, 
##     by = DiveType, k = 12) + offset(log(DiveDuration))
##   Resid. Df Resid. Dev      Df Deviance  Pr(>Chi)    
## 1     14944     595.63                               
## 2     15243     744.35 -298.45  -148.72 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Comparing these models showed that the lag does not contribute to the model, but that the tensor smooth improves the model fit substantially.

odba_results_complete <- odba_results %>%
  arrange(BirdID, DiveNumber) %>%
  group_by(BirdID) %>%
  mutate(
    lag1_TotalEvents = lag(TotalEvents, default = NA),
    StartOfDive = row_number() == 1
  ) %>%
  ungroup() %>%
  filter(!is.na(lag1_TotalEvents))

h3_model_final <- bam(
  ODBA_Total ~ s(TotalEvents, by = DiveType, k = 12) +
    DiveType + Year + Sex + StartMass +
    te(time_rel_hours, BirdID, bs = c("cr", "re"), k = c(30, 9)) +
    s(BirdID, bs = "re") +
    offset(log(DiveDuration)),
  data = odba_results_complete,
  family = Gamma(link = "log"),
  method = "fREML",
  discrete = TRUE,
  rho = 0.3,
  AR.start = odba_results_complete$StartOfDive
)

# Summary and diagnostics
summary(h3_model_final)

## 
## Family: Gamma 
## Link function: log 
## 
## Formula:
## ODBA_Total ~ s(TotalEvents, by = DiveType, k = 12) + DiveType + 
##     Year + Sex + StartMass + te(time_rel_hours, BirdID, bs = c("cr", 
##     "re"), k = c(30, 9)) + s(BirdID, bs = "re") + offset(log(DiveDuration))
## 
## Parametric coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          2.57447    0.39432   6.529 6.84e-11 ***
## DiveTypeEpipelagic   0.07619    0.02083   3.657 0.000256 ***
## DiveTypeMesopelagic  0.06265    0.02366   2.648 0.008111 ** 
## Year22               0.06871    0.03693   1.861 0.062825 .  
## SexM                 0.02420    0.04763   0.508 0.611488    
## StartMass           -0.00650    0.01661  -0.391 0.695512    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                        edf  Ref.df       F p-value    
## s(TotalEvents):DiveTypeBenthic       3.258   4.125  39.967  <2e-16 ***
## s(TotalEvents):DiveTypeEpipelagic    3.953   4.870 367.930  <2e-16 ***
## s(TotalEvents):DiveTypeMesopelagic   3.310   4.167 169.920  <2e-16 ***
## te(BirdID,time_rel_hours)          245.744 435.000   7.020  <2e-16 ***
## s(BirdID)                           12.574  17.000   1.839  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =   0.85   Deviance explained =   85%
## fREML = -2887.5  Scale est. = 0.056943  n = 15287

resid_bam <- residuals(h3_model_final, type = "pearson")
acf(resid_bam, main = "ACF of Deviance Residuals")

gam.check(h3_model_final)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1]  2.691403e-11  2.147060e-11  2.435607e-12  1.593179e-10 -1.115552e-10
## [6]  9.687362e-11 -9.415453e-08
## 
## $hess
##            [,1]          [,2]          [,3]         [,4]          [,5]
##    1.140960e+00 -0.0002087795 -4.206392e-05  -0.04649694  -0.008485174
##   -2.087795e-04  0.7130448741 -2.644623e-03   0.06037134   0.005381698
##   -4.206392e-05 -0.0026446232  1.244042e+00   0.01433581   0.016183237
##   -4.649694e-02  0.0603713439  1.433581e-02  51.99797794   9.298893648
##   -8.485174e-03  0.0053816976  1.618324e-02   9.29889365   4.268258230
##   -3.091952e-03  0.0052941626 -6.512993e-04   2.32679176  -0.120370515
## d -1.128997e+00 -1.4766177443 -1.155169e+00 -94.85426987 -28.017871205
##            [,6]        [,7]
##   -0.0030919523   -1.128997
##    0.0052941626   -1.476618
##   -0.0006512993   -1.155169
##    2.3267917556  -94.854270
##   -0.1203705147  -28.017871
##    3.1954113412   -6.286836
## d -6.2868359004 7639.000000
## 
## Model rank =  659 / 659 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                                        k'    edf k-index p-value
## s(TotalEvents):DiveTypeBenthic      11.00   3.26    1.01    0.93
## s(TotalEvents):DiveTypeEpipelagic   11.00   3.95    1.01    0.90
## s(TotalEvents):DiveTypeMesopelagic  11.00   3.31    1.01    0.93
## te(BirdID,time_rel_hours)          600.00 245.74      NA      NA
## s(BirdID)                           20.00  12.57      NA      NA

Now we can simplify the fixed effects. We’ll start with Mass because it’s the least significant.

h3_model_no_mass <- bam(
  ODBA_Total ~ s(TotalEvents, by = DiveType, k = 12) +
    DiveType + Year + Sex +
    te(time_rel_hours, BirdID, bs = c("cr", "re"), k = c(30, 9)) +
    s(BirdID, bs = "re") +
    offset(log(DiveDuration)),
  data = odba_results_complete,
  family = Gamma(link = "log"),
  method = "fREML",
  discrete = TRUE,
  rho = 0.3,
  AR.start = odba_results_complete$StartOfDive
)

AIC(h3_model_final, h3_model_no_mass)

##                        df      AIC
## h3_model_final   279.5980 238047.4
## h3_model_no_mass 279.7129 238026.7

anova(h3_model_final, h3_model_no_mass)

## Analysis of Deviance Table
## 
## Model 1: ODBA_Total ~ s(TotalEvents, by = DiveType, k = 12) + DiveType + 
##     Year + Sex + StartMass + te(time_rel_hours, BirdID, bs = c("cr", 
##     "re"), k = c(30, 9)) + s(BirdID, bs = "re") + offset(log(DiveDuration))
## Model 2: ODBA_Total ~ s(TotalEvents, by = DiveType, k = 12) + DiveType + 
##     Year + Sex + te(time_rel_hours, BirdID, bs = c("cr", "re"), 
##     k = c(30, 9)) + s(BirdID, bs = "re") + offset(log(DiveDuration))
##   Resid. Df Resid. Dev       Df Deviance      F    Pr(>F)    
## 1     14947     593.55                                       
## 2     14947     592.95 0.066888   0.6026 158.54 2.927e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Removing mass improved the model fit significantly. Next, we can remove Sex.

odba_na_sex <- odba_results_complete %>%
  filter(Sex != "NA")

h3_model_no_mass_na_sex <- bam(
  ODBA_Total ~ s(TotalEvents, by = DiveType, k = 12) +
    DiveType + Year + Sex +
    te(time_rel_hours, BirdID, bs = c("cr", "re"), k = c(30, 9)) +
    s(BirdID, bs = "re") +
    offset(log(DiveDuration)),
  data = odba_na_sex,
  family = Gamma(link = "log"),
  method = "fREML",
  discrete = TRUE,
  rho = 0.3,
  AR.start = odba_na_sex$StartOfDive
)

h3_model_no_sex <- bam(
  ODBA_Total ~ s(TotalEvents, by = DiveType, k = 12) +
    DiveType + Year +
    te(time_rel_hours, BirdID, bs = c("cr", "re"), k = c(30, 9)) +
    s(BirdID, bs = "re") +
    offset(log(DiveDuration)),
  data = odba_na_sex,
  family = Gamma(link = "log"),
  method = "fREML",
  discrete = TRUE,
  rho = 0.3,
  AR.start = odba_na_sex$StartOfDive
)

AIC(h3_model_no_mass_na_sex, h3_model_no_sex)

##                               df      AIC
## h3_model_no_mass_na_sex 279.7129 238026.7
## h3_model_no_sex         279.5524 238025.7

anova(h3_model_no_mass_na_sex, h3_model_no_sex)

## Analysis of Deviance Table
## 
## Model 1: ODBA_Total ~ s(TotalEvents, by = DiveType, k = 12) + DiveType + 
##     Year + Sex + te(time_rel_hours, BirdID, bs = c("cr", "re"), 
##     k = c(30, 9)) + s(BirdID, bs = "re") + offset(log(DiveDuration))
## Model 2: ODBA_Total ~ s(TotalEvents, by = DiveType, k = 12) + DiveType + 
##     Year + te(time_rel_hours, BirdID, bs = c("cr", "re"), k = c(30, 
##     9)) + s(BirdID, bs = "re") + offset(log(DiveDuration))
##   Resid. Df Resid. Dev       Df Deviance F Pr(>F)
## 1     14947     592.95                           
## 2     14947     592.94 -0.21235  0.01315

The effect of sex is negligible, so we will remove it to simplify the model. Last effect is year.

h3_model_withyear <- bam(
  ODBA_Total ~ s(TotalEvents, by = DiveType, k = 12) +
    DiveType +
    te(time_rel_hours, BirdID, bs = c("cr", "re"), k = c(30, 9)) +
    s(BirdID, bs = "re") +
    offset(log(DiveDuration)),
  data = odba_results_complete,
  family = Gamma(link = "log"),
  method = "fREML",
  discrete = TRUE,
  rho = 0.3,
  AR.start = odba_results_complete$StartOfDive
)

h3_model_withoutyear <- bam(
  ODBA_Total ~ s(TotalEvents, by = DiveType, k = 12) +
    DiveType + Year +
    te(time_rel_hours, BirdID, bs = c("cr", "re"), k = c(30, 9)) +
    s(BirdID, bs = "re") +
    offset(log(DiveDuration)),
  data = odba_results_complete,
  family = Gamma(link = "log"),
  method = "fREML",
  discrete = TRUE,
  rho = 0.3,
  AR.start = odba_results_complete$StartOfDive
)

AIC(h3_model_withyear, h3_model_withoutyear)

##                            df      AIC
## h3_model_withyear    314.7879 270765.7
## h3_model_withoutyear 316.1023 270668.8

anova(h3_model_withyear, h3_model_withoutyear)

## Analysis of Deviance Table
## 
## Model 1: ODBA_Total ~ s(TotalEvents, by = DiveType, k = 12) + DiveType + 
##     te(time_rel_hours, BirdID, bs = c("cr", "re"), k = c(30, 
##         9)) + s(BirdID, bs = "re") + offset(log(DiveDuration))
## Model 2: ODBA_Total ~ s(TotalEvents, by = DiveType, k = 12) + DiveType + 
##     Year + te(time_rel_hours, BirdID, bs = c("cr", "re"), k = c(30, 
##     9)) + s(BirdID, bs = "re") + offset(log(DiveDuration))
##   Resid. Df Resid. Dev      Df Deviance      F    Pr(>F)    
## 1     16989     704.75                                      
## 2     16988     700.71 0.88585   4.0488 77.991 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Year contributed positively to the model, with a high F-test statistic despite having the AIC lower in the model without year.

h3_model_final <-  bam(
  ODBA_Total ~ s(TotalEvents, by = DiveType, k = 12) +
    DiveType + Year +
    te(time_rel_hours, BirdID, bs = c("cr", "re"), k = c(30, 9)) +
    s(BirdID, bs = "re") +
    offset(log(DiveDuration)),
  data = odba_results_complete,
  family = Gamma(link = "log"),
  method = "fREML",
  discrete = TRUE,
  rho = 0.3,
  AR.start = odba_results_complete$StartOfDive
)

summary(h3_model_final)

## 
## Family: Gamma 
## Link function: log 
## 
## Formula:
## ODBA_Total ~ s(TotalEvents, by = DiveType, k = 12) + DiveType + 
##     Year + te(time_rel_hours, BirdID, bs = c("cr", "re"), k = c(30, 
##     9)) + s(BirdID, bs = "re") + offset(log(DiveDuration))
## 
## Parametric coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          2.42122    0.02910  83.200  < 2e-16 ***
## DiveTypeEpipelagic   0.08744    0.02082   4.199 2.69e-05 ***
## DiveTypeMesopelagic  0.06119    0.02335   2.620   0.0088 ** 
## Year22               0.05765    0.02982   1.933   0.0532 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                        edf  Ref.df       F p-value    
## s(TotalEvents):DiveTypeBenthic       3.374   4.265  49.545  <2e-16 ***
## s(TotalEvents):DiveTypeEpipelagic    4.274   5.236 387.016  <2e-16 ***
## s(TotalEvents):DiveTypeMesopelagic   3.333   4.192 196.632  <2e-16 ***
## te(BirdID,time_rel_hours)          280.848 487.000   7.140  <2e-16 ***
## s(BirdID)                           15.739  22.000   1.537  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.852   Deviance explained = 84.9%
## fREML = -2995.1  Scale est. = 0.058604  n = 17375

# --- Model diagnostics from gratia
appraise(h3_model_final) +
  patchwork::plot_layout(ncol = 2)

1.3.3 Interpretation

To evaluate whether energy expenditure (approximated by ODBA) increased with prey capture attempts, I fit a generalized additive model (GAM) using a Gamma distribution with a log link and dive duration as an offset. The model included nonlinear smooths of prey capture attempts by dive type, fixed effects for dive type and year, a tensor product smooth over time and individual, and a random effect for individual bird, along with an AR(1) correlation structure for residuals autocorrelation (rho = 0.3). The model explained 85% of the deviance in the data (R^2 adjusted = 0.85).

There were significant nonlinear relationships between total prey capture attempts and ODBA across all dive types (p < 0.001 for all smooths), with the strongest effect observed in epipelagic dives (edf = 4.27), followed by mesopelagic dives (edf = 3.33), and benthic dives (edf = 3.37). Epipelagic dives were associated with 9.1% higher energy expenditure (estimate = 0.087, p < 0.001), mesopelagic dives with 6.3% higher expenditure (estimate = 0.061), p = 0.0088), compared to benthic dives. Penguins in 2022 exhibited marginally higher ODBA than in 2019 (estimate = 0.058, p = 0.053).

Figures

# === 1. Median dive duration for offset (for prediction only) ===
med_duration <- median(odba_results_complete$DiveDuration, na.rm = TRUE)

# === 2. Refit model for prediction without AR.start ===
h3_model_pred <- bam(
  ODBA_Total ~ s(TotalEvents, by = DiveType, k = 12) +
    DiveType + Year +
    te(time_rel_hours, BirdID, bs = c("cr", "re"), k = c(30, 9)) +
    s(BirdID, bs = "re") +
    offset(log(DiveDuration)),
  data = odba_results_complete,
  family = Gamma(link = "log"),
  method = "fREML",
  discrete = TRUE
)

# === 3. Plot smooth terms from final model ===
draw(h3_model_final, select = 1:3) + 
  plot_annotation(
    title = "Effect of Prey Capture Attempts on Energy Expenditure (ODBA)",
    subtitle = "Smooth terms estimated by dive type"
  )

## Warning in grep(p.terms[!ind], m.terms[off]): argument 'pattern' has length > 1
## and only the first element will be used
## Warning in grep(p.terms[!ind], m.terms[off]): argument 'pattern' has length > 1
## and only the first element will be used
## Warning in grep(p.terms[!ind], m.terms[off]): argument 'pattern' has length > 1
## and only the first element will be used

# === 4. Predict ODBA across TotalEvents by dive type ===
pred_grid <- ggpredict(
  h3_model_pred,
  terms = c("TotalEvents [0:75 by=5]", "DiveType"),
  condition = c(DiveDuration = med_duration)
)

# === 5. Plot prediction curves with 95% CI ribbons ===
plot_curve <- ggplot(pred_grid, aes(x = x, y = predicted, color = group)) +
  geom_line(size = 1.2) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = group), alpha = 0.2, color = NA) +
  labs(
    title = "Predicted Energy Expenditure (ODBA) by Prey Capture Attempts",
    subtitle = "Predictions at median dive duration",
    x = "Total Prey Capture Attempts",
    y = "Predicted ODBA",
    color = "Dive Type",
    fill = "Dive Type"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold"),
    axis.title = element_text(face = "bold"),
    legend.position = "right"
  )

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# === 6. Bar plot: Predicted ODBA at TotalEvents = 5 ===
pred_fixed <- ggpredict(
  h3_model_pred,
  terms = c("DiveType", "TotalEvents[5]"),
  condition = c(DiveDuration = med_duration)
)

plot_bar <- ggplot(pred_fixed, aes(x = x, y = predicted, fill = x)) +
  geom_col() +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.2) +
  labs(
    x = "Dive Type",
    y = "Predicted ODBA at 5 Prey Capture Attempts",
    title = "Energy Expenditure by Dive Type"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    legend.position = "none",
    plot.title = element_text(face = "bold"),
    axis.title = element_text(face = "bold")
  )

# === 7. Pairwise comparisons of dive types at 5 prey capture attempts ===
em <- emmeans(
  h3_model_pred,
  ~ DiveType,
  at = list(TotalEvents = 5, DiveDuration = med_duration),
  type = "response",
  data = odba_results_complete  # Supply the data explicitly
)

# 8. Run pairwise Tukey-adjusted comparisons
pairwise_results <- pairs(em, adjust = "tukey")

# View results
pairwise_results

##  contrast                 ratio      SE    df null t.ratio p.value
##  Benthic / Epipelagic     0.953 0.01400 17004    1  -3.288  0.0029
##  Benthic / Mesopelagic    0.983 0.01620 17004    1  -1.068  0.5337
##  Epipelagic / Mesopelagic 1.031 0.00933 17004    1   3.407  0.0019
## 
## Results are averaged over the levels of: Year 
## P value adjustment: tukey method for comparing a family of 3 estimates 
## Tests are performed on the log scale

# === 8. Display plots ===
plot_curve + plot_bar

# === 9. View pairwise results ===
pairwise_results

##  contrast                 ratio      SE    df null t.ratio p.value
##  Benthic / Epipelagic     0.953 0.01400 17004    1  -3.288  0.0029
##  Benthic / Mesopelagic    0.983 0.01620 17004    1  -1.068  0.5337
##  Epipelagic / Mesopelagic 1.031 0.00933 17004    1   3.407  0.0019
## 
## Results are averaged over the levels of: Year 
## P value adjustment: tukey method for comparing a family of 3 estimates 
## Tests are performed on the log scale

modeling_take4

Taylor Azizeh

2025-04-13

1 Thesis hypotheses modelling

1.1 Hypothesis 1: Pelagic dives will have a higher rate of prey capture attempts, compared to benthic dives, because benthic prey are predicted to be of a higher caloric content.

1.1.1 Explore your data (EDA) (Zuur. pg 12)

1.1.2 Evaluation of transit dives:

1.1.3 Model fitting

1.1.4 Interpretation

1.2 Hypothesis 2: The rate of prey capture attempts will be highest in the bottom phase of the dive, where emperor penguins are believed to be primarily hunting.

1.2.1 Explore your data (EDA) (Zuur. pg 12)

1.2.2 Model fitting

1.2.3 Interpretation

1.3 Hypothesis 3: Energy expenditure (activity) will increase with prey capture rate, depending on dive type.

1.3.1 Explore your data (EDA) (Zuur. pg 12)

1.3.2 Model fitting

1.3.3 Interpretation