Introduction
This analysis focuses on understanding the habitat use and behavior
of Herring Gulls (Larus argentatus) equipped with GPS tags. By examining
fine-scale movement data collected between May and October 2025, I aim
to explore how individual gulls interact with their environment across
varying tidal stages and habitats.
The workflow begins by cleaning and formatting the GPS dataset, then
integrating hourly tidal data from Bar Harbor to capture environmental
context. I then use the Expectation–Maximization binary Clustering
(EMbC) package to classify behaviors (e.g., resting, local
searching, traveling, active foraging) based on movement patterns.
Finally, I apply track2KBA, a tool designed to identify
important foraging and space-use areas for seabirds, to estimate core
use areas and visualize habitat utilization.
 
Downloading Data and General Setup
Here, I’m setting up my working environment and importing the raw GPS
dataset collected from the gull tagged on Great Duck Island (“P9K”). The
data spans from May 15th, 2025 through October 25th, 2025, corresponding
roughly to the bird’s breeding and post-breeding period.
#setting base directory
knitr::opts_chunk$set(echo = TRUE)
csv_path <- "C:/Users/pauly/Desktop/gps_data/gull_gps_analysis/data/data-export-p9k-10272025.csv"
gull_raw <- read.csv(csv_path, na.strings = c("","NA","nan"))
 
Before cleaning or analyzing the data, I load several R packages that
handle spatial data, data wrangling, and mapping.
#general packages
if(!require(tidyverse)) install.packages("tidyverse")
library(tidyverse)
if(!require(sf)) install.packages("sf")
library(sf)
#embc packages
if(!require(EMbC)) install.packages("EMbC")
library(EMbC)
if(!require(sp)) install.packages("sp")
library(sp)
if(!require(suntools)) install.packages("suntools")
library(suntools) #do I actually need this?
if(!require(RColorBrewer)) install.packages("RColorBrewer")
library(RColorBrewer) 
#leaflet maps
if(!require(leaflet)) install.packages("leaflet")
library(leaflet)
# modeling approaches (optional) - intentially hid this code chunk
if(!require(nnet)) install.packages("nnet")
library(nnet)
 
Cleaning Data Files
The GPS export contains many columns, so I first rename them to
intuitive names and convert important variables into the correct data
types.
I also filter out data points that have low positional accuracy
(hdop < 5) or recorded any error codes, to
remove unreliable fixes. This is building off of the methodology of
O'Hanlon et al. 2022 and
Enners et al. 2018.
gps_clean <- gull_raw %>%
  rename(time_at_fix = time_at_fix,
         hardware_id = hardware_id,
         alias = alias,
         latitude = latitude,
         longitude = longitude,
         altitude = altitude,
         speed_knots = speed,          # speed in knots
         cog = cog,
         hdop = hdop,
         vdop = vdop,
         pdop = pdop,
         satellite_count = satellite_count,
         time_to_fix = time_to_fix,
         fix_quality = fix_quality,
         error = error,
         activity = activity,
         battery_mv = battery_mv,
         instantaneous_solar_mv = instantaneous_solar_mv,
         solar_current_since_last_fix_ma = solar_current_since_last_fix_ma,
         temperature_c = temperature_c,
         connect_at = connect_at,
         ctt_device_type = ctt_device_type) %>%
  mutate(
    time_at_fix = as.POSIXct(gull_raw$time_at_fix, format = "%Y-%m-%dT%H:%M:%OSZ"), #converting into time variable
    connect_at = ymd_hms(connect_at, tz = "UTC"), #connection time
    latitude = as.numeric(latitude), #latitude
    longitude = as.numeric(longitude), #longitude
    satellite_count = as.integer(satellite_count), #number of satellites connected
    speed_knots = as.numeric(speed_knots), # knots
    speed_mph = as.numeric(speed_knots) * 0.514444, #mph
    hardware_id = as.character(hardware_id), #hardware_id, similar to 'alias'
    date = as.Date(time_at_fix) #creating date column
  )
#cleaning up data for erroneous datapoints
gps_clean <- gps_clean %>% 
  filter(hdop < 5 | is.na(hdop)) %>% 
  filter(error == 0 | is.na(error)) %>%
  filter(!is.na(latitude), !is.na(longitude), !is.na(time_at_fix)) #removing any empty, unusable cells
 
Because I’m primarily interested in daytime foraging and movement, I
filter the data to include only fixes between 6 AM and 6 PM, across the
May–October 2025 breeding season. This also helps reduce dataset size
for computational efficiency.
These times will vary with a dependence on the timeframe being
analyzed.
#filters between may 15th and october 27th - guess-timated "breeding season"... need to verify this
gps_clean <- gps_clean %>% 
  filter(date > "2025-5-15") 
#filter out nighttime - save dataframe space
gps_clean$time <- format(gps_clean$time_at_fix, format = "%H:%M:%S")
#next, remove times before 06:00 and after 18:00 -- I will change this as I see fit...
## I chose these times for the time frame that I was analyzing P9K's data through: May 15th through October 26th... use sunrise/sunset (haha) resource to determine twilight/no sunlit times
gps_clean <- gps_clean %>% 
  filter(time > "06:00:00") %>% #filtering out before daylight
  filter(time < "18:00:00") #filtering out after daylight
 
To perform spatial analysis and mapping, I convert the cleaned
dataset into an sf (simple features) object, which can
later be exported as a shapefile or visualized in GIS software.
#making simple feature, as one does
gps_sf <- st_as_sf(gps_clean, coords = c("longitude", "latitude"), crs = 4326)
 
 
 
Integrating Tidal Data
Because I want to understand how the gull’s activity changes with Bar
Harbor tidal cycles, I manually downloaded hourly tide predictions for
the same date range (May 15–Oct 27, 2025). I’ll now import and clean
this tidal dataset.
#csv path for tidal data
tidal_csv <- "C:/Users/pauly/Desktop/gps_data/gull_gps_analysis/data/tide_data_0515_to_10_27.csv"
#downloading raw dataset
tide_raw <- read.csv(tidal_csv)
 
Cleaning Tidal Data
Here, I rename the columns and combine date/time information into a
single datetime column for easier joining with the GPS dataset.
#renaming columns intuitively
tides <- tide_raw %>% 
  rename(
    date = Date,
    time_gmt = Time..GMT.,
    predicted_ft = Predicted..ft.,
    preliminary_ft = Preliminary..ft.,
    verified_ft = Verified..ft.
  ) %>%
  mutate(
    date = as.Date(date), 
    datetime = as.POSIXct(paste(date, time_gmt), format = "%Y-%m-%d %H:%M")) 
 
 
Classifying Tidal Stage and Direction
To examine whether gull behavior differs between high, mid, and low
tide periods, I divide the predicted tide heights into three equal
ranges.
I also determine whether the tide was flowing in (flooding) or ebbing
(receding) by comparing the change in height between consecutive
hours.
# compute thresholds
range_min <- min(tides$predicted_ft, na.rm = TRUE)
range_max <- max(tides$predicted_ft, na.rm = TRUE)
tidal_third <- (range_max - range_min) / 3
# define cut points
low_cut  <- range_min + tidal_third
high_cut <- range_max - tidal_third
tides <- tides %>%
  mutate(
    tide_stage = case_when(
      predicted_ft <= low_cut  ~ "Low",
      predicted_ft >= high_cut ~ "High",
      TRUE ~ "Mid"
    )
  )
#ebbing and flowing tide
tides <- tides %>%
  arrange(datetime) %>%
  mutate(
    next_height = lead(predicted_ft),
    diff_height = next_height - predicted_ft,
    tide_direction = case_when(
      diff_height > 0 ~ "Flowing",   # Tide rising (flowing in)
      diff_height < 0 ~ "Ebbing",    # Tide falling (flowing out)
      TRUE ~ NA_character_
    )
  )
 
 
Joining GPS file with Tidal Data
Finally, I merge the hourly tidal information with the gull’s GPS
fixes. I first round each GPS timestamp to the nearest hour, then
perform a left_join() so each location record inherits the corresponding
tidal height, stage, and direction at that time.
#adding tidal data to GPS file
gps_clean <- gps_clean %>% 
  mutate(datetime = floor_date(time_at_fix, unit = "hour"))
#left_join(), my beloved
gps_tides <- gps_clean %>%
  left_join(tides, by = "datetime")
 
 
 
Expectation-maximization binary clustering (EMbC)
https://cran.r-project.org/web/packages/EMbC/refman/EMbC.html
The EMbC (Expectation-Maximization Binary Clustering)
algorithm is a data-driven method for automatically classifying animal
movement trajectories into distinct behavioral modes. It does so using
only basic movement metrics like speed (velocity) and turning angle… two
core descriptors of animal movement patterns.
In other words, EMbC looks at how fast an animal is moving and how
sharply it turns between GPS fixes, and from that it assigns a
behavioral “state.”
These states correspond nicely to intuitive behaviors, such as:
- LL (Low speed, Low turning) → Resting or roosting
 
- LH (Low speed, High turning) → Local searching or area-restricted
search
 
- HL (High speed, Low turning) → Traveling or commuting
 
- HH (High speed, High turning) → Active foraging or pursuit
behavior
 
These four “binary” clusters (high/low × high/low) form the
foundation of the method.
Prepare the GPS Data
Here we’re extracting the key spatial and temporal variables that
EMbC needs. The function stbc() (spatio-temporal binary
clustering) will use these to calculate:
- Step length (distance between consecutive fixes)
 
- Turning angle (change in direction between consecutive fixes)
 
These two variables will form the basis of the behavioral
clustering.
#making first three columns timestamp, long, and lat
p9k <- data.frame(
  dTm = gps_tides$time_at_fix,
  lon = gps_tides$longitude,
  lat = gps_tides$latitude
)
#checking dataframe
colnames(p9k)
 
## [1] "dTm" "lon" "lat"
##       dTm                              lon              lat       
##  Min.   :2025-05-16 06:04:12.00   Min.   :-68.48   Min.   :44.08  
##  1st Qu.:2025-06-25 11:58:32.25   1st Qu.:-68.34   1st Qu.:44.13  
##  Median :2025-08-01 14:32:14.50   Median :-68.32   Median :44.14  
##  Mean   :2025-08-02 18:31:25.25   Mean   :-68.32   Mean   :44.18  
##  3rd Qu.:2025-09-07 17:39:12.50   3rd Qu.:-68.25   3rd Qu.:44.27  
##  Max.   :2025-10-25 15:59:38.00   Max.   :-68.17   Max.   :44.43
 
Run EMbC to Cluster Movement Behaviors
This code automatically computes step lengths and turning angles
between consecutive GPS points.
It does so by using the Expectation-Maximization Binary Clustering
algorithm to assign each point to one of four Gaussian clusters ->
two binary dimensions: low/high speed x low/high turning.
This produces an object of class binClstPath that stores
estimated velocity, Turning angle, and clustered behavioral states
(e.g. LL, LH, HL, HH), as well as model parameters and diagnostics.
mybcp <- stbc(p9k, info=-1)
 
## [1]   0  -0.0000e+00       4      7038
 
Exploring Results
View summary statistics
This displays the mean and variance for velocity and turning in each
behavioral cluster, giving you a sense of how distinct the movement
states are.
##          X1.mn    X1.sd    X2.mn    X2.sd     kn  kn(%)
##  1 LL     0.04     0.10     1.07     0.73   3154  44.81 
##  2 LH     0.05     0.10     2.78     0.28   1821  25.87 
##  3 HL     4.90     3.71     0.25     0.19    628   8.92 
##  4 HH     2.51     2.37     1.89     0.86   1434  20.38
 
Visualize the velocity–turning space
This scatterplot shows how your data points distribute in
velocity–turning space.
We’ll see clear clusters — for example, “LL” points grouped at low
speed and low turning, “HH” points spread across higher speed and
turning.

 
Visualize how behavior changes over time
This plot shows behavioral state as a time series — useful for
identifying when the gull switches from resting to traveling to
foraging.

 
Visualize behaviors spatially
This displays the gull’s movement path over space, colored by
behavior type.

 
 
Optional - include daytime covariate
Here, scv="height" (solar height) allows EMbC to factor
in whether it’s day or night when assigning behavioral clusters —
helpful if you suspect diel patterns in foraging… I need to try this
before taking away points between the hours of 18:00 and 06:00…
mybcp3 <- stbc(p9k, scv="height", info=-1)
 
## [1]   0  -0.0000e+00       8      7038
## [1] ... Stable clustering
# sctr(mybcp3)
# lblp(mybcp3)
# view(mybcp3)
 
Allow us to compare…
#velocity-turn space
sctr(mybcp)
 


#behavior over time
lblp(mybcp)
 


#behaviors spatially
view(mybcp)
 


Add Behaviors Back into GPS Dataframe
#adding behaviors to p9k dataframe
p9k$behavior <- mybcp@A
#labeling behaviors
p9k$behavior_label <- factor(p9k$behavior, 
                             levels = 1:4, 
                             labels = c("LL", "LH", "HL", "HH"))
#getting rid of nas (optional)
# p9k$behavior_label <- ifelse(p9k$behavior_label %in% c("LH","HL"), "foraging", "non-foraging")
 
 
 
 
Creating Habitat Raster Files
This workflow extracts elevation data for each GPS fix using a
digital elevation model (DEM), then classifies each fix into land,
intertidal, or subtidal habitat zones. This elevation raster provides
the foundation for classifying habitat: distinguishing between
terrestrial and marine areas.
# habitat raster files - core spatial and data libraries
if(!require(elevatr)) install.packages("elevatr")
library(elevatr)
if(!require(terra)) install.packages("terra")
library(terra)
if(!require(dplyr)) install.packages("dplyr")
library(dplyr) #suit yourself, I'm easy
if(!require(ggplot2)) install.packages("ggplot2")
library(ggplot2) 
 
This allows us to explore when and where our Great Duck Island gull
uses different habitat types. For example, whether it forages in the
intertidal zone during low tide, or rests on land between foraging
trips.
# Define extent from your GPS data
p9k_sf <- st_as_sf(p9k, coords = c("lon", "lat"), crs = 4326, remove = FALSE)
#retrieve raster within my coordinates
dem_raster <- elevatr::get_elev_raster(
  locations = p9k_sf,
  z = 10,                # zoom level: you can adjust (higher = finer resolution)
  clip = "bbox",         # clip the raster to bounding box of your points
  src = "aws"            # use AWS Terrain Tiles (default)
)
dem_spat <- terra::rast(dem_raster)
dem_spat
 
## class       : SpatRaster 
## size        : 631, 546, 1  (nrow, ncol, nlyr)
## resolution  : 0.0005592455, 0.0005592455  (x, y)
## extent      : -68.47751, -68.17216, 44.07932, 44.4322  (xmin, xmax, ymin, ymax)
## coord. ref. : +proj=longlat +datum=WGS84 +no_defs 
## source(s)   : memory
## name        : file92504ec62f32 
## min value   :              -87 
## max value   :              463
plot(dem_spat, main = "DEM/ Terrain from elevatr")
 

st_as_sf() converts our GPS dataset into an sf (simple
features) object, allowing for easy spatial manipulation.
get_elev_raster() downloads a DEM (digital elevation
model) from online sources (AWS here) using the bounding box of our
gull’s movement track.
Each pixel in this raster contains an elevation value in meters above
sea level.
Plotting the DEM helps us confirm that the terrain covers the
relevant area (Great Duck Island and surrounding marine zones).
Classify Habitat Based on Elevation
This classification provides a static habitat variable for every GPS
fix.
p9k <- p9k %>% 
  mutate(habitat_static = case_when(
    elev_m > 0 ~ "land", 
    elev_m <= 0 & elev_m > -5 ~ "intertidal", 
    elev_m <= -5 ~ "subtidal", 
    TRUE ~ NA_character_
  ))
table(p9k$habitat_static, useNA = "ifany")
 
## 
## intertidal       land   subtidal       <NA> 
##       1768       4702        565          3
This step translates raw elevation values into categorical habitat
types: - Land → typically nesting/resting areas (e.g., island terrain) -
Intertidal → shallow coastal zones exposed at low tide, often key for
foraging - Subtidal → permanently submerged areas used for commuting or
pelagic foraging
Thresholds (0 m and -5 m) can be adjusted depending on the tidal
amplitude around Bar Harbor.
 
Visualize Habitat Use with an Interactive Map
This interactive map allows you to visually explore where our gull
spends time across different habitats.
#Using Leaflet
## Habitat Color Pallete
pal <- colorFactor(
  palette = c("subtidal" = "#2b6fb3",
              "intertidal" = "#d8c48f",
              "land" = "#66c266"),
  domain = p9k$habitat_static
)
## Leaflet map for all behavior types
leaflet(data = p9k) %>%
  addProviderTiles(providers$Esri.WorldImagery, group = "Imagery") %>%
  addProviderTiles(providers$Esri.WorldTopoMap, group = "Topographic") %>%
  addCircleMarkers(
    lng = ~lon, lat = ~lat,
    color = ~pal(habitat_static),
    fillOpacity = 0.8, radius = 3,
    popup = ~paste0(
      "<b>Habitat:</b> ", habitat_static, "<br>",
      "<b>Elevation (m):</b> ", round(elev_m, 2)
    )
  ) %>%
  addLegend("bottomright",
            pal = pal,
            values = ~habitat_static,
            title = "Habitat Type",
            opacity = 1) %>%
  addLayersControl(
    baseGroups = c("Imagery", "Topographic"),
    options = layersControlOptions(collapsed = TRUE)
  )
 
 
 
Integrating Tidal Data
#ensuring that the left_join() will work
p9k <- p9k %>% 
  mutate(datetime = dTm) %>% 
  mutate(datetime = floor_date(datetime, unit = "hour"))
#joining datasets...
p9k_tides <- p9k %>% 
  left_join(tides, by = "datetime")
#getting rid of na values
p9k_tides <- p9k_tides %>% 
  filter(behavior_label == "HH" | behavior_label == "HL" | behavior_label == "LH" | behavior_label == "LL") %>% 
  filter(habitat_static == "intertidal" | habitat_static == "land" | habitat_static == "subtidal")
#creating tidal column including stage and direction as one value
p9k_tides$tide_all <- paste(p9k_tides$tide_stage, p9k_tides$tide_direction, sep = "_")
#glimpsing dataframe
glimpse(p9k_tides)
 
## Rows: 7,034
## Columns: 18
## $ dTm            <dttm> 2025-05-16 06:04:12, 2025-05-16 06:19:20, 2025-05-16 0…
## $ lon            <dbl> -68.39765, -68.39777, -68.39821, -68.39901, -68.39729, …
## $ lat            <dbl> 44.29755, 44.29876, 44.29973, 44.30061, 44.31041, 44.39…
## $ behavior       <dbl> 1, 1, 1, 4, 3, 3, 1, 1, 4, 1, 3, 1, 4, 1, 2, 4, 1, 4, 2…
## $ behavior_label <fct> LL, LL, LL, HH, HL, HL, LL, LL, HH, LL, HL, LL, HH, LL,…
## $ elev_m         <dbl> 11, 11, 11, 11, 11, 22, 0, 0, 0, 0, 0, 0, 0, 3, 6, 6, 0…
## $ habitat_static <chr> "land", "land", "land", "land", "land", "land", "intert…
## $ datetime       <dttm> 2025-05-16 06:00:00, 2025-05-16 06:00:00, 2025-05-16 0…
## $ date           <date> 2025-05-16, 2025-05-16, 2025-05-16, 2025-05-16, 2025-0…
## $ time_gmt       <chr> "06:00", "06:00", "06:00", "06:00", "07:00", "07:00", "…
## $ predicted_ft   <dbl> 10.738, 10.738, 10.738, 10.738, 9.590, 9.590, 9.590, 9.…
## $ preliminary_ft <chr> "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", …
## $ verified_ft    <chr> "11.52", "11.52", "11.52", "11.52", "10.38", "10.38", "…
## $ tide_stage     <chr> "High", "High", "High", "High", "High", "High", "High",…
## $ next_height    <dbl> 9.590, 9.590, 9.590, 9.590, 7.522, 7.522, 7.522, 7.522,…
## $ diff_height    <dbl> -1.148, -1.148, -1.148, -1.148, -2.068, -2.068, -2.068,…
## $ tide_direction <chr> "Ebbing", "Ebbing", "Ebbing", "Ebbing", "Ebbing", "Ebbi…
## $ tide_all       <chr> "High_Ebbing", "High_Ebbing", "High_Ebbing", "High_Ebbi…
Behavior, Tides, and Land Usage
I have a dataset now that contains data on a gull’s foraging
behavior, including columns like “behavior_level” (“LL =
resting/perching = low velocity - low turning LH = local
movement/searching = low velocity - high turning HL = traveling = high
velocity - low turning HH = active foraging = high velocity - high
turning”), “tide_stage” (High, Low, Mid), “tide_direction” (Ebbing,
Flowing), and “tide_all” (High_Ebbing, High_Flowing, Mid_Ebbing,
Mid_Flowing, etc…), and “habitat_static” (intertidal, subtidal, land). I
want to see if his behavior changes during different tidal cycles as
well as his on shore usage… how can I do this statistically? how can I
visualize this?
Behavior vs. Tidal Stage
These show how often each behavior occurs under different tidal
conditions.
# Behavior vs Tide Stage
ggplot(p9k_tides, aes(x = tide_stage, fill = behavior_label)) +
  geom_bar(position = "fill") +
  scale_y_continuous(labels = scales::percent) +
  labs(
    title = "Proportion of Behaviors by Tide Stage",
    x = "Tide Stage",
    y = "Proportion of Fixes",
    fill = "Behavior"
  ) +
  theme_minimal()
 

# Behavior vs Tide Direction
ggplot(p9k_tides, aes(x = tide_direction, fill = behavior_label)) +
  geom_bar(position = "fill") +
  scale_y_continuous(labels = scales::percent) +
  labs(
    title = "Behavior Composition by Tidal Direction",
    x = "Tide Direction",
    y = "Proportion"
  ) +
  theme_minimal()
 

 
Tide + Habitat interactions
Try to figure out how to remove nas from this
ggplot(p9k_tides, aes(x = tide_all, fill = behavior_label)) +
  geom_bar(position = "fill") +
  facet_wrap(~ habitat_static) +
  scale_y_continuous(labels = scales::percent) +
  theme_minimal() +
  labs(
    title = "Behavior Across Tide–Habitat Combinations",
    x = "Tide Stage + Direction",
    y = "Proportion"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
 

 
Chi-square
# Contingency tables
table_tide <- table(p9k_tides$behavior_label, p9k_tides$tide_stage)
chisq.test(table_tide)   # If counts are big enough
 
## 
##  Pearson's Chi-squared test
## 
## data:  table_tide
## X-squared = 29.857, df = 6, p-value = 4.184e-05
# If any cell < 5:
# fisher.test(table_tide)
# Repeat for habitat
table_habitat <- table(p9k_tides$behavior_label, p9k_tides$habitat_static)
chisq.test(table_habitat)
 
## 
##  Pearson's Chi-squared test
## 
## data:  table_habitat
## X-squared = 751.54, df = 6, p-value < 2.2e-16
A significant p-value means behavior distribution is not random with
respect to tide stage (or habitat).
 
Modeling Approaches
# Convert to factors
p9k_tides <- p9k_tides %>%
  mutate(
    behavior_level = factor(behavior_label),
    tide_stage = factor(tide_stage, levels = c("Low", "Mid", "High")),
    tide_direction = factor(tide_direction),
    habitat_static = factor(habitat_static)
  )
# Model: predict behavior from tide and habitat
m1 <- multinom(
  behavior_label ~ tide_stage + tide_direction + habitat_static,
  data = p9k_tides
)
 
## # weights:  28 (18 variable)
## initial  value 9751.194536 
## iter  10 value 8667.178236
## iter  20 value 8462.191181
## final  value 8455.211321 
## converged
## Call:
## multinom(formula = behavior_label ~ tide_stage + tide_direction + 
##     habitat_static, data = p9k_tides)
## 
## Coefficients:
##    (Intercept) tide_stageMid tide_stageHigh tide_directionFlowing
## LH  -0.5641331    0.01561319    -0.07003388            0.02281036
## HL  -1.4709468   -0.40280884    -0.58393904           -0.03035355
## HH  -0.5151345   -0.18195776    -0.14964282            0.08707945
##    habitat_staticland habitat_staticsubtidal
## LH          0.0325796             -0.1592899
## HL         -0.2206743              2.4259160
## HH         -0.5034544              1.3875479
## 
## Std. Errors:
##    (Intercept) tide_stageMid tide_stageHigh tide_directionFlowing
## LH  0.07928860    0.07088278     0.07568942            0.06026695
## HL  0.11826539    0.10755217     0.11738366            0.09325332
## HH  0.08214747    0.07842070     0.08218212            0.06657850
##    habitat_staticland habitat_staticsubtidal
## LH         0.06940692              0.1840597
## HL         0.11246366              0.1558958
## HH         0.07338109              0.1339484
## 
## Residual Deviance: 16910.42 
## AIC: 16946.42
# effect sizes
exp(coef(m1))  # odds ratios
 
##    (Intercept) tide_stageMid tide_stageHigh tide_directionFlowing
## LH   0.5688530     1.0157357      0.9323622             1.0230725
## HL   0.2297079     0.6684399      0.5576972             0.9701025
## HH   0.5974202     0.8336366      0.8610155             1.0909834
##    habitat_staticland habitat_staticsubtidal
## LH          1.0331161              0.8527491
## HL          0.8019778             11.3125870
## HH          0.6044391              4.0050174
# if(!require(glmmTMB)) install.packages("glmmTMB")
# library(glmmTMB)
# 
# m2 <- glmmTMB(
#   behavior_label ~ tide_stage + tide_direction + habitat_static + (1|id),
#   family = multinom2(),
#   data = p9k_tides
# )
# summary(m2)
 
p9k_tides %>%
  mutate(hour = hour(datetime)) %>%
  group_by(hour, behavior_label, tide_all) %>%
  summarise(count = n()) %>%
  group_by(hour) %>%
  mutate(prop = count / sum(count)) %>%
  ggplot(aes(x = hour, y = prop, fill = behavior_label)) +
  facet_grid(behavior_label~tide_all) +
  geom_area(position = "stack") +
  theme_minimal() +
  labs(
    title = "Behavior Distribution by Hour of Day (Tide-Linked)",
    x = "Hour",
    y = "Proportion"
  )
 
## `summarise()` has grouped output by 'hour', 'behavior_label'. You can override
## using the `.groups` argument.

 
 
 
Using track2KBA
https://cran.r-project.org/web/packages/track2KBA/refman/track2KBA.html
The R package track2KBA (Identifying Important Areas
from Animal Tracking Data) is a tool developed primarily for identifying
spatial areas of importance (for example for conservation) from
animal‐tracking data.
It’s a package oriented toward spatial patterns of use rather than
direct behavioral modelling of e.g., tide × behavior interaction. Its
original focus is where animals spend time and how that informs
important sites for the species/population.
How to use it with our GPS dataset
Identify core use areas: If we want to know where our gull(s) spend
most of their time (e.g., intertidal flats vs subtidal zones vs
terrestrial areas), we could use track2KBA to compute utilization
distributions (UDs) and then compare space use across individuals,
across behavior states, or across tidal phases.
Trip‐based summaries: If our gulls make trips (departing from colony,
returning), we can use tripSplit() + tripSummary() to define those
trips, then relate trip characteristics (distance, duration, etc) to
tide/habitat/behavior.
Assess sample representativeness / overlap: If we decide to integrate
multiple gulls into one dataset, we can use indEffectTest() or
repAssess() to see whether the tracked sample represents the population
and whether space‐use overlap is more within‐ vs
between‐individuals…helpful if we are thinking about population‐level
inference.
 
Important notes
We are already focusing more on behavioral states (resting,
searching, foraging) and environmental covariates (tide stage,
direction, habitat, weather) rather than purely where they go. While
track2KBA is spatially strong, our analysis is more ecological /
behavioral than purely spatial site‐identification.
Many of our tasks (behaviour ~ tide/habitat) are better handled by
statistical modelling (multinomial logistic regression, mixed models)
rather than just spatial KDE/UD. So track2KBA may be an additional tool
rather than the central one.
If our sampling resolution is high and covers short time‐scales
(hourly GPS + behavior states + tide), then the typical “core‐area” or
“important area” use of track2KBA may be less relevant, unless you want
to identify foraging hotspots… which we do.
if (!require(track2KBA)) install.packages("track2KBA")
library(track2KBA)
if (!require(lubridate)) install.packages("lubridate")
library(lubridate)
if (!require(leaflet))  install.packages("leaflet")
library(leaflet)
if (!require(devtools)) install.packages("devtools") 
# devtools::install_github("BirdLifeInternational/track2kba", dependencies=TRUE) # development version - add argument 'build_vignettes = FALSE' to speed it up 
library(devtools)
 
Main Functions
Estimating space‐use/utilization distributions (UDs) from tracking
datasets: e.g., the function estSpaceUse() takes a set of
tracks (locations over time) and computes a kernel density estimate
(KDE) of where animals spend time.
Finding an appropriate spatial “scale” for KDE (via findScale()),
which helps decide how wide the smoothing should be given the data
resolution.
Splitting tracking data into discrete “trips” from a central place
(via tripSplit()), summarizing them (tripSummary()), projecting tracks
into equal‐area coordinate systems (projectTracks()), etc.
Once UDs/trips are summarized, identifying sites of aggregation—i.e.,
areas used by a large proportion of tracked individuals or by many
individuals—via findSite(), which can be used to evaluate “Key
Biodiversity Areas” (KBAs) or other conservation‐relevant zones.
Map‐making… mapKDE(), mapTrips(), mapSite() to visualize UDs, trips,
and candidate important areas.
 
 
Trip Parameters
Now that the dataset is clean, I identify discrete foraging trips
away from the colony. This step helps separate periods when the gulls
are at the colony (resting, incubating, or roosting) from times when
they are actively foraging.
Herring Gulls typically travel several kilometers out to sea, so I
define a trip as any movement that extends more than 1 km from the
colony and lasts at least 2 hours. The innerBuff parameter
defines when a bird is considered to have “left” the colony, and
returnBuff defines when it is considered to have
returned.
#creating trips 
trips <- tripSplit(
  dataGroup  = dataGroup,
  colony = colony,
  innerBuff  = 1,      # km — has left the colony
  returnBuff = 2,     # km — returning home
  duration   = 2
)
 
## track 35681210422055101 starts out on trip
## track 356812104220551116 does not return to the colony
#mapping trips -- learn how to facet this by trip?
mapTrips(trips = trips, colony = colony)
 
## Trips colored by completeness. Indicate colorBy=='trip' to color by
##     trips.
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the track2KBA package.
##   Please report the issue at
##   <https://github.com/BirdLifeInternational/track2kba/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

 
Estimating Space Use with Kernel Density Estimation (KDE)
Once the trips are identified, I summarize their characteristics to
see how many trips were recorded per individual, how long they lasted,
and how far the birds traveled from the colony. I also filter out any
incomplete trips (those that don’t have a recorded return).
trips <- subset(trips, trips$Returns == "Yes" )
sumTrips <- tripSummary(trips = trips, colony = colony)
sumTrips
 
 
Projecting Tracks for Spatial Analysis
Before estimating space use, it’s important to project the data into
an equal-area coordinate system. Geographic coordinates (latitude and
longitude) distort distances, especially over larger spatial scales,
which can bias area-based estimates like kernel densities.
I use an azimuthal projection centered on my dataset… this minimizes
distortion and ensures that the distances and areas calculated by
Track2KBA are accurate.
# projecting tracks using an azimuthal projection
tracks_prj <- projectTracks(dataGroup = trips, projType = "azim", custom = TRUE)
 
## NOTE: projection is data specific
tracks <- projectTracks( dataGroup = trips, projType = "azim", custom = TRUE)
 
## NOTE: projection is data specific
## [1] "SpatialPointsDataFrame"
## attr(,"package")
## [1] "sp"
 
Determining Kernel Density Parameters
Kernel Density Estimation (KDE) requires a smoothing parameter that
defines how tightly or loosely the utilization distribution (UD) will
fit the data. I use the findScale() function to determine
an appropriate smoothing factor based on the birds’ movement
patterns.
This parameter ensures that my space-use estimates are biologically
realistic, neither overestimating nor underestimating the area the birds
used.
# estimating the appropriate scale (bandwidth) for KDE
hVals <- findScale(
  tracks = tracks, 
  scaleARS = TRUE, 
  sumTrips = sumTrips
)
 
## No 'res' was specified. Movement scale in the data was compared
##         to a 500-cell grid with cell size of 0.083 km squared.

 
Estimating and Mapping Space Use
Finally, I estimate the gulls’ space use during foraging trips using
kernel density estimation. This produces utilization distributions
(UDs), which represent the probability of a bird being found in a given
area.
I exclude all points within 1 km of the colony (since those represent
resting or nesting rather than foraging activity) and focus on the 50%
UD — the “core” foraging area where each bird spent most of its
time.
I then visualize these foraging zones to see the main areas of
activity around Great Duck Island.
# Removing non-foraging points close to the colony
tracks <- tracks[tracks$ColDist > 1, ]
# Estimating the 50% utilization distribution (core foraging area)
KDE <- estSpaceUse(
  tracks = tracks, 
  scale = hVals$mag, 
  levelUD = 50, #you can change this
  polyOut = TRUE
)
 
## No grid resolution ('res') was specified, or the specified resolution was
##     >99 km and therefore ignored. Space use was calculated on a 500-cell grid,
##     with cells of 0.105 square km
# Mapping core foraging areas relative to the colony
mapKDE(KDE = KDE$UDPolygons, colony = colony)
 

 
Writing Foraging Habitats as an ArcGIS shapefile
library(sf)
UD_polygons <- KDE$UDPolygons
print(UD_polygons)
 
## Simple feature collection with 1 feature and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -68.38871 ymin: 44.1044 xmax: -68.218 ymax: 44.32005
## Geodetic CRS:  WGS 84
##                              id    area                       geometry
## 356812104220551 356812104220551 131.991 MULTIPOLYGON (((-68.35995 4...
# checking names
names(UD_polygons)
 
## [1] "id"       "area"     "geometry"
# Write the 50% core use area polygons to a shapefile
st_write(KDE$UDPolygons, "GDI_p9k_foraging_KDE_50.shp", delete_layer = TRUE)
 
## Deleting layer `GDI_p9k_foraging_KDE_50' using driver `ESRI Shapefile'
## Writing layer `GDI_p9k_foraging_KDE_50' to data source 
##   `GDI_p9k_foraging_KDE_50.shp' using driver `ESRI Shapefile'
## Writing 1 features with 2 fields and geometry type Multi Polygon.
 
Visualize KDE polygons in a Leaflet map
library(leaflet)
leaflet() %>%
  addProviderTiles(providers$Esri.WorldImagery) %>%
  addPolygons(
    data = UD_polygons,
    color = "red",
    weight = 2,
    opacity = 0.8,
    fillColor = "red",
    fillOpacity = 0.3,
    popup = ~paste0("Polygon ID: ", id, "<br>",
                    "Area (m²): ", round(area, 2))
  ) %>%
  addCircleMarkers(
    data = colony_sf,
    color = "yellow",
    radius = 5,
    label = "Great Duck Island (Colony)"
  )
 
 
 
For Foraging Behavior
#setting field ID
p9k_tides$fieldID <- "p9k"
p9k_foraging <- p9k_tides %>% 
  filter(behavior_label != "LL")
#formatting for track2KBA
dataGroup <- formatFields(
  dataGroup = p9k_foraging, 
  fieldID = "fieldID", 
  fieldDateTime = "dTm",
  fieldLon = "lon", 
  fieldLat = "lat"
)
## Trip Parameters
#creating trips 
trips <- tripSplit(
  dataGroup  = dataGroup,
  colony = colony,
  innerBuff  = 1,      # km — has left the colony
  returnBuff = 2,     # km — returning home
  duration   = 2
)
 
## track p9k01 starts out on trip
## track p9k99 does not return to the colony
#mapping trips -- learn how to facet this by trip?
mapTrips(trips = trips, colony = colony)
 
## Trips colored by completeness. Indicate colorBy=='trip' to color by
##     trips.

## Estimating Space Use with Kernel Density Estimation (KDE)
trips <- subset(trips, trips$Returns == "Yes" )
sumTrips <- tripSummary(trips = trips, colony = colony)
sumTrips
 
## Projecting Tracks for Spatial Analysis
# projecting tracks using an azimuthal projection
tracks_prj <- projectTracks(dataGroup = trips, projType = "azim", custom = TRUE)
 
## NOTE: projection is data specific
tracks <- projectTracks( dataGroup = trips, projType = "azim", custom = TRUE)
 
## NOTE: projection is data specific
## [1] "SpatialPointsDataFrame"
## attr(,"package")
## [1] "sp"
## Determining Kernel Density Parameters
# estimating the appropriate scale (bandwidth) for KDE
hVals <- findScale(
  tracks = tracks, 
  scaleARS = TRUE, 
  sumTrips = sumTrips
)
 
## No 'res' was specified. Movement scale in the data was compared
##         to a 500-cell grid with cell size of 0.076 km squared.

## Estimating and mapping space use 
# Removing non-foraging points close to the colony
tracks <- tracks[tracks$ColDist > 1, ]
# Estimating the 50% utilization distribution (core foraging area)
KDE <- estSpaceUse(
  tracks = tracks, 
  scale = hVals$mag, 
  levelUD = 50, #you can change this
  polyOut = TRUE
)
 
## No grid resolution ('res') was specified, or the specified resolution was
##     >99 km and therefore ignored. Space use was calculated on a 500-cell grid,
##     with cells of 0.099 square km
# Mapping core foraging areas relative to the colony
mapKDE(KDE = KDE$UDPolygons, colony = colony)
 

Writing Foraging Habitats as an ArcGIS shapefile
library(sf)
UD_polygons <- KDE$UDPolygons
print(UD_polygons)
 
## Simple feature collection with 1 feature and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -68.38148 ymin: 44.09717 xmax: -68.21757 ymax: 44.31908
## Geodetic CRS:  WGS 84
##      id     area                       geometry
## p9k p9k 142.1934 MULTIPOLYGON (((-68.36818 4...
# checking names
names(UD_polygons)
 
## [1] "id"       "area"     "geometry"
# Write the 50% core use area polygons to a shapefile
st_write(KDE$UDPolygons, "GDI_p9k_foraging_KDE_50.shp", delete_layer = TRUE)
 
## Deleting layer `GDI_p9k_foraging_KDE_50' using driver `ESRI Shapefile'
## Writing layer `GDI_p9k_foraging_KDE_50' to data source 
##   `GDI_p9k_foraging_KDE_50.shp' using driver `ESRI Shapefile'
## Writing 1 features with 2 fields and geometry type Multi Polygon.
 
Visualize KDE polygons in a Leaflet map
library(leaflet)
leaflet() %>%
  addProviderTiles(providers$Esri.WorldImagery) %>%
  addPolygons(
    data = UD_polygons,
    color = "red",
    weight = 2,
    opacity = 0.8,
    fillColor = "red",
    fillOpacity = 0.3,
    popup = ~paste0("Polygon ID: ", id, "<br>",
                    "Area (m²): ", round(area, 2))
  ) %>%
  addCircleMarkers(
    data = colony_sf,
    color = "yellow",
    radius = 5,
    label = "Great Duck Island (Colony)"
  )
 
 
 
CODE JAIL
CODE JAIL
Hidden Markov Model
https://arxiv.org/abs/1710.03786?utm_
https://cran.r-project.org/web/packages/momentuHMM/vignettes/momentuHMM.pdf
A hidden markov model assumes that an animal switches between hidden
behavioral states (e.g. foraging, resting, flying), that you can’t
directly observe – but you can observe movement patterns (like speed,
turning angle, or step length) that depend on those states.
hidden state refers to the behavioral mode of the animal
that is not observed directly.
observable data refers to the movement metrics you do
observe — step length (distance between fixes) and turning angle (change
in direction).
emission distribution refers to the probability
distribution assigned with each behavior for what the step lengths and
angles look like. For example, foraging might have been short, random
steps while commuting has long, straight steps.
transition probability refers to the probability of
switching from one behavior to another; e.g. a gull has a 10% chance of
switching from foraging to commuting every 15 minutes.
# #install.packages("momentuHMM")
# library(momentuHMM)
# library(dplyr)
# library(sf)
 
# p9k_markov <- p9k_raw
# 
# p9k_markov <- st_as_sf(p9k_markov, coords = c("longitude", "latitude"), crs = 4326)
# p9k_markov <- st_transform(p9k_markov, 32619)
# coords <- st_coordinates(p9k_markov)
# p9k_markov$x <- coords[,1]
# p9k_markov$y <- coords[,2]
# p9k_markov$time <- as.POSIXct(p9k_markov$time_at_fix)
# 
# #preparing data
# gull_prep <- prepData(p9k_markov, type = "UTM", coordNames = c("x", "y"))
# 
# # #fit 3-state hmm: resting/commuting/foraging
# # m3 <- fitHMM(
# #   data = gull_prep,
# #   nbStates = 3,
# #   stepPar0 = c(10, 100, 500, 50, 200, 1000, 0, 1500, 750, 250), # initial step mean/sd
# #   anglePar0 = c(0, 0, 0, 1, 1, 1)) # initial turning angle mean/sd
 
Keeping zeros and supplying zero-mass parameters before fitting the
model
You must supply stepPar0 with (nbStates × 2) parameters for the step
distribution (mean & sd for gamma) plus (nbStates) zero-mass
parameters (probabilities of a zero step for each state). For nbStates =
3 that’s 3*2 + 3 = 9 numbers.
# # step means for states 1,2,3:
# means <- c(5, 100, 500)
# 
# # step sds for states 1,2,3:
# sds   <- c(5, 200, 1000)
# 
# # zero-mass probs for states 1,2,3 (between 0 and 1)
# zeroProb <- c(0.5, 0.01, 0.05)  # e.g., state1 = resting (lots of zeros)
# 
# stepPar0 <- c(means, sds, zeroProb)  # length 9
# 
# anglePar0 <- c(0, 0, 0, 1, 1, 1)  # means (radians) then concentrations (for 3 states)
# 
# m3 <- fitHMM(
#   data = gull_prep,
#   nbStates = 3,
#   stepPar0 = stepPar0,
#   anglePar0 = anglePar0,
#   stepDist = "gamma")
# 
# m3
 
 
 
Auto-generate sensible intial parameters
# # create a small helper to produce initial parameters
# 
# make_init_pars <- function(pdata, nbStates = 3){
#   steps <- pdata$step
#   # replace 0 with small positive for clustering
#   steps_for_clust <- ifelse(steps == 0, min(steps[steps > 0], na.rm=TRUE)/10, steps)
#   cl <- kmeans(log(steps_for_clust + 1), centers = nbStates)
#   # cluster centers -> approximate means on original scale
#   cluster_means <- tapply(steps, cl$cluster, function(x) mean(x[x>0], na.rm=TRUE))
#   cluster_sds   <- tapply(steps, cl$cluster, function(x) sd(x[x>0], na.rm=TRUE))
#   # order states by increasing mean to make them interpretable
#   ord <- order(cluster_means)
#   means <- as.numeric(cluster_means[ord])
#   sds   <- as.numeric(cluster_sds[ord])
#   # zero-mass probabilities per state
#   zeroProb <- tapply(steps == 0, cl$cluster, mean)[ord]
#   # if any NA in sd (e.g., single point), replace
#   sds[is.na(sds)] <- pmax(1, means[is.na(sds)]/2)
#   return(list(stepPar0 = c(means, sds, zeroProb)))
# }
# 
# init <- make_init_pars(gull_prep, nbStates = 3)
# init$stepPar0  # look at what it chose
 
 
Interpretting Markov Model
# # Summaries
# print(m3)
# plot(m3, ask=FALSE)
# m3$stateProbs
# 
# # adding states
# gull_prep$state <- viterbi(m3)
# table(gull_prep$state)
 
 
Next steps
# table(gull_prep$behavior)