##Install Packages

##Load Libraries

knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(leaflet)
library(sf)
library(rgbif)
library(dplyr)
library(terra)
library(geodata)
library(dbscan)
library(maps)

Connecting to GBIF and selecting bird species for migration pattern exploration.

The bird I selected to examine its migration patterns is the Great Blue Heron, also known as “Ardea Herodias”.

heron_name <- name_backbone(name = "Ardea herodias")
heron_name
## # A tibble: 1 × 30
##   usageKey scientificName            canonicalName authorship rank  code  status
## * <chr>    <chr>                     <chr>         <chr>      <chr> <chr> <chr> 
## 1 9630752  Ardea herodias Linnaeus,… Ardea herodi… Linnaeus,… SPEC… ZOOL… ACCEP…
## # ℹ 23 more variables: genericName <chr>, specificEpithet <chr>, type <chr>,
## #   formattedName <chr>, matchType <chr>, confidence <int>, timeTaken <int>,
## #   kingdom <chr>, phylum <chr>, class <chr>, order <chr>, family <chr>,
## #   genus <chr>, species <chr>, kingdomKey <chr>, phylumKey <chr>,
## #   classKey <chr>, orderKey <chr>, familyKey <chr>, genusKey <chr>,
## #   speciesKey <chr>, value <lgl>, verbatim_name <chr>
heron_data <- occ_search(
  scientificName = "Ardea herodias",
  hasCoordinate = TRUE,
  limit = 500
)

heron_df <- heron_data$data
head(heron_df)
## # A tibble: 6 × 96
##   key        scientificName   decimalLatitude decimalLongitude issues datasetKey
##   <chr>      <chr>                      <dbl>            <dbl> <chr>  <chr>     
## 1 5938027555 Ardea herodias …            39.4            -76.2 cdc,c… 50c9509d-…
## 2 5938028341 Ardea herodias …            30.7            -97.6 cdc,c… 50c9509d-…
## 3 5938028631 Ardea herodias …            19.4            -99.1 cdc,c… 50c9509d-…
## 4 5938029860 Ardea herodias …            49.1           -123.  cdc,c… 50c9509d-…
## 5 5938029966 Ardea herodias …            34.4           -120.  cdc,c… 50c9509d-…
## 6 5938030094 Ardea herodias …            26.5            -80.2 cdc,c… 50c9509d-…
## # ℹ 90 more variables: publishingOrgKey <chr>, installationKey <chr>,
## #   hostingOrganizationKey <chr>, publishingCountry <chr>, protocol <chr>,
## #   lastCrawled <chr>, lastParsed <chr>, crawlId <int>, basisOfRecord <chr>,
## #   occurrenceStatus <chr>, lifeStage <chr>, taxonKey <int>, kingdomKey <int>,
## #   phylumKey <int>, classKey <int>, orderKey <int>, familyKey <int>,
## #   genusKey <int>, speciesKey <int>, acceptedTaxonKey <int>,
## #   acceptedScientificName <chr>, kingdom <chr>, phylum <chr>, order <chr>, …
heron_clean <- heron_df %>%
  dplyr::select(
    species,
    decimalLatitude,
    decimalLongitude,
    year,
    month,
    eventDate,
    countryCode,
    stateProvince
  ) %>%
  filter(
    !is.na(decimalLatitude),
    !is.na(decimalLongitude),
    !is.na(month),
    !is.na(year)
  )
heron_clean <- dplyr::rename(heron_clean, 
                             Latitude = decimalLatitude,
                             Longitude = decimalLongitude,
                             Year = year,
                             Month = month,
                             Date = eventDate,
                             Country_Code = countryCode,
                             State_Province = stateProvince,
                             Species = species)
head(heron_clean)
## # A tibble: 6 × 8
##   Species       Latitude Longitude  Year Month Date  Country_Code State_Province
##   <chr>            <dbl>     <dbl> <int> <int> <chr> <chr>        <chr>         
## 1 Ardea herodi…     39.4     -76.2  2026     1 2026… US           Maryland      
## 2 Ardea herodi…     30.7     -97.6  2026     1 2026… US           Texas         
## 3 Ardea herodi…     19.4     -99.1  2026     1 2026… MX           Distrito Fede…
## 4 Ardea herodi…     49.1    -123.   2026     1 2026… CA           British Colum…
## 5 Ardea herodi…     34.4    -120.   2026     1 2026… US           California    
## 6 Ardea herodi…     26.5     -80.2  2026     1 2026… US           Florida
heron_sf <- st_as_sf(
  heron_clean,
  coords = c("Longitude", "Latitude"),
  crs = 4326
)

heron_sf
## Simple feature collection with 500 features and 6 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -124.0907 ymin: 8.359034 xmax: -61.48649 ymax: 49.31423
## Geodetic CRS:  WGS 84
## # A tibble: 500 × 7
##    Species         Year Month Date                Country_Code State_Province  
##  * <chr>          <int> <int> <chr>               <chr>        <chr>           
##  1 Ardea herodias  2026     1 2026-01-03T13:08:10 US           Maryland        
##  2 Ardea herodias  2026     1 2026-01-03T07:44    US           Texas           
##  3 Ardea herodias  2026     1 2026-01-04T14:30:40 MX           Distrito Federal
##  4 Ardea herodias  2026     1 2026-01-01T15:04:36 CA           British Columbia
##  5 Ardea herodias  2026     1 2026-01-01T14:18:58 US           California      
##  6 Ardea herodias  2026     1 2026-01-04T22:04    US           Florida         
##  7 Ardea herodias  2026     1 2026-01-03T16:44    US           Tennessee       
##  8 Ardea herodias  2026     1 2026-01-03T18:13    US           Florida         
##  9 Ardea herodias  2026     1 2026-01-03T10:00    US           Colorado        
## 10 Ardea herodias  2026     1 2026-01-04T10:29:37 US           Pennsylvania    
## # ℹ 490 more rows
## # ℹ 1 more variable: geometry <POINT [°]>
coords <- heron_clean %>%
  dplyr::select(Longitude, Latitude)
clust <- dbscan::dbscan(coords, eps = 1.5, minPts = 20)
heron_clustered <- heron_clean %>%
  mutate(cluster = clust$cluster) %>%
  filter(cluster != 0)
largest_cluster_id <- heron_clustered %>%
  count(cluster, sort = TRUE) %>%
  slice(1) %>%
  pull(cluster)
largest_cluster <- heron_clustered %>%
  filter(cluster == largest_cluster_id)
cluster_center <- largest_cluster %>%
  summarise(
    lon = mean(Longitude, na.rm = TRUE),
    lat = mean(Latitude, na.rm = TRUE)
  )
cluster_center
## # A tibble: 1 × 2
##     lon   lat
##   <dbl> <dbl>
## 1 -81.2  27.2
head(largest_cluster)
## # A tibble: 6 × 9
##   Species       Latitude Longitude  Year Month Date  Country_Code State_Province
##   <chr>            <dbl>     <dbl> <int> <int> <chr> <chr>        <chr>         
## 1 Ardea herodi…     26.5     -80.2  2026     1 2026… US           Florida       
## 2 Ardea herodi…     27.0     -82.2  2026     1 2026… US           Florida       
## 3 Ardea herodi…     26.6     -80.3  2026     1 2026… US           Florida       
## 4 Ardea herodi…     25.8     -80.8  2026     1 2026… US           Florida       
## 5 Ardea herodi…     27.1     -82.4  2026     1 2026… US           Florida       
## 6 Ardea herodi…     29.6     -81.2  2026     1 2026… US           Florida       
## # ℹ 1 more variable: cluster <int>
xmin <- min(heron_clean$Longitude) - 3
xmax <- max(heron_clean$Longitude) + 3
ymin <- min(heron_clean$Latitude) - 3
ymax <- max(heron_clean$Latitude) + 3
region_ext <- terra::ext(xmin, xmax, ymin, ymax)
temp <- geodata::worldclim_global(var = "tavg", res = 10, path = tempdir())
temp_layer <- temp[[1]]
temp_crop <- terra::crop(temp_layer, region_ext)
temp_pal <- colorNumeric(
  palette = "magma",
  domain = values(temp_crop),
  na.color = "transparent"
)
elev <- geodata::elevation_global(res = 10, path = tempdir())
elev_crop <- terra::crop(elev, region_ext)
elev_pal <- colorNumeric(
  palette = "viridis",
  domain = terra::values(elev_crop),
  na.color = "transparent"
)
data("us.cities", package = "maps")
cities_df <- us.cities %>%
  dplyr::transmute(
    city = name,
    pop = pop,
    lon = long,
    lat = lat
  )
cities_df <- cities_df %>%
  mutate(
    distance = sqrt((lon - cluster_center$lon)^2 + (lat - cluster_center$lat)^2)
  )
cities_df <- cities_df %>%
  arrange(distance) %>%
  slice(1:25) %>%
  arrange(desc(pop)) %>%
  slice(1:3)
cat("
<style>
.leaflet-control-layers {
  font-size: 12px;
  width: 150px;
}

.leaflet-control.legend {
  font-size: 10px;
  width: 100px;
}
</style>
")
## 
## <style>
## .leaflet-control-layers {
##   font-size: 12px;
##   width: 150px;
## }
## 
## .leaflet-control.legend {
##   font-size: 10px;
##   width: 100px;
## }
## </style>
leaflet(heron_clean) %>%
  addTiles() %>%
  addCircleMarkers(
    lng = ~Longitude,
    lat = ~Latitude,
    radius = 2,
    color = "maroon",
    stroke = FALSE,
    fillOpacity = 0.4,
    group = "Ardea Herodias Observations",
    popup = ~paste("Great Blue Heron")
  ) %>%
  addCircleMarkers(
    data = largest_cluster,
    lng = ~Longitude,
    lat = ~Latitude,
    radius = 3,
    color = "maroon",
    stroke = TRUE,
    fillOpacity = 0.6,
    group = "Largest Cluster",
    popup = ~paste("Largest Observation Cluster")
  ) %>%
  addLabelOnlyMarkers(
    data = cities_df,
    lng = ~lon,
    lat = ~lat,
    label = ~city,
    group = "Nearby Cities",
    labelOptions = labelOptions(
      noHide = TRUE,
      direction = "top",
      textOnly = TRUE,
      style = list(
        "color" = "white",
        "font-size" = "10px"
      )
    )
  ) %>%
  addRasterImage(
    temp_crop,
    colors = temp_pal,
    opacity = 0.6,
    group = "Temperature"
  ) %>%
  addRasterImage(
    elev_crop,
    colors = elev_pal,
    opacity = 0.6,
    group = "Elevation"
  ) %>%
  addLegend(
    pal = temp_pal,
    values = values(temp_crop),
    title = "Temperature",
    position = "bottomright",
    group = "Temperature"
  ) %>%
  addLegend(
    pal = elev_pal,
    values = values(elev_crop),
    title = "Elevation",
    position = "bottomleft",
    group = "Elevation"
  ) %>%
  addLayersControl(
    overlayGroups = c("Temperature", "Elevation", "Ardea Herodias Observations", "Largest Cluster", "Nearby Cities"),
    options = layersControlOptions(collapsed = FALSE)
  )