1. Importar datos de aves de Colombia de eBird a R

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(rnaturalearth)
library(sf)
## Linking to GEOS 3.11.2, GDAL 3.7.2, PROJ 9.3.0; sf_use_s2() is TRUE
# file to save spatial data
gpkg_file <- "data/gis-data.gpkg"
dir.create(dirname(gpkg_file), showWarnings = FALSE, recursive = TRUE)

# political boundaries
# land border with lakes removed
ne_land <- ne_download(scale = 50, category = "cultural",
                       type = "admin_0_countries_lakes",
                       returnclass = "sf") |>
  filter(CONTINENT %in% c("North America", "South America")) |>
  st_set_precision(1e6) |>
  st_union()
## Reading layer `ne_50m_admin_0_countries_lakes' from data source 
##   `C:\Users\pc.laboratorio.dz\AppData\Local\Temp\RtmpQbCqon\ne_50m_admin_0_countries_lakes.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 242 features and 168 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -180 ymin: -89.99893 xmax: 180 ymax: 83.59961
## Geodetic CRS:  WGS 84
# country boundaries
ne_countries <- ne_download(scale = 50, category = "cultural",
                       type = "admin_0_countries_lakes",
                       returnclass = "sf") |>
  select(country = ADMIN, country_code = ISO_A2)
## Reading layer `ne_50m_admin_0_countries_lakes' from data source 
##   `C:\Users\pc.laboratorio.dz\AppData\Local\Temp\RtmpQbCqon\ne_50m_admin_0_countries_lakes.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 242 features and 168 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -180 ymin: -89.99893 xmax: 180 ymax: 83.59961
## Geodetic CRS:  WGS 84
# state boundaries for united states
ne_states <- ne_download(scale = 50, category = "cultural",
                       type = "admin_1_states_provinces",
                       returnclass = "sf") |> 
  filter(iso_a2 == "US") |> 
  select(state = name, state_code = iso_3166_2)
## Reading layer `ne_50m_admin_1_states_provinces' from data source 
##   `C:\Users\pc.laboratorio.dz\AppData\Local\Temp\RtmpQbCqon\ne_50m_admin_1_states_provinces.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 294 features and 121 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -180 ymin: -46.96289 xmax: 180 ymax: 83.11611
## Geodetic CRS:  WGS 84
# country lines
# downloaded globally then filtered to north america with st_intersect()
ne_country_lines <- ne_download(scale = 50, category = "cultural",
                                type = "admin_0_boundary_lines_land",
                                returnclass = "sf") |> 
  st_geometry()
## Reading layer `ne_50m_admin_0_boundary_lines_land' from data source 
##   `C:\Users\pc.laboratorio.dz\AppData\Local\Temp\RtmpQbCqon\ne_50m_admin_0_boundary_lines_land.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 390 features and 39 fields
## Geometry type: MULTILINESTRING
## Dimension:     XY
## Bounding box:  xmin: -141.0021 ymin: -55.114 xmax: 145.9402 ymax: 70.06482
## Geodetic CRS:  WGS 84
lines_on_land <- st_intersects(ne_country_lines, ne_land, sparse = FALSE) |>
  as.logical()
ne_country_lines <- ne_country_lines[lines_on_land]
# states, north america
ne_state_lines <- ne_download(scale = 50, category = "cultural",
                              type = "admin_1_states_provinces_lines",
                              returnclass = "sf") |>
  filter(ADM0_A3 %in% c("USA", "CAN")) |>
  mutate(iso_a2 = recode(ADM0_A3, USA = "US", CAN = "CAN")) |> 
  select(country = ADM0_NAME, country_code = iso_a2)
## Reading layer `ne_50m_admin_1_states_provinces_lines' from data source 
##   `C:\Users\pc.laboratorio.dz\AppData\Local\Temp\RtmpQbCqon\ne_50m_admin_1_states_provinces_lines.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 581 features and 43 fields
## Geometry type: MULTILINESTRING
## Dimension:     XY
## Bounding box:  xmin: -139.0565 ymin: -38.0716 xmax: 174.4685 ymax: 78.68672
## Geodetic CRS:  WGS 84
# save all layers to a geopackage
unlink(gpkg_file)
write_sf(ne_land, gpkg_file, "ne_land")
write_sf(ne_countries, gpkg_file, "ne_countries")
write_sf(ne_states, gpkg_file, "ne_states")
write_sf(ne_country_lines, gpkg_file, "ne_country_lines")
write_sf(ne_state_lines, gpkg_file, "ne_state_lines")
library(auk)
## auk 0.7.0 is designed for EBD files downloaded after 2023-10-25. 
## No EBD data directory set, see ?auk_set_ebd_path to set EBD_PATH 
## eBird taxonomy version:  2023
library(dplyr)
library(ggplot2)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(readr)
library(sf)

f_sed <- "L:/Usuarios/pc.laboratorio.dz/Desktop/project_mp/ebd_CO_smp_relJan-2024_sampling.txt"
checklists <- read_sampling(f_sed)
glimpse(checklists)
## Rows: 608,818
## Columns: 31
## $ checklist_id              <chr> "S60360053", "S61357410", "S51579402", "S599…
## $ last_edited_date          <chr> "2023-11-25 21:52:57.837266", "2023-11-25 21…
## $ country                   <chr> "Colombia", "Colombia", "Colombia", "Colombi…
## $ country_code              <chr> "CO", "CO", "CO", "CO", "CO", "CO", "CO", "C…
## $ state                     <chr> "Antioquia", "Antioquia", "Antioquia", "Anti…
## $ state_code                <chr> "CO-ANT", "CO-ANT", "CO-ANT", "CO-ANT", "CO-…
## $ county                    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ county_code               <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ iba_code                  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ bcr_code                  <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ usfws_code                <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ atlas_block               <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ locality                  <chr> "Urbanización Atalaya de San Jorge, Envigado…
## $ locality_id               <chr> "L6664911", "L6664911", "L6664911", "L666491…
## $ locality_type             <chr> "P", "P", "P", "P", "P", "P", "P", "P", "P",…
## $ latitude                  <dbl> 6.161704, 6.161704, 6.161704, 6.161704, 6.16…
## $ longitude                 <dbl> -75.57647, -75.57647, -75.57647, -75.57647, …
## $ observation_date          <date> 2019-10-05, 2019-11-11, 2019-01-12, 2019-09…
## $ time_observations_started <chr> "07:59:00", "07:29:00", "07:05:00", "07:42:0…
## $ observer_id               <chr> "obs968709", "obs968709", "obs968709", "obs9…
## $ sampling_event_identifier <chr> "S60360053", "S61357410", "S51579402", "S599…
## $ protocol_type             <chr> "Traveling", "Traveling", "Stationary", "Tra…
## $ protocol_code             <chr> "P22", "P22", "P21", "P22", "P22", "P22", "P…
## $ project_code              <chr> "EBIRD_COL", "EBIRD_COL", "EBIRD_COL", "EBIR…
## $ duration_minutes          <int> 32, 39, 24, 45, 29, 27, 26, 36, 39, 19, 35, …
## $ effort_distance_km        <dbl> 0.40, 0.54, NA, 0.50, 0.31, 0.44, NA, 0.33, …
## $ effort_area_ha            <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ number_observers          <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1,…
## $ all_species_reported      <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TR…
## $ group_identifier          <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ trip_comments             <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
# Obtener el número de columnas
num_columnas <- ncol(checklists)
print(num_columnas)
## [1] 31
# Obtener el número de filas
num_filas <- nrow(checklists)
print(num_filas)
## [1] 608818
# Filtrar las listas de verificación con información de distancia
checklists_con_distancia <- checklists %>% 
  filter(!is.na(effort_distance_km))

# Crear un histograma
histograma_distancias <- ggplot(checklists_con_distancia, aes(x = effort_distance_km)) +
  geom_histogram(binwidth = 10, fill = "blue", color = "black", alpha = 0.7) +
  labs(title = "Distribución de Distancias Recorridas",
       x = "Distancia Recorrida (km)",
       y = "Frecuencia") +
  theme_minimal()

# Mostrar el histograma
print(histograma_distancias)

f_ebd <- "L:/Usuarios/pc.laboratorio.dz/Desktop/project_mp/ebd_CO_smp_relJan-2024.txt"
observations <- read_ebd(f_ebd)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
glimpse(observations)
## Rows: 5,085,161
## Columns: 48
## $ checklist_id              <chr> "G10011281", "G10011281", "G10011281", "G100…
## $ global_unique_identifier  <chr> "URN:CornellLabOfOrnithology:EBIRD:OBS168761…
## $ last_edited_date          <chr> "2023-04-16 13:52:38.819029", "2023-04-16 13…
## $ taxonomic_order           <dbl> 8293, 4329, 4334, 34709, 16581, 16603, 16615…
## $ category                  <chr> "species", "species", "species", "species", …
## $ taxon_concept_id          <chr> "avibase-79F3C681", "avibase-51F7C361", "avi…
## $ common_name               <chr> "Broad-winged Hawk", "Sparkling Violetear", …
## $ scientific_name           <chr> "Buteo platypterus", "Colibri coruscans", "C…
## $ exotic_code               <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ observation_count         <chr> "2", "3", "1", "1", "1", "1", "1", "1", "1",…
## $ breeding_code             <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ breeding_category         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ behavior_code             <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ age_sex                   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ country                   <chr> "Colombia", "Colombia", "Colombia", "Colombi…
## $ country_code              <chr> "CO", "CO", "CO", "CO", "CO", "CO", "CO", "C…
## $ state                     <chr> "Distrito Capital de Bogotá", "Distrito Capi…
## $ state_code                <chr> "CO-DC", "CO-DC", "CO-DC", "CO-DC", "CO-DC",…
## $ county                    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ county_code               <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ iba_code                  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ bcr_code                  <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ usfws_code                <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ atlas_block               <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ locality                  <chr> "Jardin Botànico Josè Celestino Mutis", "Jar…
## $ locality_id               <chr> "L8005942", "L8005942", "L8005942", "L800594…
## $ locality_type             <chr> "H", "H", "H", "H", "H", "H", "H", "H", "H",…
## $ latitude                  <dbl> 4.667211, 4.667211, 4.667211, 4.667211, 4.66…
## $ longitude                 <dbl> -74.09980, -74.09980, -74.09980, -74.09980, …
## $ observation_date          <date> 2018-12-04, 2018-12-04, 2018-12-04, 2018-12…
## $ time_observations_started <chr> "07:55:00", "07:55:00", "07:55:00", "07:55:0…
## $ observer_id               <chr> "obsr738968,obsr738968,obsr675713,obsr675713…
## $ sampling_event_identifier <chr> "S133990738,S133990738,S50417460,S50417460",…
## $ protocol_type             <chr> "Traveling", "Traveling", "Traveling", "Trav…
## $ protocol_code             <chr> "P22", "P22", "P22", "P22", "P22", "P22", "P…
## $ project_code              <chr> "EBIRD_COL", "EBIRD_COL", "EBIRD_COL", "EBIR…
## $ duration_minutes          <int> 145, 145, 145, 145, 145, 145, 145, 145, 145,…
## $ effort_distance_km        <dbl> 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5,…
## $ effort_area_ha            <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ number_observers          <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,…
## $ all_species_reported      <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TR…
## $ group_identifier          <chr> "G10011281", "G10011281", "G10011281", "G100…
## $ has_media                 <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FA…
## $ approved                  <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TR…
## $ reviewed                  <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FA…
## $ reason                    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ trip_comments             <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ species_comments          <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
# Obtener el número de columnas
num_columnas_obs <- ncol(observations)
print(num_columnas_obs)
## [1] 48
# Obtener el número de filas
num_filas_obs <- nrow(observations)
print(num_filas_obs)
## [1] 5085161
checklists_shared <- read_sampling(f_sed, unique = FALSE)
# identify shared checklists
checklists_shared |> 
  filter(!is.na(group_identifier)) |> 
  arrange(group_identifier) |> 
  select(sampling_event_identifier, group_identifier)
## # A tibble: 490,700 × 2
##    sampling_event_identifier group_identifier
##    <chr>                     <chr>           
##  1 S145629701                G10000154       
##  2 S134619855                G10000154       
##  3 S134506890                G10000154       
##  4 S134601555                G10000154       
##  5 S134511374                G10000154       
##  6 S133733369                G10000154       
##  7 S134506894                G10000154       
##  8 S134517060                G10000154       
##  9 S134523767                G10000154       
## 10 S155342137                G10000154       
## # ℹ 490,690 more rows
checklists_unique <- auk_unique(checklists_shared, checklists_only = TRUE)
nrow(checklists_shared)
## [1] 925571
nrow(checklists_unique)
## [1] 608818
head(checklists_unique$checklist_id)
## [1] "S60360053" "S61357410" "S51579402" "S59971734" "S52070627" "S51579119"
tail(checklists_unique$checklist_id)
## [1] "G7635775" "G7636828" "G7637078" "G7637309" "G7637526" "G7637523"
# importar uno de los conjuntos de datos de ejemplo de auk sin acumular taxonomía
obs_ex <- system.file("extdata/ebd-rollup-ex.txt", package = "auk") |> 
  read_ebd(rollup = FALSE)
# rollup taxonomy
obs_ex_rollup <- auk_rollup(obs_ex)

# identificar las categorías taxonómicas presentes en cada conjunto de datos
unique(obs_ex$category)
## [1] "domestic"   "form"       "hybrid"     "intergrade" "slash"     
## [6] "spuh"       "species"    "issf"
unique(obs_ex_rollup$category)
## [1] "species"
#sin resumen, hay cuatro observaciones
obs_ex |>
  filter(common_name == "Yellow-rumped Warbler") |> 
  select(checklist_id, category, common_name, subspecies_common_name, 
         observation_count)
## # A tibble: 4 × 5
##   checklist_id category   common_name   subspecies_common_name observation_count
##   <chr>        <chr>      <chr>         <chr>                  <chr>            
## 1 S44943108    intergrade Yellow-rumpe… Yellow-rumped Warbler… 1                
## 2 S129851825   species    Yellow-rumpe… <NA>                   1                
## 3 S129851825   issf       Yellow-rumpe… Yellow-rumped Warbler… 1                
## 4 S129851825   issf       Yellow-rumpe… Yellow-rumped Warbler… 2
#con resumen, se han combinado
obs_ex_rollup |>
  filter(common_name == "Yellow-rumped Warbler") |> 
  select(checklist_id, category, common_name, observation_count)
## # A tibble: 2 × 4
##   checklist_id category common_name           observation_count
##   <chr>        <chr>    <chr>                 <chr>            
## 1 S129851825   species  Yellow-rumped Warbler 4                
## 2 S44943108    species  Yellow-rumped Warbler 1