1. Importar datos de aves de Colombia de eBird a R
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(rnaturalearth)
library(sf)
## Linking to GEOS 3.11.2, GDAL 3.7.2, PROJ 9.3.0; sf_use_s2() is TRUE
# file to save spatial data
gpkg_file <- "data/gis-data.gpkg"
dir.create(dirname(gpkg_file), showWarnings = FALSE, recursive = TRUE)
# political boundaries
# land border with lakes removed
ne_land <- ne_download(scale = 50, category = "cultural",
type = "admin_0_countries_lakes",
returnclass = "sf") |>
filter(CONTINENT %in% c("North America", "South America")) |>
st_set_precision(1e6) |>
st_union()
## Reading layer `ne_50m_admin_0_countries_lakes' from data source
## `C:\Users\pc.laboratorio.dz\AppData\Local\Temp\RtmpQbCqon\ne_50m_admin_0_countries_lakes.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 242 features and 168 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -180 ymin: -89.99893 xmax: 180 ymax: 83.59961
## Geodetic CRS: WGS 84
# country boundaries
ne_countries <- ne_download(scale = 50, category = "cultural",
type = "admin_0_countries_lakes",
returnclass = "sf") |>
select(country = ADMIN, country_code = ISO_A2)
## Reading layer `ne_50m_admin_0_countries_lakes' from data source
## `C:\Users\pc.laboratorio.dz\AppData\Local\Temp\RtmpQbCqon\ne_50m_admin_0_countries_lakes.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 242 features and 168 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -180 ymin: -89.99893 xmax: 180 ymax: 83.59961
## Geodetic CRS: WGS 84
# state boundaries for united states
ne_states <- ne_download(scale = 50, category = "cultural",
type = "admin_1_states_provinces",
returnclass = "sf") |>
filter(iso_a2 == "US") |>
select(state = name, state_code = iso_3166_2)
## Reading layer `ne_50m_admin_1_states_provinces' from data source
## `C:\Users\pc.laboratorio.dz\AppData\Local\Temp\RtmpQbCqon\ne_50m_admin_1_states_provinces.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 294 features and 121 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -180 ymin: -46.96289 xmax: 180 ymax: 83.11611
## Geodetic CRS: WGS 84
# country lines
# downloaded globally then filtered to north america with st_intersect()
ne_country_lines <- ne_download(scale = 50, category = "cultural",
type = "admin_0_boundary_lines_land",
returnclass = "sf") |>
st_geometry()
## Reading layer `ne_50m_admin_0_boundary_lines_land' from data source
## `C:\Users\pc.laboratorio.dz\AppData\Local\Temp\RtmpQbCqon\ne_50m_admin_0_boundary_lines_land.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 390 features and 39 fields
## Geometry type: MULTILINESTRING
## Dimension: XY
## Bounding box: xmin: -141.0021 ymin: -55.114 xmax: 145.9402 ymax: 70.06482
## Geodetic CRS: WGS 84
lines_on_land <- st_intersects(ne_country_lines, ne_land, sparse = FALSE) |>
as.logical()
ne_country_lines <- ne_country_lines[lines_on_land]
# states, north america
ne_state_lines <- ne_download(scale = 50, category = "cultural",
type = "admin_1_states_provinces_lines",
returnclass = "sf") |>
filter(ADM0_A3 %in% c("USA", "CAN")) |>
mutate(iso_a2 = recode(ADM0_A3, USA = "US", CAN = "CAN")) |>
select(country = ADM0_NAME, country_code = iso_a2)
## Reading layer `ne_50m_admin_1_states_provinces_lines' from data source
## `C:\Users\pc.laboratorio.dz\AppData\Local\Temp\RtmpQbCqon\ne_50m_admin_1_states_provinces_lines.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 581 features and 43 fields
## Geometry type: MULTILINESTRING
## Dimension: XY
## Bounding box: xmin: -139.0565 ymin: -38.0716 xmax: 174.4685 ymax: 78.68672
## Geodetic CRS: WGS 84
# save all layers to a geopackage
unlink(gpkg_file)
write_sf(ne_land, gpkg_file, "ne_land")
write_sf(ne_countries, gpkg_file, "ne_countries")
write_sf(ne_states, gpkg_file, "ne_states")
write_sf(ne_country_lines, gpkg_file, "ne_country_lines")
write_sf(ne_state_lines, gpkg_file, "ne_state_lines")
library(auk)
## auk 0.7.0 is designed for EBD files downloaded after 2023-10-25.
## No EBD data directory set, see ?auk_set_ebd_path to set EBD_PATH
## eBird taxonomy version: 2023
library(dplyr)
library(ggplot2)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(readr)
library(sf)
f_sed <- "L:/Usuarios/pc.laboratorio.dz/Desktop/project_mp/ebd_CO_smp_relJan-2024_sampling.txt"
checklists <- read_sampling(f_sed)
glimpse(checklists)
## Rows: 608,818
## Columns: 31
## $ checklist_id <chr> "S60360053", "S61357410", "S51579402", "S599…
## $ last_edited_date <chr> "2023-11-25 21:52:57.837266", "2023-11-25 21…
## $ country <chr> "Colombia", "Colombia", "Colombia", "Colombi…
## $ country_code <chr> "CO", "CO", "CO", "CO", "CO", "CO", "CO", "C…
## $ state <chr> "Antioquia", "Antioquia", "Antioquia", "Anti…
## $ state_code <chr> "CO-ANT", "CO-ANT", "CO-ANT", "CO-ANT", "CO-…
## $ county <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ county_code <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ iba_code <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ bcr_code <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ usfws_code <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ atlas_block <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ locality <chr> "Urbanización Atalaya de San Jorge, Envigado…
## $ locality_id <chr> "L6664911", "L6664911", "L6664911", "L666491…
## $ locality_type <chr> "P", "P", "P", "P", "P", "P", "P", "P", "P",…
## $ latitude <dbl> 6.161704, 6.161704, 6.161704, 6.161704, 6.16…
## $ longitude <dbl> -75.57647, -75.57647, -75.57647, -75.57647, …
## $ observation_date <date> 2019-10-05, 2019-11-11, 2019-01-12, 2019-09…
## $ time_observations_started <chr> "07:59:00", "07:29:00", "07:05:00", "07:42:0…
## $ observer_id <chr> "obs968709", "obs968709", "obs968709", "obs9…
## $ sampling_event_identifier <chr> "S60360053", "S61357410", "S51579402", "S599…
## $ protocol_type <chr> "Traveling", "Traveling", "Stationary", "Tra…
## $ protocol_code <chr> "P22", "P22", "P21", "P22", "P22", "P22", "P…
## $ project_code <chr> "EBIRD_COL", "EBIRD_COL", "EBIRD_COL", "EBIR…
## $ duration_minutes <int> 32, 39, 24, 45, 29, 27, 26, 36, 39, 19, 35, …
## $ effort_distance_km <dbl> 0.40, 0.54, NA, 0.50, 0.31, 0.44, NA, 0.33, …
## $ effort_area_ha <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ number_observers <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1,…
## $ all_species_reported <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TR…
## $ group_identifier <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ trip_comments <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
# Obtener el número de columnas
num_columnas <- ncol(checklists)
print(num_columnas)
## [1] 31
# Obtener el número de filas
num_filas <- nrow(checklists)
print(num_filas)
## [1] 608818
# Filtrar las listas de verificación con información de distancia
checklists_con_distancia <- checklists %>%
filter(!is.na(effort_distance_km))
# Crear un histograma
histograma_distancias <- ggplot(checklists_con_distancia, aes(x = effort_distance_km)) +
geom_histogram(binwidth = 10, fill = "blue", color = "black", alpha = 0.7) +
labs(title = "Distribución de Distancias Recorridas",
x = "Distancia Recorrida (km)",
y = "Frecuencia") +
theme_minimal()
# Mostrar el histograma
print(histograma_distancias)

f_ebd <- "L:/Usuarios/pc.laboratorio.dz/Desktop/project_mp/ebd_CO_smp_relJan-2024.txt"
observations <- read_ebd(f_ebd)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
glimpse(observations)
## Rows: 5,085,161
## Columns: 48
## $ checklist_id <chr> "G10011281", "G10011281", "G10011281", "G100…
## $ global_unique_identifier <chr> "URN:CornellLabOfOrnithology:EBIRD:OBS168761…
## $ last_edited_date <chr> "2023-04-16 13:52:38.819029", "2023-04-16 13…
## $ taxonomic_order <dbl> 8293, 4329, 4334, 34709, 16581, 16603, 16615…
## $ category <chr> "species", "species", "species", "species", …
## $ taxon_concept_id <chr> "avibase-79F3C681", "avibase-51F7C361", "avi…
## $ common_name <chr> "Broad-winged Hawk", "Sparkling Violetear", …
## $ scientific_name <chr> "Buteo platypterus", "Colibri coruscans", "C…
## $ exotic_code <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ observation_count <chr> "2", "3", "1", "1", "1", "1", "1", "1", "1",…
## $ breeding_code <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ breeding_category <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ behavior_code <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ age_sex <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ country <chr> "Colombia", "Colombia", "Colombia", "Colombi…
## $ country_code <chr> "CO", "CO", "CO", "CO", "CO", "CO", "CO", "C…
## $ state <chr> "Distrito Capital de Bogotá", "Distrito Capi…
## $ state_code <chr> "CO-DC", "CO-DC", "CO-DC", "CO-DC", "CO-DC",…
## $ county <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ county_code <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ iba_code <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ bcr_code <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ usfws_code <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ atlas_block <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ locality <chr> "Jardin Botànico Josè Celestino Mutis", "Jar…
## $ locality_id <chr> "L8005942", "L8005942", "L8005942", "L800594…
## $ locality_type <chr> "H", "H", "H", "H", "H", "H", "H", "H", "H",…
## $ latitude <dbl> 4.667211, 4.667211, 4.667211, 4.667211, 4.66…
## $ longitude <dbl> -74.09980, -74.09980, -74.09980, -74.09980, …
## $ observation_date <date> 2018-12-04, 2018-12-04, 2018-12-04, 2018-12…
## $ time_observations_started <chr> "07:55:00", "07:55:00", "07:55:00", "07:55:0…
## $ observer_id <chr> "obsr738968,obsr738968,obsr675713,obsr675713…
## $ sampling_event_identifier <chr> "S133990738,S133990738,S50417460,S50417460",…
## $ protocol_type <chr> "Traveling", "Traveling", "Traveling", "Trav…
## $ protocol_code <chr> "P22", "P22", "P22", "P22", "P22", "P22", "P…
## $ project_code <chr> "EBIRD_COL", "EBIRD_COL", "EBIRD_COL", "EBIR…
## $ duration_minutes <int> 145, 145, 145, 145, 145, 145, 145, 145, 145,…
## $ effort_distance_km <dbl> 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5,…
## $ effort_area_ha <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ number_observers <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,…
## $ all_species_reported <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TR…
## $ group_identifier <chr> "G10011281", "G10011281", "G10011281", "G100…
## $ has_media <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FA…
## $ approved <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TR…
## $ reviewed <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FA…
## $ reason <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ trip_comments <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ species_comments <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
# Obtener el número de columnas
num_columnas_obs <- ncol(observations)
print(num_columnas_obs)
## [1] 48
# Obtener el número de filas
num_filas_obs <- nrow(observations)
print(num_filas_obs)
## [1] 5085161
checklists_shared <- read_sampling(f_sed, unique = FALSE)
# identify shared checklists
checklists_shared |>
filter(!is.na(group_identifier)) |>
arrange(group_identifier) |>
select(sampling_event_identifier, group_identifier)
## # A tibble: 490,700 × 2
## sampling_event_identifier group_identifier
## <chr> <chr>
## 1 S145629701 G10000154
## 2 S134619855 G10000154
## 3 S134506890 G10000154
## 4 S134601555 G10000154
## 5 S134511374 G10000154
## 6 S133733369 G10000154
## 7 S134506894 G10000154
## 8 S134517060 G10000154
## 9 S134523767 G10000154
## 10 S155342137 G10000154
## # ℹ 490,690 more rows
checklists_unique <- auk_unique(checklists_shared, checklists_only = TRUE)
nrow(checklists_shared)
## [1] 925571
nrow(checklists_unique)
## [1] 608818
head(checklists_unique$checklist_id)
## [1] "S60360053" "S61357410" "S51579402" "S59971734" "S52070627" "S51579119"
tail(checklists_unique$checklist_id)
## [1] "G7635775" "G7636828" "G7637078" "G7637309" "G7637526" "G7637523"
# importar uno de los conjuntos de datos de ejemplo de auk sin acumular taxonomía
obs_ex <- system.file("extdata/ebd-rollup-ex.txt", package = "auk") |>
read_ebd(rollup = FALSE)
# rollup taxonomy
obs_ex_rollup <- auk_rollup(obs_ex)
# identificar las categorías taxonómicas presentes en cada conjunto de datos
unique(obs_ex$category)
## [1] "domestic" "form" "hybrid" "intergrade" "slash"
## [6] "spuh" "species" "issf"
unique(obs_ex_rollup$category)
## [1] "species"
#sin resumen, hay cuatro observaciones
obs_ex |>
filter(common_name == "Yellow-rumped Warbler") |>
select(checklist_id, category, common_name, subspecies_common_name,
observation_count)
## # A tibble: 4 × 5
## checklist_id category common_name subspecies_common_name observation_count
## <chr> <chr> <chr> <chr> <chr>
## 1 S44943108 intergrade Yellow-rumpe… Yellow-rumped Warbler… 1
## 2 S129851825 species Yellow-rumpe… <NA> 1
## 3 S129851825 issf Yellow-rumpe… Yellow-rumped Warbler… 1
## 4 S129851825 issf Yellow-rumpe… Yellow-rumped Warbler… 2
#con resumen, se han combinado
obs_ex_rollup |>
filter(common_name == "Yellow-rumped Warbler") |>
select(checklist_id, category, common_name, observation_count)
## # A tibble: 2 × 4
## checklist_id category common_name observation_count
## <chr> <chr> <chr> <chr>
## 1 S129851825 species Yellow-rumped Warbler 4
## 2 S44943108 species Yellow-rumped Warbler 1