Introduction

This pipeline computes species richness from iNaturalist observations at Las Opunitias across elevational bands derived from a local DEM. We also compute rarefied richness to compare bands at equal sampling effort and create several plots for comparison.

Download the data required here: https://drive.google.com/drive/folders/1dDc1h0E69Iqb0tS9ZVS_cbp5hATpSY9Q?usp=sharing

Save the two files to a folder on your local disk and then navigate to the folder in R.


1. Load packages

# ---- packages ----
# install.packages(c("tidyverse","sf","terra","vegan","purrr","dplyr","patchwork"))  # first time only
# Load required packages
library(tidyverse)
library(sf)
library(terra)
library(ggplot2)
library(patchwork)

2. Load / clean data and set parameters.

This section defines the key input files and analysis parameters used throughout the workflow.

infile is the iNaturalist observations CSV that contains coordinates, species names, and metadata.

dem_path is the digital elevation model (DEM) used to extract elevation values for each observation.

band_width_m sets the width (in meters) of each elevational band used to group records.

min_points_per_band specifies the minimum number of species observations required for a band to be included in the analysis.

These parameters can be adjusted as needed depending on the resolution of your DEM and the density of iNaturalist records.

#load data and set parameters
infile   <- "observations_below_250m.csv"
dem_path <- "DEM_no_sea3.tif"
band_width_m <- 50
min_points_per_band <- 1
raw <- readr::read_csv(infile, guess_max = 200000, show_col_types = FALSE)

Take the raw iNaturalist data, convert lat/long to numeric, treat empty species names as missing, remove any records missing coordinates or species, and save the result as obs.

#Clean up data
obs <- raw %>%
  mutate(
    latitude  = as.numeric(latitude),
    longitude = as.numeric(longitude),
    scientific_name = na_if(scientific_name, "")
  ) %>%
  filter(!is.na(latitude), !is.na(longitude), !is.na(scientific_name))

Load in the DEM

# ---- load DEM and extract elevation ----
dem <- terra::rast(dem_path)

if (is.na(terra::crs(dem))) {
  stop("DEM has no CRS defined. Please set the correct CRS on DEM_no_sea3.tif before proceeding.")
}

# Make SpatVector of points in WGS84
pts <- terra::vect(
  obs[, c("longitude","latitude")],
  geom = c("longitude","latitude"),
  crs = "EPSG:4326"
)

# Reproject points to DEM CRS if needed
if (!terra::same.crs(pts, dem)) {
  pts_dem_crs <- terra::project(pts, terra::crs(dem))
} else {
  pts_dem_crs <- pts
}

3. Map the data

R can work as GIS, so let’s plot these iNaturalist data on a map of the island to check they look OK.

# ---- map: DEM + raw iNat observation points (no elevation data yet) ----

# Make sure DEM has a CRS
if (is.na(terra::crs(dem))) {
  stop("DEM has no CRS defined. Please set a valid CRS before mapping.")
}

# Build hillshade for better terrain visualization
slope  <- terra::terrain(dem, v = "slope",  unit = "radians")
aspect <- terra::terrain(dem, v = "aspect", unit = "radians")
hill   <- terra::shade(slope, aspect, angle = 45, direction = 315)

# Define DEM color palette
dem_pal <- hcl.colors(50, "Terrain")

# Plot DEM with hillshade background
par(mar = c(3, 3, 2, 5) + 0.1)
plot(hill, col = gray.colors(100, start = 0, end = 1), legend = FALSE,
     axes = FALSE, box = FALSE, main = "DEM with iNaturalist observation points")
plot(dem, col = dem_pal, alpha = 0.55, legend = TRUE, add = TRUE)

# Overlay observation points (simple black or blue dots)
points(pts_dem_crs, pch = 21, bg = "steelblue", col = "black", cex = 0.6)

# Optional legend
legend("right", inset = 0.02, legend = "iNaturalist records",
       pt.bg = "steelblue", pch = 21, pt.cex = 1.2,
       bg = "white", bty = "o", cex = 0.9)

4. Get elevations for observations and assign to bands.

For each iNaturalist observation, find the elevation by intersecting its coordinates with the DEM.

# ----  extract elevation ----

# Extract elevations; returns a data.frame with "ID" and the DEM values
ext_df <- terra::extract(dem, pts_dem_crs)

# Find the value column (everything except "ID")
val_col <- setdiff(names(ext_df), "ID")
if (length(val_col) != 1) stop("Unexpected extract() output; couldn't identify elevation column.")

obs$elevation_m <- ext_df[[val_col]]

# Drop NAs (outside DEM extent or nodata)
obs <- filter(obs, !is.na(elevation_m))

Now assign observations to elevational bands.

# build elevational bands ----
rng <- range(obs$elevation_m, na.rm = TRUE)
breaks <- seq(
  floor(rng[1] / band_width_m) * band_width_m,
  ceiling(rng[2] / band_width_m) * band_width_m,
  by = band_width_m
)

obs <- obs %>%
  mutate(elev_band = cut(
    elevation_m,
    breaks = breaks,
    right = FALSE,
    include.lowest = TRUE,
    ordered_result = TRUE
  ))

5. Calculate species richness in bands.

Now we all our iNat observations assigned to elevational bands, we can start to do more interesting things. Let’s start by calculating species richness in each band.

# ---- species richness per band ----
rich <- obs %>%
  group_by(elev_band) %>%
  summarise(
    richness = n_distinct(scientific_name),
    n_records = n(),
    .groups = "drop"
  ) %>%
  filter(richness >= min_points_per_band)

# Preserve band order for plotting
rich$elev_band <- factor(rich$elev_band, levels = levels(obs$elev_band))

Plot the resulting data

p <- ggplot(rich, aes(x = elev_band, y = richness)) +
  geom_col(fill = "steelblue", alpha = 0.8) +
  labs(
    x = sprintf("Elevation band (%d m)", band_width_m),
    y = "Species richness (unique scientific_name)",
    title = "iNaturalist species richness by elevational band"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    axis.title.y = element_text(color = "steelblue"),
    plot.title = element_text(face = "bold")
  )

print(p)

6. Including sampling effort

This next code block plots the number of observations in each band next to the species richness

# Build long table for paired bars (create rich_long first)
rich_long <- rich %>%
  dplyr::select(elev_band, richness, n_records) %>%
  tidyr::pivot_longer(c(richness, n_records),
                      names_to = "metric", values_to = "value") %>%
  dplyr::mutate(metric = factor(metric, levels = c("richness", "n_records")))

# Plot richness (blue) vs abundance (red)
ggplot(rich_long, aes(x = elev_band, y = value, fill = metric)) +
  geom_col(position = position_dodge(preserve = "single")) +
  scale_fill_manual(
    name = "",
    values = c(richness = "steelblue", n_records = "darkred"),
    labels = c(richness = "Richness", n_records = "Observations")
  ) +
  labs(
    x = sprintf("Elevation band (%d m)", band_width_m),
    y = "Count",
    title = "Richness (blue) and abundance (red) by elevation"
  ) +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

7. Rarefaction

The term rarefaction comes from the word rarefy, meaning “to make less dense” or “to thin out.” In the context of ecology and biodiversity analysis, rarefaction literally means “making a sample less dense” by subsampling — drawing fewer individuals or samples from a dataset to standardize effort.

Here, we will subsampling each elevational band to the sample size of the band with the fewest observations, so that we can compare richness across the transect.

# Find smallest sample size across elevation bands
target_n <- min(rich$n_records, na.rm = TRUE)
cat("Rarefying all bands to", target_n, "records per band.\n")

# Compute rarefied richness per band
rarefied_vals <- map_dbl(
  split(obs, obs$elev_band),
  function(df) {
    counts <- as.numeric(table(df$scientific_name))
    if (sum(counts) >= target_n && length(counts) > 1) {
      vegan::rarefy(counts, sample = target_n)
    } else {
      NA_real_
    }
  }
)

# Add rarefied values to the richness table
rich <- rich %>%
  mutate(rarefied = rarefied_vals)

# Observed richness plot
p_obs <- ggplot(rich, aes(x = elev_band, y = richness)) +
  geom_col(fill = "steelblue", alpha = 0.8) +
  labs(
    x = sprintf("Elevation band (%d m)", band_width_m),
    y = "Species richness",
    title = "Observed richness by elevation"
  ) +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Rarefied richness plot
p_rare <- ggplot(rich, aes(x = elev_band, y = rarefied)) +
  geom_col(fill = "steelblue", alpha = 0.8, na.rm = TRUE) +
  labs(
    x = sprintf("Elevation band (%d m)", band_width_m),
    y = sprintf("Rarefied richness (n = %d records per band)", target_n),
    title = "Rarefied richness by elevation"
  ) +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Combine side by side

p_obs + p_rare