Data retrieval

Tropicos

Specimen records were downloaded from TROPICOS by querying each country in the location field. For each accepted name, associated synonyms were retrieved using the tp_synonyms() function from the taxize R package.

Code

Load libraries:

library(readxl)
library(dplyr)
library(tidyr)
library(purrr)
library(taxize)

Load source data and retrieve synonyms (French Guiana):

## ----------------------------
## French Guiana
## ----------------------------

FRG <- read.csv("../Data/TROPICOS/Specimen Search Results 20260127143117_French_Guiana.csv", 
                sep = ",", fileEncoding = "Latin1")

# Get unique identifiers for each name
ids <- FRG$Scientific.Name.Id
pp <- !duplicated(FRG$Scientific.Name.Id)
tropicos_id <- FRG[pp, c("Scientific.Name.Id")]

#Create empty list
#info_names_acc <- list()
#ll_acc <- list()

# Loop commented out to avoid running
# for (i in 1:length(tropicos_id)){
#   info_names_acc[[i]] <- tp_synonyms(tropicos_id[[i]], 
#                                     key = "c8e5f3b3-fc7e-4331-9b60-5af590cd0356")
# }
#saveRDS(info_names_acc, file="FRG_acc_names.RData")

Load source data and retrieve synonyms (Guyana):

## ----------------------------
## Guyana
## ----------------------------

GUY <- read.csv("../Data/TROPICOS/Specimen Search Results 20260127143117_Guyana.csv", 
                sep = ",", fileEncoding = "Latin1")

# Get unique identifiers for each name
GUY_pp <- !duplicated(GUY$Scientific.Name.Id)
tropicos_id_GUY <- GUY[GUY_pp, c("Scientific.Name.Id")]

#info_names_acc_GUY <- list()
#ll_acc_GUY <- list()

# Loop commented out
# for (i in 1:length(tropicos_id_GUY)){
#   info_names_acc_GUY[[i]] <- tp_synonyms(tropicos_id_GUY[[i]], 
#                                         key = "c8e5f3b3-fc7e-4331-9b60-5af590cd0356")
# }
#saveRDS(info_names_acc_GUY, file="GUY_acc_names.RData")

Load source data and retrieve synonyms (Suriname):

## ----------------------------
## Suriname
## ----------------------------
SUR <- read.csv("../Data/TROPICOS/Specimen Search Results 20260127143117_Suriname.csv", 
                sep = ",", fileEncoding = "Latin1")

SUR_pp <- !duplicated(SUR$Scientific.Name.Id)
tropicos_id_SUR <- SUR[SUR_pp, c("Scientific.Name.Id")]

#info_names_acc_SUR <- list()
#ll_acc_SUR <- list()

# Loop commented out
# for (i in 1:length(tropicos_id_SUR)){
#   info_names_acc_SUR[[i]] <- tp_synonyms(tropicos_id_SUR[[i]], 
#                                         key = "c8e5f3b3-fc7e-4331-9b60-5af590cd0356")
# }
#saveRDS(info_names_acc_SUR, file="SUR_acc_names.RData")

Read saved files:

FRG_tropicos <- readRDS("../Data/TROPICOS/FRG_acc_names.RData")
GUY_tropicos <- readRDS("../Data/TROPICOS/GUY_acc_names.RData")
SUR_tropicos <- readRDS("../Data/TROPICOS/SUR_acc_names.RData")

Now, we need to unnest each file:

## FRENCH GUIANA ##

FRG_acc_syn <- tibble(FRG = FRG_tropicos) %>%
  mutate(acc_id = row_number()) %>%      # preserve association
    # widen top-level list
  unnest_wider(FRG) %>%                  # creates: accepted, synonyms
    # --- FIX TYPE INCONSISTENCIES ---
  mutate(
    accepted = map(
      accepted,
      ~ mutate(.x, nameid = as.character(nameid))
    ),
    synonyms = map(
      synonyms,
      ~ mutate(.x, nameid = as.character(nameid))
    )
  ) %>%
    # unnest accepted
  unnest_longer(accepted) %>%
  unnest_wider(accepted, names_sep = "_acc") %>%
    # unnest synonyms
  unnest_longer(synonyms) %>%
  unnest_wider(synonyms, names_sep = "_syn")

## GUYANA ##

GUY_acc_syn <- tibble(GUY = GUY_tropicos) %>%
  mutate(acc_id = row_number()) %>%      # preserve association
  # widen top-level list
  unnest_wider(GUY) %>%                  # creates: accepted, synonyms
  # --- FIX TYPE INCONSISTENCIES ---
  mutate(
    accepted = map(
      accepted,
      ~ mutate(.x, nameid = as.character(nameid))
    ),
    synonyms = map(
      synonyms,
      ~ mutate(.x, nameid = as.character(nameid))
    )
  ) %>%
  # unnest accepted
  unnest_longer(accepted) %>%
  unnest_wider(accepted, names_sep = "_acc") %>%
  # unnest synonyms
  unnest_longer(synonyms) %>%
  unnest_wider(synonyms, names_sep = "_syn")


## SURINAME ##

SUR_acc_syn <- tibble(SUR = SUR_tropicos) %>%
  mutate(acc_id = row_number()) %>%      # preserve association
  # widen top-level list
  unnest_wider(SUR) %>%                  # creates: accepted, synonyms
  # --- FIX TYPE INCONSISTENCIES ---
  mutate(
    accepted = map(
      accepted,
      ~ mutate(.x, nameid = as.character(nameid))
    ),
    synonyms = map(
      synonyms,
      ~ mutate(.x, nameid = as.character(nameid))
    )
  ) %>%
  # unnest accepted
  unnest_longer(accepted) %>%
  unnest_wider(accepted, names_sep = "_acc") %>%
  # unnest synonyms
  unnest_longer(synonyms) %>%
  unnest_wider(synonyms, names_sep = "_syn")

The next step is to add 1:lenght(id_tropicos) to join the unnested data with the data with information about synonymsn. This is necessary because the list retrieved from tropicos does not preserve the scientific name id of names without synonyms.

FRG_id <- data.frame(
  acc_id = seq_along(tropicos_id),
  tp_id = tropicos_id)

GUY_id <- data.frame(
  acc_id = seq_along(tropicos_id_GUY),
  tp_id = tropicos_id_GUY)

SUR_id <- data.frame(
  acc_id = seq_along(tropicos_id_SUR),
  tp_id = tropicos_id_SUR)

# Add IDs for names without synonyms
FRG_acc_syn2<-FRG_id %>% 
              left_join(FRG_acc_syn, by = "acc_id")

GUY_acc_syn2<-GUY_id %>% 
  left_join(GUY_acc_syn, by = "acc_id")

SUR_acc_syn2<-SUR_id %>% 
  left_join(SUR_acc_syn, by = "acc_id")