library(readxl)
library(dplyr)
library(tidyr)
library(purrr)
library(taxize)Data retrieval
Tropicos
Specimen records were downloaded from TROPICOS by querying each country in the location field. For each accepted name, associated synonyms were retrieved using the tp_synonyms() function from the taxize R package.
Code
Load libraries:
Load source data and retrieve synonyms (French Guiana):
## ----------------------------
## French Guiana
## ----------------------------
FRG <- read.csv("../Data/TROPICOS/Specimen Search Results 20260127143117_French_Guiana.csv",
sep = ",", fileEncoding = "Latin1")
# Get unique identifiers for each name
ids <- FRG$Scientific.Name.Id
pp <- !duplicated(FRG$Scientific.Name.Id)
tropicos_id <- FRG[pp, c("Scientific.Name.Id")]
#Create empty list
#info_names_acc <- list()
#ll_acc <- list()
# Loop commented out to avoid running
# for (i in 1:length(tropicos_id)){
# info_names_acc[[i]] <- tp_synonyms(tropicos_id[[i]],
# key = "c8e5f3b3-fc7e-4331-9b60-5af590cd0356")
# }
#saveRDS(info_names_acc, file="FRG_acc_names.RData")Load source data and retrieve synonyms (Guyana):
## ----------------------------
## Guyana
## ----------------------------
GUY <- read.csv("../Data/TROPICOS/Specimen Search Results 20260127143117_Guyana.csv",
sep = ",", fileEncoding = "Latin1")
# Get unique identifiers for each name
GUY_pp <- !duplicated(GUY$Scientific.Name.Id)
tropicos_id_GUY <- GUY[GUY_pp, c("Scientific.Name.Id")]
#info_names_acc_GUY <- list()
#ll_acc_GUY <- list()
# Loop commented out
# for (i in 1:length(tropicos_id_GUY)){
# info_names_acc_GUY[[i]] <- tp_synonyms(tropicos_id_GUY[[i]],
# key = "c8e5f3b3-fc7e-4331-9b60-5af590cd0356")
# }
#saveRDS(info_names_acc_GUY, file="GUY_acc_names.RData")Load source data and retrieve synonyms (Suriname):
## ----------------------------
## Suriname
## ----------------------------
SUR <- read.csv("../Data/TROPICOS/Specimen Search Results 20260127143117_Suriname.csv",
sep = ",", fileEncoding = "Latin1")
SUR_pp <- !duplicated(SUR$Scientific.Name.Id)
tropicos_id_SUR <- SUR[SUR_pp, c("Scientific.Name.Id")]
#info_names_acc_SUR <- list()
#ll_acc_SUR <- list()
# Loop commented out
# for (i in 1:length(tropicos_id_SUR)){
# info_names_acc_SUR[[i]] <- tp_synonyms(tropicos_id_SUR[[i]],
# key = "c8e5f3b3-fc7e-4331-9b60-5af590cd0356")
# }
#saveRDS(info_names_acc_SUR, file="SUR_acc_names.RData")Read saved files:
FRG_tropicos <- readRDS("../Data/TROPICOS/FRG_acc_names.RData")
GUY_tropicos <- readRDS("../Data/TROPICOS/GUY_acc_names.RData")
SUR_tropicos <- readRDS("../Data/TROPICOS/SUR_acc_names.RData")Now, we need to unnest each file:
## FRENCH GUIANA ##
FRG_acc_syn <- tibble(FRG = FRG_tropicos) %>%
mutate(acc_id = row_number()) %>% # preserve association
# widen top-level list
unnest_wider(FRG) %>% # creates: accepted, synonyms
# --- FIX TYPE INCONSISTENCIES ---
mutate(
accepted = map(
accepted,
~ mutate(.x, nameid = as.character(nameid))
),
synonyms = map(
synonyms,
~ mutate(.x, nameid = as.character(nameid))
)
) %>%
# unnest accepted
unnest_longer(accepted) %>%
unnest_wider(accepted, names_sep = "_acc") %>%
# unnest synonyms
unnest_longer(synonyms) %>%
unnest_wider(synonyms, names_sep = "_syn")
## GUYANA ##
GUY_acc_syn <- tibble(GUY = GUY_tropicos) %>%
mutate(acc_id = row_number()) %>% # preserve association
# widen top-level list
unnest_wider(GUY) %>% # creates: accepted, synonyms
# --- FIX TYPE INCONSISTENCIES ---
mutate(
accepted = map(
accepted,
~ mutate(.x, nameid = as.character(nameid))
),
synonyms = map(
synonyms,
~ mutate(.x, nameid = as.character(nameid))
)
) %>%
# unnest accepted
unnest_longer(accepted) %>%
unnest_wider(accepted, names_sep = "_acc") %>%
# unnest synonyms
unnest_longer(synonyms) %>%
unnest_wider(synonyms, names_sep = "_syn")
## SURINAME ##
SUR_acc_syn <- tibble(SUR = SUR_tropicos) %>%
mutate(acc_id = row_number()) %>% # preserve association
# widen top-level list
unnest_wider(SUR) %>% # creates: accepted, synonyms
# --- FIX TYPE INCONSISTENCIES ---
mutate(
accepted = map(
accepted,
~ mutate(.x, nameid = as.character(nameid))
),
synonyms = map(
synonyms,
~ mutate(.x, nameid = as.character(nameid))
)
) %>%
# unnest accepted
unnest_longer(accepted) %>%
unnest_wider(accepted, names_sep = "_acc") %>%
# unnest synonyms
unnest_longer(synonyms) %>%
unnest_wider(synonyms, names_sep = "_syn")The next step is to add 1:lenght(id_tropicos) to join the unnested data with the data with information about synonymsn. This is necessary because the list retrieved from tropicos does not preserve the scientific name id of names without synonyms.
FRG_id <- data.frame(
acc_id = seq_along(tropicos_id),
tp_id = tropicos_id)
GUY_id <- data.frame(
acc_id = seq_along(tropicos_id_GUY),
tp_id = tropicos_id_GUY)
SUR_id <- data.frame(
acc_id = seq_along(tropicos_id_SUR),
tp_id = tropicos_id_SUR)
# Add IDs for names without synonyms
FRG_acc_syn2<-FRG_id %>%
left_join(FRG_acc_syn, by = "acc_id")
GUY_acc_syn2<-GUY_id %>%
left_join(GUY_acc_syn, by = "acc_id")
SUR_acc_syn2<-SUR_id %>%
left_join(SUR_acc_syn, by = "acc_id")