knitr::opts_chunk$set(echo = TRUE)
load packages
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
library(purrr)
library(readr)
Load and manipulate the sPlot dataset
load("~/Library/CloudStorage/OneDrive-UniversitadegliStudiRomaTre/POSTDOC/splot dataset/sPlotOpen.RData")
# Obtain unique levels for the PlotObservationID and Species columns
plot_levels <- unique(DT2.oa$PlotObservationID)
species_levels <- unique(DT2.oa$Species)
# Calculate the number of unique species in the dataset
unique_species_count <- DT2.oa %>%
summarise(unique_count = n_distinct(Species)) # 42529 species
# Identify species that are present in at least 5% of the rows
species_min_05per_rows <- as.data.frame(as.data.frame(names(which(table(DT2.oa$Species) >= 475))))
species_min_05per_rows <- species_min_05per_rows %>%
rename(Species = 1)
# Identify species that have a Relative_cover value greater than 5% at least once
species_abundance_5 <- as.data.frame(unique(DT2.oa$Species[DT2.oa$Relative_cover > 0.05]))
species_abundance_5 <- species_abundance_5 %>%
rename(Species = 1)
# Identify species that have a Relative_cover value greater than 10% at least once
species_abundance_10 <- as.data.frame(unique(DT2.oa$Species[DT2.oa$Relative_cover > 0.10]))
species_abundance_10 <- species_abundance_10 %>%
rename(Species = 1)
# Identify species that have a Relative_cover value greater than 20% at least once
species_abundance_20 <- as.data.frame(unique(DT2.oa$Species[DT2.oa$Relative_cover > 0.20]))
species_abundance_20 <- species_abundance_20 %>%
rename(Species = 1)
# Identify species that occur at least 475 times
species_min_05per_rows <- as.data.frame(names(which(table(DT2.oa$Species) >= 475)))
species_min_05per_rows <- species_min_05per_rows %>%
rename(Species = 1)
# Identify species that have a Relative_cover value greater than 40% at least once
species_abundance_40 <- as.data.frame(unique(DT2.oa$Species[DT2.oa$Relative_cover > 0.4]))
species_abundance_40 <- species_abundance_40 %>%
rename(Species = 1)
# Combine species sets using an OR condition while maintaining all columns
species_comb_filt <- full_join(species_min_05per_rows, species_abundance_40, by = "Species")
# Remove any duplicates
species_comb_filt <- unique(species_comb_filt)
# Create a dataframe with Species and a Category column describing the applied filters
species_comb_filt <- as.data.frame(species_comb_filt)
colnames(species_comb_filt) <- "Species"
# Check the classes of the species datasets
class(species_min_05per_rows)
## [1] "data.frame"
class(species_abundance_5)
## [1] "data.frame"
class(species_abundance_10)
## [1] "data.frame"
class(species_abundance_20)
## [1] "data.frame"
class(species_comb_filt)
## [1] "data.frame"
# Combine all species dataframes into one
all_species <- bind_rows(
species_min_05per_rows %>% mutate(Category = ">= 0.5% rows"),
species_abundance_5 %>% mutate(Category = "> 5% abundance"),
species_abundance_10 %>% mutate(Category = "> 10% abundance"),
species_abundance_20 %>% mutate(Category = "> 20% abundance"),
species_comb_filt %>% mutate(Category = "combined 5% rows OR > 40% abundance")
)
# Export the combined dataframe as a CSV file
write.csv(all_species, "species_criteria.csv", row.names = FALSE)
Load Flamits data
# Load the Flamits dataset from a specified file path
flamits <- read_delim("~/Library/CloudStorage/OneDrive-UniversitadegliStudiRomaTre/POSTDOC/flamit/data_file.csv", delim = ";", escape_double = FALSE, trim_ws = TRUE)
## Rows: 19972 Columns: 33
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (28): taxon_name, original_name, var_name, flam_dimension, burning_devic...
## dbl (5): ID, taxon_ID, var_value, biome, fire
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Rename the taxon_name column to Species for consistency
flamits <- flamits %>%
rename(Species = taxon_name)
# Count the number of unique species in the Flamits dataset
num_unique_species <- flamits %>%
summarise(unique_species = n_distinct(Species)) %>%
pull(unique_species)
# Print the number of unique species found
print(num_unique_species)
## [1] 1790
# Create a list of datasets and their corresponding names
datasets <- list(
species_min_05per_rows = species_min_05per_rows,
species_abundance_5 = species_abundance_5,
species_abundance_10 = species_abundance_10,
species_abundance_20 = species_abundance_20,
species_comb_filt = species_comb_filt
)
# Function to count matches between datasets and Flamits, and calculate the percentage
count_matching_species <- function(dataset, name) {
matching_records <- dataset %>%
semi_join(flamits, by = "Species") # Join based on Species column
count_matching <- nrow(matching_records) # Count of matching records
total_species <- n_distinct(dataset$Species) # Total number of species in the dataset
# Calculate the percentage of matching species
percentage <- ifelse(total_species > 0, count_matching / total_species, 0)
# Return a dataframe with the dataset name, count, and ratio
return(data.frame(Dataset = name, Count = count_matching, Ratio = percentage))
}
# Execute the matching function for each dataset and combine the results
results_species_flamits <- map2_dfr(datasets, names(datasets), count_matching_species)
# Print the matching results
print(results_species_flamits)
## Dataset Count Ratio
## 1 species_min_05per_rows 150 0.21994135
## 2 species_abundance_5 905 0.04350334
## 3 species_abundance_10 779 0.05389512
## 4 species_abundance_20 617 0.06696332
## 5 species_comb_filt 445 0.08541267
# Ensure the correct column name for genus extraction
flamits <- flamits %>%
mutate(Genus = word(Species, 1)) # Create a new column for the genus
# List of datasets and their corresponding names
datasets <- list(
species_min_05per_rows = species_min_05per_rows,
species_abundance_5 = species_abundance_5,
species_abundance_10 = species_abundance_10,
species_abundance_20 = species_abundance_20,
species_comb_filt = species_comb_filt
)
# Function to count matches by genus and calculate the percentage
count_matching_genus <- function(dataset, name) {
dataset <- dataset %>%
mutate(Genus = word(Species, 1)) # Add a column for genus extraction
# Find unique genera that match with Flamits
matching_records <- dataset %>%
semi_join(flamits, by = "Genus") %>%
distinct(Genus) # Remove duplicate genera
# Count matching genera and calculate percentage
count_matching <- nrow(matching_records) # Count of matching genera
total_genus <- n_distinct(dataset$Genus) # Total number of genera in the dataset
# Calculate the percentage of matching genera
percentage <- ifelse(total_genus > 0, count_matching / total_genus, 0)
# Return a dataframe with the dataset name, count, and ratio
return(data.frame(Dataset = name, Count = count_matching, Ratio = percentage))
}
# Execute the genus matching function for each dataset and combine the results
results_genus_flamits <- map2_dfr(datasets, names(datasets), count_matching_genus)
# Print the results
print(results_genus_flamits)
## Dataset Count Ratio
## 1 species_min_05per_rows 172 0.4467532
## 2 species_abundance_5 734 0.1588401
## 3 species_abundance_10 684 0.1865794
## 4 species_abundance_20 589 0.2183908
## 5 species_comb_filt 449 0.2542469
# Combine the results from species and genus matching
combined_results <- bind_rows(results_species_flamits, results_genus_flamits)
# Assign source labels to the combined results
combined_results_flamits <- bind_rows(
results_species_flamits %>% mutate(Source = "Flamits_Species"),
results_genus_flamits %>% mutate(Source = "Flamits_Genus")
)
Try data
# Load the TRY dataset
library(readxl)
av_TRY <- read_excel("~/Library/CloudStorage/OneDrive-UniversitadegliStudiRomaTre/POSTDOC/TRY_AV.xlsx")
# List of datasets and their corresponding names
datasets <- list(
species_min_05per_rows = species_min_05per_rows,
species_abundance_5 = species_abundance_5,
species_abundance_10 = species_abundance_10,
species_abundance_20 = species_abundance_20,
species_comb_filt = species_comb_filt
)
# Function to filter and calculate proportions of TRUE values for traits
process_dataset <- function(dataset_name, species_df) {
filtered_av_TRY <- av_TRY %>%
filter(Species %in% species_df$Species)
traits_TF <- filtered_av_TRY %>%
mutate(across(-Species, ~ . > 0)) # Apply condition "greater than 0" to all columns except 'Species'
proportions_TF <- traits_TF %>%
summarize(across(-Species, ~ mean(.))) # Calculate the mean (proportion of TRUE) for each column except 'Species'
proportions_TF <- proportions_TF %>%
mutate(dataset = dataset_name) # Add a column to identify the dataset
# Reorder columns to place 'dataset' first
proportions_TF %>%
select(dataset, everything())
}
# Apply the function to each dataset
results_10 <- process_dataset("species_abundance_10", species_abundance_10)
results_20 <- process_dataset("species_abundance_20", species_abundance_20)
results_5 <- process_dataset("species_abundance_5", species_abundance_5)
results_min_5 <- process_dataset("species_min_05per_rows", species_min_05per_rows)
results_comb <- process_dataset("species_comb_filt", species_comb_filt)
# Combine all results into a single dataframe
final_results_TRY <- bind_rows(results_10, results_20, results_5, results_min_5, results_comb)
# Print the final results
print(final_results_TRY)
## # A tibble: 5 × 20
## dataset .Leaf area per leaf …¹ .Leaf area per leaf …² .Leaf area per leaf …³
## <chr> <dbl> <dbl> <dbl>
## 1 species_… 0.282 0.349 0.523
## 2 species_… 0.308 0.376 0.550
## 3 species_… 0.262 0.318 0.494
## 4 species_… 0.748 0.831 0.917
## 5 species_… 0.355 0.428 0.592
## # ℹ abbreviated names:
## # ¹`.Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA): petiole excluded.`,
## # ²`.Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA): petiole included.`,
## # ³`.Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA): undefined if petiole is in- or exclu.`
## # ℹ 16 more variables: `.Plant height vegetative.` <dbl>,
## # `.Seed dry mass.` <dbl>, `.Plant growth rate.` <dbl>,
## # `.Plant life form (Raunkiaer life form).` <dbl>, …
# Load necessary libraries for string manipulation
library(dplyr)
library(stringr) # For string manipulation functions like stringr::word()
# Function to extract genus, calculate proportions of TRUE for traits, and add dataset information
process_dataset_by_genus <- function(dataset_name, species_df) {
filtered_av_TRY <- av_TRY %>%
filter(Species %in% species_df$Species) %>%
mutate(Genus = word(Species, 1)) # Extract genus (first word of Species)
# Create a T/F table for values > 0
traits_TF <- filtered_av_TRY %>%
mutate(across(-c(Species, Genus), ~ . > 0))
# Group by Genus and calculate the proportion of TRUE for each trait
proportions_TF <- traits_TF %>%
group_by(Genus) %>%
summarize(across(-c(Species), ~ mean(.))) # Proportion of TRUE for each trait
# Add a column to identify the dataset
proportions_TF <- proportions_TF %>%
mutate(dataset = dataset_name)
# Reorder columns to place 'dataset' and 'Genus' first
proportions_TF %>%
select(dataset, Genus, everything())
}
# Apply the function to each dataset
results_10 <- process_dataset_by_genus("species_abundance_10", species_abundance_10)
results_20 <- process_dataset_by_genus("species_abundance_20", species_abundance_20)
results_5 <- process_dataset_by_genus("species_abundance_5", species_abundance_5)
results_min_5 <- process_dataset_by_genus("species_min_05per_rows", species_min_05per_rows)
results_comb <- process_dataset_by_genus("species_comb_filt", species_comb_filt)
# Combine all results into a single dataframe
final_results_by_genus <- bind_rows(results_10, results_20, results_5, results_min_5, results_comb)
# Assign TRUE if the value is greater than 0 across the entire dataframe
final_results_by_genus <- final_results_by_genus %>%
mutate(across(-c(dataset, Genus), ~ . > 0))
# Calculate the percentage of TRUE for each trait based on the dataset
percentage_T <- final_results_by_genus %>%
group_by(dataset) %>%
summarize(across(-Genus, ~ mean(.))) # Calculate the percentage of TRUE
# Combine results from TRY datasets based on genus and species
combined_results_TRY <- bind_rows(
percentage_T %>% mutate(Source = "TRY_genus"),
final_results_TRY %>% mutate(Source = "TRY_species")
)
# Combine results from Flamits based on species and genus
combined_results_flamits <- bind_rows(
results_species_flamits %>% mutate(Source = "Flamits_Species"),
results_genus_flamits %>% mutate(Source = "Flamits_Genus")
)
# Export combined results to CSV files
write.csv(combined_results_TRY, "TRY_coverage.csv")
write.csv(combined_results_flamits, "flamits_coverage.csv")
# Export combined results to an Excel file
library(openxlsx)
write.xlsx(combined_results_TRY, "TRY_coverage.xlsx")