Load Required Libraries
Load TRY Species Data
# Load TRY species dataframe
try_species_df <- as.data.frame(
read_rds("/Users/andreadetoma/Library/CloudStorage/OneDrive-UniversitadegliStudiRomaTre/WiFln/elaboration/splot_explorative/try_species_df.RDS")
)
Load and Process Flamits Data
# Load Flamits dataset
flamits <- read_delim("~/Library/CloudStorage/OneDrive-UniversitadegliStudiRomaTre/WiFln/data/flamit/data_file.csv",
delim = ";", escape_double = FALSE, trim_ws = TRUE)
## Rows: 19972 Columns: 33
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (28): taxon_name, original_name, var_name, flam_dimension, burning_devic...
## dbl (5): ID, taxon_ID, var_value, biome, fire
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Rename column for consistency
flamits <- flamits %>% rename(Species = taxon_name)
# Count unique species
num_unique_species <- flamits %>%
summarise(unique_species = n_distinct(Species)) %>%
pull(unique_species)
cat("Number of unique species in Flamits dataset:", num_unique_species, "\n")
## Number of unique species in Flamits dataset: 1790
# Function to count matching species and calculate percentage
count_matching_species <- function(dataset) {
matching_records <- dataset %>%
semi_join(flamits, by = "Species")
count_matching <- nrow(matching_records)
total_species <- n_distinct(dataset$Species)
# Calculate percentage of matching species
percentage <- count_matching / total_species
# Return results
data.frame(Dataset = "try_species_df", Count = count_matching, Ratio = percentage)
}
# Apply function and print results
results_species_flamits <- count_matching_species(try_species_df)
print(results_species_flamits)
## Dataset Count Ratio
## 1 try_species_df 387 0.1127294
Load and Combine TRY Data
# Load TRY datasets
TRY_AV4_1 <- read_delim("~/Library/CloudStorage/OneDrive-UniversitadegliStudiRomaTre/WiFln/data/try/TRY_AV4_1.csv",
delim = ";", escape_double = FALSE, trim_ws = TRUE)
## Rows: 58004 Columns: 21
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (1): Species
## dbl (20): .Bark thickness., .Species tolerance to drought., .Leaf dry mass p...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
TRY_AV4_2 <- read_delim("~/Library/CloudStorage/OneDrive-UniversitadegliStudiRomaTre/WiFln/data/try/TRY_AV4_2.csv",
delim = ";", escape_double = FALSE, trim_ws = TRUE)
## Rows: 82690 Columns: 20
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (1): Species
## dbl (19): .Budbank height distribution., .Dispersal syndrome., .Leaf area pe...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Combine datasets
TRY_AV4 <- full_join(TRY_AV4_1, TRY_AV4_2, by = "Species") %>%
replace(is.na(.), 0)
# Save combined dataset
write_csv(TRY_AV4, "~/Library/CloudStorage/OneDrive-UniversitadegliStudiRomaTre/WiFln/data/try/TRY_AV_4.csv")
Analyze TRY Traits
# Function to calculate proportions of TRUE values for traits
process_dataset <- function(species_df) {
filtered_av_TRY <- TRY_AV4 %>%
filter(Species %in% species_df$Species)
# Calculate proportion of TRUE values
proportions_TF <- filtered_av_TRY %>%
mutate(across(-Species, ~ . > 0)) %>%
summarize(across(-Species, ~ mean(.)))
proportions_TF %>%
mutate(dataset = "try_species_df") %>%
select(dataset, everything())
}
# Apply function and print results
results_TRY <- process_dataset(try_species_df)
# Simplify column names
colnames(results_TRY) <- make.names(colnames(results_TRY), unique = TRUE)
# Remove duplicate columns
colnames(results_TRY) <- gsub("\\.x$|\\.y$", "", colnames(results_TRY))
results_TRY <- results_TRY[, !duplicated(colnames(results_TRY))]
# Save results to Excel
write.xlsx(results_TRY, "result_TRY.xlsx")
print(results_TRY)
## # A tibble: 1 × 37
## dataset .Bark.thickness. .Species.tolerance.to…¹ .Leaf.dry.mass.per.l…²
## <chr> <dbl> <dbl> <dbl>
## 1 try_species_df 0.0602 0.273 0.619
## # ℹ abbreviated names: ¹.Species.tolerance.to.drought.,
## # ².Leaf.dry.mass.per.leaf.fresh.mass..leaf.dry.matter.content..LDMC..
## # ℹ 33 more variables:
## # .Leaf.area.per.leaf.dry.mass..specific.leaf.area..SLA.or.1.LMA...undefined.if.petiole.is.in..or.exclu. <dbl>,
## # .Crown..canopy..structure. <dbl>,
## # .Shoot.branching.type..shoot.branching.architecture. <dbl>,
## # .Stem.specific.density..SSD..stem.dry.mass.per.stem.fresh.volume..or.wood.density. <dbl>, …