Database Coverage Analysis

Load Required Libraries

Load TRY Species Data

# Load TRY species dataframe
try_species_df <- as.data.frame(
  read_rds("/Users/andreadetoma/Library/CloudStorage/OneDrive-UniversitadegliStudiRomaTre/WiFln/elaboration/splot_explorative/try_species_df.RDS")
)

Load and Process Flamits Data

# Load Flamits dataset
flamits <- read_delim("~/Library/CloudStorage/OneDrive-UniversitadegliStudiRomaTre/WiFln/data/flamit/data_file.csv", 
                      delim = ";", escape_double = FALSE, trim_ws = TRUE)

## Rows: 19972 Columns: 33
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (28): taxon_name, original_name, var_name, flam_dimension, burning_devic...
## dbl  (5): ID, taxon_ID, var_value, biome, fire
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Rename column for consistency
flamits <- flamits %>% rename(Species = taxon_name)

# Count unique species
num_unique_species <- flamits %>%
  summarise(unique_species = n_distinct(Species)) %>%
  pull(unique_species)

cat("Number of unique species in Flamits dataset:", num_unique_species, "\n")

## Number of unique species in Flamits dataset: 1790

# Function to count matching species and calculate percentage
count_matching_species <- function(dataset) {
  matching_records <- dataset %>%
    semi_join(flamits, by = "Species")
  
  count_matching <- nrow(matching_records)
  total_species <- n_distinct(dataset$Species)
  
  # Calculate percentage of matching species
  percentage <- count_matching / total_species
  
  # Return results
  data.frame(Dataset = "try_species_df", Count = count_matching, Ratio = percentage)
}

# Apply function and print results
results_species_flamits <- count_matching_species(try_species_df)
print(results_species_flamits)

##          Dataset Count     Ratio
## 1 try_species_df   387 0.1127294

Load and Combine TRY Data

# Load TRY datasets
TRY_AV4_1 <- read_delim("~/Library/CloudStorage/OneDrive-UniversitadegliStudiRomaTre/WiFln/data/try/TRY_AV4_1.csv", 
                        delim = ";", escape_double = FALSE, trim_ws = TRUE)

## Rows: 58004 Columns: 21
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr  (1): Species
## dbl (20): .Bark thickness., .Species tolerance to drought., .Leaf dry mass p...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

TRY_AV4_2 <- read_delim("~/Library/CloudStorage/OneDrive-UniversitadegliStudiRomaTre/WiFln/data/try/TRY_AV4_2.csv", 
                        delim = ";", escape_double = FALSE, trim_ws = TRUE)

## Rows: 82690 Columns: 20
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr  (1): Species
## dbl (19): .Budbank height distribution., .Dispersal syndrome., .Leaf area pe...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Combine datasets
TRY_AV4 <- full_join(TRY_AV4_1, TRY_AV4_2, by = "Species") %>%
  replace(is.na(.), 0)

# Save combined dataset
write_csv(TRY_AV4, "~/Library/CloudStorage/OneDrive-UniversitadegliStudiRomaTre/WiFln/data/try/TRY_AV_4.csv")

Analyze TRY Traits

# Function to calculate proportions of TRUE values for traits
process_dataset <- function(species_df) {
  filtered_av_TRY <- TRY_AV4 %>%
    filter(Species %in% species_df$Species)
  
  # Calculate proportion of TRUE values
  proportions_TF <- filtered_av_TRY %>%
    mutate(across(-Species, ~ . > 0)) %>%
    summarize(across(-Species, ~ mean(.)))
  
  proportions_TF %>%
    mutate(dataset = "try_species_df") %>%
    select(dataset, everything())
}

# Apply function and print results
results_TRY <- process_dataset(try_species_df)

# Simplify column names
colnames(results_TRY) <- make.names(colnames(results_TRY), unique = TRUE)

# Remove duplicate columns
colnames(results_TRY) <- gsub("\\.x$|\\.y$", "", colnames(results_TRY))
results_TRY <- results_TRY[, !duplicated(colnames(results_TRY))]

# Save results to Excel
write.xlsx(results_TRY, "result_TRY.xlsx")

print(results_TRY)

## # A tibble: 1 × 37
##   dataset        .Bark.thickness. .Species.tolerance.to…¹ .Leaf.dry.mass.per.l…²
##   <chr>                     <dbl>                   <dbl>                  <dbl>
## 1 try_species_df           0.0602                   0.273                  0.619
## # ℹ abbreviated names: ¹.Species.tolerance.to.drought.,
## #   ².Leaf.dry.mass.per.leaf.fresh.mass..leaf.dry.matter.content..LDMC..
## # ℹ 33 more variables:
## #   .Leaf.area.per.leaf.dry.mass..specific.leaf.area..SLA.or.1.LMA...undefined.if.petiole.is.in..or.exclu. <dbl>,
## #   .Crown..canopy..structure. <dbl>,
## #   .Shoot.branching.type..shoot.branching.architecture. <dbl>,
## #   .Stem.specific.density..SSD..stem.dry.mass.per.stem.fresh.volume..or.wood.density. <dbl>, …

Database Coverage Analysis

Andrea De Toma

2024-11-26

Load Required Libraries

Load TRY Species Data

Load and Process Flamits Data

Load and Combine TRY Data

Analyze TRY Traits