Introduction

This document presents a comprehensive comparative biodiversity analysis of fungal spore communities from two locations: Collores and Estancias. The analysis includes diversity indices, community composition, statistical comparisons, and multivariate approaches to understand differences and similarities between the two locations.

Load Required Libraries

# Install packages if needed
# install.packages(c("vegan", "tidyverse", "reshape2", "knitr", "kableExtra", "ggpubr"))

library(vegan)       # For biodiversity analysis
library(tidyverse)   # For data manipulation and visualization
library(reshape2)    # For data reshaping
library(knitr)       # For nice tables
library(kableExtra)  # For enhanced tables
library(ggpubr)      # For statistical comparisons and publication-ready plots

Data Import and Preparation

# Read the data
collores_data <- read.csv("collores_franja.csv", check.names = FALSE, row.names = 1)
estancias_data <- read.csv("estancias_franja.csv", check.names = FALSE, row.names = 1)

# Replace NA values with 0
collores_data[is.na(collores_data)] <- 0
estancias_data[is.na(estancias_data)] <- 0

# Convert to numeric matrices
collores_matrix <- as.matrix(collores_data)
estancias_matrix <- as.matrix(estancias_data)

# Display data dimensions
cat("COLLORES Dataset:\n")
## COLLORES Dataset:
cat("  Number of samples:", nrow(collores_matrix), "\n")
##   Number of samples: 12
cat("  Number of taxa:", ncol(collores_matrix), "\n")
##   Number of taxa: 41
cat("  Total spore count:", sum(collores_matrix), "\n\n")
##   Total spore count: 381
cat("ESTANCIAS Dataset:\n")
## ESTANCIAS Dataset:
cat("  Number of samples:", nrow(estancias_matrix), "\n")
##   Number of samples: 12
cat("  Number of taxa:", ncol(estancias_matrix), "\n")
##   Number of taxa: 45
cat("  Total spore count:", sum(estancias_matrix), "\n")
##   Total spore count: 283

Data Overview

overview_df <- data.frame(
  Location = c("Collores", "Estancias"),
  Samples = c(nrow(collores_matrix), nrow(estancias_matrix)),
  Taxa = c(ncol(collores_matrix), ncol(estancias_matrix)),
  Total_Spores = c(sum(collores_matrix), sum(estancias_matrix)),
  Mean_Spores_per_Sample = c(round(mean(rowSums(collores_matrix)), 2),
                              round(mean(rowSums(estancias_matrix)), 2))
)

kable(overview_df,
      caption = "Overview of Datasets by Location",
      col.names = c("Location", "# Samples", "# Taxa", "Total Spores", "Mean Spores/Sample")) %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)
Overview of Datasets by Location
Location # Samples # Taxa Total Spores Mean Spores/Sample
Collores 12 41 381 31.75
Estancias 12 45 283 23.58

Basic Statistics by Location

Summary Statistics

# Calculate basic statistics for each location
collores_richness <- rowSums(collores_matrix > 0)
collores_abundance <- rowSums(collores_matrix)

estancias_richness <- rowSums(estancias_matrix > 0)
estancias_abundance <- rowSums(estancias_matrix)

# Create summary table
stats_summary <- data.frame(
  Metric = c("Mean Richness", "Median Richness", "SD Richness", "Min Richness", "Max Richness",
             "Mean Abundance", "Median Abundance", "SD Abundance", "Min Abundance", "Max Abundance"),
  Collores = c(
    round(mean(collores_richness), 2),
    median(collores_richness),
    round(sd(collores_richness), 2),
    min(collores_richness),
    max(collores_richness),
    round(mean(collores_abundance), 2),
    median(collores_abundance),
    round(sd(collores_abundance), 2),
    min(collores_abundance),
    max(collores_abundance)
  ),
  Estancias = c(
    round(mean(estancias_richness), 2),
    median(estancias_richness),
    round(sd(estancias_richness), 2),
    min(estancias_richness),
    max(estancias_richness),
    round(mean(estancias_abundance), 2),
    median(estancias_abundance),
    round(sd(estancias_abundance), 2),
    min(estancias_abundance),
    max(estancias_abundance)
  )
)

kable(stats_summary,
      caption = "Summary Statistics by Location") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)
Summary Statistics by Location
Metric Collores Estancias
Mean Richness 7.75 8.08
Median Richness 8.00 9.00
SD Richness 3.41 3.29
Min Richness 2.00 3.00
Max Richness 14.00 12.00
Mean Abundance 31.75 23.58
Median Abundance 29.50 23.50
SD Abundance 17.20 15.68
Min Abundance 3.00 4.00
Max Abundance 61.00 57.00

Boxplots: Richness and Abundance

# Prepare data for boxplots
richness_df <- data.frame(
  Location = c(rep("Collores", length(collores_richness)),
               rep("Estancias", length(estancias_richness))),
  Richness = c(collores_richness, estancias_richness)
)

abundance_df <- data.frame(
  Location = c(rep("Collores", length(collores_abundance)),
               rep("Estancias", length(estancias_abundance))),
  Abundance = c(collores_abundance, estancias_abundance)
)

# Richness boxplot
p1 <- ggplot(richness_df, aes(x = Location, y = Richness, fill = Location)) +
  geom_boxplot(alpha = 0.7, outlier.shape = 16, outlier.size = 2) +
  geom_jitter(width = 0.2, alpha = 0.5, size = 2) +
  stat_compare_means(method = "wilcox.test", label = "p.format", label.x = 1.5, size = 6) +
  scale_fill_manual(values = c("Collores" = "#E69F00", "Estancias" = "#56B4E9")) +
  theme_minimal() +
  labs(title = "Species Richness by Location",
       y = "Number of Taxa per Sample",
       x = "") +
  theme(
    legend.position = "none",
    plot.title = element_text(size = 16),      # Title size
    axis.title.y = element_text(size = 14),    # Y-axis label size
    axis.text.x = element_text(size = 16),     # X-axis text (Collores, Estancias)
    axis.text.y = element_text(size = 12))

# Abundance boxplot
p2 <- ggplot(abundance_df, aes(x = Location, y = Abundance, fill = Location)) +
  geom_boxplot(alpha = 0.7, outlier.shape = 16, outlier.size = 2) +
  geom_jitter(width = 0.2, alpha = 0.5, size = 2) +
  stat_compare_means(method = "wilcox.test", label = "p.format", label.x = 1.5, size = 6) +
  scale_fill_manual(values = c("Collores" = "#E69F00", "Estancias" = "#56B4E9")) +
  theme_minimal() +
  labs(title = "Spore Abundance by Location",
       y = "Total Spores per Sample",
       x = "", size = 6) +
  theme(
    legend.position = "none",
    plot.title = element_text(size = 16),      # Title size
    axis.title.y = element_text(size = 14),    # Y-axis label size
    axis.text.x = element_text(size = 16),     # X-axis text (Collores, Estancias)
    axis.text.y = element_text(size = 12))

# Combine plots
ggarrange(p1, p2, ncol = 2)

# save graphs
ggsave("boxplots_richness_abundance.png", width = 12, height = 6)

Pooled Biodiversity Indices by Location

# Calculate pooled diversity for each location
collores_pooled <- colSums(collores_matrix)
collores_pooled <- collores_pooled[collores_pooled > 0]

estancias_pooled <- colSums(estancias_matrix)
estancias_pooled <- estancias_pooled[estancias_pooled > 0]

# Calculate indices
pooled_indices <- data.frame(
  Index = c("Gamma Diversity (Total Richness)",
            "Total Abundance",
            "Shannon Index (H')",
            "Simpson Index (D)",
            "Inverse Simpson (1/D)",
            "Pielou's Evenness (J')"),
  Collores = c(
    length(collores_pooled),
    sum(collores_pooled),
    round(diversity(collores_pooled, index = "shannon"), 3),
    round(diversity(collores_pooled, index = "simpson"), 3),
    round(diversity(collores_pooled, index = "invsimpson"), 3),
    round(diversity(collores_pooled, index = "shannon") / log(length(collores_pooled)), 3)
  ),
  Estancias = c(
    length(estancias_pooled),
    sum(estancias_pooled),
    round(diversity(estancias_pooled, index = "shannon"), 3),
    round(diversity(estancias_pooled, index = "simpson"), 3),
    round(diversity(estancias_pooled, index = "invsimpson"), 3),
    round(diversity(estancias_pooled, index = "shannon") / log(length(estancias_pooled)), 3)
  )
)

kable(pooled_indices,
      caption = "Pooled Biodiversity Indices by Location") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)
Pooled Biodiversity Indices by Location
Index Collores Estancias
Gamma Diversity (Total Richness) 41.000 45.000
Total Abundance 381.000 283.000
Shannon Index (H’) 3.112 2.827
Simpson Index (D) 0.936 0.897
Inverse Simpson (1/D) 15.627 9.730
Pielou’s Evenness (J’) 0.838 0.743

Visualization: Pooled Diversity Comparison

# Prepare data for plotting (exclude richness and abundance for scale)
pooled_plot_df <- data.frame(
  Location = rep(c("Collores", "Estancias"), each = 3),
  Index = rep(c("Shannon", "Simpson", "Evenness"), 2),
  Value = c(
    diversity(collores_pooled, index = "shannon"),
    diversity(collores_pooled, index = "simpson"),
    diversity(collores_pooled, index = "shannon") / log(length(collores_pooled)),
    diversity(estancias_pooled, index = "shannon"),
    diversity(estancias_pooled, index = "simpson"),
    diversity(estancias_pooled, index = "shannon") / log(length(estancias_pooled))
  )
)

ggplot(pooled_plot_df, aes(x = Index, y = Value, fill = Location)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.7) +
  scale_fill_manual(values = c("Collores" = "#E69F00", "Estancias" = "#56B4E9")) +
  theme_minimal() +
  labs(title = "Pooled Diversity Indices Comparison",
       x = "Diversity Index",
       y = "Value",
       fill = "Location") +
  theme(legend.position = "top")

Diversity Indices by Sample

Calculate Indices for Each Sample

# Collores
collores_shannon <- diversity(collores_matrix, index = "shannon")
collores_simpson <- diversity(collores_matrix, index = "simpson")
collores_invsimpson <- diversity(collores_matrix, index = "invsimpson")
collores_evenness <- collores_shannon / log(collores_richness)
collores_evenness[is.infinite(collores_evenness) | is.nan(collores_evenness)] <- NA

collores_div_df <- data.frame(
  Sample = rownames(collores_matrix),
  Location = "Collores",
  Richness = collores_richness,
  Abundance = collores_abundance,
  Shannon = round(collores_shannon, 3),
  Simpson = round(collores_simpson, 3),
  InvSimpson = round(collores_invsimpson, 3),
  Evenness = round(collores_evenness, 3)
)

# Estancias
estancias_shannon <- diversity(estancias_matrix, index = "shannon")
estancias_simpson <- diversity(estancias_matrix, index = "simpson")
estancias_invsimpson <- diversity(estancias_matrix, index = "invsimpson")
estancias_evenness <- estancias_shannon / log(estancias_richness)
estancias_evenness[is.infinite(estancias_evenness) | is.nan(estancias_evenness)] <- NA

estancias_div_df <- data.frame(
  Sample = rownames(estancias_matrix),
  Location = "Estancias",
  Richness = estancias_richness,
  Abundance = estancias_abundance,
  Shannon = round(estancias_shannon, 3),
  Simpson = round(estancias_simpson, 3),
  InvSimpson = round(estancias_invsimpson, 3),
  Evenness = round(estancias_evenness, 3)
)

# Combine
all_diversity <- rbind(collores_div_df, estancias_div_df)

kable(all_diversity,
      caption = "Diversity Indices by Sample and Location") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                full_width = FALSE) %>%
  scroll_box(height = "400px")
Diversity Indices by Sample and Location
Sample Location Richness Abundance Shannon Simpson InvSimpson Evenness
1 1 Collores 2 3 0.637 0.444 1.800 0.918
2 2 Collores 4 21 0.685 0.331 1.495 0.494
3 3 Collores 5 30 0.578 0.244 1.324 0.359
4 4 Collores 11 35 2.100 0.846 6.481 0.876
5 5 Collores 10 48 1.741 0.740 3.853 0.756
6 6 Collores 5 10 1.471 0.740 3.846 0.914
7 7 Collores 8 34 1.682 0.763 4.219 0.809
8 8 Collores 7 27 1.765 0.801 5.028 0.907
9 9 Collores 14 57 1.964 0.773 4.408 0.744
10 10 Collores 11 29 2.055 0.832 5.965 0.857
11 11 Collores 8 26 1.740 0.778 4.507 0.837
12 12 Collores 8 61 0.946 0.414 1.708 0.455
13 1 Estancias 4 4 1.386 0.750 4.000 1.000
21 2 Estancias 3 5 0.950 0.560 2.273 0.865
31 3 Estancias 3 6 1.011 0.611 2.571 0.921
41 4 Estancias 9 13 2.032 0.840 6.259 0.925
51 5 Estancias 6 23 1.394 0.669 3.023 0.778
61 6 Estancias 9 24 1.600 0.694 3.273 0.728
71 7 Estancias 11 39 1.994 0.828 5.828 0.832
81 8 Estancias 9 28 1.900 0.819 5.521 0.865
91 9 Estancias 9 19 1.941 0.820 5.554 0.883
101 10 Estancias 12 33 2.016 0.812 5.312 0.811
111 11 Estancias 10 32 1.845 0.764 4.231 0.801
121 12 Estancias 12 57 1.755 0.734 3.765 0.706

Statistical Summary by Location

# Calculate summary statistics for diversity indices
diversity_summary <- all_diversity %>%
  group_by(Location) %>%
  summarise(
    Mean_Shannon = round(mean(Shannon, na.rm = TRUE), 3),
    SD_Shannon = round(sd(Shannon, na.rm = TRUE), 3),
    Mean_Simpson = round(mean(Simpson, na.rm = TRUE), 3),
    SD_Simpson = round(sd(Simpson, na.rm = TRUE), 3),
    Mean_Evenness = round(mean(Evenness, na.rm = TRUE), 3),
    SD_Evenness = round(sd(Evenness, na.rm = TRUE), 3)
  )

kable(diversity_summary,
      caption = "Mean Diversity Indices by Location (± SD)") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)
Mean Diversity Indices by Location (± SD)
Location Mean_Shannon SD_Shannon Mean_Simpson SD_Simpson Mean_Evenness SD_Evenness
Collores 1.447 0.575 0.642 0.217 0.744 0.196
Estancias 1.652 0.385 0.742 0.092 0.843 0.085

Diversity Indices Comparison Plots

# Shannon diversity
p1 <- ggplot(all_diversity, aes(x = Location, y = Shannon, fill = Location)) +
  geom_boxplot(alpha = 0.7, outlier.shape = 16) +
  geom_jitter(width = 0.2, alpha = 0.5, size = 2) +
  stat_compare_means(method = "wilcox.test", label = "p.format") +
  scale_fill_manual(values = c("Collores" = "#E69F00", "Estancias" = "#56B4E9")) +
  theme_minimal() +
  labs(title = "Shannon Diversity Index",
       y = "Shannon H'") +
  theme(legend.position = "none")

# Simpson diversity
p2 <- ggplot(all_diversity, aes(x = Location, y = Simpson, fill = Location)) +
  geom_boxplot(alpha = 0.7, outlier.shape = 16) +
  geom_jitter(width = 0.2, alpha = 0.5, size = 2) +
  stat_compare_means(method = "wilcox.test", label = "p.format") +
  scale_fill_manual(values = c("Collores" = "#E69F00", "Estancias" = "#56B4E9")) +
  theme_minimal() +
  labs(title = "Simpson Diversity Index",
       y = "Simpson D") +
  theme(legend.position = "none")

# Evenness
p3 <- ggplot(all_diversity, aes(x = Location, y = Evenness, fill = Location)) +
  geom_boxplot(alpha = 0.7, outlier.shape = 16) +
  geom_jitter(width = 0.2, alpha = 0.5, size = 2) +
  stat_compare_means(method = "wilcox.test", label = "p.format") +
  scale_fill_manual(values = c("Collores" = "#E69F00", "Estancias" = "#56B4E9")) +
  theme_minimal() +
  labs(title = "Pielou's Evenness",
       y = "Evenness J'") +
  theme(legend.position = "none")

# Combine plots
ggarrange(p1, p2, p3, ncol = 1, nrow = 3)

Statistical Tests for Differences Between Locations

Wilcoxon Rank-Sum Tests

# Perform Wilcoxon tests for each diversity metric
richness_test <- wilcox.test(collores_richness, estancias_richness)
abundance_test <- wilcox.test(collores_abundance, estancias_abundance)
shannon_test <- wilcox.test(collores_shannon, estancias_shannon)
simpson_test <- wilcox.test(collores_simpson, estancias_simpson)
evenness_test <- wilcox.test(collores_evenness, estancias_evenness, na.rm = TRUE)

# Create results table
test_results <- data.frame(
  Metric = c("Richness", "Abundance", "Shannon", "Simpson", "Evenness"),
  W_statistic = c(richness_test$statistic, abundance_test$statistic,
                  shannon_test$statistic, simpson_test$statistic,
                  evenness_test$statistic),
  P_value = c(richness_test$p.value, abundance_test$p.value,
              shannon_test$p.value, simpson_test$p.value,
              evenness_test$p.value),
  Significance = c(
    ifelse(richness_test$p.value < 0.001, "***",
           ifelse(richness_test$p.value < 0.01, "**",
                  ifelse(richness_test$p.value < 0.05, "*", "ns"))),
    ifelse(abundance_test$p.value < 0.001, "***",
           ifelse(abundance_test$p.value < 0.01, "**",
                  ifelse(abundance_test$p.value < 0.05, "*", "ns"))),
    ifelse(shannon_test$p.value < 0.001, "***",
           ifelse(shannon_test$p.value < 0.01, "**",
                  ifelse(shannon_test$p.value < 0.05, "*", "ns"))),
    ifelse(simpson_test$p.value < 0.001, "***",
           ifelse(simpson_test$p.value < 0.01, "**",
                  ifelse(simpson_test$p.value < 0.05, "*", "ns"))),
    ifelse(evenness_test$p.value < 0.001, "***",
           ifelse(evenness_test$p.value < 0.01, "**",
                  ifelse(evenness_test$p.value < 0.05, "*", "ns")))
  )
)

kable(test_results,
      digits = 4,
      caption = "Wilcoxon Rank-Sum Tests: Collores vs. Estancias (*** p<0.001, ** p<0.01, * p<0.05, ns = not significant)") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)
Wilcoxon Rank-Sum Tests: Collores vs. Estancias (*** p
Metric W_statistic P_value Significance
Richness 64.0 0.6634 ns
Abundance 92.5 0.2481 ns
Shannon 58.0 0.4428 ns
Simpson 60.0 0.5137 ns
Evenness 55.0 0.3474 ns

PERMANOVA: Community Composition Differences

# Combine matrices for PERMANOVA
# Need to match taxa between datasets
all_taxa <- unique(c(colnames(collores_matrix), colnames(estancias_matrix)))

# Create standardized matrices with all taxa
collores_full <- matrix(0, nrow = nrow(collores_matrix), ncol = length(all_taxa))
colnames(collores_full) <- all_taxa
rownames(collores_full) <- rownames(collores_matrix)
collores_full[, colnames(collores_matrix)] <- collores_matrix

estancias_full <- matrix(0, nrow = nrow(estancias_matrix), ncol = length(all_taxa))
colnames(estancias_full) <- all_taxa
rownames(estancias_full) <- rownames(estancias_matrix)
estancias_full[, colnames(estancias_matrix)] <- estancias_matrix

# Combine
combined_matrix <- rbind(collores_full, estancias_full)
location_factor <- factor(c(rep("Collores", nrow(collores_full)),
                             rep("Estancias", nrow(estancias_full))))

# Perform PERMANOVA
set.seed(123)
permanova_result <- adonis2(combined_matrix ~ location_factor,
                            method = "bray",
                            permutations = 999)

kable(permanova_result,
      digits = 4,
      caption = "PERMANOVA: Testing for Differences in Community Composition Between Locations") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)
PERMANOVA: Testing for Differences in Community Composition Between Locations
Df SumOfSqs R2 F Pr(>F)
Model 1 0.9428 0.0996 2.4346 0.001
Residual 22 8.5195 0.9004 NA NA
Total 23 9.4623 1.0000 NA NA

ANOSIM: Analysis of Similarities

# Perform ANOSIM
set.seed(123)
anosim_result <- anosim(combined_matrix, location_factor,
                        distance = "bray",
                        permutations = 999)

cat("ANOSIM Results:\n")
## ANOSIM Results:
cat("R statistic:", round(anosim_result$statistic, 4), "\n")
## R statistic: 0.1967
cat("p-value:", round(anosim_result$signif, 4), "\n\n")
## p-value: 0.001
cat("Interpretation:\n")
## Interpretation:
cat("  R close to 1: Locations are well separated\n")
##   R close to 1: Locations are well separated
cat("  R close to 0: Locations are not separated\n")
##   R close to 0: Locations are not separated
cat("  R < 0: Within-location variation > between-location variation\n")
##   R < 0: Within-location variation > between-location variation

Species Composition Analysis

Most Abundant Taxa by Location

# Top 15 taxa for each location
collores_top <- head(sort(collores_pooled, decreasing = TRUE), 15)
estancias_top <- head(sort(estancias_pooled, decreasing = TRUE), 15)

# Create comparison table
max_length <- max(length(collores_top), length(estancias_top))

comparison_df <- data.frame(
  Rank = 1:max_length,
  Collores_Taxon = c(names(collores_top), rep("", max_length - length(collores_top))),
  Collores_Abundance = c(as.numeric(collores_top), rep(NA, max_length - length(collores_top))),
  Estancias_Taxon = c(names(estancias_top), rep("", max_length - length(estancias_top))),
  Estancias_Abundance = c(as.numeric(estancias_top), rep(NA, max_length - length(estancias_top)))
)

kable(comparison_df,
      caption = "Top 15 Most Abundant Taxa by Location",
      col.names = c("Rank", "Taxon", "Abundance", "Taxon", "Abundance")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                full_width = FALSE) %>%
  add_header_above(c(" " = 1, "Collores" = 2, "Estancias" = 2))
Top 15 Most Abundant Taxa by Location
Collores
Estancias
Rank Taxon Abundance Taxon Abundance
1 Diatrypella 49 Xylariaceae ascopores 66
2 m39 46 m15 39
3 Leptosphaeria 37 Leptosphaeria 28
4 m26 26 Aspergillus/Penicillium 23
5 m25 23 Ganoderma 17
6 Trematosphaeria pertusa/Venturia 23 Cercophora/Podospora 16
7 Arthrinium/Chaetomium 22 Bipolaris/Drechslera/Helminthosporium/Exserohilum 12
8 Bipolaris/Drechslera/Helminthosporium/Exserohilum 15 Scoleco-ascospore 12
9 Delitschia/Amphisphaeria 15 m18 7
10 m13 10 m10 5
11 m31 10 Drechslera 4
12 Xylariaceae ascopores 10 m21 4
13 m15 8 m7 4
14 Scoleco-ascopores 8 Trematosphaeria pertusa/Venturia 4
15 m38 7 Fusarium 3

Horizontal Bar Plots: Top Taxa

# Prepare data for Collores
collores_bar_df <- data.frame(
  Taxon = factor(names(collores_top), levels = rev(names(collores_top))),
  Abundance = as.numeric(collores_top),
  Location = "Collores"
)

# Prepare data for Estancias
estancias_bar_df <- data.frame(
  Taxon = factor(names(estancias_top), levels = rev(names(estancias_top))),
  Abundance = as.numeric(estancias_top),
  Location = "Estancias"
)

# Plot for Collores
p1 <- ggplot(collores_bar_df, aes(x = Taxon, y = Abundance)) +
  geom_bar(stat = "identity", fill = "#E69F00", alpha = 0.8) +
  coord_flip() +
  theme_minimal() +
  labs(title = "Collores: Top 15 Most Abundant Taxa",
       x = "",
       y = "Total Abundance") +
  theme(axis.text.y = element_text(size = 9))

# Plot for Estancias
p2 <- ggplot(estancias_bar_df, aes(x = Taxon, y = Abundance)) +
  geom_bar(stat = "identity", fill = "#56B4E9", alpha = 0.8) +
  coord_flip() +
  theme_minimal() +
  labs(title = "Estancias: Top 15 Most Abundant Taxa",
       x = "",
       y = "Total Abundance") +
  theme(axis.text.y = element_text(size = 9))

# Combine plots
ggarrange(p1, p2, ncol = 1, nrow = 2)

# save graphs
ggsave("horizontal_barplots_top_taxa.png", width = 10, height = 10)

Rank-Abundance Curves

# Prepare data
collores_rank <- data.frame(
  Rank = 1:length(collores_pooled),
  Abundance = as.numeric(sort(collores_pooled, decreasing = TRUE)),
  Location = "Collores"
)

estancias_rank <- data.frame(
  Rank = 1:length(estancias_pooled),
  Abundance = as.numeric(sort(estancias_pooled, decreasing = TRUE)),
  Location = "Estancias"
)

rank_combined <- rbind(collores_rank, estancias_rank)

# Plot
ggplot(rank_combined, aes(x = Rank, y = Abundance, color = Location, group = Location)) +
  geom_line(size = 1.2) +
  geom_point(size = 2, alpha = 0.6) +
  scale_y_log10() +
  scale_color_manual(values = c("Collores" = "#E69F00", "Estancias" = "#56B4E9")) +
  theme_minimal() +
  labs(title = "Rank-Abundance Curves by Location",
       x = "Species Rank",
       y = "Abundance (log scale)",
       color = "Location") +
  annotation_logticks(sides = "l") +
  theme(legend.position = "top")

Shared vs. Unique Taxa

# Identify shared and unique taxa
collores_taxa <- names(collores_pooled)
estancias_taxa <- names(estancias_pooled)

shared_taxa <- intersect(collores_taxa, estancias_taxa)
collores_unique <- setdiff(collores_taxa, estancias_taxa)
estancias_unique <- setdiff(estancias_taxa, collores_taxa)

# Create summary
taxa_summary <- data.frame(
  Category = c("Total taxa in Collores",
               "Total taxa in Estancias",
               "Shared taxa",
               "Unique to Collores",
               "Unique to Estancias",
               "% Shared (of Collores)",
               "% Shared (of Estancias)"),
  Count = c(
    length(collores_taxa),
    length(estancias_taxa),
    length(shared_taxa),
    length(collores_unique),
    length(estancias_unique),
    round(length(shared_taxa) / length(collores_taxa) * 100, 1),
    round(length(shared_taxa) / length(estancias_taxa) * 100, 1)
  )
)

kable(taxa_summary,
      caption = "Shared and Unique Taxa Between Locations",
      col.names = c("Category", "Value")) %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)
Shared and Unique Taxa Between Locations
Category Value
Total taxa in Collores 41.0
Total taxa in Estancias 45.0
Shared taxa 13.0
Unique to Collores 28.0
Unique to Estancias 32.0
% Shared (of Collores) 31.7
% Shared (of Estancias) 28.9

Venn Diagram Data

cat("Shared taxa (", length(shared_taxa), "):\n")
## Shared taxa ( 13 ):
cat(paste(shared_taxa, collapse = ", "), "\n\n")
## Bipolaris/Drechslera/Helminthosporium/Exserohilum, Curvularia, Ganoderma, Leptosphaeria, m10, m13, m14, m15, m23 (cf. Gibberella zeae), m24, Torula, Trematosphaeria pertusa/Venturia, Xylariaceae ascopores
cat("Unique to Collores (", length(collores_unique), "):\n")
## Unique to Collores ( 28 ):
cat(paste(collores_unique, collapse = ", "), "\n\n")
## Arthrinium/Chaetomium, Chaetosphaerella, Delitschia/Amphisphaeria, Diatrypella, Helminthosporium, Kalmusia, Lophiostoma caulium, Lophiostoma/Massarina, m25, m26, m27, m28, m28 (cf. Bertiella), m29, m30, m31, m32, m33, m34, m35, m36, m37, m38, m39, Microsporum gypseum, Pleospora, Rinodina, Scoleco-ascopores
cat("Unique to Estancias (", length(estancias_unique), "):\n")
## Unique to Estancias ( 32 ):
cat(paste(estancias_unique, collapse = ", "), "\n")
## Aspergillus/Penicillium, Cacumisporium capitulatum, Cercophora/Podospora, Cladosporium, Clasterosporium, Drechslera, Fusarium, m1, m11, m12, m16, m17, m18, m19, m2, m20, m21, m22, m3, m4 (cf. Sporidesmium adscendens), m5, m6, m7, m9, Massarina, Nigrospora, Phaeosphaeria fuckelii, Phaeosphaeria oryzae/Leptosphaeria, Phaeosphaeria viridella/Leptosphaeria, Savoryella curvispora, Scoleco-ascospore, Sporidesmium/Distoseptispora

Species Accumulation Curves

# Calculate species accumulation curves
set.seed(123)
collores_accum <- specaccum(collores_matrix, method = "random", permutations = 100)
estancias_accum <- specaccum(estancias_matrix, method = "random", permutations = 100)

# Prepare data for plotting
collores_accum_df <- data.frame(
  Sites = collores_accum$sites,
  Richness = collores_accum$richness,
  SD = collores_accum$sd,
  Location = "Collores"
)

estancias_accum_df <- data.frame(
  Sites = estancias_accum$sites,
  Richness = estancias_accum$richness,
  SD = estancias_accum$sd,
  Location = "Estancias"
)

accum_combined <- rbind(collores_accum_df, estancias_accum_df)

# Plot
ggplot(accum_combined, aes(x = Sites, y = Richness, color = Location, fill = Location)) +
  geom_ribbon(aes(ymin = Richness - SD, ymax = Richness + SD), alpha = 0.2, color = NA) +
  geom_line(size = 1.2) +
  geom_point(size = 2) +
  scale_color_manual(values = c("Collores" = "#E69F00", "Estancias" = "#56B4E9")) +
  scale_fill_manual(values = c("Collores" = "#E69F00", "Estancias" = "#56B4E9")) +
  theme_minimal() +
  labs(title = "Species Accumulation Curves by Location",
       x = "Number of Samples",
       y = "Cumulative Number of Taxa",
       color = "Location",
       fill = "Location") +
  theme(legend.position = "top")

# save graph
ggsave("species_accumulation_curves.png", width = 10, height = 6)

Accumulation Curve Analysis

# Calculate sampling completeness
collores_completeness <- (collores_accum$richness[length(collores_accum$richness)] / 
                          length(collores_pooled)) * 100
estancias_completeness <- (estancias_accum$richness[length(estancias_accum$richness)] / 
                           length(estancias_pooled)) * 100

accum_summary <- data.frame(
  Location = c("Collores", "Estancias"),
  Observed_Richness = c(collores_accum$richness[length(collores_accum$richness)],
                        estancias_accum$richness[length(estancias_accum$richness)]),
  Total_Richness = c(length(collores_pooled), length(estancias_pooled)),
  Completeness_Percent = round(c(collores_completeness, estancias_completeness), 1),
  Slope_Last_Point = c(
    diff(tail(collores_accum$richness, 2)),
    diff(tail(estancias_accum$richness, 2))
  )
)

kable(accum_summary,
      caption = "Species Accumulation Curve Analysis",
      col.names = c("Location", "Observed Richness", "Total Richness", 
                    "Completeness (%)", "Slope (last interval)")) %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)
Species Accumulation Curve Analysis
Location Observed Richness Total Richness Completeness (%) Slope (last interval)
Collores 41 41 100 1.61
Estancias 45 45 100 2.58
cat("\nInterpretation:\n")
## 
## Interpretation:
cat("- Completeness near 100% suggests adequate sampling\n")
## - Completeness near 100% suggests adequate sampling
cat("- Slope near 0 suggests curve is approaching asymptote\n")
## - Slope near 0 suggests curve is approaching asymptote
cat("- Positive slope suggests more sampling would reveal more taxa\n")
## - Positive slope suggests more sampling would reveal more taxa

Multivariate Analysis

NMDS Ordination

# Perform NMDS on combined data
set.seed(123)
nmds <- metaMDS(combined_matrix, distance = "bray", k = 2, trymax = 100)
## Wisconsin double standardization
## Run 0 stress 8.9739e-05 
## Run 1 stress 9.247582e-05 
## ... Procrustes: rmse 0.000113254  max resid 0.0002050938 
## ... Similar to previous best
## Run 2 stress 8.920808e-05 
## ... New best solution
## ... Procrustes: rmse 0.0001110418  max resid 0.0003101921 
## ... Similar to previous best
## Run 3 stress 8.792056e-05 
## ... New best solution
## ... Procrustes: rmse 9.446217e-05  max resid 0.0001788452 
## ... Similar to previous best
## Run 4 stress 6.73586e-05 
## ... New best solution
## ... Procrustes: rmse 5.550749e-05  max resid 0.000103968 
## ... Similar to previous best
## Run 5 stress 9.074792e-05 
## ... Procrustes: rmse 0.0001090724  max resid 0.0002344792 
## ... Similar to previous best
## Run 6 stress 8.943947e-05 
## ... Procrustes: rmse 8.640841e-05  max resid 0.0001484969 
## ... Similar to previous best
## Run 7 stress 8.7144e-05 
## ... Procrustes: rmse 8.137084e-05  max resid 0.0002306804 
## ... Similar to previous best
## Run 8 stress 7.075302e-05 
## ... Procrustes: rmse 7.797697e-05  max resid 0.0001990423 
## ... Similar to previous best
## Run 9 stress 8.739171e-05 
## ... Procrustes: rmse 0.0001183726  max resid 0.0002027679 
## ... Similar to previous best
## Run 10 stress 8.07646e-05 
## ... Procrustes: rmse 6.293758e-05  max resid 0.000143062 
## ... Similar to previous best
## Run 11 stress 7.392939e-05 
## ... Procrustes: rmse 0.0001034971  max resid 0.0002137053 
## ... Similar to previous best
## Run 12 stress 9.237838e-05 
## ... Procrustes: rmse 9.960249e-05  max resid 0.0002060827 
## ... Similar to previous best
## Run 13 stress 8.469947e-05 
## ... Procrustes: rmse 6.249446e-05  max resid 0.0001534326 
## ... Similar to previous best
## Run 14 stress 9.002e-05 
## ... Procrustes: rmse 9.759811e-05  max resid 0.0001915827 
## ... Similar to previous best
## Run 15 stress 9.900744e-05 
## ... Procrustes: rmse 0.0001244791  max resid 0.0002304147 
## ... Similar to previous best
## Run 16 stress 9.530368e-05 
## ... Procrustes: rmse 0.0001209258  max resid 0.0002173887 
## ... Similar to previous best
## Run 17 stress 7.04336e-05 
## ... Procrustes: rmse 5.020936e-05  max resid 0.0001236886 
## ... Similar to previous best
## Run 18 stress 8.585764e-05 
## ... Procrustes: rmse 7.513399e-05  max resid 0.0001773248 
## ... Similar to previous best
## Run 19 stress 9.227046e-05 
## ... Procrustes: rmse 7.635758e-05  max resid 0.0001481881 
## ... Similar to previous best
## Run 20 stress 9.852011e-05 
## ... Procrustes: rmse 6.876544e-05  max resid 0.0001355137 
## ... Similar to previous best
## *** Best solution repeated 17 times
# Extract scores
nmds_scores <- as.data.frame(scores(nmds, display = "sites"))
nmds_scores$Sample <- rownames(nmds_scores)
nmds_scores$Location <- location_factor

# Plot NMDS
ggplot(nmds_scores, aes(x = NMDS1, y = NMDS2, color = Location, shape = Location)) +
  geom_point(size = 4, alpha = 0.7) +
  stat_ellipse(aes(group = Location), level = 0.95, linetype = 2) +
  scale_color_manual(values = c("Collores" = "#E69F00", "Estancias" = "#56B4E9")) +
  theme_minimal() +
  labs(title = paste("NMDS Ordination (Stress =", round(nmds$stress, 3), ")"),
       subtitle = "Based on Bray-Curtis dissimilarity; ellipses show 95% confidence",
       x = "NMDS1",
       y = "NMDS2") +
  theme(legend.position = "top")

cat("\nNMDS Stress value:", round(nmds$stress, 3), "\n")
## 
## NMDS Stress value: 0
cat("Stress interpretation:\n")
## Stress interpretation:
cat("  < 0.05: Excellent\n")
##   < 0.05: Excellent
cat("  < 0.10: Good\n")
##   < 0.10: Good
cat("  < 0.20: Acceptable\n")
##   < 0.20: Acceptable
cat("  > 0.20: Poor (use with caution)\n")
##   > 0.20: Poor (use with caution)

Hierarchical Clustering

# Calculate Bray-Curtis dissimilarity
dist_matrix <- vegdist(combined_matrix, method = "bray")

# Hierarchical clustering
cluster <- hclust(dist_matrix, method = "average")

# Plot dendrogram with location colors
plot(cluster,
     main = "Hierarchical Clustering of All Samples\n(Bray-Curtis Dissimilarity, UPGMA)",
     xlab = "Sample",
     ylab = "Bray-Curtis Dissimilarity",
     hang = -1,
     cex = 0.7)

# Color labels by location
location_colors <- ifelse(location_factor == "Collores", "#E69F00", "#56B4E9")
library(dendextend)
dend <- as.dendrogram(cluster)
labels_colors(dend) <- location_colors[order.dendrogram(dend)]
plot(dend, main = "Hierarchical Clustering (Colored by Location)")
legend("topright", legend = c("Collores", "Estancias"), 
       col = c("#E69F00", "#56B4E9"), pch = 15, cex = 0.8)

Beta Diversity Comparison

# Calculate within-location beta diversity
collores_beta <- betadiver(collores_matrix, method = "w")
estancias_beta <- betadiver(estancias_matrix, method = "w")

# Summary
beta_summary <- data.frame(
  Location = c("Collores", "Estancias"),
  Mean_Beta = c(round(mean(collores_beta), 3), round(mean(estancias_beta), 3)),
  SD_Beta = c(round(sd(collores_beta), 3), round(sd(estancias_beta), 3)),
  Min_Beta = c(round(min(collores_beta), 3), round(min(estancias_beta), 3)),
  Max_Beta = c(round(max(collores_beta), 3), round(max(estancias_beta), 3))
)

kable(beta_summary,
      caption = "Within-Location Beta Diversity (Whittaker's β)",
      col.names = c("Location", "Mean", "SD", "Min", "Max")) %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)
Within-Location Beta Diversity (Whittaker’s β)
Location Mean SD Min Max
Collores 0.780 0.186 0.333 1
Estancias 0.766 0.199 0.429 1
# Test for difference
beta_test <- wilcox.test(collores_beta, estancias_beta)
cat("\nWilcoxon test for beta diversity difference:\n")
## 
## Wilcoxon test for beta diversity difference:
cat("W =", beta_test$statistic, ", p-value =", round(beta_test$p.value, 4), "\n")
## W = 2282.5 , p-value = 0.6314

Summary and Conclusions

Key Findings Summary

summary_results <- data.frame(
  Metric = c(
    "Number of samples",
    "Gamma diversity (total taxa)",
    "Mean alpha diversity (taxa/sample)",
    "Beta diversity (multiplicative)",
    "Total spore count",
    "Mean spore abundance/sample",
    "Pooled Shannon diversity",
    "Mean Shannon diversity/sample",
    "Pooled Simpson diversity",
    "Pooled evenness",
    "Most abundant unique taxon",
    "Number of unique taxa"
  ),
  Collores = c(
    nrow(collores_matrix),
    length(collores_pooled),
    round(mean(collores_richness), 2),
    round(length(collores_pooled) / mean(collores_richness), 2),
    sum(collores_matrix),
    round(mean(collores_abundance), 2),
    round(diversity(collores_pooled, index = "shannon"), 3),
    round(mean(collores_shannon), 3),
    round(diversity(collores_pooled, index = "simpson"), 3),
    round(diversity(collores_pooled, index = "shannon") / log(length(collores_pooled)), 3),
    names(collores_pooled)[1],
    length(collores_unique)
  ),
  Estancias = c(
    nrow(estancias_matrix),
    length(estancias_pooled),
    round(mean(estancias_richness), 2),
    round(length(estancias_pooled) / mean(estancias_richness), 2),
    sum(estancias_matrix),
    round(mean(estancias_abundance), 2),
    round(diversity(estancias_pooled, index = "shannon"), 3),
    round(mean(estancias_shannon), 3),
    round(diversity(estancias_pooled, index = "simpson"), 3),
    round(diversity(estancias_pooled, index = "shannon") / log(length(estancias_pooled)), 3),
    names(estancias_pooled)[1],
    length(estancias_unique)
  )
)

kable(summary_results,
      caption = "Comprehensive Summary: Collores vs. Estancias") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)
Comprehensive Summary: Collores vs. Estancias
Metric Collores Estancias
Number of samples 12 12
Gamma diversity (total taxa) 41 45
Mean alpha diversity (taxa/sample) 7.75 8.08
Beta diversity (multiplicative) 5.29 5.57
Total spore count 381 283
Mean spore abundance/sample 31.75 23.58
Pooled Shannon diversity 3.112 2.827
Mean Shannon diversity/sample 1.447 1.652
Pooled Simpson diversity 0.936 0.897
Pooled evenness 0.838 0.743
Most abundant unique taxon Arthrinium/Chaetomium Aspergillus/Penicillium
Number of unique taxa 28 32

Conclusions

Based on the comparative biodiversity analysis of fungal spore communities:

1. Overall Diversity Patterns

  • Collores has a gamma diversity of 41 taxa with pooled Shannon index of 3.112
  • Estancias has a gamma diversity of 45 taxa with pooled Shannon index of 2.827

2. Statistical Differences

  • Richness comparison: Not significantly different (p = 0.6634)
  • Shannon diversity: Not significantly different (p = 0.4428)
  • Community composition (PERMANOVA): Significantly different (p = 0.001)

3. Species Composition

  • 13 taxa are shared between locations (31.7% of Collores, 28.9% of Estancias)
  • 28 taxa unique to Collores
  • 32 taxa unique to Estancias

4. Beta Diversity

  • Within-location turnover is similar between locations
  • Collores mean β = 0.78, Estancias mean β = 0.766

5. Sampling Adequacy

  • Collores: 100% completeness
  • Estancias: 100% completeness

6. Ecological Interpretation

The two locations show distinct fungal spore communities, with limited overlap in species composition. The differences may reflect variations in environmental conditions, substrate availability, or spatial heterogeneity between locations.

Session Information

sessionInfo()
## R version 4.5.0 (2025-04-11)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sequoia 15.6.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/Puerto_Rico
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] dendextend_1.19.1 ggpubr_0.6.0      kableExtra_1.4.0  knitr_1.50       
##  [5] reshape2_1.4.4    lubridate_1.9.4   forcats_1.0.0     stringr_1.5.1    
##  [9] dplyr_1.1.4       purrr_1.0.4       readr_2.1.5       tidyr_1.3.1      
## [13] tibble_3.2.1      ggplot2_3.5.2     tidyverse_2.0.0   vegan_2.7-2      
## [17] permute_0.9-8    
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6      xfun_0.52         bslib_0.9.0       rstatix_0.7.2    
##  [5] lattice_0.22-6    tzdb_0.5.0        vctrs_0.6.5       tools_4.5.0      
##  [9] generics_0.1.3    parallel_4.5.0    cluster_2.1.8.1   pkgconfig_2.0.3  
## [13] Matrix_1.7-3      lifecycle_1.0.4   farver_2.1.2      compiler_4.5.0   
## [17] textshaping_1.0.0 munsell_0.5.1     carData_3.0-5     htmltools_0.5.8.1
## [21] sass_0.4.10       yaml_2.3.10       Formula_1.2-5     pillar_1.10.2    
## [25] car_3.1-3         jquerylib_0.1.4   MASS_7.3-65       cachem_1.1.0     
## [29] viridis_0.6.5     abind_1.4-8       nlme_3.1-168      tidyselect_1.2.1 
## [33] digest_0.6.37     stringi_1.8.7     labeling_0.4.3    splines_4.5.0    
## [37] cowplot_1.1.3     fastmap_1.2.0     grid_4.5.0        colorspace_2.1-1 
## [41] cli_3.6.4         magrittr_2.0.3    broom_1.0.8       withr_3.0.2      
## [45] scales_1.3.0      backports_1.5.0   timechange_0.3.0  rmarkdown_2.29   
## [49] gridExtra_2.3     ggsignif_0.6.4    ragg_1.4.0        hms_1.1.3        
## [53] evaluate_1.0.3    viridisLite_0.4.2 mgcv_1.9-1        rlang_1.1.6      
## [57] Rcpp_1.0.14       glue_1.8.0        xml2_1.3.8        svglite_2.2.1    
## [61] rstudioapi_0.17.1 jsonlite_2.0.0    R6_2.6.1          plyr_1.8.9       
## [65] systemfonts_1.3.1

Export Results

# Save all diversity indices
write.csv(all_diversity, "diversity_indices_all_samples.csv", row.names = FALSE)

# Save pooled diversity
write.csv(pooled_indices, "pooled_diversity_by_location.csv", row.names = FALSE)

# Save statistical test results
write.csv(test_results, "statistical_tests_results.csv", row.names = FALSE)

# Save species composition
write.csv(comparison_df, "top_taxa_comparison.csv", row.names = FALSE)

# Save shared/unique taxa
write.csv(taxa_summary, "shared_unique_taxa_summary.csv", row.names = FALSE)

# Save NMDS scores
write.csv(nmds_scores, "nmds_scores_all_samples.csv", row.names = FALSE)

cat("All results exported successfully!\n")