Introduction

This document presents a comprehensive comparative biodiversity analysis of fungal spore communities from two locations: Collores and Estancias. The analysis includes diversity indices, community composition, statistical comparisons, and multivariate approaches to understand differences and similarities between the two locations.

Load Required Libraries

# Install packages if needed
# install.packages(c("vegan", "tidyverse", "reshape2", "knitr", "kableExtra", "ggpubr"))

library(vegan)       # For biodiversity analysis
library(tidyverse)   # For data manipulation and visualization
library(reshape2)    # For data reshaping
library(knitr)       # For nice tables
library(kableExtra)  # For enhanced tables
library(ggpubr)      # For statistical comparisons and publication-ready plots

Data Import and Preparation

# Read the data
collores_data <- read.csv("collores_franja.csv", check.names = FALSE, row.names = 1)
estancias_data <- read.csv("estancias_franja.csv", check.names = FALSE, row.names = 1)

# Replace NA values with 0
collores_data[is.na(collores_data)] <- 0
estancias_data[is.na(estancias_data)] <- 0

# Convert to numeric matrices
collores_matrix <- as.matrix(collores_data)
estancias_matrix <- as.matrix(estancias_data)

# Display data dimensions
cat("COLLORES Dataset:\n")

## COLLORES Dataset:

cat("  Number of samples:", nrow(collores_matrix), "\n")

##   Number of samples: 12

cat("  Number of taxa:", ncol(collores_matrix), "\n")

##   Number of taxa: 41

cat("  Total spore count:", sum(collores_matrix), "\n\n")

##   Total spore count: 381

cat("ESTANCIAS Dataset:\n")

## ESTANCIAS Dataset:

cat("  Number of samples:", nrow(estancias_matrix), "\n")

##   Number of samples: 12

cat("  Number of taxa:", ncol(estancias_matrix), "\n")

##   Number of taxa: 45

cat("  Total spore count:", sum(estancias_matrix), "\n")

##   Total spore count: 283

Data Overview

overview_df <- data.frame(
  Location = c("Collores", "Estancias"),
  Samples = c(nrow(collores_matrix), nrow(estancias_matrix)),
  Taxa = c(ncol(collores_matrix), ncol(estancias_matrix)),
  Total_Spores = c(sum(collores_matrix), sum(estancias_matrix)),
  Mean_Spores_per_Sample = c(round(mean(rowSums(collores_matrix)), 2),
                              round(mean(rowSums(estancias_matrix)), 2))
)

kable(overview_df,
      caption = "Overview of Datasets by Location",
      col.names = c("Location", "# Samples", "# Taxa", "Total Spores", "Mean Spores/Sample")) %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)

Overview of Datasets by Location
Location	# Samples	# Taxa	Total Spores	Mean Spores/Sample
Collores	12	41	381	31.75
Estancias	12	45	283	23.58

Basic Statistics by Location

Summary Statistics

# Calculate basic statistics for each location
collores_richness <- rowSums(collores_matrix > 0)
collores_abundance <- rowSums(collores_matrix)

estancias_richness <- rowSums(estancias_matrix > 0)
estancias_abundance <- rowSums(estancias_matrix)

# Create summary table
stats_summary <- data.frame(
  Metric = c("Mean Richness", "Median Richness", "SD Richness", "Min Richness", "Max Richness",
             "Mean Abundance", "Median Abundance", "SD Abundance", "Min Abundance", "Max Abundance"),
  Collores = c(
    round(mean(collores_richness), 2),
    median(collores_richness),
    round(sd(collores_richness), 2),
    min(collores_richness),
    max(collores_richness),
    round(mean(collores_abundance), 2),
    median(collores_abundance),
    round(sd(collores_abundance), 2),
    min(collores_abundance),
    max(collores_abundance)
  ),
  Estancias = c(
    round(mean(estancias_richness), 2),
    median(estancias_richness),
    round(sd(estancias_richness), 2),
    min(estancias_richness),
    max(estancias_richness),
    round(mean(estancias_abundance), 2),
    median(estancias_abundance),
    round(sd(estancias_abundance), 2),
    min(estancias_abundance),
    max(estancias_abundance)
  )
)

kable(stats_summary,
      caption = "Summary Statistics by Location") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)

Summary Statistics by Location
Metric	Collores	Estancias
Mean Richness	7.75	8.08
Median Richness	8.00	9.00
SD Richness	3.41	3.29
Min Richness	2.00	3.00
Max Richness	14.00	12.00
Mean Abundance	31.75	23.58
Median Abundance	29.50	23.50
SD Abundance	17.20	15.68
Min Abundance	3.00	4.00
Max Abundance	61.00	57.00

Boxplots: Richness and Abundance

# Prepare data for boxplots
richness_df <- data.frame(
  Location = c(rep("Collores", length(collores_richness)),
               rep("Estancias", length(estancias_richness))),
  Richness = c(collores_richness, estancias_richness)
)

abundance_df <- data.frame(
  Location = c(rep("Collores", length(collores_abundance)),
               rep("Estancias", length(estancias_abundance))),
  Abundance = c(collores_abundance, estancias_abundance)
)

# Richness boxplot
p1 <- ggplot(richness_df, aes(x = Location, y = Richness, fill = Location)) +
  geom_boxplot(alpha = 0.7, outlier.shape = 16, outlier.size = 2) +
  geom_jitter(width = 0.2, alpha = 0.5, size = 2) +
  stat_compare_means(method = "wilcox.test", label = "p.format", label.x = 1.5, size = 6) +
  scale_fill_manual(values = c("Collores" = "#E69F00", "Estancias" = "#56B4E9")) +
  theme_minimal() +
  labs(title = "Species Richness by Location",
       y = "Number of Taxa per Sample",
       x = "") +
  theme(
    legend.position = "none",
    plot.title = element_text(size = 16),      # Title size
    axis.title.y = element_text(size = 14),    # Y-axis label size
    axis.text.x = element_text(size = 16),     # X-axis text (Collores, Estancias)
    axis.text.y = element_text(size = 12))

# Abundance boxplot
p2 <- ggplot(abundance_df, aes(x = Location, y = Abundance, fill = Location)) +
  geom_boxplot(alpha = 0.7, outlier.shape = 16, outlier.size = 2) +
  geom_jitter(width = 0.2, alpha = 0.5, size = 2) +
  stat_compare_means(method = "wilcox.test", label = "p.format", label.x = 1.5, size = 6) +
  scale_fill_manual(values = c("Collores" = "#E69F00", "Estancias" = "#56B4E9")) +
  theme_minimal() +
  labs(title = "Spore Abundance by Location",
       y = "Total Spores per Sample",
       x = "", size = 6) +
  theme(
    legend.position = "none",
    plot.title = element_text(size = 16),      # Title size
    axis.title.y = element_text(size = 14),    # Y-axis label size
    axis.text.x = element_text(size = 16),     # X-axis text (Collores, Estancias)
    axis.text.y = element_text(size = 12))

# Combine plots
ggarrange(p1, p2, ncol = 2)

# save graphs
ggsave("boxplots_richness_abundance.png", width = 12, height = 6)

Pooled Biodiversity Indices by Location

# Calculate pooled diversity for each location
collores_pooled <- colSums(collores_matrix)
collores_pooled <- collores_pooled[collores_pooled > 0]

estancias_pooled <- colSums(estancias_matrix)
estancias_pooled <- estancias_pooled[estancias_pooled > 0]

# Calculate indices
pooled_indices <- data.frame(
  Index = c("Gamma Diversity (Total Richness)",
            "Total Abundance",
            "Shannon Index (H')",
            "Simpson Index (D)",
            "Inverse Simpson (1/D)",
            "Pielou's Evenness (J')"),
  Collores = c(
    length(collores_pooled),
    sum(collores_pooled),
    round(diversity(collores_pooled, index = "shannon"), 3),
    round(diversity(collores_pooled, index = "simpson"), 3),
    round(diversity(collores_pooled, index = "invsimpson"), 3),
    round(diversity(collores_pooled, index = "shannon") / log(length(collores_pooled)), 3)
  ),
  Estancias = c(
    length(estancias_pooled),
    sum(estancias_pooled),
    round(diversity(estancias_pooled, index = "shannon"), 3),
    round(diversity(estancias_pooled, index = "simpson"), 3),
    round(diversity(estancias_pooled, index = "invsimpson"), 3),
    round(diversity(estancias_pooled, index = "shannon") / log(length(estancias_pooled)), 3)
  )
)

kable(pooled_indices,
      caption = "Pooled Biodiversity Indices by Location") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)

Pooled Biodiversity Indices by Location
Index	Collores	Estancias
Gamma Diversity (Total Richness)	41.000	45.000
Total Abundance	381.000	283.000
Shannon Index (H’)	3.112	2.827
Simpson Index (D)	0.936	0.897
Inverse Simpson (1/D)	15.627	9.730
Pielou’s Evenness (J’)	0.838	0.743

Visualization: Pooled Diversity Comparison

# Prepare data for plotting (exclude richness and abundance for scale)
pooled_plot_df <- data.frame(
  Location = rep(c("Collores", "Estancias"), each = 3),
  Index = rep(c("Shannon", "Simpson", "Evenness"), 2),
  Value = c(
    diversity(collores_pooled, index = "shannon"),
    diversity(collores_pooled, index = "simpson"),
    diversity(collores_pooled, index = "shannon") / log(length(collores_pooled)),
    diversity(estancias_pooled, index = "shannon"),
    diversity(estancias_pooled, index = "simpson"),
    diversity(estancias_pooled, index = "shannon") / log(length(estancias_pooled))
  )
)

ggplot(pooled_plot_df, aes(x = Index, y = Value, fill = Location)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.7) +
  scale_fill_manual(values = c("Collores" = "#E69F00", "Estancias" = "#56B4E9")) +
  theme_minimal() +
  labs(title = "Pooled Diversity Indices Comparison",
       x = "Diversity Index",
       y = "Value",
       fill = "Location") +
  theme(legend.position = "top")

Diversity Indices by Sample

Calculate Indices for Each Sample

# Collores
collores_shannon <- diversity(collores_matrix, index = "shannon")
collores_simpson <- diversity(collores_matrix, index = "simpson")
collores_invsimpson <- diversity(collores_matrix, index = "invsimpson")
collores_evenness <- collores_shannon / log(collores_richness)
collores_evenness[is.infinite(collores_evenness) | is.nan(collores_evenness)] <- NA

collores_div_df <- data.frame(
  Sample = rownames(collores_matrix),
  Location = "Collores",
  Richness = collores_richness,
  Abundance = collores_abundance,
  Shannon = round(collores_shannon, 3),
  Simpson = round(collores_simpson, 3),
  InvSimpson = round(collores_invsimpson, 3),
  Evenness = round(collores_evenness, 3)
)

# Estancias
estancias_shannon <- diversity(estancias_matrix, index = "shannon")
estancias_simpson <- diversity(estancias_matrix, index = "simpson")
estancias_invsimpson <- diversity(estancias_matrix, index = "invsimpson")
estancias_evenness <- estancias_shannon / log(estancias_richness)
estancias_evenness[is.infinite(estancias_evenness) | is.nan(estancias_evenness)] <- NA

estancias_div_df <- data.frame(
  Sample = rownames(estancias_matrix),
  Location = "Estancias",
  Richness = estancias_richness,
  Abundance = estancias_abundance,
  Shannon = round(estancias_shannon, 3),
  Simpson = round(estancias_simpson, 3),
  InvSimpson = round(estancias_invsimpson, 3),
  Evenness = round(estancias_evenness, 3)
)

# Combine
all_diversity <- rbind(collores_div_df, estancias_div_df)

kable(all_diversity,
      caption = "Diversity Indices by Sample and Location") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                full_width = FALSE) %>%
  scroll_box(height = "400px")

Diversity Indices by Sample and Location
	Sample	Location	Richness	Abundance	Shannon	Simpson	InvSimpson	Evenness
1	1	Collores	2	3	0.637	0.444	1.800	0.918
2	2	Collores	4	21	0.685	0.331	1.495	0.494
3	3	Collores	5	30	0.578	0.244	1.324	0.359
4	4	Collores	11	35	2.100	0.846	6.481	0.876
5	5	Collores	10	48	1.741	0.740	3.853	0.756
6	6	Collores	5	10	1.471	0.740	3.846	0.914
7	7	Collores	8	34	1.682	0.763	4.219	0.809
8	8	Collores	7	27	1.765	0.801	5.028	0.907
9	9	Collores	14	57	1.964	0.773	4.408	0.744
10	10	Collores	11	29	2.055	0.832	5.965	0.857
11	11	Collores	8	26	1.740	0.778	4.507	0.837
12	12	Collores	8	61	0.946	0.414	1.708	0.455
13	1	Estancias	4	4	1.386	0.750	4.000	1.000
21	2	Estancias	3	5	0.950	0.560	2.273	0.865
31	3	Estancias	3	6	1.011	0.611	2.571	0.921
41	4	Estancias	9	13	2.032	0.840	6.259	0.925
51	5	Estancias	6	23	1.394	0.669	3.023	0.778
61	6	Estancias	9	24	1.600	0.694	3.273	0.728
71	7	Estancias	11	39	1.994	0.828	5.828	0.832
81	8	Estancias	9	28	1.900	0.819	5.521	0.865
91	9	Estancias	9	19	1.941	0.820	5.554	0.883
101	10	Estancias	12	33	2.016	0.812	5.312	0.811
111	11	Estancias	10	32	1.845	0.764	4.231	0.801
121	12	Estancias	12	57	1.755	0.734	3.765	0.706

Statistical Summary by Location

# Calculate summary statistics for diversity indices
diversity_summary <- all_diversity %>%
  group_by(Location) %>%
  summarise(
    Mean_Shannon = round(mean(Shannon, na.rm = TRUE), 3),
    SD_Shannon = round(sd(Shannon, na.rm = TRUE), 3),
    Mean_Simpson = round(mean(Simpson, na.rm = TRUE), 3),
    SD_Simpson = round(sd(Simpson, na.rm = TRUE), 3),
    Mean_Evenness = round(mean(Evenness, na.rm = TRUE), 3),
    SD_Evenness = round(sd(Evenness, na.rm = TRUE), 3)
  )

kable(diversity_summary,
      caption = "Mean Diversity Indices by Location (± SD)") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)

Mean Diversity Indices by Location (± SD)
Location	Mean_Shannon	SD_Shannon	Mean_Simpson	SD_Simpson	Mean_Evenness	SD_Evenness
Collores	1.447	0.575	0.642	0.217	0.744	0.196
Estancias	1.652	0.385	0.742	0.092	0.843	0.085

Diversity Indices Comparison Plots

# Shannon diversity
p1 <- ggplot(all_diversity, aes(x = Location, y = Shannon, fill = Location)) +
  geom_boxplot(alpha = 0.7, outlier.shape = 16) +
  geom_jitter(width = 0.2, alpha = 0.5, size = 2) +
  stat_compare_means(method = "wilcox.test", label = "p.format") +
  scale_fill_manual(values = c("Collores" = "#E69F00", "Estancias" = "#56B4E9")) +
  theme_minimal() +
  labs(title = "Shannon Diversity Index",
       y = "Shannon H'") +
  theme(legend.position = "none")

# Simpson diversity
p2 <- ggplot(all_diversity, aes(x = Location, y = Simpson, fill = Location)) +
  geom_boxplot(alpha = 0.7, outlier.shape = 16) +
  geom_jitter(width = 0.2, alpha = 0.5, size = 2) +
  stat_compare_means(method = "wilcox.test", label = "p.format") +
  scale_fill_manual(values = c("Collores" = "#E69F00", "Estancias" = "#56B4E9")) +
  theme_minimal() +
  labs(title = "Simpson Diversity Index",
       y = "Simpson D") +
  theme(legend.position = "none")

# Evenness
p3 <- ggplot(all_diversity, aes(x = Location, y = Evenness, fill = Location)) +
  geom_boxplot(alpha = 0.7, outlier.shape = 16) +
  geom_jitter(width = 0.2, alpha = 0.5, size = 2) +
  stat_compare_means(method = "wilcox.test", label = "p.format") +
  scale_fill_manual(values = c("Collores" = "#E69F00", "Estancias" = "#56B4E9")) +
  theme_minimal() +
  labs(title = "Pielou's Evenness",
       y = "Evenness J'") +
  theme(legend.position = "none")

# Combine plots
ggarrange(p1, p2, p3, ncol = 1, nrow = 3)

Statistical Tests for Differences Between Locations

Wilcoxon Rank-Sum Tests

# Perform Wilcoxon tests for each diversity metric
richness_test <- wilcox.test(collores_richness, estancias_richness)
abundance_test <- wilcox.test(collores_abundance, estancias_abundance)
shannon_test <- wilcox.test(collores_shannon, estancias_shannon)
simpson_test <- wilcox.test(collores_simpson, estancias_simpson)
evenness_test <- wilcox.test(collores_evenness, estancias_evenness, na.rm = TRUE)

# Create results table
test_results <- data.frame(
  Metric = c("Richness", "Abundance", "Shannon", "Simpson", "Evenness"),
  W_statistic = c(richness_test$statistic, abundance_test$statistic,
                  shannon_test$statistic, simpson_test$statistic,
                  evenness_test$statistic),
  P_value = c(richness_test$p.value, abundance_test$p.value,
              shannon_test$p.value, simpson_test$p.value,
              evenness_test$p.value),
  Significance = c(
    ifelse(richness_test$p.value < 0.001, "***",
           ifelse(richness_test$p.value < 0.01, "**",
                  ifelse(richness_test$p.value < 0.05, "*", "ns"))),
    ifelse(abundance_test$p.value < 0.001, "***",
           ifelse(abundance_test$p.value < 0.01, "**",
                  ifelse(abundance_test$p.value < 0.05, "*", "ns"))),
    ifelse(shannon_test$p.value < 0.001, "***",
           ifelse(shannon_test$p.value < 0.01, "**",
                  ifelse(shannon_test$p.value < 0.05, "*", "ns"))),
    ifelse(simpson_test$p.value < 0.001, "***",
           ifelse(simpson_test$p.value < 0.01, "**",
                  ifelse(simpson_test$p.value < 0.05, "*", "ns"))),
    ifelse(evenness_test$p.value < 0.001, "***",
           ifelse(evenness_test$p.value < 0.01, "**",
                  ifelse(evenness_test$p.value < 0.05, "*", "ns")))
  )
)

kable(test_results,
      digits = 4,
      caption = "Wilcoxon Rank-Sum Tests: Collores vs. Estancias (*** p<0.001, ** p<0.01, * p<0.05, ns = not significant)") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)

Wilcoxon Rank-Sum Tests: Collores vs. Estancias (*** p
Metric	W_statistic	P_value	Significance
Richness	64.0	0.6634	ns
Abundance	92.5	0.2481	ns
Shannon	58.0	0.4428	ns
Simpson	60.0	0.5137	ns
Evenness	55.0	0.3474	ns

PERMANOVA: Community Composition Differences

# Combine matrices for PERMANOVA
# Need to match taxa between datasets
all_taxa <- unique(c(colnames(collores_matrix), colnames(estancias_matrix)))

# Create standardized matrices with all taxa
collores_full <- matrix(0, nrow = nrow(collores_matrix), ncol = length(all_taxa))
colnames(collores_full) <- all_taxa
rownames(collores_full) <- rownames(collores_matrix)
collores_full[, colnames(collores_matrix)] <- collores_matrix

estancias_full <- matrix(0, nrow = nrow(estancias_matrix), ncol = length(all_taxa))
colnames(estancias_full) <- all_taxa
rownames(estancias_full) <- rownames(estancias_matrix)
estancias_full[, colnames(estancias_matrix)] <- estancias_matrix

# Combine
combined_matrix <- rbind(collores_full, estancias_full)
location_factor <- factor(c(rep("Collores", nrow(collores_full)),
                             rep("Estancias", nrow(estancias_full))))

# Perform PERMANOVA
set.seed(123)
permanova_result <- adonis2(combined_matrix ~ location_factor,
                            method = "bray",
                            permutations = 999)

kable(permanova_result,
      digits = 4,
      caption = "PERMANOVA: Testing for Differences in Community Composition Between Locations") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)

PERMANOVA: Testing for Differences in Community Composition Between Locations
	Df	SumOfSqs	R2	F	Pr(>F)
Model	1	0.9428	0.0996	2.4346	0.001
Residual	22	8.5195	0.9004	NA	NA
Total	23	9.4623	1.0000	NA	NA

ANOSIM: Analysis of Similarities

# Perform ANOSIM
set.seed(123)
anosim_result <- anosim(combined_matrix, location_factor,
                        distance = "bray",
                        permutations = 999)

cat("ANOSIM Results:\n")

## ANOSIM Results:

cat("R statistic:", round(anosim_result$statistic, 4), "\n")

## R statistic: 0.1967

cat("p-value:", round(anosim_result$signif, 4), "\n\n")

## p-value: 0.001

cat("Interpretation:\n")

## Interpretation:

cat("  R close to 1: Locations are well separated\n")

##   R close to 1: Locations are well separated

cat("  R close to 0: Locations are not separated\n")

##   R close to 0: Locations are not separated

cat("  R < 0: Within-location variation > between-location variation\n")

##   R < 0: Within-location variation > between-location variation

Species Composition Analysis

Most Abundant Taxa by Location

# Top 15 taxa for each location
collores_top <- head(sort(collores_pooled, decreasing = TRUE), 15)
estancias_top <- head(sort(estancias_pooled, decreasing = TRUE), 15)

# Create comparison table
max_length <- max(length(collores_top), length(estancias_top))

comparison_df <- data.frame(
  Rank = 1:max_length,
  Collores_Taxon = c(names(collores_top), rep("", max_length - length(collores_top))),
  Collores_Abundance = c(as.numeric(collores_top), rep(NA, max_length - length(collores_top))),
  Estancias_Taxon = c(names(estancias_top), rep("", max_length - length(estancias_top))),
  Estancias_Abundance = c(as.numeric(estancias_top), rep(NA, max_length - length(estancias_top)))
)

kable(comparison_df,
      caption = "Top 15 Most Abundant Taxa by Location",
      col.names = c("Rank", "Taxon", "Abundance", "Taxon", "Abundance")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                full_width = FALSE) %>%
  add_header_above(c(" " = 1, "Collores" = 2, "Estancias" = 2))

Top 15 Most Abundant Taxa by Location
	Collores		Estancias
Rank	Taxon	Abundance	Taxon	Abundance
1	Diatrypella	49	Xylariaceae ascopores	66
2	m39	46	m15	39
3	Leptosphaeria	37	Leptosphaeria	28
4	m26	26	Aspergillus/Penicillium	23
5	m25	23	Ganoderma	17
6	Trematosphaeria pertusa/Venturia	23	Cercophora/Podospora	16
7	Arthrinium/Chaetomium	22	Bipolaris/Drechslera/Helminthosporium/Exserohilum	12
8	Bipolaris/Drechslera/Helminthosporium/Exserohilum	15	Scoleco-ascospore	12
9	Delitschia/Amphisphaeria	15	m18	7
10	m13	10	m10	5
11	m31	10	Drechslera	4
12	Xylariaceae ascopores	10	m21	4
13	m15	8	m7	4
14	Scoleco-ascopores	8	Trematosphaeria pertusa/Venturia	4
15	m38	7	Fusarium	3

Horizontal Bar Plots: Top Taxa

# Prepare data for Collores
collores_bar_df <- data.frame(
  Taxon = factor(names(collores_top), levels = rev(names(collores_top))),
  Abundance = as.numeric(collores_top),
  Location = "Collores"
)

# Prepare data for Estancias
estancias_bar_df <- data.frame(
  Taxon = factor(names(estancias_top), levels = rev(names(estancias_top))),
  Abundance = as.numeric(estancias_top),
  Location = "Estancias"
)

# Plot for Collores
p1 <- ggplot(collores_bar_df, aes(x = Taxon, y = Abundance)) +
  geom_bar(stat = "identity", fill = "#E69F00", alpha = 0.8) +
  coord_flip() +
  theme_minimal() +
  labs(title = "Collores: Top 15 Most Abundant Taxa",
       x = "",
       y = "Total Abundance") +
  theme(axis.text.y = element_text(size = 9))

# Plot for Estancias
p2 <- ggplot(estancias_bar_df, aes(x = Taxon, y = Abundance)) +
  geom_bar(stat = "identity", fill = "#56B4E9", alpha = 0.8) +
  coord_flip() +
  theme_minimal() +
  labs(title = "Estancias: Top 15 Most Abundant Taxa",
       x = "",
       y = "Total Abundance") +
  theme(axis.text.y = element_text(size = 9))

# Combine plots
ggarrange(p1, p2, ncol = 1, nrow = 2)

# save graphs
ggsave("horizontal_barplots_top_taxa.png", width = 10, height = 10)

Rank-Abundance Curves

# Prepare data
collores_rank <- data.frame(
  Rank = 1:length(collores_pooled),
  Abundance = as.numeric(sort(collores_pooled, decreasing = TRUE)),
  Location = "Collores"
)

estancias_rank <- data.frame(
  Rank = 1:length(estancias_pooled),
  Abundance = as.numeric(sort(estancias_pooled, decreasing = TRUE)),
  Location = "Estancias"
)

rank_combined <- rbind(collores_rank, estancias_rank)

# Plot
ggplot(rank_combined, aes(x = Rank, y = Abundance, color = Location, group = Location)) +
  geom_line(size = 1.2) +
  geom_point(size = 2, alpha = 0.6) +
  scale_y_log10() +
  scale_color_manual(values = c("Collores" = "#E69F00", "Estancias" = "#56B4E9")) +
  theme_minimal() +
  labs(title = "Rank-Abundance Curves by Location",
       x = "Species Rank",
       y = "Abundance (log scale)",
       color = "Location") +
  annotation_logticks(sides = "l") +
  theme(legend.position = "top")

Shared vs. Unique Taxa

# Identify shared and unique taxa
collores_taxa <- names(collores_pooled)
estancias_taxa <- names(estancias_pooled)

shared_taxa <- intersect(collores_taxa, estancias_taxa)
collores_unique <- setdiff(collores_taxa, estancias_taxa)
estancias_unique <- setdiff(estancias_taxa, collores_taxa)

# Create summary
taxa_summary <- data.frame(
  Category = c("Total taxa in Collores",
               "Total taxa in Estancias",
               "Shared taxa",
               "Unique to Collores",
               "Unique to Estancias",
               "% Shared (of Collores)",
               "% Shared (of Estancias)"),
  Count = c(
    length(collores_taxa),
    length(estancias_taxa),
    length(shared_taxa),
    length(collores_unique),
    length(estancias_unique),
    round(length(shared_taxa) / length(collores_taxa) * 100, 1),
    round(length(shared_taxa) / length(estancias_taxa) * 100, 1)
  )
)

kable(taxa_summary,
      caption = "Shared and Unique Taxa Between Locations",
      col.names = c("Category", "Value")) %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)

Shared and Unique Taxa Between Locations
Category	Value
Total taxa in Collores	41.0
Total taxa in Estancias	45.0
Shared taxa	13.0
Unique to Collores	28.0
Unique to Estancias	32.0
% Shared (of Collores)	31.7
% Shared (of Estancias)	28.9

Venn Diagram Data

cat("Shared taxa (", length(shared_taxa), "):\n")

## Shared taxa ( 13 ):

cat(paste(shared_taxa, collapse = ", "), "\n\n")

## Bipolaris/Drechslera/Helminthosporium/Exserohilum, Curvularia, Ganoderma, Leptosphaeria, m10, m13, m14, m15, m23 (cf. Gibberella zeae), m24, Torula, Trematosphaeria pertusa/Venturia, Xylariaceae ascopores

cat("Unique to Collores (", length(collores_unique), "):\n")

## Unique to Collores ( 28 ):

cat(paste(collores_unique, collapse = ", "), "\n\n")

## Arthrinium/Chaetomium, Chaetosphaerella, Delitschia/Amphisphaeria, Diatrypella, Helminthosporium, Kalmusia, Lophiostoma caulium, Lophiostoma/Massarina, m25, m26, m27, m28, m28 (cf. Bertiella), m29, m30, m31, m32, m33, m34, m35, m36, m37, m38, m39, Microsporum gypseum, Pleospora, Rinodina, Scoleco-ascopores

cat("Unique to Estancias (", length(estancias_unique), "):\n")

## Unique to Estancias ( 32 ):

cat(paste(estancias_unique, collapse = ", "), "\n")

## Aspergillus/Penicillium, Cacumisporium capitulatum, Cercophora/Podospora, Cladosporium, Clasterosporium, Drechslera, Fusarium, m1, m11, m12, m16, m17, m18, m19, m2, m20, m21, m22, m3, m4 (cf. Sporidesmium adscendens), m5, m6, m7, m9, Massarina, Nigrospora, Phaeosphaeria fuckelii, Phaeosphaeria oryzae/Leptosphaeria, Phaeosphaeria viridella/Leptosphaeria, Savoryella curvispora, Scoleco-ascospore, Sporidesmium/Distoseptispora

Species Accumulation Curves

# Calculate species accumulation curves
set.seed(123)
collores_accum <- specaccum(collores_matrix, method = "random", permutations = 100)
estancias_accum <- specaccum(estancias_matrix, method = "random", permutations = 100)

# Prepare data for plotting
collores_accum_df <- data.frame(
  Sites = collores_accum$sites,
  Richness = collores_accum$richness,
  SD = collores_accum$sd,
  Location = "Collores"
)

estancias_accum_df <- data.frame(
  Sites = estancias_accum$sites,
  Richness = estancias_accum$richness,
  SD = estancias_accum$sd,
  Location = "Estancias"
)

accum_combined <- rbind(collores_accum_df, estancias_accum_df)

# Plot
ggplot(accum_combined, aes(x = Sites, y = Richness, color = Location, fill = Location)) +
  geom_ribbon(aes(ymin = Richness - SD, ymax = Richness + SD), alpha = 0.2, color = NA) +
  geom_line(size = 1.2) +
  geom_point(size = 2) +
  scale_color_manual(values = c("Collores" = "#E69F00", "Estancias" = "#56B4E9")) +
  scale_fill_manual(values = c("Collores" = "#E69F00", "Estancias" = "#56B4E9")) +
  theme_minimal() +
  labs(title = "Species Accumulation Curves by Location",
       x = "Number of Samples",
       y = "Cumulative Number of Taxa",
       color = "Location",
       fill = "Location") +
  theme(legend.position = "top")

# save graph
ggsave("species_accumulation_curves.png", width = 10, height = 6)

Accumulation Curve Analysis

# Calculate sampling completeness
collores_completeness <- (collores_accum$richness[length(collores_accum$richness)] / 
                          length(collores_pooled)) * 100
estancias_completeness <- (estancias_accum$richness[length(estancias_accum$richness)] / 
                           length(estancias_pooled)) * 100

accum_summary <- data.frame(
  Location = c("Collores", "Estancias"),
  Observed_Richness = c(collores_accum$richness[length(collores_accum$richness)],
                        estancias_accum$richness[length(estancias_accum$richness)]),
  Total_Richness = c(length(collores_pooled), length(estancias_pooled)),
  Completeness_Percent = round(c(collores_completeness, estancias_completeness), 1),
  Slope_Last_Point = c(
    diff(tail(collores_accum$richness, 2)),
    diff(tail(estancias_accum$richness, 2))
  )
)

kable(accum_summary,
      caption = "Species Accumulation Curve Analysis",
      col.names = c("Location", "Observed Richness", "Total Richness", 
                    "Completeness (%)", "Slope (last interval)")) %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)

Species Accumulation Curve Analysis
Location	Observed Richness	Total Richness	Completeness (%)	Slope (last interval)
Collores	41	41	100	1.61
Estancias	45	45	100	2.58

cat("\nInterpretation:\n")

## 
## Interpretation:

cat("- Completeness near 100% suggests adequate sampling\n")

## - Completeness near 100% suggests adequate sampling

cat("- Slope near 0 suggests curve is approaching asymptote\n")

## - Slope near 0 suggests curve is approaching asymptote

cat("- Positive slope suggests more sampling would reveal more taxa\n")

## - Positive slope suggests more sampling would reveal more taxa

Multivariate Analysis

NMDS Ordination

# Perform NMDS on combined data
set.seed(123)
nmds <- metaMDS(combined_matrix, distance = "bray", k = 2, trymax = 100)

## Wisconsin double standardization
## Run 0 stress 8.9739e-05 
## Run 1 stress 9.247582e-05 
## ... Procrustes: rmse 0.000113254  max resid 0.0002050938 
## ... Similar to previous best
## Run 2 stress 8.920808e-05 
## ... New best solution
## ... Procrustes: rmse 0.0001110418  max resid 0.0003101921 
## ... Similar to previous best
## Run 3 stress 8.792056e-05 
## ... New best solution
## ... Procrustes: rmse 9.446217e-05  max resid 0.0001788452 
## ... Similar to previous best
## Run 4 stress 6.73586e-05 
## ... New best solution
## ... Procrustes: rmse 5.550749e-05  max resid 0.000103968 
## ... Similar to previous best
## Run 5 stress 9.074792e-05 
## ... Procrustes: rmse 0.0001090724  max resid 0.0002344792 
## ... Similar to previous best
## Run 6 stress 8.943947e-05 
## ... Procrustes: rmse 8.640841e-05  max resid 0.0001484969 
## ... Similar to previous best
## Run 7 stress 8.7144e-05 
## ... Procrustes: rmse 8.137084e-05  max resid 0.0002306804 
## ... Similar to previous best
## Run 8 stress 7.075302e-05 
## ... Procrustes: rmse 7.797697e-05  max resid 0.0001990423 
## ... Similar to previous best
## Run 9 stress 8.739171e-05 
## ... Procrustes: rmse 0.0001183726  max resid 0.0002027679 
## ... Similar to previous best
## Run 10 stress 8.07646e-05 
## ... Procrustes: rmse 6.293758e-05  max resid 0.000143062 
## ... Similar to previous best
## Run 11 stress 7.392939e-05 
## ... Procrustes: rmse 0.0001034971  max resid 0.0002137053 
## ... Similar to previous best
## Run 12 stress 9.237838e-05 
## ... Procrustes: rmse 9.960249e-05  max resid 0.0002060827 
## ... Similar to previous best
## Run 13 stress 8.469947e-05 
## ... Procrustes: rmse 6.249446e-05  max resid 0.0001534326 
## ... Similar to previous best
## Run 14 stress 9.002e-05 
## ... Procrustes: rmse 9.759811e-05  max resid 0.0001915827 
## ... Similar to previous best
## Run 15 stress 9.900744e-05 
## ... Procrustes: rmse 0.0001244791  max resid 0.0002304147 
## ... Similar to previous best
## Run 16 stress 9.530368e-05 
## ... Procrustes: rmse 0.0001209258  max resid 0.0002173887 
## ... Similar to previous best
## Run 17 stress 7.04336e-05 
## ... Procrustes: rmse 5.020936e-05  max resid 0.0001236886 
## ... Similar to previous best
## Run 18 stress 8.585764e-05 
## ... Procrustes: rmse 7.513399e-05  max resid 0.0001773248 
## ... Similar to previous best
## Run 19 stress 9.227046e-05 
## ... Procrustes: rmse 7.635758e-05  max resid 0.0001481881 
## ... Similar to previous best
## Run 20 stress 9.852011e-05 
## ... Procrustes: rmse 6.876544e-05  max resid 0.0001355137 
## ... Similar to previous best
## *** Best solution repeated 17 times

# Extract scores
nmds_scores <- as.data.frame(scores(nmds, display = "sites"))
nmds_scores$Sample <- rownames(nmds_scores)
nmds_scores$Location <- location_factor

# Plot NMDS
ggplot(nmds_scores, aes(x = NMDS1, y = NMDS2, color = Location, shape = Location)) +
  geom_point(size = 4, alpha = 0.7) +
  stat_ellipse(aes(group = Location), level = 0.95, linetype = 2) +
  scale_color_manual(values = c("Collores" = "#E69F00", "Estancias" = "#56B4E9")) +
  theme_minimal() +
  labs(title = paste("NMDS Ordination (Stress =", round(nmds$stress, 3), ")"),
       subtitle = "Based on Bray-Curtis dissimilarity; ellipses show 95% confidence",
       x = "NMDS1",
       y = "NMDS2") +
  theme(legend.position = "top")

cat("\nNMDS Stress value:", round(nmds$stress, 3), "\n")

## 
## NMDS Stress value: 0

cat("Stress interpretation:\n")

## Stress interpretation:

cat("  < 0.05: Excellent\n")

##   < 0.05: Excellent

cat("  < 0.10: Good\n")

##   < 0.10: Good

cat("  < 0.20: Acceptable\n")

##   < 0.20: Acceptable

cat("  > 0.20: Poor (use with caution)\n")

##   > 0.20: Poor (use with caution)

Hierarchical Clustering

# Calculate Bray-Curtis dissimilarity
dist_matrix <- vegdist(combined_matrix, method = "bray")

# Hierarchical clustering
cluster <- hclust(dist_matrix, method = "average")

# Plot dendrogram with location colors
plot(cluster,
     main = "Hierarchical Clustering of All Samples\n(Bray-Curtis Dissimilarity, UPGMA)",
     xlab = "Sample",
     ylab = "Bray-Curtis Dissimilarity",
     hang = -1,
     cex = 0.7)

# Color labels by location
location_colors <- ifelse(location_factor == "Collores", "#E69F00", "#56B4E9")
library(dendextend)
dend <- as.dendrogram(cluster)
labels_colors(dend) <- location_colors[order.dendrogram(dend)]
plot(dend, main = "Hierarchical Clustering (Colored by Location)")
legend("topright", legend = c("Collores", "Estancias"), 
       col = c("#E69F00", "#56B4E9"), pch = 15, cex = 0.8)

Beta Diversity Comparison

# Calculate within-location beta diversity
collores_beta <- betadiver(collores_matrix, method = "w")
estancias_beta <- betadiver(estancias_matrix, method = "w")

# Summary
beta_summary <- data.frame(
  Location = c("Collores", "Estancias"),
  Mean_Beta = c(round(mean(collores_beta), 3), round(mean(estancias_beta), 3)),
  SD_Beta = c(round(sd(collores_beta), 3), round(sd(estancias_beta), 3)),
  Min_Beta = c(round(min(collores_beta), 3), round(min(estancias_beta), 3)),
  Max_Beta = c(round(max(collores_beta), 3), round(max(estancias_beta), 3))
)

kable(beta_summary,
      caption = "Within-Location Beta Diversity (Whittaker's β)",
      col.names = c("Location", "Mean", "SD", "Min", "Max")) %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)

Within-Location Beta Diversity (Whittaker’s β)
Location	Mean	SD	Min	Max
Collores	0.780	0.186	0.333	1
Estancias	0.766	0.199	0.429	1

# Test for difference
beta_test <- wilcox.test(collores_beta, estancias_beta)
cat("\nWilcoxon test for beta diversity difference:\n")

## 
## Wilcoxon test for beta diversity difference:

cat("W =", beta_test$statistic, ", p-value =", round(beta_test$p.value, 4), "\n")

## W = 2282.5 , p-value = 0.6314

Summary and Conclusions

Key Findings Summary

summary_results <- data.frame(
  Metric = c(
    "Number of samples",
    "Gamma diversity (total taxa)",
    "Mean alpha diversity (taxa/sample)",
    "Beta diversity (multiplicative)",
    "Total spore count",
    "Mean spore abundance/sample",
    "Pooled Shannon diversity",
    "Mean Shannon diversity/sample",
    "Pooled Simpson diversity",
    "Pooled evenness",
    "Most abundant unique taxon",
    "Number of unique taxa"
  ),
  Collores = c(
    nrow(collores_matrix),
    length(collores_pooled),
    round(mean(collores_richness), 2),
    round(length(collores_pooled) / mean(collores_richness), 2),
    sum(collores_matrix),
    round(mean(collores_abundance), 2),
    round(diversity(collores_pooled, index = "shannon"), 3),
    round(mean(collores_shannon), 3),
    round(diversity(collores_pooled, index = "simpson"), 3),
    round(diversity(collores_pooled, index = "shannon") / log(length(collores_pooled)), 3),
    names(collores_pooled)[1],
    length(collores_unique)
  ),
  Estancias = c(
    nrow(estancias_matrix),
    length(estancias_pooled),
    round(mean(estancias_richness), 2),
    round(length(estancias_pooled) / mean(estancias_richness), 2),
    sum(estancias_matrix),
    round(mean(estancias_abundance), 2),
    round(diversity(estancias_pooled, index = "shannon"), 3),
    round(mean(estancias_shannon), 3),
    round(diversity(estancias_pooled, index = "simpson"), 3),
    round(diversity(estancias_pooled, index = "shannon") / log(length(estancias_pooled)), 3),
    names(estancias_pooled)[1],
    length(estancias_unique)
  )
)

kable(summary_results,
      caption = "Comprehensive Summary: Collores vs. Estancias") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)

Comprehensive Summary: Collores vs. Estancias
Metric	Collores	Estancias
Number of samples	12	12
Gamma diversity (total taxa)	41	45
Mean alpha diversity (taxa/sample)	7.75	8.08
Beta diversity (multiplicative)	5.29	5.57
Total spore count	381	283
Mean spore abundance/sample	31.75	23.58
Pooled Shannon diversity	3.112	2.827
Mean Shannon diversity/sample	1.447	1.652
Pooled Simpson diversity	0.936	0.897
Pooled evenness	0.838	0.743
Most abundant unique taxon	Arthrinium/Chaetomium	Aspergillus/Penicillium
Number of unique taxa	28	32

Conclusions

Based on the comparative biodiversity analysis of fungal spore communities:

1. Overall Diversity Patterns

Collores has a gamma diversity of 41 taxa with pooled Shannon index of 3.112
Estancias has a gamma diversity of 45 taxa with pooled Shannon index of 2.827

2. Statistical Differences

Richness comparison: Not significantly different (p = 0.6634)
Shannon diversity: Not significantly different (p = 0.4428)
Community composition (PERMANOVA): Significantly different (p = 0.001)

3. Species Composition

13 taxa are shared between locations (31.7% of Collores, 28.9% of Estancias)
28 taxa unique to Collores
32 taxa unique to Estancias

4. Beta Diversity

Within-location turnover is similar between locations
Collores mean β = 0.78, Estancias mean β = 0.766

5. Sampling Adequacy

Collores: 100% completeness
Estancias: 100% completeness

6. Ecological Interpretation

The two locations show distinct fungal spore communities, with limited overlap in species composition. The differences may reflect variations in environmental conditions, substrate availability, or spatial heterogeneity between locations.

Session Information

sessionInfo()

## R version 4.5.0 (2025-04-11)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sequoia 15.6.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/Puerto_Rico
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] dendextend_1.19.1 ggpubr_0.6.0      kableExtra_1.4.0  knitr_1.50       
##  [5] reshape2_1.4.4    lubridate_1.9.4   forcats_1.0.0     stringr_1.5.1    
##  [9] dplyr_1.1.4       purrr_1.0.4       readr_2.1.5       tidyr_1.3.1      
## [13] tibble_3.2.1      ggplot2_3.5.2     tidyverse_2.0.0   vegan_2.7-2      
## [17] permute_0.9-8    
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6      xfun_0.52         bslib_0.9.0       rstatix_0.7.2    
##  [5] lattice_0.22-6    tzdb_0.5.0        vctrs_0.6.5       tools_4.5.0      
##  [9] generics_0.1.3    parallel_4.5.0    cluster_2.1.8.1   pkgconfig_2.0.3  
## [13] Matrix_1.7-3      lifecycle_1.0.4   farver_2.1.2      compiler_4.5.0   
## [17] textshaping_1.0.0 munsell_0.5.1     carData_3.0-5     htmltools_0.5.8.1
## [21] sass_0.4.10       yaml_2.3.10       Formula_1.2-5     pillar_1.10.2    
## [25] car_3.1-3         jquerylib_0.1.4   MASS_7.3-65       cachem_1.1.0     
## [29] viridis_0.6.5     abind_1.4-8       nlme_3.1-168      tidyselect_1.2.1 
## [33] digest_0.6.37     stringi_1.8.7     labeling_0.4.3    splines_4.5.0    
## [37] cowplot_1.1.3     fastmap_1.2.0     grid_4.5.0        colorspace_2.1-1 
## [41] cli_3.6.4         magrittr_2.0.3    broom_1.0.8       withr_3.0.2      
## [45] scales_1.3.0      backports_1.5.0   timechange_0.3.0  rmarkdown_2.29   
## [49] gridExtra_2.3     ggsignif_0.6.4    ragg_1.4.0        hms_1.1.3        
## [53] evaluate_1.0.3    viridisLite_0.4.2 mgcv_1.9-1        rlang_1.1.6      
## [57] Rcpp_1.0.14       glue_1.8.0        xml2_1.3.8        svglite_2.2.1    
## [61] rstudioapi_0.17.1 jsonlite_2.0.0    R6_2.6.1          plyr_1.8.9       
## [65] systemfonts_1.3.1

Export Results

# Save all diversity indices
write.csv(all_diversity, "diversity_indices_all_samples.csv", row.names = FALSE)

# Save pooled diversity
write.csv(pooled_indices, "pooled_diversity_by_location.csv", row.names = FALSE)

# Save statistical test results
write.csv(test_results, "statistical_tests_results.csv", row.names = FALSE)

# Save species composition
write.csv(comparison_df, "top_taxa_comparison.csv", row.names = FALSE)

# Save shared/unique taxa
write.csv(taxa_summary, "shared_unique_taxa_summary.csv", row.names = FALSE)

# Save NMDS scores
write.csv(nmds_scores, "nmds_scores_all_samples.csv", row.names = FALSE)

cat("All results exported successfully!\n")

Comparative Fungal Spore Biodiversity Analysis: Collores vs. Estancias

Your Name

2025-11-16