This document presents a comprehensive comparative biodiversity analysis of fungal spore communities from two locations: Collores and Estancias. The analysis includes diversity indices, community composition, statistical comparisons, and multivariate approaches to understand differences and similarities between the two locations.
# Install packages if needed
# install.packages(c("vegan", "tidyverse", "reshape2", "knitr", "kableExtra", "ggpubr"))
library(vegan) # For biodiversity analysis
library(tidyverse) # For data manipulation and visualization
library(reshape2) # For data reshaping
library(knitr) # For nice tables
library(kableExtra) # For enhanced tables
library(ggpubr) # For statistical comparisons and publication-ready plots
# Read the data
collores_data <- read.csv("collores_franja.csv", check.names = FALSE, row.names = 1)
estancias_data <- read.csv("estancias_franja.csv", check.names = FALSE, row.names = 1)
# Replace NA values with 0
collores_data[is.na(collores_data)] <- 0
estancias_data[is.na(estancias_data)] <- 0
# Convert to numeric matrices
collores_matrix <- as.matrix(collores_data)
estancias_matrix <- as.matrix(estancias_data)
# Display data dimensions
cat("COLLORES Dataset:\n")
## COLLORES Dataset:
cat(" Number of samples:", nrow(collores_matrix), "\n")
## Number of samples: 12
cat(" Number of taxa:", ncol(collores_matrix), "\n")
## Number of taxa: 41
cat(" Total spore count:", sum(collores_matrix), "\n\n")
## Total spore count: 381
cat("ESTANCIAS Dataset:\n")
## ESTANCIAS Dataset:
cat(" Number of samples:", nrow(estancias_matrix), "\n")
## Number of samples: 12
cat(" Number of taxa:", ncol(estancias_matrix), "\n")
## Number of taxa: 45
cat(" Total spore count:", sum(estancias_matrix), "\n")
## Total spore count: 283
overview_df <- data.frame(
Location = c("Collores", "Estancias"),
Samples = c(nrow(collores_matrix), nrow(estancias_matrix)),
Taxa = c(ncol(collores_matrix), ncol(estancias_matrix)),
Total_Spores = c(sum(collores_matrix), sum(estancias_matrix)),
Mean_Spores_per_Sample = c(round(mean(rowSums(collores_matrix)), 2),
round(mean(rowSums(estancias_matrix)), 2))
)
kable(overview_df,
caption = "Overview of Datasets by Location",
col.names = c("Location", "# Samples", "# Taxa", "Total Spores", "Mean Spores/Sample")) %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = FALSE)
| Location | # Samples | # Taxa | Total Spores | Mean Spores/Sample |
|---|---|---|---|---|
| Collores | 12 | 41 | 381 | 31.75 |
| Estancias | 12 | 45 | 283 | 23.58 |
# Calculate basic statistics for each location
collores_richness <- rowSums(collores_matrix > 0)
collores_abundance <- rowSums(collores_matrix)
estancias_richness <- rowSums(estancias_matrix > 0)
estancias_abundance <- rowSums(estancias_matrix)
# Create summary table
stats_summary <- data.frame(
Metric = c("Mean Richness", "Median Richness", "SD Richness", "Min Richness", "Max Richness",
"Mean Abundance", "Median Abundance", "SD Abundance", "Min Abundance", "Max Abundance"),
Collores = c(
round(mean(collores_richness), 2),
median(collores_richness),
round(sd(collores_richness), 2),
min(collores_richness),
max(collores_richness),
round(mean(collores_abundance), 2),
median(collores_abundance),
round(sd(collores_abundance), 2),
min(collores_abundance),
max(collores_abundance)
),
Estancias = c(
round(mean(estancias_richness), 2),
median(estancias_richness),
round(sd(estancias_richness), 2),
min(estancias_richness),
max(estancias_richness),
round(mean(estancias_abundance), 2),
median(estancias_abundance),
round(sd(estancias_abundance), 2),
min(estancias_abundance),
max(estancias_abundance)
)
)
kable(stats_summary,
caption = "Summary Statistics by Location") %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = FALSE)
| Metric | Collores | Estancias |
|---|---|---|
| Mean Richness | 7.75 | 8.08 |
| Median Richness | 8.00 | 9.00 |
| SD Richness | 3.41 | 3.29 |
| Min Richness | 2.00 | 3.00 |
| Max Richness | 14.00 | 12.00 |
| Mean Abundance | 31.75 | 23.58 |
| Median Abundance | 29.50 | 23.50 |
| SD Abundance | 17.20 | 15.68 |
| Min Abundance | 3.00 | 4.00 |
| Max Abundance | 61.00 | 57.00 |
# Prepare data for boxplots
richness_df <- data.frame(
Location = c(rep("Collores", length(collores_richness)),
rep("Estancias", length(estancias_richness))),
Richness = c(collores_richness, estancias_richness)
)
abundance_df <- data.frame(
Location = c(rep("Collores", length(collores_abundance)),
rep("Estancias", length(estancias_abundance))),
Abundance = c(collores_abundance, estancias_abundance)
)
# Richness boxplot
p1 <- ggplot(richness_df, aes(x = Location, y = Richness, fill = Location)) +
geom_boxplot(alpha = 0.7, outlier.shape = 16, outlier.size = 2) +
geom_jitter(width = 0.2, alpha = 0.5, size = 2) +
stat_compare_means(method = "wilcox.test", label = "p.format", label.x = 1.5, size = 6) +
scale_fill_manual(values = c("Collores" = "#E69F00", "Estancias" = "#56B4E9")) +
theme_minimal() +
labs(title = "Species Richness by Location",
y = "Number of Taxa per Sample",
x = "") +
theme(
legend.position = "none",
plot.title = element_text(size = 16), # Title size
axis.title.y = element_text(size = 14), # Y-axis label size
axis.text.x = element_text(size = 16), # X-axis text (Collores, Estancias)
axis.text.y = element_text(size = 12))
# Abundance boxplot
p2 <- ggplot(abundance_df, aes(x = Location, y = Abundance, fill = Location)) +
geom_boxplot(alpha = 0.7, outlier.shape = 16, outlier.size = 2) +
geom_jitter(width = 0.2, alpha = 0.5, size = 2) +
stat_compare_means(method = "wilcox.test", label = "p.format", label.x = 1.5, size = 6) +
scale_fill_manual(values = c("Collores" = "#E69F00", "Estancias" = "#56B4E9")) +
theme_minimal() +
labs(title = "Spore Abundance by Location",
y = "Total Spores per Sample",
x = "", size = 6) +
theme(
legend.position = "none",
plot.title = element_text(size = 16), # Title size
axis.title.y = element_text(size = 14), # Y-axis label size
axis.text.x = element_text(size = 16), # X-axis text (Collores, Estancias)
axis.text.y = element_text(size = 12))
# Combine plots
ggarrange(p1, p2, ncol = 2)
# save graphs
ggsave("boxplots_richness_abundance.png", width = 12, height = 6)
# Calculate pooled diversity for each location
collores_pooled <- colSums(collores_matrix)
collores_pooled <- collores_pooled[collores_pooled > 0]
estancias_pooled <- colSums(estancias_matrix)
estancias_pooled <- estancias_pooled[estancias_pooled > 0]
# Calculate indices
pooled_indices <- data.frame(
Index = c("Gamma Diversity (Total Richness)",
"Total Abundance",
"Shannon Index (H')",
"Simpson Index (D)",
"Inverse Simpson (1/D)",
"Pielou's Evenness (J')"),
Collores = c(
length(collores_pooled),
sum(collores_pooled),
round(diversity(collores_pooled, index = "shannon"), 3),
round(diversity(collores_pooled, index = "simpson"), 3),
round(diversity(collores_pooled, index = "invsimpson"), 3),
round(diversity(collores_pooled, index = "shannon") / log(length(collores_pooled)), 3)
),
Estancias = c(
length(estancias_pooled),
sum(estancias_pooled),
round(diversity(estancias_pooled, index = "shannon"), 3),
round(diversity(estancias_pooled, index = "simpson"), 3),
round(diversity(estancias_pooled, index = "invsimpson"), 3),
round(diversity(estancias_pooled, index = "shannon") / log(length(estancias_pooled)), 3)
)
)
kable(pooled_indices,
caption = "Pooled Biodiversity Indices by Location") %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = FALSE)
| Index | Collores | Estancias |
|---|---|---|
| Gamma Diversity (Total Richness) | 41.000 | 45.000 |
| Total Abundance | 381.000 | 283.000 |
| Shannon Index (H’) | 3.112 | 2.827 |
| Simpson Index (D) | 0.936 | 0.897 |
| Inverse Simpson (1/D) | 15.627 | 9.730 |
| Pielou’s Evenness (J’) | 0.838 | 0.743 |
# Prepare data for plotting (exclude richness and abundance for scale)
pooled_plot_df <- data.frame(
Location = rep(c("Collores", "Estancias"), each = 3),
Index = rep(c("Shannon", "Simpson", "Evenness"), 2),
Value = c(
diversity(collores_pooled, index = "shannon"),
diversity(collores_pooled, index = "simpson"),
diversity(collores_pooled, index = "shannon") / log(length(collores_pooled)),
diversity(estancias_pooled, index = "shannon"),
diversity(estancias_pooled, index = "simpson"),
diversity(estancias_pooled, index = "shannon") / log(length(estancias_pooled))
)
)
ggplot(pooled_plot_df, aes(x = Index, y = Value, fill = Location)) +
geom_bar(stat = "identity", position = "dodge", width = 0.7) +
scale_fill_manual(values = c("Collores" = "#E69F00", "Estancias" = "#56B4E9")) +
theme_minimal() +
labs(title = "Pooled Diversity Indices Comparison",
x = "Diversity Index",
y = "Value",
fill = "Location") +
theme(legend.position = "top")
# Collores
collores_shannon <- diversity(collores_matrix, index = "shannon")
collores_simpson <- diversity(collores_matrix, index = "simpson")
collores_invsimpson <- diversity(collores_matrix, index = "invsimpson")
collores_evenness <- collores_shannon / log(collores_richness)
collores_evenness[is.infinite(collores_evenness) | is.nan(collores_evenness)] <- NA
collores_div_df <- data.frame(
Sample = rownames(collores_matrix),
Location = "Collores",
Richness = collores_richness,
Abundance = collores_abundance,
Shannon = round(collores_shannon, 3),
Simpson = round(collores_simpson, 3),
InvSimpson = round(collores_invsimpson, 3),
Evenness = round(collores_evenness, 3)
)
# Estancias
estancias_shannon <- diversity(estancias_matrix, index = "shannon")
estancias_simpson <- diversity(estancias_matrix, index = "simpson")
estancias_invsimpson <- diversity(estancias_matrix, index = "invsimpson")
estancias_evenness <- estancias_shannon / log(estancias_richness)
estancias_evenness[is.infinite(estancias_evenness) | is.nan(estancias_evenness)] <- NA
estancias_div_df <- data.frame(
Sample = rownames(estancias_matrix),
Location = "Estancias",
Richness = estancias_richness,
Abundance = estancias_abundance,
Shannon = round(estancias_shannon, 3),
Simpson = round(estancias_simpson, 3),
InvSimpson = round(estancias_invsimpson, 3),
Evenness = round(estancias_evenness, 3)
)
# Combine
all_diversity <- rbind(collores_div_df, estancias_div_df)
kable(all_diversity,
caption = "Diversity Indices by Sample and Location") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE) %>%
scroll_box(height = "400px")
| Sample | Location | Richness | Abundance | Shannon | Simpson | InvSimpson | Evenness | |
|---|---|---|---|---|---|---|---|---|
| 1 | 1 | Collores | 2 | 3 | 0.637 | 0.444 | 1.800 | 0.918 |
| 2 | 2 | Collores | 4 | 21 | 0.685 | 0.331 | 1.495 | 0.494 |
| 3 | 3 | Collores | 5 | 30 | 0.578 | 0.244 | 1.324 | 0.359 |
| 4 | 4 | Collores | 11 | 35 | 2.100 | 0.846 | 6.481 | 0.876 |
| 5 | 5 | Collores | 10 | 48 | 1.741 | 0.740 | 3.853 | 0.756 |
| 6 | 6 | Collores | 5 | 10 | 1.471 | 0.740 | 3.846 | 0.914 |
| 7 | 7 | Collores | 8 | 34 | 1.682 | 0.763 | 4.219 | 0.809 |
| 8 | 8 | Collores | 7 | 27 | 1.765 | 0.801 | 5.028 | 0.907 |
| 9 | 9 | Collores | 14 | 57 | 1.964 | 0.773 | 4.408 | 0.744 |
| 10 | 10 | Collores | 11 | 29 | 2.055 | 0.832 | 5.965 | 0.857 |
| 11 | 11 | Collores | 8 | 26 | 1.740 | 0.778 | 4.507 | 0.837 |
| 12 | 12 | Collores | 8 | 61 | 0.946 | 0.414 | 1.708 | 0.455 |
| 13 | 1 | Estancias | 4 | 4 | 1.386 | 0.750 | 4.000 | 1.000 |
| 21 | 2 | Estancias | 3 | 5 | 0.950 | 0.560 | 2.273 | 0.865 |
| 31 | 3 | Estancias | 3 | 6 | 1.011 | 0.611 | 2.571 | 0.921 |
| 41 | 4 | Estancias | 9 | 13 | 2.032 | 0.840 | 6.259 | 0.925 |
| 51 | 5 | Estancias | 6 | 23 | 1.394 | 0.669 | 3.023 | 0.778 |
| 61 | 6 | Estancias | 9 | 24 | 1.600 | 0.694 | 3.273 | 0.728 |
| 71 | 7 | Estancias | 11 | 39 | 1.994 | 0.828 | 5.828 | 0.832 |
| 81 | 8 | Estancias | 9 | 28 | 1.900 | 0.819 | 5.521 | 0.865 |
| 91 | 9 | Estancias | 9 | 19 | 1.941 | 0.820 | 5.554 | 0.883 |
| 101 | 10 | Estancias | 12 | 33 | 2.016 | 0.812 | 5.312 | 0.811 |
| 111 | 11 | Estancias | 10 | 32 | 1.845 | 0.764 | 4.231 | 0.801 |
| 121 | 12 | Estancias | 12 | 57 | 1.755 | 0.734 | 3.765 | 0.706 |
# Calculate summary statistics for diversity indices
diversity_summary <- all_diversity %>%
group_by(Location) %>%
summarise(
Mean_Shannon = round(mean(Shannon, na.rm = TRUE), 3),
SD_Shannon = round(sd(Shannon, na.rm = TRUE), 3),
Mean_Simpson = round(mean(Simpson, na.rm = TRUE), 3),
SD_Simpson = round(sd(Simpson, na.rm = TRUE), 3),
Mean_Evenness = round(mean(Evenness, na.rm = TRUE), 3),
SD_Evenness = round(sd(Evenness, na.rm = TRUE), 3)
)
kable(diversity_summary,
caption = "Mean Diversity Indices by Location (± SD)") %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = FALSE)
| Location | Mean_Shannon | SD_Shannon | Mean_Simpson | SD_Simpson | Mean_Evenness | SD_Evenness |
|---|---|---|---|---|---|---|
| Collores | 1.447 | 0.575 | 0.642 | 0.217 | 0.744 | 0.196 |
| Estancias | 1.652 | 0.385 | 0.742 | 0.092 | 0.843 | 0.085 |
# Shannon diversity
p1 <- ggplot(all_diversity, aes(x = Location, y = Shannon, fill = Location)) +
geom_boxplot(alpha = 0.7, outlier.shape = 16) +
geom_jitter(width = 0.2, alpha = 0.5, size = 2) +
stat_compare_means(method = "wilcox.test", label = "p.format") +
scale_fill_manual(values = c("Collores" = "#E69F00", "Estancias" = "#56B4E9")) +
theme_minimal() +
labs(title = "Shannon Diversity Index",
y = "Shannon H'") +
theme(legend.position = "none")
# Simpson diversity
p2 <- ggplot(all_diversity, aes(x = Location, y = Simpson, fill = Location)) +
geom_boxplot(alpha = 0.7, outlier.shape = 16) +
geom_jitter(width = 0.2, alpha = 0.5, size = 2) +
stat_compare_means(method = "wilcox.test", label = "p.format") +
scale_fill_manual(values = c("Collores" = "#E69F00", "Estancias" = "#56B4E9")) +
theme_minimal() +
labs(title = "Simpson Diversity Index",
y = "Simpson D") +
theme(legend.position = "none")
# Evenness
p3 <- ggplot(all_diversity, aes(x = Location, y = Evenness, fill = Location)) +
geom_boxplot(alpha = 0.7, outlier.shape = 16) +
geom_jitter(width = 0.2, alpha = 0.5, size = 2) +
stat_compare_means(method = "wilcox.test", label = "p.format") +
scale_fill_manual(values = c("Collores" = "#E69F00", "Estancias" = "#56B4E9")) +
theme_minimal() +
labs(title = "Pielou's Evenness",
y = "Evenness J'") +
theme(legend.position = "none")
# Combine plots
ggarrange(p1, p2, p3, ncol = 1, nrow = 3)
# Perform Wilcoxon tests for each diversity metric
richness_test <- wilcox.test(collores_richness, estancias_richness)
abundance_test <- wilcox.test(collores_abundance, estancias_abundance)
shannon_test <- wilcox.test(collores_shannon, estancias_shannon)
simpson_test <- wilcox.test(collores_simpson, estancias_simpson)
evenness_test <- wilcox.test(collores_evenness, estancias_evenness, na.rm = TRUE)
# Create results table
test_results <- data.frame(
Metric = c("Richness", "Abundance", "Shannon", "Simpson", "Evenness"),
W_statistic = c(richness_test$statistic, abundance_test$statistic,
shannon_test$statistic, simpson_test$statistic,
evenness_test$statistic),
P_value = c(richness_test$p.value, abundance_test$p.value,
shannon_test$p.value, simpson_test$p.value,
evenness_test$p.value),
Significance = c(
ifelse(richness_test$p.value < 0.001, "***",
ifelse(richness_test$p.value < 0.01, "**",
ifelse(richness_test$p.value < 0.05, "*", "ns"))),
ifelse(abundance_test$p.value < 0.001, "***",
ifelse(abundance_test$p.value < 0.01, "**",
ifelse(abundance_test$p.value < 0.05, "*", "ns"))),
ifelse(shannon_test$p.value < 0.001, "***",
ifelse(shannon_test$p.value < 0.01, "**",
ifelse(shannon_test$p.value < 0.05, "*", "ns"))),
ifelse(simpson_test$p.value < 0.001, "***",
ifelse(simpson_test$p.value < 0.01, "**",
ifelse(simpson_test$p.value < 0.05, "*", "ns"))),
ifelse(evenness_test$p.value < 0.001, "***",
ifelse(evenness_test$p.value < 0.01, "**",
ifelse(evenness_test$p.value < 0.05, "*", "ns")))
)
)
kable(test_results,
digits = 4,
caption = "Wilcoxon Rank-Sum Tests: Collores vs. Estancias (*** p<0.001, ** p<0.01, * p<0.05, ns = not significant)") %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = FALSE)
| Metric | W_statistic | P_value | Significance |
|---|---|---|---|
| Richness | 64.0 | 0.6634 | ns |
| Abundance | 92.5 | 0.2481 | ns |
| Shannon | 58.0 | 0.4428 | ns |
| Simpson | 60.0 | 0.5137 | ns |
| Evenness | 55.0 | 0.3474 | ns |
# Combine matrices for PERMANOVA
# Need to match taxa between datasets
all_taxa <- unique(c(colnames(collores_matrix), colnames(estancias_matrix)))
# Create standardized matrices with all taxa
collores_full <- matrix(0, nrow = nrow(collores_matrix), ncol = length(all_taxa))
colnames(collores_full) <- all_taxa
rownames(collores_full) <- rownames(collores_matrix)
collores_full[, colnames(collores_matrix)] <- collores_matrix
estancias_full <- matrix(0, nrow = nrow(estancias_matrix), ncol = length(all_taxa))
colnames(estancias_full) <- all_taxa
rownames(estancias_full) <- rownames(estancias_matrix)
estancias_full[, colnames(estancias_matrix)] <- estancias_matrix
# Combine
combined_matrix <- rbind(collores_full, estancias_full)
location_factor <- factor(c(rep("Collores", nrow(collores_full)),
rep("Estancias", nrow(estancias_full))))
# Perform PERMANOVA
set.seed(123)
permanova_result <- adonis2(combined_matrix ~ location_factor,
method = "bray",
permutations = 999)
kable(permanova_result,
digits = 4,
caption = "PERMANOVA: Testing for Differences in Community Composition Between Locations") %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = FALSE)
| Df | SumOfSqs | R2 | F | Pr(>F) | |
|---|---|---|---|---|---|
| Model | 1 | 0.9428 | 0.0996 | 2.4346 | 0.001 |
| Residual | 22 | 8.5195 | 0.9004 | NA | NA |
| Total | 23 | 9.4623 | 1.0000 | NA | NA |
# Perform ANOSIM
set.seed(123)
anosim_result <- anosim(combined_matrix, location_factor,
distance = "bray",
permutations = 999)
cat("ANOSIM Results:\n")
## ANOSIM Results:
cat("R statistic:", round(anosim_result$statistic, 4), "\n")
## R statistic: 0.1967
cat("p-value:", round(anosim_result$signif, 4), "\n\n")
## p-value: 0.001
cat("Interpretation:\n")
## Interpretation:
cat(" R close to 1: Locations are well separated\n")
## R close to 1: Locations are well separated
cat(" R close to 0: Locations are not separated\n")
## R close to 0: Locations are not separated
cat(" R < 0: Within-location variation > between-location variation\n")
## R < 0: Within-location variation > between-location variation
# Top 15 taxa for each location
collores_top <- head(sort(collores_pooled, decreasing = TRUE), 15)
estancias_top <- head(sort(estancias_pooled, decreasing = TRUE), 15)
# Create comparison table
max_length <- max(length(collores_top), length(estancias_top))
comparison_df <- data.frame(
Rank = 1:max_length,
Collores_Taxon = c(names(collores_top), rep("", max_length - length(collores_top))),
Collores_Abundance = c(as.numeric(collores_top), rep(NA, max_length - length(collores_top))),
Estancias_Taxon = c(names(estancias_top), rep("", max_length - length(estancias_top))),
Estancias_Abundance = c(as.numeric(estancias_top), rep(NA, max_length - length(estancias_top)))
)
kable(comparison_df,
caption = "Top 15 Most Abundant Taxa by Location",
col.names = c("Rank", "Taxon", "Abundance", "Taxon", "Abundance")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE) %>%
add_header_above(c(" " = 1, "Collores" = 2, "Estancias" = 2))
| Rank | Taxon | Abundance | Taxon | Abundance |
|---|---|---|---|---|
| 1 | Diatrypella | 49 | Xylariaceae ascopores | 66 |
| 2 | m39 | 46 | m15 | 39 |
| 3 | Leptosphaeria | 37 | Leptosphaeria | 28 |
| 4 | m26 | 26 | Aspergillus/Penicillium | 23 |
| 5 | m25 | 23 | Ganoderma | 17 |
| 6 | Trematosphaeria pertusa/Venturia | 23 | Cercophora/Podospora | 16 |
| 7 | Arthrinium/Chaetomium | 22 | Bipolaris/Drechslera/Helminthosporium/Exserohilum | 12 |
| 8 | Bipolaris/Drechslera/Helminthosporium/Exserohilum | 15 | Scoleco-ascospore | 12 |
| 9 | Delitschia/Amphisphaeria | 15 | m18 | 7 |
| 10 | m13 | 10 | m10 | 5 |
| 11 | m31 | 10 | Drechslera | 4 |
| 12 | Xylariaceae ascopores | 10 | m21 | 4 |
| 13 | m15 | 8 | m7 | 4 |
| 14 | Scoleco-ascopores | 8 | Trematosphaeria pertusa/Venturia | 4 |
| 15 | m38 | 7 | Fusarium | 3 |
# Prepare data for Collores
collores_bar_df <- data.frame(
Taxon = factor(names(collores_top), levels = rev(names(collores_top))),
Abundance = as.numeric(collores_top),
Location = "Collores"
)
# Prepare data for Estancias
estancias_bar_df <- data.frame(
Taxon = factor(names(estancias_top), levels = rev(names(estancias_top))),
Abundance = as.numeric(estancias_top),
Location = "Estancias"
)
# Plot for Collores
p1 <- ggplot(collores_bar_df, aes(x = Taxon, y = Abundance)) +
geom_bar(stat = "identity", fill = "#E69F00", alpha = 0.8) +
coord_flip() +
theme_minimal() +
labs(title = "Collores: Top 15 Most Abundant Taxa",
x = "",
y = "Total Abundance") +
theme(axis.text.y = element_text(size = 9))
# Plot for Estancias
p2 <- ggplot(estancias_bar_df, aes(x = Taxon, y = Abundance)) +
geom_bar(stat = "identity", fill = "#56B4E9", alpha = 0.8) +
coord_flip() +
theme_minimal() +
labs(title = "Estancias: Top 15 Most Abundant Taxa",
x = "",
y = "Total Abundance") +
theme(axis.text.y = element_text(size = 9))
# Combine plots
ggarrange(p1, p2, ncol = 1, nrow = 2)
# save graphs
ggsave("horizontal_barplots_top_taxa.png", width = 10, height = 10)
# Prepare data
collores_rank <- data.frame(
Rank = 1:length(collores_pooled),
Abundance = as.numeric(sort(collores_pooled, decreasing = TRUE)),
Location = "Collores"
)
estancias_rank <- data.frame(
Rank = 1:length(estancias_pooled),
Abundance = as.numeric(sort(estancias_pooled, decreasing = TRUE)),
Location = "Estancias"
)
rank_combined <- rbind(collores_rank, estancias_rank)
# Plot
ggplot(rank_combined, aes(x = Rank, y = Abundance, color = Location, group = Location)) +
geom_line(size = 1.2) +
geom_point(size = 2, alpha = 0.6) +
scale_y_log10() +
scale_color_manual(values = c("Collores" = "#E69F00", "Estancias" = "#56B4E9")) +
theme_minimal() +
labs(title = "Rank-Abundance Curves by Location",
x = "Species Rank",
y = "Abundance (log scale)",
color = "Location") +
annotation_logticks(sides = "l") +
theme(legend.position = "top")
cat("Shared taxa (", length(shared_taxa), "):\n")
## Shared taxa ( 13 ):
cat(paste(shared_taxa, collapse = ", "), "\n\n")
## Bipolaris/Drechslera/Helminthosporium/Exserohilum, Curvularia, Ganoderma, Leptosphaeria, m10, m13, m14, m15, m23 (cf. Gibberella zeae), m24, Torula, Trematosphaeria pertusa/Venturia, Xylariaceae ascopores
cat("Unique to Collores (", length(collores_unique), "):\n")
## Unique to Collores ( 28 ):
cat(paste(collores_unique, collapse = ", "), "\n\n")
## Arthrinium/Chaetomium, Chaetosphaerella, Delitschia/Amphisphaeria, Diatrypella, Helminthosporium, Kalmusia, Lophiostoma caulium, Lophiostoma/Massarina, m25, m26, m27, m28, m28 (cf. Bertiella), m29, m30, m31, m32, m33, m34, m35, m36, m37, m38, m39, Microsporum gypseum, Pleospora, Rinodina, Scoleco-ascopores
cat("Unique to Estancias (", length(estancias_unique), "):\n")
## Unique to Estancias ( 32 ):
cat(paste(estancias_unique, collapse = ", "), "\n")
## Aspergillus/Penicillium, Cacumisporium capitulatum, Cercophora/Podospora, Cladosporium, Clasterosporium, Drechslera, Fusarium, m1, m11, m12, m16, m17, m18, m19, m2, m20, m21, m22, m3, m4 (cf. Sporidesmium adscendens), m5, m6, m7, m9, Massarina, Nigrospora, Phaeosphaeria fuckelii, Phaeosphaeria oryzae/Leptosphaeria, Phaeosphaeria viridella/Leptosphaeria, Savoryella curvispora, Scoleco-ascospore, Sporidesmium/Distoseptispora
# Calculate species accumulation curves
set.seed(123)
collores_accum <- specaccum(collores_matrix, method = "random", permutations = 100)
estancias_accum <- specaccum(estancias_matrix, method = "random", permutations = 100)
# Prepare data for plotting
collores_accum_df <- data.frame(
Sites = collores_accum$sites,
Richness = collores_accum$richness,
SD = collores_accum$sd,
Location = "Collores"
)
estancias_accum_df <- data.frame(
Sites = estancias_accum$sites,
Richness = estancias_accum$richness,
SD = estancias_accum$sd,
Location = "Estancias"
)
accum_combined <- rbind(collores_accum_df, estancias_accum_df)
# Plot
ggplot(accum_combined, aes(x = Sites, y = Richness, color = Location, fill = Location)) +
geom_ribbon(aes(ymin = Richness - SD, ymax = Richness + SD), alpha = 0.2, color = NA) +
geom_line(size = 1.2) +
geom_point(size = 2) +
scale_color_manual(values = c("Collores" = "#E69F00", "Estancias" = "#56B4E9")) +
scale_fill_manual(values = c("Collores" = "#E69F00", "Estancias" = "#56B4E9")) +
theme_minimal() +
labs(title = "Species Accumulation Curves by Location",
x = "Number of Samples",
y = "Cumulative Number of Taxa",
color = "Location",
fill = "Location") +
theme(legend.position = "top")
# save graph
ggsave("species_accumulation_curves.png", width = 10, height = 6)
# Calculate sampling completeness
collores_completeness <- (collores_accum$richness[length(collores_accum$richness)] /
length(collores_pooled)) * 100
estancias_completeness <- (estancias_accum$richness[length(estancias_accum$richness)] /
length(estancias_pooled)) * 100
accum_summary <- data.frame(
Location = c("Collores", "Estancias"),
Observed_Richness = c(collores_accum$richness[length(collores_accum$richness)],
estancias_accum$richness[length(estancias_accum$richness)]),
Total_Richness = c(length(collores_pooled), length(estancias_pooled)),
Completeness_Percent = round(c(collores_completeness, estancias_completeness), 1),
Slope_Last_Point = c(
diff(tail(collores_accum$richness, 2)),
diff(tail(estancias_accum$richness, 2))
)
)
kable(accum_summary,
caption = "Species Accumulation Curve Analysis",
col.names = c("Location", "Observed Richness", "Total Richness",
"Completeness (%)", "Slope (last interval)")) %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = FALSE)
| Location | Observed Richness | Total Richness | Completeness (%) | Slope (last interval) |
|---|---|---|---|---|
| Collores | 41 | 41 | 100 | 1.61 |
| Estancias | 45 | 45 | 100 | 2.58 |
cat("\nInterpretation:\n")
##
## Interpretation:
cat("- Completeness near 100% suggests adequate sampling\n")
## - Completeness near 100% suggests adequate sampling
cat("- Slope near 0 suggests curve is approaching asymptote\n")
## - Slope near 0 suggests curve is approaching asymptote
cat("- Positive slope suggests more sampling would reveal more taxa\n")
## - Positive slope suggests more sampling would reveal more taxa
# Perform NMDS on combined data
set.seed(123)
nmds <- metaMDS(combined_matrix, distance = "bray", k = 2, trymax = 100)
## Wisconsin double standardization
## Run 0 stress 8.9739e-05
## Run 1 stress 9.247582e-05
## ... Procrustes: rmse 0.000113254 max resid 0.0002050938
## ... Similar to previous best
## Run 2 stress 8.920808e-05
## ... New best solution
## ... Procrustes: rmse 0.0001110418 max resid 0.0003101921
## ... Similar to previous best
## Run 3 stress 8.792056e-05
## ... New best solution
## ... Procrustes: rmse 9.446217e-05 max resid 0.0001788452
## ... Similar to previous best
## Run 4 stress 6.73586e-05
## ... New best solution
## ... Procrustes: rmse 5.550749e-05 max resid 0.000103968
## ... Similar to previous best
## Run 5 stress 9.074792e-05
## ... Procrustes: rmse 0.0001090724 max resid 0.0002344792
## ... Similar to previous best
## Run 6 stress 8.943947e-05
## ... Procrustes: rmse 8.640841e-05 max resid 0.0001484969
## ... Similar to previous best
## Run 7 stress 8.7144e-05
## ... Procrustes: rmse 8.137084e-05 max resid 0.0002306804
## ... Similar to previous best
## Run 8 stress 7.075302e-05
## ... Procrustes: rmse 7.797697e-05 max resid 0.0001990423
## ... Similar to previous best
## Run 9 stress 8.739171e-05
## ... Procrustes: rmse 0.0001183726 max resid 0.0002027679
## ... Similar to previous best
## Run 10 stress 8.07646e-05
## ... Procrustes: rmse 6.293758e-05 max resid 0.000143062
## ... Similar to previous best
## Run 11 stress 7.392939e-05
## ... Procrustes: rmse 0.0001034971 max resid 0.0002137053
## ... Similar to previous best
## Run 12 stress 9.237838e-05
## ... Procrustes: rmse 9.960249e-05 max resid 0.0002060827
## ... Similar to previous best
## Run 13 stress 8.469947e-05
## ... Procrustes: rmse 6.249446e-05 max resid 0.0001534326
## ... Similar to previous best
## Run 14 stress 9.002e-05
## ... Procrustes: rmse 9.759811e-05 max resid 0.0001915827
## ... Similar to previous best
## Run 15 stress 9.900744e-05
## ... Procrustes: rmse 0.0001244791 max resid 0.0002304147
## ... Similar to previous best
## Run 16 stress 9.530368e-05
## ... Procrustes: rmse 0.0001209258 max resid 0.0002173887
## ... Similar to previous best
## Run 17 stress 7.04336e-05
## ... Procrustes: rmse 5.020936e-05 max resid 0.0001236886
## ... Similar to previous best
## Run 18 stress 8.585764e-05
## ... Procrustes: rmse 7.513399e-05 max resid 0.0001773248
## ... Similar to previous best
## Run 19 stress 9.227046e-05
## ... Procrustes: rmse 7.635758e-05 max resid 0.0001481881
## ... Similar to previous best
## Run 20 stress 9.852011e-05
## ... Procrustes: rmse 6.876544e-05 max resid 0.0001355137
## ... Similar to previous best
## *** Best solution repeated 17 times
# Extract scores
nmds_scores <- as.data.frame(scores(nmds, display = "sites"))
nmds_scores$Sample <- rownames(nmds_scores)
nmds_scores$Location <- location_factor
# Plot NMDS
ggplot(nmds_scores, aes(x = NMDS1, y = NMDS2, color = Location, shape = Location)) +
geom_point(size = 4, alpha = 0.7) +
stat_ellipse(aes(group = Location), level = 0.95, linetype = 2) +
scale_color_manual(values = c("Collores" = "#E69F00", "Estancias" = "#56B4E9")) +
theme_minimal() +
labs(title = paste("NMDS Ordination (Stress =", round(nmds$stress, 3), ")"),
subtitle = "Based on Bray-Curtis dissimilarity; ellipses show 95% confidence",
x = "NMDS1",
y = "NMDS2") +
theme(legend.position = "top")
cat("\nNMDS Stress value:", round(nmds$stress, 3), "\n")
##
## NMDS Stress value: 0
cat("Stress interpretation:\n")
## Stress interpretation:
cat(" < 0.05: Excellent\n")
## < 0.05: Excellent
cat(" < 0.10: Good\n")
## < 0.10: Good
cat(" < 0.20: Acceptable\n")
## < 0.20: Acceptable
cat(" > 0.20: Poor (use with caution)\n")
## > 0.20: Poor (use with caution)
# Calculate Bray-Curtis dissimilarity
dist_matrix <- vegdist(combined_matrix, method = "bray")
# Hierarchical clustering
cluster <- hclust(dist_matrix, method = "average")
# Plot dendrogram with location colors
plot(cluster,
main = "Hierarchical Clustering of All Samples\n(Bray-Curtis Dissimilarity, UPGMA)",
xlab = "Sample",
ylab = "Bray-Curtis Dissimilarity",
hang = -1,
cex = 0.7)
# Color labels by location
location_colors <- ifelse(location_factor == "Collores", "#E69F00", "#56B4E9")
library(dendextend)
dend <- as.dendrogram(cluster)
labels_colors(dend) <- location_colors[order.dendrogram(dend)]
plot(dend, main = "Hierarchical Clustering (Colored by Location)")
legend("topright", legend = c("Collores", "Estancias"),
col = c("#E69F00", "#56B4E9"), pch = 15, cex = 0.8)
# Calculate within-location beta diversity
collores_beta <- betadiver(collores_matrix, method = "w")
estancias_beta <- betadiver(estancias_matrix, method = "w")
# Summary
beta_summary <- data.frame(
Location = c("Collores", "Estancias"),
Mean_Beta = c(round(mean(collores_beta), 3), round(mean(estancias_beta), 3)),
SD_Beta = c(round(sd(collores_beta), 3), round(sd(estancias_beta), 3)),
Min_Beta = c(round(min(collores_beta), 3), round(min(estancias_beta), 3)),
Max_Beta = c(round(max(collores_beta), 3), round(max(estancias_beta), 3))
)
kable(beta_summary,
caption = "Within-Location Beta Diversity (Whittaker's β)",
col.names = c("Location", "Mean", "SD", "Min", "Max")) %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = FALSE)
| Location | Mean | SD | Min | Max |
|---|---|---|---|---|
| Collores | 0.780 | 0.186 | 0.333 | 1 |
| Estancias | 0.766 | 0.199 | 0.429 | 1 |
# Test for difference
beta_test <- wilcox.test(collores_beta, estancias_beta)
cat("\nWilcoxon test for beta diversity difference:\n")
##
## Wilcoxon test for beta diversity difference:
cat("W =", beta_test$statistic, ", p-value =", round(beta_test$p.value, 4), "\n")
## W = 2282.5 , p-value = 0.6314
summary_results <- data.frame(
Metric = c(
"Number of samples",
"Gamma diversity (total taxa)",
"Mean alpha diversity (taxa/sample)",
"Beta diversity (multiplicative)",
"Total spore count",
"Mean spore abundance/sample",
"Pooled Shannon diversity",
"Mean Shannon diversity/sample",
"Pooled Simpson diversity",
"Pooled evenness",
"Most abundant unique taxon",
"Number of unique taxa"
),
Collores = c(
nrow(collores_matrix),
length(collores_pooled),
round(mean(collores_richness), 2),
round(length(collores_pooled) / mean(collores_richness), 2),
sum(collores_matrix),
round(mean(collores_abundance), 2),
round(diversity(collores_pooled, index = "shannon"), 3),
round(mean(collores_shannon), 3),
round(diversity(collores_pooled, index = "simpson"), 3),
round(diversity(collores_pooled, index = "shannon") / log(length(collores_pooled)), 3),
names(collores_pooled)[1],
length(collores_unique)
),
Estancias = c(
nrow(estancias_matrix),
length(estancias_pooled),
round(mean(estancias_richness), 2),
round(length(estancias_pooled) / mean(estancias_richness), 2),
sum(estancias_matrix),
round(mean(estancias_abundance), 2),
round(diversity(estancias_pooled, index = "shannon"), 3),
round(mean(estancias_shannon), 3),
round(diversity(estancias_pooled, index = "simpson"), 3),
round(diversity(estancias_pooled, index = "shannon") / log(length(estancias_pooled)), 3),
names(estancias_pooled)[1],
length(estancias_unique)
)
)
kable(summary_results,
caption = "Comprehensive Summary: Collores vs. Estancias") %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = FALSE)
| Metric | Collores | Estancias |
|---|---|---|
| Number of samples | 12 | 12 |
| Gamma diversity (total taxa) | 41 | 45 |
| Mean alpha diversity (taxa/sample) | 7.75 | 8.08 |
| Beta diversity (multiplicative) | 5.29 | 5.57 |
| Total spore count | 381 | 283 |
| Mean spore abundance/sample | 31.75 | 23.58 |
| Pooled Shannon diversity | 3.112 | 2.827 |
| Mean Shannon diversity/sample | 1.447 | 1.652 |
| Pooled Simpson diversity | 0.936 | 0.897 |
| Pooled evenness | 0.838 | 0.743 |
| Most abundant unique taxon | Arthrinium/Chaetomium | Aspergillus/Penicillium |
| Number of unique taxa | 28 | 32 |
Based on the comparative biodiversity analysis of fungal spore communities:
The two locations show distinct fungal spore communities, with limited overlap in species composition. The differences may reflect variations in environmental conditions, substrate availability, or spatial heterogeneity between locations.
sessionInfo()
## R version 4.5.0 (2025-04-11)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sequoia 15.6.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/Puerto_Rico
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] dendextend_1.19.1 ggpubr_0.6.0 kableExtra_1.4.0 knitr_1.50
## [5] reshape2_1.4.4 lubridate_1.9.4 forcats_1.0.0 stringr_1.5.1
## [9] dplyr_1.1.4 purrr_1.0.4 readr_2.1.5 tidyr_1.3.1
## [13] tibble_3.2.1 ggplot2_3.5.2 tidyverse_2.0.0 vegan_2.7-2
## [17] permute_0.9-8
##
## loaded via a namespace (and not attached):
## [1] gtable_0.3.6 xfun_0.52 bslib_0.9.0 rstatix_0.7.2
## [5] lattice_0.22-6 tzdb_0.5.0 vctrs_0.6.5 tools_4.5.0
## [9] generics_0.1.3 parallel_4.5.0 cluster_2.1.8.1 pkgconfig_2.0.3
## [13] Matrix_1.7-3 lifecycle_1.0.4 farver_2.1.2 compiler_4.5.0
## [17] textshaping_1.0.0 munsell_0.5.1 carData_3.0-5 htmltools_0.5.8.1
## [21] sass_0.4.10 yaml_2.3.10 Formula_1.2-5 pillar_1.10.2
## [25] car_3.1-3 jquerylib_0.1.4 MASS_7.3-65 cachem_1.1.0
## [29] viridis_0.6.5 abind_1.4-8 nlme_3.1-168 tidyselect_1.2.1
## [33] digest_0.6.37 stringi_1.8.7 labeling_0.4.3 splines_4.5.0
## [37] cowplot_1.1.3 fastmap_1.2.0 grid_4.5.0 colorspace_2.1-1
## [41] cli_3.6.4 magrittr_2.0.3 broom_1.0.8 withr_3.0.2
## [45] scales_1.3.0 backports_1.5.0 timechange_0.3.0 rmarkdown_2.29
## [49] gridExtra_2.3 ggsignif_0.6.4 ragg_1.4.0 hms_1.1.3
## [53] evaluate_1.0.3 viridisLite_0.4.2 mgcv_1.9-1 rlang_1.1.6
## [57] Rcpp_1.0.14 glue_1.8.0 xml2_1.3.8 svglite_2.2.1
## [61] rstudioapi_0.17.1 jsonlite_2.0.0 R6_2.6.1 plyr_1.8.9
## [65] systemfonts_1.3.1
# Save all diversity indices
write.csv(all_diversity, "diversity_indices_all_samples.csv", row.names = FALSE)
# Save pooled diversity
write.csv(pooled_indices, "pooled_diversity_by_location.csv", row.names = FALSE)
# Save statistical test results
write.csv(test_results, "statistical_tests_results.csv", row.names = FALSE)
# Save species composition
write.csv(comparison_df, "top_taxa_comparison.csv", row.names = FALSE)
# Save shared/unique taxa
write.csv(taxa_summary, "shared_unique_taxa_summary.csv", row.names = FALSE)
# Save NMDS scores
write.csv(nmds_scores, "nmds_scores_all_samples.csv", row.names = FALSE)
cat("All results exported successfully!\n")