library(tidyverse)
library(vegan)
library(phyloseq)
library(ggplot2)
library(glmmTMB)
library(car)
library(permute)
setwd("~/Desktop/hi22/analyses/bacteria")
site_colors <- c(
monoculture = "#F7C1BB",
diversified = "#885A5A",
agroforestry = "#84B082",
forest = "#353A47",
Kipuka = "#DC136C",
Monoculture = "#F7C1BB",
Diversified = "#885A5A",
Agroforestry = "#84B082",
Forest = "#353A47"
)
sitetype_levels <- c("monoculture", "diversified", "agroforestry", "forest", "Kipuka")
Note: This document depends on
ps_bactandmeta_ccfrom the bacterial analysis.
Runhi22_bacteria_analysis.Rmdfirst, or load the necessary objects below before knitting.
ps_bact <- readRDS("Intermediate_data/phyloseq_b_clean.RDS")
meta_cc <- readRDS("Intermediate_data/meta_cc.RDS")
# meta_cc is built during the bacteria analysis; if knitting independently,
# source the bacteria Rmd or rebuild it here as needed.
ko_functions <- read.table(
"picrust2_output/KO_metagenome_out/pred_metagenome_unstrat.tsv",
header = TRUE, row.names = 1, sep = "\t")
pathways <- read.table(
"picrust2_output/pathways_out/path_abun_unstrat.tsv",
header = TRUE, row.names = 1, sep = "\t")
cat("KO functions:", dim(ko_functions), "\n")
## KO functions: 8969 67
cat("Pathways:", dim(pathways), "\n")
## Pathways: 556 67
# Transpose so samples are rows
ko_functions_t <- t(ko_functions)
pathways_t <- t(pathways)
PICRUSt2 converts - to . in sample names.
This section maps them back to HI_ IDs so they align with
the rest of the project.
sample_metadata_full <- as(sample_data(ps_bact), "data.frame")
sample_name_mapping <- setNames(sample_metadata_full$id, rownames(sample_metadata_full))
picrust_sample_names <- rownames(ko_functions_t)
# Restore dashes if PICRUSt2 converted them to dots
if (any(grepl("\\.", picrust_sample_names)) && any(grepl("-", names(sample_name_mapping)))) {
picrust_names_fixed <- gsub("\\.", "-", picrust_sample_names)
} else {
picrust_names_fixed <- picrust_sample_names
}
hi_ids_mapped <- sample_name_mapping[picrust_names_fixed]
successful_mappings <- !is.na(hi_ids_mapped)
cat("Successful mappings:", sum(successful_mappings), "out of", length(hi_ids_mapped), "\n")
## Successful mappings: 67 out of 67
# Diagnostic: show a few mappings to confirm they look right
data.frame(
picrust_original = picrust_sample_names[1:5],
picrust_fixed = picrust_names_fixed[1:5],
hi_id_mapped = hi_ids_mapped[1:5]
)
## picrust_original picrust_fixed hi_id_mapped
## AA-A-01 AA.A.01 AA-A-01 HI_58
## AA-A-02 AA.A.02 AA-A-02 HI_36
## AA-A-03 AA.A.03 AA-A-03 HI_10
## AA-A-04 AA.A.04 AA-A-04 HI_32
## AA-A-05 AA.A.05 AA-A-05 HI_70
if (sum(successful_mappings) == 0) {
stop("No successful sample name mappings. Check sample naming conventions.")
}
ko_mapped <- ko_functions_t[successful_mappings, ]
pathways_mapped <- pathways_t[successful_mappings, ]
hi_ids_clean <- hi_ids_mapped[successful_mappings]
rownames(ko_mapped) <- hi_ids_clean
rownames(pathways_mapped) <- hi_ids_clean
common_samples_final <- intersect(rownames(ko_mapped), meta_cc$id)
cat("Final common samples:", length(common_samples_final), "\n")
## Final common samples: 66
ko_matched <- ko_mapped[common_samples_final, ]
pathways_matched <- pathways_mapped[common_samples_final, ]
meta_matched <- meta_cc[meta_cc$id %in% common_samples_final, ]
func_diversity <- data.frame(
sample_id = rownames(ko_matched),
ko_richness = specnumber(ko_matched),
ko_shannon = diversity(ko_matched),
pathway_richness = specnumber(pathways_matched),
pathway_shannon = diversity(pathways_matched),
stringsAsFactors = FALSE
)
# Align rows and merge with metadata
meta_matched_ord <- meta_matched[order(meta_matched$id), ]
ko_matched_ord <- ko_matched[order(rownames(ko_matched)), ]
func_div_ord <- func_diversity[order(func_diversity$sample_id), ]
func_data <- cbind(func_div_ord, meta_matched_ord)
cat("Samples:", nrow(func_data), "\n")
## Samples: 66
cat("KO functions:", ncol(ko_matched), "\n")
## KO functions: 8969
cat("Pathways:", ncol(pathways_matched), "\n")
## Pathways: 556
head(func_diversity)
## sample_id ko_richness ko_shannon pathway_richness pathway_shannon
## HI_58 HI_58 7643 7.971415 497 5.689721
## HI_36 HI_36 7482 7.871598 487 5.618232
## HI_10 HI_10 7165 7.909284 477 5.640280
## HI_32 HI_32 7438 7.848995 491 5.612405
## HI_70 HI_70 7857 7.942741 511 5.678624
## HI_46 HI_46 7488 7.933188 497 5.676335
func_ko_mod <- glmmTMB(
ko_richness ~ sitetype +
scale(ph) + scale(c) + scale(n) + (1 | site_initials),
data = func_data,
family = nbinom2(link = "log"))
summary(func_ko_mod)
## Family: nbinom2 ( log )
## Formula:
## ko_richness ~ sitetype + scale(ph) + scale(c) + scale(n) + (1 |
## site_initials)
## Data: func_data
##
## AIC BIC logLik -2*log(L) df.resid
## 913.3 935.2 -446.7 893.3 56
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## site_initials (Intercept) 7.74e-05 0.008798
## Number of obs: 66, groups: site_initials, 22
##
## Dispersion parameter for nbinom2 family (): 1.71e+03
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 8.929403 0.010420 857.0 <2e-16 ***
## sitetypediversified -0.007109 0.012933 -0.5 0.5825
## sitetypeagroforestry -0.003119 0.013100 -0.2 0.8118
## sitetypeforest -0.027486 0.014034 -2.0 0.0502 .
## sitetypeKipuka -0.004156 0.019873 -0.2 0.8344
## scale(ph) -0.001569 0.004308 -0.4 0.7157
## scale(c) -0.017582 0.008466 -2.1 0.0378 *
## scale(n) 0.010400 0.009163 1.1 0.2564
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Anova(func_ko_mod)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: ko_richness
## Chisq Df Pr(>Chisq)
## sitetype 5.1254 4 0.27467
## scale(ph) 0.1327 1 0.71570
## scale(c) 4.3131 1 0.03782 *
## scale(n) 1.2881 1 0.25639
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ko_dist <- vegdist(ko_matched, method = "bray")
(func_permanova <- adonis2(
ko_dist ~ sitetype + ph + c + n,
data = func_data,
permutations = 999))
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = ko_dist ~ sitetype + ph + c + n, data = func_data, permutations = 999)
## Df SumOfSqs R2 F Pr(>F)
## Model 7 0.09776 0.10167 0.9378 0.491
## Residual 58 0.86370 0.89833
## Total 65 0.96145 1.00000
ko_pcoa <- cmdscale(ko_dist)
ko_pcoa_df <- data.frame(
sample_id = rownames(ko_pcoa),
PCoA1 = ko_pcoa[, 1],
PCoA2 = ko_pcoa[, 2]
) %>% merge(func_data, by = "sample_id")
ggplot(ko_pcoa_df, aes(x = PCoA1, y = PCoA2)) +
stat_ellipse(aes(color = sitetype, fill = sitetype),
alpha = 0.2, level = 0.95, geom = "polygon") +
geom_point(aes(color = sitetype), size = 2) +
scale_color_manual(values = site_colors) +
scale_fill_manual(values = site_colors) +
labs(x = "PCoA 1", y = "PCoA 2",
color = "site type", fill = "site type") +
theme_bw()
PCoA of functional community profiles (KO abundances)
functional_categories <- list(
# Nitrogen cycling
nitrogen_fixation = c("K02588", "K02586", "K02591"),
nitrification = c("K10944", "K10945", "K10946"),
denitrification = c("K00370", "K00371", "K00374", "K00376"),
nitrate_reduction = c("K00362", "K00363"),
# Carbon cycling
carbon_fixation = c("K01601", "K01602", "K00855"),
methane_oxidation = c("K10944", "K14127"),
cellulose_degradation = c("K01225", "K01179", "K01181"),
# Phosphorus cycling
phosphatase = c("K01113", "K03787", "K09474"),
phosphonate = c("K06193", "K05306"),
# Stress resistance
osmotic_stress = c("K02168", "K02169", "K02170"),
oxidative_stress = c("K00428", "K04564", "K00833"),
heavy_metals = c("K07787", "K07788"),
# Plant interactions
plant_hormones = c("K00128", "K00129"),
biofilm_formation = c("K02403", "K02404", "K02405"),
# Antibiotic resistance
beta_lactamase = c("K01467", "K17836", "K17837"),
multidrug_resistance = c("K03296", "K07799")
)
analyze_functional_category <- function(ko_data, func_metadata, category_name, ko_list) {
ko_names <- paste0("ko:", ko_list)
present_kos <- ko_names[ko_names %in% colnames(ko_data)]
cat("\n\n**", category_name, "** — KOs searched:", length(ko_list),
"| present:", length(present_kos), "\n\n")
if (length(present_kos) == 0) {
cat("*No KOs from this category found in dataset.*\n")
return(NULL)
}
category_abundance <- rowSums(ko_data[, present_kos, drop = FALSE])
if (var(category_abundance) == 0) {
cat("*No variation in abundance — all samples have the same value.*\n")
return(NULL)
}
category_data <- data.frame(
sample_id = names(category_abundance),
abundance = category_abundance,
sitetype = func_metadata$sitetype[match(names(category_abundance), func_metadata$sample_id)],
ph = func_metadata$ph[match(names(category_abundance), func_metadata$sample_id)],
c = func_metadata$c[match(names(category_abundance), func_metadata$sample_id)],
n = func_metadata$n[match(names(category_abundance), func_metadata$sample_id)],
site_initials = func_metadata$site_initials[match(names(category_abundance), func_metadata$sample_id)]
)
if (sum(category_data$abundance > 0) > 10) {
category_mod <- glmmTMB(
abundance ~ sitetype + scale(ph) + scale(c) + scale(n) + (1 | site_initials),
data = category_data,
family = nbinom2(link = "log"))
print(Anova(category_mod))
sitetype_p <- Anova(category_mod)$`Pr(>Chisq)`[1]
if (sitetype_p < 0.1) {
cat("Site-type effect detected (p =", round(sitetype_p, 4), ")\n")
}
} else {
cat("*Too few non-zero values for statistical analysis.*\n")
}
category_plot <- ggplot(category_data, aes(x = sitetype, y = abundance)) +
geom_boxplot(aes(fill = sitetype), alpha = 0.7) +
geom_jitter(width = 0.2, alpha = 0.6) +
scale_fill_manual(values = site_colors, guide = "none") +
scale_x_discrete(limits = sitetype_levels) +
labs(title = paste(category_name, "potential"),
subtitle = paste("Based on", length(present_kos), "KO functions"),
x = "site type", y = "relative abundance") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
list(data = category_data, plot = category_plot, present_kos = present_kos,
model = if (exists("category_mod")) category_mod else NULL)
}
category_results <- list()
for (i in seq_along(functional_categories)) {
category_name <- names(functional_categories)[i]
result <- analyze_functional_category(
ko_data = ko_matched,
func_metadata = func_data,
category_name = category_name,
ko_list = functional_categories[[i]])
if (!is.null(result)) category_results[[category_name]] <- result
}
** nitrogen_fixation ** — KOs searched: 3 | present: 3
Analysis of Deviance Table (Type II Wald chisquare tests)
Response: abundance Chisq Df Pr(>Chisq)
sitetype 2.4690 4 0.65019
scale(ph) 4.3694 1 0.03659 scale(c) 4.0072 1 0.04531 scale(n)
0.5069 1 0.47648
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05
‘.’ 0.1 ’ ’ 1
** nitrification ** — KOs searched: 3 | present: 3
Analysis of Deviance Table (Type II Wald chisquare tests)
Response: abundance Chisq Df Pr(>Chisq)
sitetype 7.8841 4 0.0959161 .
scale(ph) 1.9214 1 0.1657059
scale(c) 13.3661 1 0.0002562 * scale(n) 10.2740 1 0.0013492
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01
’’ 0.05 ‘.’ 0.1 ’ ’ 1 Site-type effect detected (p = 0.0959 )
** denitrification ** — KOs searched: 4 | present: 4
Analysis of Deviance Table (Type II Wald chisquare tests)
Response: abundance Chisq Df Pr(>Chisq)
sitetype 8.1961 4 0.08465 . scale(ph) 0.0899 1 0.76435
scale(c) 3.0565 1 0.08042 . scale(n) 3.0656 1 0.07997 . — Signif. codes:
0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1
Site-type effect detected (p = 0.0847 )
** nitrate_reduction ** — KOs searched: 2 | present: 2
Analysis of Deviance Table (Type II Wald chisquare tests)
Response: abundance Chisq Df Pr(>Chisq)
sitetype 9.1043 4 0.0585444 .
scale(ph) 0.9888 1 0.3200320
scale(c) 10.8778 1 0.0009732 * scale(n) 8.4206 1 0.0037100
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01
’’ 0.05 ‘.’ 0.1 ’ ’ 1 Site-type effect detected (p = 0.0585 )
** carbon_fixation ** — KOs searched: 3 | present: 3
Analysis of Deviance Table (Type II Wald chisquare tests)
Response: abundance Chisq Df Pr(>Chisq)
sitetype 0.9920 4 0.911011
scale(ph) 9.1443 1 0.002495 ** scale(c) 1.6875 1 0.193926
scale(n) 0.1756 1 0.675162
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05
‘.’ 0.1 ’ ’ 1
** methane_oxidation ** — KOs searched: 2 | present: 2
Analysis of Deviance Table (Type II Wald chisquare tests)
Response: abundance Chisq Df Pr(>Chisq)
sitetype 10.3128 4 0.0354758 *
scale(ph) 1.5623 1 0.2113305
scale(c) 14.2311 1 0.0001617 * scale(n) 9.3650 1 0.0022117
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01
’’ 0.05 ‘.’ 0.1 ’ ’ 1 Site-type effect detected (p = 0.0355 )
** cellulose_degradation ** — KOs searched: 3 | present: 2
Analysis of Deviance Table (Type II Wald chisquare tests)
Response: abundance Chisq Df Pr(>Chisq)
sitetype 3.0218 4 0.5541805
scale(ph) 12.3598 1 0.0004387 *** scale(c) 1.8500 1 0.1737821
scale(n) 1.0513 1 0.3052148
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05
‘.’ 0.1 ’ ’ 1
** phosphatase ** — KOs searched: 3 | present: 3
Analysis of Deviance Table (Type II Wald chisquare tests)
Response: abundance Chisq Df Pr(>Chisq)
sitetype 3.0583 4 0.548116
scale(ph) 8.6283 1 0.003310 scale(c) 7.1351 1 0.007559
scale(n) 3.8209 1 0.050618 . — Signif. codes: 0 ‘’
0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1
** phosphonate ** — KOs searched: 2 | present: 2
Analysis of Deviance Table (Type II Wald chisquare tests)
Response: abundance Chisq Df Pr(>Chisq)
sitetype 2.7768 4 0.595837
scale(ph) 9.5126 1 0.002041 ** scale(c) 0.4824 1 0.487345
scale(n) 0.4515 1 0.501643
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05
‘.’ 0.1 ’ ’ 1
** osmotic_stress ** — KOs searched: 3 | present: 3
Analysis of Deviance Table (Type II Wald chisquare tests)
Response: abundance Chisq Df Pr(>Chisq)
sitetype 8.7355 4 0.06806 . scale(ph) 7.5837 1 0.00589 ** scale(c)
2.1923 1 0.13870
scale(n) 1.6427 1 0.19996
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05
‘.’ 0.1 ’ ’ 1 Site-type effect detected (p = 0.0681 )
** oxidative_stress ** — KOs searched: 3 | present: 3
Analysis of Deviance Table (Type II Wald chisquare tests)
Response: abundance Chisq Df Pr(>Chisq)
sitetype 5.2410 4 0.26345
scale(ph) 6.3346 1 0.01184 scale(c) 5.3221 1 0.02106 scale(n)
2.0440 1 0.15281
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05
‘.’ 0.1 ’ ’ 1
** heavy_metals ** — KOs searched: 2 | present: 2
Analysis of Deviance Table (Type II Wald chisquare tests)
Response: abundance Chisq Df Pr(>Chisq)
sitetype 2.0539 4 0.72584
scale(ph) 36.5947 1 1.454e-09 *** scale(c) 2.8606 1 0.09077 .
scale(n) 0.1909 1 0.66218
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05
‘.’ 0.1 ’ ’ 1
** plant_hormones ** — KOs searched: 2 | present: 2
Analysis of Deviance Table (Type II Wald chisquare tests)
Response: abundance Chisq Df Pr(>Chisq)
sitetype 2.7476 4 0.600904
scale(ph) 9.9844 1 0.001579 ** scale(c) 4.1729 1 0.041074 * scale(n)
2.9577 1 0.085467 . — Signif. codes: 0 ‘’ 0.001 ’’
0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1
** biofilm_formation ** — KOs searched: 3 | present: 3
Analysis of Deviance Table (Type II Wald chisquare tests)
Response: abundance Chisq Df Pr(>Chisq)
sitetype 3.8380 4 0.428375
scale(ph) 10.9149 1 0.000954 *** scale(c) 1.0087 1 0.315223
scale(n) 1.8528 1 0.173454
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05
‘.’ 0.1 ’ ’ 1
** beta_lactamase ** — KOs searched: 3 | present: 3
Analysis of Deviance Table (Type II Wald chisquare tests)
Response: abundance Chisq Df Pr(>Chisq)
sitetype 10.4406 4 0.033624 * scale(ph) 7.6185 1 0.005777 ** scale(c)
3.6930 1 0.054643 . scale(n) 5.0097 1 0.025205 * — Signif. codes: 0
‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1
Site-type effect detected (p = 0.0336 )
** multidrug_resistance ** — KOs searched: 2 | present: 2
Analysis of Deviance Table (Type II Wald chisquare tests)
Response: abundance Chisq Df Pr(>Chisq)
sitetype 2.0122 4 0.73352
scale(ph) 23.4132 1 1.307e-06 *** scale(c) 2.7958 1 0.09451 .
scale(n) 0.2922 1 0.58879
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05
‘.’ 0.1 ’ ’ 1
for (cat_name in names(category_results)) {
print(category_results[[cat_name]]$plot)
}
Functional category abundances by site type
Functional category abundances by site type
Functional category abundances by site type
Functional category abundances by site type
Functional category abundances by site type
Functional category abundances by site type
Functional category abundances by site type
Functional category abundances by site type
Functional category abundances by site type
Functional category abundances by site type
Functional category abundances by site type
Functional category abundances by site type
Functional category abundances by site type
Functional category abundances by site type
Functional category abundances by site type
Functional category abundances by site type
key_functions <- c("methane_oxidation", "beta_lactamase", "nitrification", "denitrification")
for (func_name in key_functions) {
if (!func_name %in% names(category_results)) next
result <- category_results[[func_name]]
p <- ggplot(result$data, aes(x = sitetype, y = abundance)) +
geom_boxplot(aes(fill = sitetype), alpha = 0.7, outlier.shape = NA) +
geom_jitter(width = 0.2, alpha = 0.6, size = 2) +
scale_fill_manual(values = site_colors, guide = "none") +
scale_x_discrete(limits = sitetype_levels) +
labs(title = paste(str_to_title(gsub("_", " ", func_name)), "potential"),
subtitle = paste("Based on", length(result$present_kos), "KO functions"),
x = "site type", y = "relative abundance (RPKM)") +
theme_bw(base_size = 12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(size = 14, face = "bold"),
panel.grid.major.x = element_blank())
print(p)
}
carbon_functions <- c("carbon_fixation", "methane_oxidation", "cellulose_degradation")
nitrogen_functions <- c("nitrogen_fixation", "nitrification", "denitrification", "nitrate_reduction")
carbon_data <- data.frame()
nitrogen_data <- data.frame()
for (func in carbon_functions) {
if (!func %in% names(category_results)) next
d <- category_results[[func]]$data[, c("sample_id", "abundance", "sitetype", "c", "n", "ph")]
if (nrow(carbon_data) == 0) {
carbon_data <- d; colnames(carbon_data)[2] <- func
} else {
carbon_data <- merge(carbon_data,
category_results[[func]]$data[, c("sample_id", "abundance")], by = "sample_id")
colnames(carbon_data)[ncol(carbon_data)] <- func
}
}
for (func in nitrogen_functions) {
if (!func %in% names(category_results)) next
d <- category_results[[func]]$data[, c("sample_id", "abundance", "sitetype", "c", "n", "ph")]
if (nrow(nitrogen_data) == 0) {
nitrogen_data <- d; colnames(nitrogen_data)[2] <- func
} else {
nitrogen_data <- merge(nitrogen_data,
category_results[[func]]$data[, c("sample_id", "abundance")], by = "sample_id")
colnames(nitrogen_data)[ncol(nitrogen_data)] <- func
}
}
c_func_cols <- intersect(carbon_functions, colnames(carbon_data))
n_func_cols <- intersect(nitrogen_functions, colnames(nitrogen_data))
if (nrow(carbon_data) > 0) carbon_data$total_carbon_functions <- rowSums(carbon_data[, c_func_cols, drop = FALSE], na.rm = TRUE)
if (nrow(nitrogen_data) > 0) nitrogen_data$total_nitrogen_functions <- rowSums(nitrogen_data[, n_func_cols, drop = FALSE], na.rm = TRUE)
if (nrow(carbon_data) > 0) {
for (func in c_func_cols) {
ct <- cor.test(carbon_data[[func]], carbon_data$c)
cat(func, ": r =", round(ct$estimate, 3), ", p =", round(ct$p.value, 4), "\n")
}
total_c_cor <- cor.test(carbon_data$total_carbon_functions, carbon_data$c)
cat("TOTAL: r =", round(total_c_cor$estimate, 3),
", p =", round(total_c_cor$p.value, 4), "\n")
ggplot(carbon_data, aes(x = c, y = total_carbon_functions)) +
geom_point(aes(color = sitetype), size = 3) +
geom_smooth(method = "lm", se = TRUE, color = "black") +
scale_color_manual(values = site_colors) +
labs(x = "soil carbon (%)", y = "total carbon cycling potential",
subtitle = paste("r =", round(total_c_cor$estimate, 3),
", p =", round(total_c_cor$p.value, 4))) +
theme_bw()
}
## carbon_fixation : r = -0.172 , p = 0.1661
## methane_oxidation : r = -0.145 , p = 0.2456
## cellulose_degradation : r = -0.108 , p = 0.3895
## TOTAL: r = -0.16 , p = 0.2004
Total carbon cycling potential vs soil carbon
if (nrow(nitrogen_data) > 0) {
for (func in n_func_cols) {
ct <- cor.test(nitrogen_data[[func]], nitrogen_data$n)
cat(func, ": r =", round(ct$estimate, 3), ", p =", round(ct$p.value, 4), "\n")
}
total_n_cor <- cor.test(nitrogen_data$total_nitrogen_functions, nitrogen_data$n)
cat("TOTAL: r =", round(total_n_cor$estimate, 3),
", p =", round(total_n_cor$p.value, 4), "\n")
ggplot(nitrogen_data, aes(x = n, y = total_nitrogen_functions)) +
geom_point(aes(color = sitetype), size = 3) +
geom_smooth(method = "lm", se = TRUE, color = "black") +
scale_color_manual(values = site_colors) +
labs(x = "soil nitrogen (%)", y = "total nitrogen cycling potential",
subtitle = paste("r =", round(total_n_cor$estimate, 3),
", p =", round(total_n_cor$p.value, 4))) +
theme_bw()
}
## nitrogen_fixation : r = -0.217 , p = 0.0801
## nitrification : r = -0.007 , p = 0.9566
## denitrification : r = -0.104 , p = 0.407
## nitrate_reduction : r = -0.142 , p = 0.2554
## TOTAL: r = -0.138 , p = 0.2691
Total nitrogen cycling potential vs soil nitrogen
if (nrow(carbon_data) > 0 && nrow(nitrogen_data) > 0) {
cn_data <- merge(
carbon_data[, c("sample_id", "total_carbon_functions", "c", "n", "sitetype")],
nitrogen_data[, c("sample_id", "total_nitrogen_functions")],
by = "sample_id") %>%
mutate(soil_cn_ratio = c / n,
functional_cn_ratio = total_carbon_functions / total_nitrogen_functions)
cn_cor <- cor.test(cn_data$soil_cn_ratio, cn_data$functional_cn_ratio)
cat("Soil C:N vs functional C:N: r =", round(cn_cor$estimate, 3),
", p =", round(cn_cor$p.value, 4), "\n\n")
ggplot(cn_data, aes(x = soil_cn_ratio, y = functional_cn_ratio)) +
geom_point(aes(color = sitetype), size = 3) +
geom_smooth(method = "lm", se = TRUE, color = "black") +
scale_color_manual(values = site_colors) +
labs(x = "soil C:N ratio", y = "functional C:N ratio",
subtitle = paste("r =", round(cn_cor$estimate, 3),
", p =", round(cn_cor$p.value, 4))) +
theme_bw()
}
## Soil C:N vs functional C:N: r = 0.407 , p = 7e-04
Soil C:N ratio vs functional C:N ratio
if (exists("cn_data")) {
cn_data %>%
group_by(sitetype) %>%
summarise(
soil_cn = mean(soil_cn_ratio, na.rm = TRUE),
functional_cn = mean(functional_cn_ratio, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(soil_cn)
}
## # A tibble: 5 × 3
## sitetype soil_cn functional_cn
## <fct> <dbl> <dbl>
## 1 diversified 12.8 1.01
## 2 Kipuka 13.1 1.23
## 3 agroforestry 13.4 1.00
## 4 monoculture 13.8 1.05
## 5 forest 15.9 1.24
functions_to_test <- list(
carbon_fixation = "c",
cellulose_degradation = "c",
nitrogen_fixation = "n",
nitrification = "n",
denitrification = "n"
)
for (func_name in names(functions_to_test)) {
soil_var <- functions_to_test[[func_name]]
if (!func_name %in% names(category_results)) next
fd <- category_results[[func_name]]$data
ct <- cor.test(fd$abundance, fd[[soil_var]])
cat(func_name, "vs soil", soil_var,
": r =", round(ct$estimate, 3),
", p =", round(ct$p.value, 4), "\n")
p <- ggplot(fd, aes_string(x = soil_var, y = "abundance")) +
geom_point(aes(color = sitetype), size = 3) +
geom_smooth(method = "lm", se = TRUE, color = "black") +
scale_color_manual(values = site_colors) +
labs(x = paste("soil", toupper(soil_var), "(%)"),
y = paste(str_to_title(gsub("_", " ", func_name)), "potential"),
subtitle = paste("r =", round(ct$estimate, 3),
", p =", round(ct$p.value, 4))) +
theme_bw()
print(p)
}
## carbon_fixation vs soil c : r = -0.172 , p = 0.1661
## cellulose_degradation vs soil c : r = -0.108 , p = 0.3895
## nitrogen_fixation vs soil n : r = -0.217 , p = 0.0801
## nitrification vs soil n : r = -0.007 , p = 0.9566
## denitrification vs soil n : r = -0.104 , p = 0.407
summary_tbl <- map_dfr(names(category_results), function(cat_name) {
result <- category_results[[cat_name]]
mean_by_site <- result$data %>%
group_by(sitetype) %>%
summarise(mean_abundance = mean(abundance, na.rm = TRUE), .groups = "drop") %>%
arrange(desc(mean_abundance))
tibble(
category = cat_name,
n_kos_present = length(result$present_kos),
highest_sitetype = as.character(mean_by_site$sitetype[1]),
lowest_sitetype = as.character(mean_by_site$sitetype[nrow(mean_by_site)])
)
})
summary_tbl
## # A tibble: 16 × 4
## category n_kos_present highest_sitetype lowest_sitetype
## <chr> <int> <chr> <chr>
## 1 nitrogen_fixation 3 diversified Kipuka
## 2 nitrification 3 agroforestry forest
## 3 denitrification 4 monoculture forest
## 4 nitrate_reduction 2 agroforestry forest
## 5 carbon_fixation 3 monoculture agroforestry
## 6 methane_oxidation 2 agroforestry Kipuka
## 7 cellulose_degradation 2 monoculture forest
## 8 phosphatase 3 monoculture forest
## 9 phosphonate 2 monoculture forest
## 10 osmotic_stress 3 monoculture forest
## 11 oxidative_stress 3 agroforestry Kipuka
## 12 heavy_metals 2 monoculture Kipuka
## 13 plant_hormones 2 monoculture forest
## 14 biofilm_formation 3 Kipuka forest
## 15 beta_lactamase 3 monoculture forest
## 16 multidrug_resistance 2 monoculture Kipuka