Setup

Packages

library(tidyverse)
library(vegan)
library(phyloseq)
library(ggplot2)
library(glmmTMB)
library(car)
library(permute)

setwd("~/Desktop/hi22/analyses/bacteria")

Colors and factor levels

site_colors <- c(
  monoculture  = "#F7C1BB",
  diversified  = "#885A5A",
  agroforestry = "#84B082",
  forest       = "#353A47",
  Kipuka       = "#DC136C",
  Monoculture  = "#F7C1BB",
  Diversified  = "#885A5A",
  Agroforestry = "#84B082",
  Forest       = "#353A47"
)

sitetype_levels <- c("monoculture", "diversified", "agroforestry", "forest", "Kipuka")

Note: This document depends on ps_bact and meta_cc from the bacterial analysis.
Run hi22_bacteria_analysis.Rmd first, or load the necessary objects below before knitting.

ps_bact <- readRDS("Intermediate_data/phyloseq_b_clean.RDS")
meta_cc <- readRDS("Intermediate_data/meta_cc.RDS")
# meta_cc is built during the bacteria analysis; if knitting independently,
# source the bacteria Rmd or rebuild it here as needed.

Load PICRUSt2 Data

ko_functions <- read.table(
  "picrust2_output/KO_metagenome_out/pred_metagenome_unstrat.tsv",
  header = TRUE, row.names = 1, sep = "\t")

pathways <- read.table(
  "picrust2_output/pathways_out/path_abun_unstrat.tsv",
  header = TRUE, row.names = 1, sep = "\t")

cat("KO functions:", dim(ko_functions), "\n")
## KO functions: 8969 67
cat("Pathways:",     dim(pathways), "\n")
## Pathways: 556 67
# Transpose so samples are rows
ko_functions_t <- t(ko_functions)
pathways_t     <- t(pathways)

Sample Name Mapping

PICRUSt2 converts - to . in sample names. This section maps them back to HI_ IDs so they align with the rest of the project.

sample_metadata_full <- as(sample_data(ps_bact), "data.frame")
sample_name_mapping  <- setNames(sample_metadata_full$id, rownames(sample_metadata_full))

picrust_sample_names <- rownames(ko_functions_t)

# Restore dashes if PICRUSt2 converted them to dots
if (any(grepl("\\.", picrust_sample_names)) && any(grepl("-", names(sample_name_mapping)))) {
  picrust_names_fixed <- gsub("\\.", "-", picrust_sample_names)
} else {
  picrust_names_fixed <- picrust_sample_names
}

hi_ids_mapped      <- sample_name_mapping[picrust_names_fixed]
successful_mappings <- !is.na(hi_ids_mapped)

cat("Successful mappings:", sum(successful_mappings), "out of", length(hi_ids_mapped), "\n")
## Successful mappings: 67 out of 67
# Diagnostic: show a few mappings to confirm they look right
data.frame(
  picrust_original = picrust_sample_names[1:5],
  picrust_fixed    = picrust_names_fixed[1:5],
  hi_id_mapped     = hi_ids_mapped[1:5]
)
##         picrust_original picrust_fixed hi_id_mapped
## AA-A-01          AA.A.01       AA-A-01        HI_58
## AA-A-02          AA.A.02       AA-A-02        HI_36
## AA-A-03          AA.A.03       AA-A-03        HI_10
## AA-A-04          AA.A.04       AA-A-04        HI_32
## AA-A-05          AA.A.05       AA-A-05        HI_70
if (sum(successful_mappings) == 0) {
  stop("No successful sample name mappings. Check sample naming conventions.")
}

ko_mapped       <- ko_functions_t[successful_mappings, ]
pathways_mapped <- pathways_t[successful_mappings, ]
hi_ids_clean    <- hi_ids_mapped[successful_mappings]

rownames(ko_mapped)       <- hi_ids_clean
rownames(pathways_mapped) <- hi_ids_clean

common_samples_final <- intersect(rownames(ko_mapped), meta_cc$id)
cat("Final common samples:", length(common_samples_final), "\n")
## Final common samples: 66
ko_matched       <- ko_mapped[common_samples_final, ]
pathways_matched <- pathways_mapped[common_samples_final, ]
meta_matched     <- meta_cc[meta_cc$id %in% common_samples_final, ]

Functional Diversity

func_diversity <- data.frame(
  sample_id        = rownames(ko_matched),
  ko_richness      = specnumber(ko_matched),
  ko_shannon       = diversity(ko_matched),
  pathway_richness = specnumber(pathways_matched),
  pathway_shannon  = diversity(pathways_matched),
  stringsAsFactors = FALSE
)

# Align rows and merge with metadata
meta_matched_ord   <- meta_matched[order(meta_matched$id), ]
ko_matched_ord     <- ko_matched[order(rownames(ko_matched)), ]
func_div_ord       <- func_diversity[order(func_diversity$sample_id), ]

func_data <- cbind(func_div_ord, meta_matched_ord)

cat("Samples:", nrow(func_data), "\n")
## Samples: 66
cat("KO functions:", ncol(ko_matched), "\n")
## KO functions: 8969
cat("Pathways:", ncol(pathways_matched), "\n")
## Pathways: 556
head(func_diversity)
##       sample_id ko_richness ko_shannon pathway_richness pathway_shannon
## HI_58     HI_58        7643   7.971415              497        5.689721
## HI_36     HI_36        7482   7.871598              487        5.618232
## HI_10     HI_10        7165   7.909284              477        5.640280
## HI_32     HI_32        7438   7.848995              491        5.612405
## HI_70     HI_70        7857   7.942741              511        5.678624
## HI_46     HI_46        7488   7.933188              497        5.676335

KO richness model

func_ko_mod <- glmmTMB(
  ko_richness ~ sitetype +
    scale(ph) + scale(c) + scale(n) + (1 | site_initials),
  data   = func_data,
  family = nbinom2(link = "log"))

summary(func_ko_mod)
##  Family: nbinom2  ( log )
## Formula:          
## ko_richness ~ sitetype + scale(ph) + scale(c) + scale(n) + (1 |  
##     site_initials)
## Data: func_data
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     913.3     935.2    -446.7     893.3        56 
## 
## Random effects:
## 
## Conditional model:
##  Groups        Name        Variance Std.Dev.
##  site_initials (Intercept) 7.74e-05 0.008798
## Number of obs: 66, groups:  site_initials, 22
## 
## Dispersion parameter for nbinom2 family (): 1.71e+03 
## 
## Conditional model:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           8.929403   0.010420   857.0   <2e-16 ***
## sitetypediversified  -0.007109   0.012933    -0.5   0.5825    
## sitetypeagroforestry -0.003119   0.013100    -0.2   0.8118    
## sitetypeforest       -0.027486   0.014034    -2.0   0.0502 .  
## sitetypeKipuka       -0.004156   0.019873    -0.2   0.8344    
## scale(ph)            -0.001569   0.004308    -0.4   0.7157    
## scale(c)             -0.017582   0.008466    -2.1   0.0378 *  
## scale(n)              0.010400   0.009163     1.1   0.2564    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Anova(func_ko_mod)
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: ko_richness
##            Chisq Df Pr(>Chisq)  
## sitetype  5.1254  4    0.27467  
## scale(ph) 0.1327  1    0.71570  
## scale(c)  4.3131  1    0.03782 *
## scale(n)  1.2881  1    0.25639  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Functional Community Composition

PERMANOVA

ko_dist <- vegdist(ko_matched, method = "bray")

(func_permanova <- adonis2(
  ko_dist ~ sitetype + ph + c + n,
  data         = func_data,
  permutations = 999))
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = ko_dist ~ sitetype + ph + c + n, data = func_data, permutations = 999)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     7  0.09776 0.10167 0.9378  0.491
## Residual 58  0.86370 0.89833              
## Total    65  0.96145 1.00000

PCoA

ko_pcoa    <- cmdscale(ko_dist)
ko_pcoa_df <- data.frame(
  sample_id = rownames(ko_pcoa),
  PCoA1     = ko_pcoa[, 1],
  PCoA2     = ko_pcoa[, 2]
) %>% merge(func_data, by = "sample_id")

ggplot(ko_pcoa_df, aes(x = PCoA1, y = PCoA2)) +
  stat_ellipse(aes(color = sitetype, fill = sitetype),
    alpha = 0.2, level = 0.95, geom = "polygon") +
  geom_point(aes(color = sitetype), size = 2) +
  scale_color_manual(values = site_colors) +
  scale_fill_manual(values = site_colors) +
  labs(x = "PCoA 1", y = "PCoA 2",
       color = "site type", fill = "site type") +
  theme_bw()
PCoA of functional community profiles (KO abundances)

PCoA of functional community profiles (KO abundances)


Functional Categories

Category definitions

functional_categories <- list(
  # Nitrogen cycling
  nitrogen_fixation  = c("K02588", "K02586", "K02591"),
  nitrification      = c("K10944", "K10945", "K10946"),
  denitrification    = c("K00370", "K00371", "K00374", "K00376"),
  nitrate_reduction  = c("K00362", "K00363"),
  # Carbon cycling
  carbon_fixation    = c("K01601", "K01602", "K00855"),
  methane_oxidation  = c("K10944", "K14127"),
  cellulose_degradation = c("K01225", "K01179", "K01181"),
  # Phosphorus cycling
  phosphatase        = c("K01113", "K03787", "K09474"),
  phosphonate        = c("K06193", "K05306"),
  # Stress resistance
  osmotic_stress     = c("K02168", "K02169", "K02170"),
  oxidative_stress   = c("K00428", "K04564", "K00833"),
  heavy_metals       = c("K07787", "K07788"),
  # Plant interactions
  plant_hormones     = c("K00128", "K00129"),
  biofilm_formation  = c("K02403", "K02404", "K02405"),
  # Antibiotic resistance
  beta_lactamase     = c("K01467", "K17836", "K17837"),
  multidrug_resistance = c("K03296", "K07799")
)

Run category analysis

analyze_functional_category <- function(ko_data, func_metadata, category_name, ko_list) {
  ko_names    <- paste0("ko:", ko_list)
  present_kos <- ko_names[ko_names %in% colnames(ko_data)]

  cat("\n\n**", category_name, "** — KOs searched:", length(ko_list),
      "| present:", length(present_kos), "\n\n")

  if (length(present_kos) == 0) {
    cat("*No KOs from this category found in dataset.*\n")
    return(NULL)
  }

  category_abundance <- rowSums(ko_data[, present_kos, drop = FALSE])

  if (var(category_abundance) == 0) {
    cat("*No variation in abundance — all samples have the same value.*\n")
    return(NULL)
  }

  category_data <- data.frame(
    sample_id     = names(category_abundance),
    abundance     = category_abundance,
    sitetype      = func_metadata$sitetype[match(names(category_abundance), func_metadata$sample_id)],
    ph            = func_metadata$ph[match(names(category_abundance), func_metadata$sample_id)],
    c             = func_metadata$c[match(names(category_abundance), func_metadata$sample_id)],
    n             = func_metadata$n[match(names(category_abundance), func_metadata$sample_id)],
    site_initials = func_metadata$site_initials[match(names(category_abundance), func_metadata$sample_id)]
  )

  if (sum(category_data$abundance > 0) > 10) {
    category_mod <- glmmTMB(
      abundance ~ sitetype + scale(ph) + scale(c) + scale(n) + (1 | site_initials),
      data   = category_data,
      family = nbinom2(link = "log"))

    print(Anova(category_mod))

    sitetype_p <- Anova(category_mod)$`Pr(>Chisq)`[1]
    if (sitetype_p < 0.1) {
      cat("Site-type effect detected (p =", round(sitetype_p, 4), ")\n")
    }
  } else {
    cat("*Too few non-zero values for statistical analysis.*\n")
  }

  category_plot <- ggplot(category_data, aes(x = sitetype, y = abundance)) +
    geom_boxplot(aes(fill = sitetype), alpha = 0.7) +
    geom_jitter(width = 0.2, alpha = 0.6) +
    scale_fill_manual(values = site_colors, guide = "none") +
    scale_x_discrete(limits = sitetype_levels) +
    labs(title    = paste(category_name, "potential"),
         subtitle = paste("Based on", length(present_kos), "KO functions"),
         x = "site type", y = "relative abundance") +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))

  list(data = category_data, plot = category_plot, present_kos = present_kos,
       model = if (exists("category_mod")) category_mod else NULL)
}

category_results <- list()
for (i in seq_along(functional_categories)) {
  category_name <- names(functional_categories)[i]
  result <- analyze_functional_category(
    ko_data       = ko_matched,
    func_metadata = func_data,
    category_name = category_name,
    ko_list       = functional_categories[[i]])
  if (!is.null(result)) category_results[[category_name]] <- result
}

** nitrogen_fixation ** — KOs searched: 3 | present: 3

Analysis of Deviance Table (Type II Wald chisquare tests)

Response: abundance Chisq Df Pr(>Chisq)
sitetype 2.4690 4 0.65019
scale(ph) 4.3694 1 0.03659 scale(c) 4.0072 1 0.04531 scale(n) 0.5069 1 0.47648
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1

** nitrification ** — KOs searched: 3 | present: 3

Analysis of Deviance Table (Type II Wald chisquare tests)

Response: abundance Chisq Df Pr(>Chisq)
sitetype 7.8841 4 0.0959161 .
scale(ph) 1.9214 1 0.1657059
scale(c) 13.3661 1 0.0002562 * scale(n) 10.2740 1 0.0013492 — Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1 Site-type effect detected (p = 0.0959 )

** denitrification ** — KOs searched: 4 | present: 4

Analysis of Deviance Table (Type II Wald chisquare tests)

Response: abundance Chisq Df Pr(>Chisq)
sitetype 8.1961 4 0.08465 . scale(ph) 0.0899 1 0.76435
scale(c) 3.0565 1 0.08042 . scale(n) 3.0656 1 0.07997 . — Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1 Site-type effect detected (p = 0.0847 )

** nitrate_reduction ** — KOs searched: 2 | present: 2

Analysis of Deviance Table (Type II Wald chisquare tests)

Response: abundance Chisq Df Pr(>Chisq)
sitetype 9.1043 4 0.0585444 .
scale(ph) 0.9888 1 0.3200320
scale(c) 10.8778 1 0.0009732 * scale(n) 8.4206 1 0.0037100 — Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1 Site-type effect detected (p = 0.0585 )

** carbon_fixation ** — KOs searched: 3 | present: 3

Analysis of Deviance Table (Type II Wald chisquare tests)

Response: abundance Chisq Df Pr(>Chisq)
sitetype 0.9920 4 0.911011
scale(ph) 9.1443 1 0.002495 ** scale(c) 1.6875 1 0.193926
scale(n) 0.1756 1 0.675162
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1

** methane_oxidation ** — KOs searched: 2 | present: 2

Analysis of Deviance Table (Type II Wald chisquare tests)

Response: abundance Chisq Df Pr(>Chisq)
sitetype 10.3128 4 0.0354758 *
scale(ph) 1.5623 1 0.2113305
scale(c) 14.2311 1 0.0001617 * scale(n) 9.3650 1 0.0022117 — Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1 Site-type effect detected (p = 0.0355 )

** cellulose_degradation ** — KOs searched: 3 | present: 2

Analysis of Deviance Table (Type II Wald chisquare tests)

Response: abundance Chisq Df Pr(>Chisq)
sitetype 3.0218 4 0.5541805
scale(ph) 12.3598 1 0.0004387 *** scale(c) 1.8500 1 0.1737821
scale(n) 1.0513 1 0.3052148
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1

** phosphatase ** — KOs searched: 3 | present: 3

Analysis of Deviance Table (Type II Wald chisquare tests)

Response: abundance Chisq Df Pr(>Chisq)
sitetype 3.0583 4 0.548116
scale(ph) 8.6283 1 0.003310 scale(c) 7.1351 1 0.007559 scale(n) 3.8209 1 0.050618 . — Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1

** phosphonate ** — KOs searched: 2 | present: 2

Analysis of Deviance Table (Type II Wald chisquare tests)

Response: abundance Chisq Df Pr(>Chisq)
sitetype 2.7768 4 0.595837
scale(ph) 9.5126 1 0.002041 ** scale(c) 0.4824 1 0.487345
scale(n) 0.4515 1 0.501643
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1

** osmotic_stress ** — KOs searched: 3 | present: 3

Analysis of Deviance Table (Type II Wald chisquare tests)

Response: abundance Chisq Df Pr(>Chisq)
sitetype 8.7355 4 0.06806 . scale(ph) 7.5837 1 0.00589 ** scale(c) 2.1923 1 0.13870
scale(n) 1.6427 1 0.19996
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1 Site-type effect detected (p = 0.0681 )

** oxidative_stress ** — KOs searched: 3 | present: 3

Analysis of Deviance Table (Type II Wald chisquare tests)

Response: abundance Chisq Df Pr(>Chisq)
sitetype 5.2410 4 0.26345
scale(ph) 6.3346 1 0.01184 scale(c) 5.3221 1 0.02106 scale(n) 2.0440 1 0.15281
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1

** heavy_metals ** — KOs searched: 2 | present: 2

Analysis of Deviance Table (Type II Wald chisquare tests)

Response: abundance Chisq Df Pr(>Chisq)
sitetype 2.0539 4 0.72584
scale(ph) 36.5947 1 1.454e-09 *** scale(c) 2.8606 1 0.09077 .
scale(n) 0.1909 1 0.66218
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1

** plant_hormones ** — KOs searched: 2 | present: 2

Analysis of Deviance Table (Type II Wald chisquare tests)

Response: abundance Chisq Df Pr(>Chisq)
sitetype 2.7476 4 0.600904
scale(ph) 9.9844 1 0.001579 ** scale(c) 4.1729 1 0.041074 * scale(n) 2.9577 1 0.085467 . — Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1

** biofilm_formation ** — KOs searched: 3 | present: 3

Analysis of Deviance Table (Type II Wald chisquare tests)

Response: abundance Chisq Df Pr(>Chisq)
sitetype 3.8380 4 0.428375
scale(ph) 10.9149 1 0.000954 *** scale(c) 1.0087 1 0.315223
scale(n) 1.8528 1 0.173454
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1

** beta_lactamase ** — KOs searched: 3 | present: 3

Analysis of Deviance Table (Type II Wald chisquare tests)

Response: abundance Chisq Df Pr(>Chisq)
sitetype 10.4406 4 0.033624 * scale(ph) 7.6185 1 0.005777 ** scale(c) 3.6930 1 0.054643 . scale(n) 5.0097 1 0.025205 * — Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1 Site-type effect detected (p = 0.0336 )

** multidrug_resistance ** — KOs searched: 2 | present: 2

Analysis of Deviance Table (Type II Wald chisquare tests)

Response: abundance Chisq Df Pr(>Chisq)
sitetype 2.0122 4 0.73352
scale(ph) 23.4132 1 1.307e-06 *** scale(c) 2.7958 1 0.09451 .
scale(n) 0.2922 1 0.58879
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1

Category plots

for (cat_name in names(category_results)) {
  print(category_results[[cat_name]]$plot)
}
Functional category abundances by site type

Functional category abundances by site type

Functional category abundances by site type

Functional category abundances by site type

Functional category abundances by site type

Functional category abundances by site type

Functional category abundances by site type

Functional category abundances by site type

Functional category abundances by site type

Functional category abundances by site type

Functional category abundances by site type

Functional category abundances by site type

Functional category abundances by site type

Functional category abundances by site type

Functional category abundances by site type

Functional category abundances by site type

Functional category abundances by site type

Functional category abundances by site type

Functional category abundances by site type

Functional category abundances by site type

Functional category abundances by site type

Functional category abundances by site type

Functional category abundances by site type

Functional category abundances by site type

Functional category abundances by site type

Functional category abundances by site type

Functional category abundances by site type

Functional category abundances by site type

Functional category abundances by site type

Functional category abundances by site type

Functional category abundances by site type

Functional category abundances by site type


Key Functions — Enhanced Plots

key_functions <- c("methane_oxidation", "beta_lactamase", "nitrification", "denitrification")

for (func_name in key_functions) {
  if (!func_name %in% names(category_results)) next
  result <- category_results[[func_name]]

  p <- ggplot(result$data, aes(x = sitetype, y = abundance)) +
    geom_boxplot(aes(fill = sitetype), alpha = 0.7, outlier.shape = NA) +
    geom_jitter(width = 0.2, alpha = 0.6, size = 2) +
    scale_fill_manual(values = site_colors, guide = "none") +
    scale_x_discrete(limits = sitetype_levels) +
    labs(title    = paste(str_to_title(gsub("_", " ", func_name)), "potential"),
         subtitle = paste("Based on", length(result$present_kos), "KO functions"),
         x = "site type", y = "relative abundance (RPKM)") +
    theme_bw(base_size = 12) +
    theme(axis.text.x          = element_text(angle = 45, hjust = 1),
          plot.title            = element_text(size = 14, face = "bold"),
          panel.grid.major.x   = element_blank())

  print(p)
}


C/N Functional Potential vs. Soil Chemistry

Assemble C/N data

carbon_functions   <- c("carbon_fixation", "methane_oxidation", "cellulose_degradation")
nitrogen_functions <- c("nitrogen_fixation", "nitrification", "denitrification", "nitrate_reduction")

carbon_data   <- data.frame()
nitrogen_data <- data.frame()

for (func in carbon_functions) {
  if (!func %in% names(category_results)) next
  d <- category_results[[func]]$data[, c("sample_id", "abundance", "sitetype", "c", "n", "ph")]
  if (nrow(carbon_data) == 0) {
    carbon_data <- d; colnames(carbon_data)[2] <- func
  } else {
    carbon_data <- merge(carbon_data,
      category_results[[func]]$data[, c("sample_id", "abundance")], by = "sample_id")
    colnames(carbon_data)[ncol(carbon_data)] <- func
  }
}

for (func in nitrogen_functions) {
  if (!func %in% names(category_results)) next
  d <- category_results[[func]]$data[, c("sample_id", "abundance", "sitetype", "c", "n", "ph")]
  if (nrow(nitrogen_data) == 0) {
    nitrogen_data <- d; colnames(nitrogen_data)[2] <- func
  } else {
    nitrogen_data <- merge(nitrogen_data,
      category_results[[func]]$data[, c("sample_id", "abundance")], by = "sample_id")
    colnames(nitrogen_data)[ncol(nitrogen_data)] <- func
  }
}

c_func_cols <- intersect(carbon_functions,   colnames(carbon_data))
n_func_cols <- intersect(nitrogen_functions, colnames(nitrogen_data))

if (nrow(carbon_data)   > 0) carbon_data$total_carbon_functions     <- rowSums(carbon_data[,   c_func_cols, drop = FALSE], na.rm = TRUE)
if (nrow(nitrogen_data) > 0) nitrogen_data$total_nitrogen_functions <- rowSums(nitrogen_data[, n_func_cols, drop = FALSE], na.rm = TRUE)

Carbon functions vs. soil C

if (nrow(carbon_data) > 0) {
  for (func in c_func_cols) {
    ct <- cor.test(carbon_data[[func]], carbon_data$c)
    cat(func, ": r =", round(ct$estimate, 3), ", p =", round(ct$p.value, 4), "\n")
  }

  total_c_cor <- cor.test(carbon_data$total_carbon_functions, carbon_data$c)
  cat("TOTAL: r =", round(total_c_cor$estimate, 3),
      ", p =", round(total_c_cor$p.value, 4), "\n")

  ggplot(carbon_data, aes(x = c, y = total_carbon_functions)) +
    geom_point(aes(color = sitetype), size = 3) +
    geom_smooth(method = "lm", se = TRUE, color = "black") +
    scale_color_manual(values = site_colors) +
    labs(x = "soil carbon (%)", y = "total carbon cycling potential",
         subtitle = paste("r =", round(total_c_cor$estimate, 3),
                          ", p =", round(total_c_cor$p.value, 4))) +
    theme_bw()
}
## carbon_fixation : r = -0.172 , p = 0.1661 
## methane_oxidation : r = -0.145 , p = 0.2456 
## cellulose_degradation : r = -0.108 , p = 0.3895 
## TOTAL: r = -0.16 , p = 0.2004
Total carbon cycling potential vs soil carbon

Total carbon cycling potential vs soil carbon

Nitrogen functions vs. soil N

if (nrow(nitrogen_data) > 0) {
  for (func in n_func_cols) {
    ct <- cor.test(nitrogen_data[[func]], nitrogen_data$n)
    cat(func, ": r =", round(ct$estimate, 3), ", p =", round(ct$p.value, 4), "\n")
  }

  total_n_cor <- cor.test(nitrogen_data$total_nitrogen_functions, nitrogen_data$n)
  cat("TOTAL: r =", round(total_n_cor$estimate, 3),
      ", p =", round(total_n_cor$p.value, 4), "\n")

  ggplot(nitrogen_data, aes(x = n, y = total_nitrogen_functions)) +
    geom_point(aes(color = sitetype), size = 3) +
    geom_smooth(method = "lm", se = TRUE, color = "black") +
    scale_color_manual(values = site_colors) +
    labs(x = "soil nitrogen (%)", y = "total nitrogen cycling potential",
         subtitle = paste("r =", round(total_n_cor$estimate, 3),
                          ", p =", round(total_n_cor$p.value, 4))) +
    theme_bw()
}
## nitrogen_fixation : r = -0.217 , p = 0.0801 
## nitrification : r = -0.007 , p = 0.9566 
## denitrification : r = -0.104 , p = 0.407 
## nitrate_reduction : r = -0.142 , p = 0.2554 
## TOTAL: r = -0.138 , p = 0.2691
Total nitrogen cycling potential vs soil nitrogen

Total nitrogen cycling potential vs soil nitrogen


C:N Ratio Analysis

if (nrow(carbon_data) > 0 && nrow(nitrogen_data) > 0) {
  cn_data <- merge(
    carbon_data[,   c("sample_id", "total_carbon_functions",   "c", "n", "sitetype")],
    nitrogen_data[, c("sample_id", "total_nitrogen_functions")],
    by = "sample_id") %>%
    mutate(soil_cn_ratio       = c / n,
           functional_cn_ratio = total_carbon_functions / total_nitrogen_functions)

  cn_cor <- cor.test(cn_data$soil_cn_ratio, cn_data$functional_cn_ratio)
  cat("Soil C:N vs functional C:N: r =", round(cn_cor$estimate, 3),
      ", p =", round(cn_cor$p.value, 4), "\n\n")

  ggplot(cn_data, aes(x = soil_cn_ratio, y = functional_cn_ratio)) +
    geom_point(aes(color = sitetype), size = 3) +
    geom_smooth(method = "lm", se = TRUE, color = "black") +
    scale_color_manual(values = site_colors) +
    labs(x = "soil C:N ratio", y = "functional C:N ratio",
         subtitle = paste("r =", round(cn_cor$estimate, 3),
                          ", p =", round(cn_cor$p.value, 4))) +
    theme_bw()
}
## Soil C:N vs functional C:N: r = 0.407 , p = 7e-04
Soil C:N ratio vs functional C:N ratio

Soil C:N ratio vs functional C:N ratio

if (exists("cn_data")) {
  cn_data %>%
    group_by(sitetype) %>%
    summarise(
      soil_cn       = mean(soil_cn_ratio,       na.rm = TRUE),
      functional_cn = mean(functional_cn_ratio, na.rm = TRUE),
      .groups = "drop"
    ) %>%
    arrange(soil_cn)
}
## # A tibble: 5 × 3
##   sitetype     soil_cn functional_cn
##   <fct>          <dbl>         <dbl>
## 1 diversified     12.8          1.01
## 2 Kipuka          13.1          1.23
## 3 agroforestry    13.4          1.00
## 4 monoculture     13.8          1.05
## 5 forest          15.9          1.24

Individual Function Relationships

functions_to_test <- list(
  carbon_fixation       = "c",
  cellulose_degradation = "c",
  nitrogen_fixation     = "n",
  nitrification         = "n",
  denitrification       = "n"
)

for (func_name in names(functions_to_test)) {
  soil_var <- functions_to_test[[func_name]]
  if (!func_name %in% names(category_results)) next

  fd       <- category_results[[func_name]]$data
  ct       <- cor.test(fd$abundance, fd[[soil_var]])

  cat(func_name, "vs soil", soil_var,
      ": r =", round(ct$estimate, 3),
      ", p =", round(ct$p.value, 4), "\n")

  p <- ggplot(fd, aes_string(x = soil_var, y = "abundance")) +
    geom_point(aes(color = sitetype), size = 3) +
    geom_smooth(method = "lm", se = TRUE, color = "black") +
    scale_color_manual(values = site_colors) +
    labs(x     = paste("soil", toupper(soil_var), "(%)"),
         y     = paste(str_to_title(gsub("_", " ", func_name)), "potential"),
         subtitle = paste("r =", round(ct$estimate, 3),
                          ", p =", round(ct$p.value, 4))) +
    theme_bw()

  print(p)
}
## carbon_fixation vs soil c : r = -0.172 , p = 0.1661

## cellulose_degradation vs soil c : r = -0.108 , p = 0.3895

## nitrogen_fixation vs soil n : r = -0.217 , p = 0.0801

## nitrification vs soil n : r = -0.007 , p = 0.9566

## denitrification vs soil n : r = -0.104 , p = 0.407


Functional Category Summary

summary_tbl <- map_dfr(names(category_results), function(cat_name) {
  result       <- category_results[[cat_name]]
  mean_by_site <- result$data %>%
    group_by(sitetype) %>%
    summarise(mean_abundance = mean(abundance, na.rm = TRUE), .groups = "drop") %>%
    arrange(desc(mean_abundance))

  tibble(
    category        = cat_name,
    n_kos_present   = length(result$present_kos),
    highest_sitetype = as.character(mean_by_site$sitetype[1]),
    lowest_sitetype  = as.character(mean_by_site$sitetype[nrow(mean_by_site)])
  )
})

summary_tbl
## # A tibble: 16 × 4
##    category              n_kos_present highest_sitetype lowest_sitetype
##    <chr>                         <int> <chr>            <chr>          
##  1 nitrogen_fixation                 3 diversified      Kipuka         
##  2 nitrification                     3 agroforestry     forest         
##  3 denitrification                   4 monoculture      forest         
##  4 nitrate_reduction                 2 agroforestry     forest         
##  5 carbon_fixation                   3 monoculture      agroforestry   
##  6 methane_oxidation                 2 agroforestry     Kipuka         
##  7 cellulose_degradation             2 monoculture      forest         
##  8 phosphatase                       3 monoculture      forest         
##  9 phosphonate                       2 monoculture      forest         
## 10 osmotic_stress                    3 monoculture      forest         
## 11 oxidative_stress                  3 agroforestry     Kipuka         
## 12 heavy_metals                      2 monoculture      Kipuka         
## 13 plant_hormones                    2 monoculture      forest         
## 14 biofilm_formation                 3 Kipuka           forest         
## 15 beta_lactamase                    3 monoculture      forest         
## 16 multidrug_resistance              2 monoculture      Kipuka