Setup

Packages

library(tidyverse)
library(vegan)
library(phyloseq)
library(ggplot2)
library(glmmTMB)
library(car)
library(permute)

setwd("~/Desktop/hi22/analyses/bacteria")

Colors and factor levels

site_colors <- c(
  monoculture  = "#F7C1BB",
  diversified  = "#885A5A",
  agroforestry = "#84B082",
  forest       = "#353A47",
  Kipuka       = "#DC136C",
  Monoculture  = "#F7C1BB",
  Diversified  = "#885A5A",
  Agroforestry = "#84B082",
  Forest       = "#353A47"
)

sitetype_levels <- c("monoculture", "diversified", "agroforestry", "forest", "Kipuka")

Note: This document depends on ps_bact and meta_cc from the bacterial analysis.
Run hi22_bacteria_analysis.Rmd first, or load the necessary objects below before knitting.

ps_bact <- readRDS("Intermediate_data/phyloseq_b_clean.RDS")
meta_cc <- readRDS("Intermediate_data/meta_cc.RDS")
# meta_cc is built during the bacteria analysis; if knitting independently,
# source the bacteria Rmd or rebuild it here as needed.

Load PICRUSt2 Data

ko_functions <- read.table(
  "picrust2_output/KO_metagenome_out/pred_metagenome_unstrat.tsv",
  header = TRUE, row.names = 1, sep = "\t")

pathways <- read.table(
  "picrust2_output/pathways_out/path_abun_unstrat.tsv",
  header = TRUE, row.names = 1, sep = "\t")

cat("KO functions:", dim(ko_functions), "\n")

## KO functions: 8969 67

cat("Pathways:",     dim(pathways), "\n")

## Pathways: 556 67

# Transpose so samples are rows
ko_functions_t <- t(ko_functions)
pathways_t     <- t(pathways)

Sample Name Mapping

PICRUSt2 converts - to . in sample names. This section maps them back to HI_ IDs so they align with the rest of the project.

sample_metadata_full <- as(sample_data(ps_bact), "data.frame")
sample_name_mapping  <- setNames(sample_metadata_full$id, rownames(sample_metadata_full))

picrust_sample_names <- rownames(ko_functions_t)

# Restore dashes if PICRUSt2 converted them to dots
if (any(grepl("\\.", picrust_sample_names)) && any(grepl("-", names(sample_name_mapping)))) {
  picrust_names_fixed <- gsub("\\.", "-", picrust_sample_names)
} else {
  picrust_names_fixed <- picrust_sample_names
}

hi_ids_mapped      <- sample_name_mapping[picrust_names_fixed]
successful_mappings <- !is.na(hi_ids_mapped)

cat("Successful mappings:", sum(successful_mappings), "out of", length(hi_ids_mapped), "\n")

## Successful mappings: 67 out of 67

# Diagnostic: show a few mappings to confirm they look right
data.frame(
  picrust_original = picrust_sample_names[1:5],
  picrust_fixed    = picrust_names_fixed[1:5],
  hi_id_mapped     = hi_ids_mapped[1:5]
)

##         picrust_original picrust_fixed hi_id_mapped
## AA-A-01          AA.A.01       AA-A-01        HI_58
## AA-A-02          AA.A.02       AA-A-02        HI_36
## AA-A-03          AA.A.03       AA-A-03        HI_10
## AA-A-04          AA.A.04       AA-A-04        HI_32
## AA-A-05          AA.A.05       AA-A-05        HI_70

if (sum(successful_mappings) == 0) {
  stop("No successful sample name mappings. Check sample naming conventions.")
}

ko_mapped       <- ko_functions_t[successful_mappings, ]
pathways_mapped <- pathways_t[successful_mappings, ]
hi_ids_clean    <- hi_ids_mapped[successful_mappings]

rownames(ko_mapped)       <- hi_ids_clean
rownames(pathways_mapped) <- hi_ids_clean

common_samples_final <- intersect(rownames(ko_mapped), meta_cc$id)
cat("Final common samples:", length(common_samples_final), "\n")

## Final common samples: 66

ko_matched       <- ko_mapped[common_samples_final, ]
pathways_matched <- pathways_mapped[common_samples_final, ]
meta_matched     <- meta_cc[meta_cc$id %in% common_samples_final, ]

Functional Diversity

func_diversity <- data.frame(
  sample_id        = rownames(ko_matched),
  ko_richness      = specnumber(ko_matched),
  ko_shannon       = diversity(ko_matched),
  pathway_richness = specnumber(pathways_matched),
  pathway_shannon  = diversity(pathways_matched),
  stringsAsFactors = FALSE
)

# Align rows and merge with metadata
meta_matched_ord   <- meta_matched[order(meta_matched$id), ]
ko_matched_ord     <- ko_matched[order(rownames(ko_matched)), ]
func_div_ord       <- func_diversity[order(func_diversity$sample_id), ]

func_data <- cbind(func_div_ord, meta_matched_ord)

cat("Samples:", nrow(func_data), "\n")

## Samples: 66

cat("KO functions:", ncol(ko_matched), "\n")

## KO functions: 8969

cat("Pathways:", ncol(pathways_matched), "\n")

## Pathways: 556

head(func_diversity)

##       sample_id ko_richness ko_shannon pathway_richness pathway_shannon
## HI_58     HI_58        7643   7.971415              497        5.689721
## HI_36     HI_36        7482   7.871598              487        5.618232
## HI_10     HI_10        7165   7.909284              477        5.640280
## HI_32     HI_32        7438   7.848995              491        5.612405
## HI_70     HI_70        7857   7.942741              511        5.678624
## HI_46     HI_46        7488   7.933188              497        5.676335

KO richness model

func_ko_mod <- glmmTMB(
  ko_richness ~ sitetype +
    scale(ph) + scale(c) + scale(n) + (1 | site_initials),
  data   = func_data,
  family = nbinom2(link = "log"))

summary(func_ko_mod)

##  Family: nbinom2  ( log )
## Formula:          
## ko_richness ~ sitetype + scale(ph) + scale(c) + scale(n) + (1 |  
##     site_initials)
## Data: func_data
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     913.3     935.2    -446.7     893.3        56 
## 
## Random effects:
## 
## Conditional model:
##  Groups        Name        Variance Std.Dev.
##  site_initials (Intercept) 7.74e-05 0.008798
## Number of obs: 66, groups:  site_initials, 22
## 
## Dispersion parameter for nbinom2 family (): 1.71e+03 
## 
## Conditional model:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           8.929403   0.010420   857.0   <2e-16 ***
## sitetypediversified  -0.007109   0.012933    -0.5   0.5825    
## sitetypeagroforestry -0.003119   0.013100    -0.2   0.8118    
## sitetypeforest       -0.027486   0.014034    -2.0   0.0502 .  
## sitetypeKipuka       -0.004156   0.019873    -0.2   0.8344    
## scale(ph)            -0.001569   0.004308    -0.4   0.7157    
## scale(c)             -0.017582   0.008466    -2.1   0.0378 *  
## scale(n)              0.010400   0.009163     1.1   0.2564    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Anova(func_ko_mod)

## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: ko_richness
##            Chisq Df Pr(>Chisq)  
## sitetype  5.1254  4    0.27467  
## scale(ph) 0.1327  1    0.71570  
## scale(c)  4.3131  1    0.03782 *
## scale(n)  1.2881  1    0.25639  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Functional Community Composition

PERMANOVA

ko_dist <- vegdist(ko_matched, method = "bray")

(func_permanova <- adonis2(
  ko_dist ~ sitetype + ph + c + n,
  data         = func_data,
  permutations = 999))

## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = ko_dist ~ sitetype + ph + c + n, data = func_data, permutations = 999)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     7  0.09776 0.10167 0.9378  0.491
## Residual 58  0.86370 0.89833              
## Total    65  0.96145 1.00000

PCoA

ko_pcoa    <- cmdscale(ko_dist)
ko_pcoa_df <- data.frame(
  sample_id = rownames(ko_pcoa),
  PCoA1     = ko_pcoa[, 1],
  PCoA2     = ko_pcoa[, 2]
) %>% merge(func_data, by = "sample_id")

ggplot(ko_pcoa_df, aes(x = PCoA1, y = PCoA2)) +
  stat_ellipse(aes(color = sitetype, fill = sitetype),
    alpha = 0.2, level = 0.95, geom = "polygon") +
  geom_point(aes(color = sitetype), size = 2) +
  scale_color_manual(values = site_colors) +
  scale_fill_manual(values = site_colors) +
  labs(x = "PCoA 1", y = "PCoA 2",
       color = "site type", fill = "site type") +
  theme_bw()

PCoA of functional community profiles (KO abundances)

Functional Categories

Category definitions

functional_categories <- list(
  # Nitrogen cycling
  nitrogen_fixation  = c("K02588", "K02586", "K02591"),
  nitrification      = c("K10944", "K10945", "K10946"),
  denitrification    = c("K00370", "K00371", "K00374", "K00376"),
  nitrate_reduction  = c("K00362", "K00363"),
  # Carbon cycling
  carbon_fixation    = c("K01601", "K01602", "K00855"),
  methane_oxidation  = c("K10944", "K14127"),
  cellulose_degradation = c("K01225", "K01179", "K01181"),
  # Phosphorus cycling
  phosphatase        = c("K01113", "K03787", "K09474"),
  phosphonate        = c("K06193", "K05306"),
  # Stress resistance
  osmotic_stress     = c("K02168", "K02169", "K02170"),
  oxidative_stress   = c("K00428", "K04564", "K00833"),
  heavy_metals       = c("K07787", "K07788"),
  # Plant interactions
  plant_hormones     = c("K00128", "K00129"),
  biofilm_formation  = c("K02403", "K02404", "K02405"),
  # Antibiotic resistance
  beta_lactamase     = c("K01467", "K17836", "K17837"),
  multidrug_resistance = c("K03296", "K07799")
)