Setup

Packages

library(tidyverse)
library(phyloseq)
library(vegan)
library(adespatial)
library(hillR)
library(geosphere)
library(car)
library(glmmTMB)
library(emmeans)
library(multcomp)
library(multcompView)
library(broom)
library(DHARMa)
library(UpSetR)
library(ggplot2)
library(patchwork)
library(permute)

setwd("~/Desktop/hi22/analyses/bacteria")

Colors and factor levels

site_colors <- c(
  monoculture  = "#F7C1BB",
  diversified  = "#885A5A",
  agroforestry = "#84B082",
  forest       = "#353A47",
  kipuka       = "#DC136C",
  Monoculture  = "#F7C1BB",
  Diversified  = "#885A5A",
  Agroforestry = "#84B082",
  Forest       = "#353A47",
  Kipuka       = "#DC136C"
)

sitetype_levels <- c("monoculture", "diversified", "agroforestry", "forest", "Kipuka")

Load data

ps_bact     <- readRDS("Intermediate_data/phyloseq_b_clean.RDS")
ps_bact_clr <- readRDS("Intermediate_data/phyloseq_b_clean_clr.RDS")

bacteria_otus <- taxa_names(ps_bact)

bact_otu <- bind_cols(
  as(sample_data(ps_bact), "data.frame"),
  as(otu_table(ps_bact, taxa_are_rows = FALSE), "matrix") %>% as.data.frame()
)

Helper functions

get_pcs <- function(df, vars, k, prefix) {
  mat  <- df[, vars]
  keep <- complete.cases(mat)
  p    <- prcomp(mat[keep, ], center = TRUE, scale. = TRUE)
  out  <- as.data.frame(matrix(NA_real_, nrow = nrow(df), ncol = k))
  colnames(out) <- paste0(prefix, "_PC", 1:k)
  out[keep, ] <- p$x[, 1:k, drop = FALSE]
  out
}

get_pc_composite <- function(df, vars, k, prefix) {
  mat  <- df[, vars]
  keep <- complete.cases(mat)
  p    <- prcomp(mat[keep, ], center = TRUE, scale. = TRUE)
  pcs  <- as.data.frame(matrix(NA_real_, nrow = nrow(df), ncol = k))
  colnames(pcs) <- paste0(prefix, "_PC", 1:k)
  pcs[keep, ] <- p$x[, 1:k, drop = FALSE]
  w         <- summary(p)$importance[2, 1:k]
  comp_vals <- as.numeric(as.matrix(pcs[keep, , drop = FALSE]) %*% w)
  out       <- rep(NA_real_, nrow(df))
  out[keep] <- comp_vals
  setNames(data.frame(out), paste0(prefix, "_comp"))
}

posthoc_em <- function(mod, response_col, data) {
  cld(emmeans(mod, pairwise ~ sitetype, adjust = "none", type = "response")$emmeans,
    alpha = 0.05, Letters = letters, adjust = "none") %>%
    tidy() %>%
    mutate(.group = trimws(.group)) %>%
    left_join(
      data %>% group_by(sitetype) %>%
        summarise(ymax = max(.data[[response_col]], na.rm = TRUE)),
      by = "sitetype"
    )
}

Diversity Indices

bact_diversity <- bact_otu %>%
  mutate(
    hillq0_bacteria   = hill_taxa(across(any_of(bacteria_otus)), q = 0),
    hillq1_bacteria   = hill_taxa(across(any_of(bacteria_otus)), q = 1),
    hillq2_bacteria   = hill_taxa(across(any_of(bacteria_otus)), q = 2),
    chao_bacteria     = estimateR(across(any_of(bacteria_otus)))["S.chao1", ],
    shannon_bacteria  = vegan::diversity(across(any_of(bacteria_otus))),
    richness_bacteria = specnumber(across(any_of(bacteria_otus))),
    evenness_bacteria = shannon_bacteria / log(richness_bacteria)
  ) %>%
  dplyr::select(-starts_with("bASV"))

Environmental Predictors

coords    <- bact_diversity %>% dplyr::select(longitude, latitude)
geo_dist  <- geosphere::distm(coords)
pcnm_obj  <- pcnm(geo_dist)
space_pcs <- as.data.frame(pcnm_obj$vectors)[1:2]

land_vars  <- c("agriculture", "open", "development", "vegetation")
soil_vars  <- c("c", "n", "ph", "ec")
micro_vars <- c("temp_flux_mean", "evapotranspiration_mm")

land_pcs  <- get_pc_composite(bact_diversity, land_vars,  2, "land")  %>% scale()
soil_pcs  <- get_pcs(bact_diversity,          soil_vars,  2, "soil")  %>% scale()
micro_pcs <- get_pc_composite(bact_diversity, micro_vars, 2, "micro") %>% scale()

mod_df <- bind_cols(bact_diversity, space_pcs, land_pcs, micro_pcs) %>%
  as.data.frame() %>%
  mutate(sitetype = factor(sitetype, levels = sitetype_levels)) %>%
  filter(!is.na(sitetype), id != "HI_69")

rownames(mod_df) <- mod_df$id

CLR distance matrix

if (taxa_are_rows(ps_bact_clr)) {
  bact_clr_mat <- t(as(otu_table(ps_bact_clr), "matrix"))
} else {
  bact_clr_mat <- as(otu_table(ps_bact_clr), "matrix")
}

rownames(bact_clr_mat) <- as(sample_data(ps_bact_clr), "data.frame")$id

common_samples  <- intersect(rownames(bact_clr_mat), mod_df$id)
bact_clr_cc     <- bact_clr_mat[common_samples, ]
meta_cc         <- mod_df[common_samples, ]

bact_clr_cc <- bact_clr_cc[order(rownames(bact_clr_cc)), ]
meta_cc     <- meta_cc[order(meta_cc$id), ]

asv_var     <- apply(bact_clr_cc, 2, var, na.rm = TRUE)
bact_clr_cc <- bact_clr_cc[, !is.na(asv_var) & asv_var > 0, drop = FALSE]

bact_clr_dist <- dist(bact_clr_cc, method = "euclidean")

Alpha Diversity Models

Hill q0 — Observed richness

bact_q0.mod <- glmmTMB(
  hillq0_bacteria ~ scale(management_score_rescaled) +
    scale(ph) + scale(c) + scale(n) + scale(land_comp) +
    scale(longitude) + scale(latitude) + (1 | site_initials),
  data = mod_df, family = nbinom2(link = "log"))

cat("AIC:", AIC(bact_q0.mod), "\n")
## AIC: 779.9942
Anova(bact_q0.mod, type = "II")
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: hillq0_bacteria
##                                   Chisq Df Pr(>Chisq)   
## scale(management_score_rescaled) 0.2616  1   0.609026   
## scale(ph)                        4.8970  1   0.026904 * 
## scale(c)                         8.7063  1   0.003171 **
## scale(n)                         2.9749  1   0.084563 . 
## scale(land_comp)                 1.1660  1   0.280222   
## scale(longitude)                 2.3809  1   0.122827   
## scale(latitude)                  3.3786  1   0.066049 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(simulateResiduals(bact_q0.mod))
DHARMa residuals — q0 management model

DHARMa residuals — q0 management model

bact_q0.mod.st <- glmmTMB(
  hillq0_bacteria ~ sitetype +
    scale(ph) + scale(c) + scale(n) + scale(land_comp) + scale(micro_comp) +
    scale(longitude) + scale(latitude) + (1 | site_initials),
  data = mod_df, family = nbinom2(link = "log"))

cat("AIC:", AIC(bact_q0.mod.st), "\n")
## AIC: 778.402
Anova(bact_q0.mod.st, type = "II")
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: hillq0_bacteria
##                     Chisq Df Pr(>Chisq)    
## sitetype          10.3057  4  0.0355808 *  
## scale(ph)          7.2195  1  0.0072117 ** 
## scale(c)          11.7379  1  0.0006124 ***
## scale(n)           3.6694  1  0.0554200 .  
## scale(land_comp)   0.6060  1  0.4363142    
## scale(micro_comp)  4.0713  1  0.0436171 *  
## scale(longitude)   5.7844  1  0.0161694 *  
## scale(latitude)    8.3312  1  0.0038969 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(simulateResiduals(bact_q0.mod.st))
DHARMa residuals — q0 site-type model

DHARMa residuals — q0 site-type model

(bact_q0.em <- posthoc_em(bact_q0.mod.st, "hillq0_bacteria", mod_df))
## # A tibble: 5 × 8
##   sitetype     response std.error    df conf.low conf.high .group  ymax
##   <chr>           <dbl>     <dbl> <dbl>    <dbl>     <dbl> <chr>  <int>
## 1 monoculture      386.      27.3   Inf     336.      443. a        589
## 2 Kipuka           387.      44.7   Inf     309.      485. ab       477
## 3 diversified      430.      19.2   Inf     394.      469. ab       584
## 4 forest           461.      30.4   Inf     405.      525. ab       561
## 5 agroforestry     481.      23.4   Inf     437.      529. b        581

Hill q1 — Exp(Shannon)

bact_q1.mod <- glmmTMB(
  hillq1_bacteria ~ scale(management_score_rescaled) +
    scale(ph) + scale(c) + scale(n) + scale(land_comp) +
    scale(longitude) + scale(latitude) + (1 | site_initials),
  data = mod_df, family = Gamma(link = "log"))

cat("AIC:", AIC(bact_q1.mod), "\n")
## AIC: 751.4844
Anova(bact_q1.mod, type = "II")
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: hillq1_bacteria
##                                   Chisq Df Pr(>Chisq)
## scale(management_score_rescaled) 0.1468  1     0.7016
## scale(ph)                        1.9373  1     0.1640
## scale(c)                         1.0275  1     0.3108
## scale(n)                         0.2565  1     0.6126
## scale(land_comp)                 0.5407  1     0.4621
## scale(longitude)                 1.4798  1     0.2238
## scale(latitude)                  2.0279  1     0.1544
plot(simulateResiduals(bact_q1.mod))
DHARMa residuals — q1 management model

DHARMa residuals — q1 management model

bact_q1.mod.st <- glmmTMB(
  hillq1_bacteria ~ sitetype +
    scale(ph) + scale(c) + scale(n) + scale(land_comp) + scale(micro_comp) +
    scale(longitude) + scale(latitude) + (1 | site_initials),
  data = mod_df, family = Gamma(link = "log"))

cat("AIC:", AIC(bact_q1.mod.st), "\n")
## AIC: 755.6595
Anova(bact_q1.mod.st, type = "II")
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: hillq1_bacteria
##                    Chisq Df Pr(>Chisq)  
## sitetype          3.4542  4    0.48487  
## scale(ph)         2.5952  1    0.10719  
## scale(c)          2.6207  1    0.10548  
## scale(n)          1.2222  1    0.26892  
## scale(land_comp)  0.2842  1    0.59397  
## scale(micro_comp) 2.0015  1    0.15714  
## scale(longitude)  3.2929  1    0.06958 .
## scale(latitude)   5.8699  1    0.01540 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(bact_q1.em <- posthoc_em(bact_q1.mod.st, "hillq1_bacteria", mod_df))
## # A tibble: 5 × 8
##   sitetype     response std.error    df conf.low conf.high .group  ymax
##   <chr>           <dbl>     <dbl> <dbl>    <dbl>     <dbl> <chr>  <dbl>
## 1 Kipuka           194.      36.8   Inf     133.      281. a       296.
## 2 monoculture      209.      25.2   Inf     166.      265. a       341.
## 3 diversified      236.      18.1   Inf     203.      274. a       322.
## 4 agroforestry     243.      20.0   Inf     207.      286. a       344.
## 5 forest           246.      28.3   Inf     196.      308. a       313.

Hill q2 — Inverse Simpson

bact_q2.mod <- glmmTMB(
  hillq2_bacteria ~ scale(management_score_rescaled) +
    scale(ph) + scale(c) + scale(n) + scale(land_comp) +
    scale(longitude) + scale(latitude) + (1 | site_initials),
  data = mod_df, family = Gamma(link = "log"))

cat("AIC:", AIC(bact_q2.mod), "\n")
## AIC: 706.1222
Anova(bact_q2.mod, type = "II")
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: hillq2_bacteria
##                                   Chisq Df Pr(>Chisq)
## scale(management_score_rescaled) 0.5921  1     0.4416
## scale(ph)                        0.7271  1     0.3938
## scale(c)                         0.0547  1     0.8150
## scale(n)                         0.0199  1     0.8878
## scale(land_comp)                 1.9417  1     0.1635
## scale(longitude)                 0.7178  1     0.3969
## scale(latitude)                  0.8694  1     0.3511
bact_q2.mod.st <- glmmTMB(
  hillq2_bacteria ~ sitetype +
    scale(ph) + scale(c) + scale(n) + scale(land_comp) + scale(micro_comp) +
    scale(longitude) + scale(latitude) + (1 | site_initials),
  data = mod_df, family = Gamma(link = "log"))

cat("AIC:", AIC(bact_q2.mod.st), "\n")
## AIC: 712.9291
Anova(bact_q2.mod.st, type = "II")
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: hillq2_bacteria
##                    Chisq Df Pr(>Chisq)  
## sitetype          1.2993  4    0.86149  
## scale(ph)         0.6714  1    0.41256  
## scale(c)          0.0767  1    0.78183  
## scale(n)          0.2990  1    0.58452  
## scale(land_comp)  1.6105  1    0.20442  
## scale(micro_comp) 0.6455  1    0.42174  
## scale(longitude)  2.1781  1    0.13998  
## scale(latitude)   2.9564  1    0.08554 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(bact_q2.em <- posthoc_em(bact_q2.mod.st, "hillq2_bacteria", mod_df))
## # A tibble: 5 × 8
##   sitetype     response std.error    df conf.low conf.high .group  ymax
##   <chr>           <dbl>     <dbl> <dbl>    <dbl>     <dbl> <chr>  <dbl>
## 1 Kipuka           89.5      25.4   Inf     51.3      156. a       178.
## 2 monoculture     121.       21.5   Inf     85.0      171. a       190.
## 3 forest          121.       20.3   Inf     86.8      168. a       180.
## 4 agroforestry    121.       16.5   Inf     93.1      158. a       205.
## 5 diversified     125.       15.4   Inf     97.8      159. a       200.

Community Composition

dbRDA

bact_dbrda <- capscale(
  bact_clr_dist ~
    scale(land_comp) + scale(longitude) + scale(latitude) +
    scale(management_score_rescaled) + scale(ph) + scale(c) + scale(n),
  data = meta_cc)

sort(vif.cca(bact_dbrda))
##                        scale(ph) scale(management_score_rescaled) 
##                         1.557038                         1.718431 
##                 scale(land_comp)                  scale(latitude) 
##                         2.193442                         2.678406 
##                 scale(longitude)                         scale(n) 
##                         3.150693                         4.533302 
##                         scale(c) 
##                         5.297940
perm <- how(nperm = 999)
setBlocks(perm) <- meta_cc$site_initials
set.seed(5)
(bact_dbrda_anova <- anova(bact_dbrda, by = "terms", permutations = perm))
## Permutation test for capscale under reduced model
## Terms added sequentially (first to last)
## Blocks:  meta_cc$site_initials 
## Permutation: free
## Number of permutations: 999
## 
## Model: capscale(formula = bact_clr_dist ~ scale(land_comp) + scale(longitude) + scale(latitude) + scale(management_score_rescaled) + scale(ph) + scale(c) + scale(n), data = meta_cc)
##                                  Df Variance      F Pr(>F)    
## scale(land_comp)                  1    975.8 4.1319  0.005 ** 
## scale(longitude)                  1    325.5 1.3785  0.669    
## scale(latitude)                   1    477.7 2.0225  0.507    
## scale(management_score_rescaled)  1    406.0 1.7191  0.013 *  
## scale(ph)                         1    714.5 3.0255  0.001 ***
## scale(c)                          1    684.6 2.8989  0.001 ***
## scale(n)                          1    421.8 1.7859  0.319    
## Residual                         58  13697.5                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
bact_site_scores <- vegan::scores(bact_dbrda, choices = 1:2,
  display = c("sites", "bp"))

bact_dbrda_df <- as.data.frame(cbind(bact_site_scores$sites, meta_cc))

bact_dbrda_env <- as.data.frame(bact_site_scores$biplot) %>%
  rownames_to_column("variable") %>%
  mutate(variable = case_when(
    variable == "scale(land_comp)"                 ~ "landscape context",
    variable == "scale(management_score_rescaled)" ~ "management intensity",
    variable == "scale(c)"                         ~ "soil carbon",
    variable == "scale(ph)"                        ~ "soil pH",
    variable == "scale(n)"                         ~ "soil nitrogen",
    variable == "scale(longitude)"                 ~ "longitude",
    variable == "scale(latitude)"                  ~ "latitude",
    TRUE ~ variable
  )) %>%
  filter(!variable %in% c("latitude", "longitude", "management intensity"))

Variation partitioning

mgmt_cc_vp  <- meta_cc %>% dplyr::select(management_score_rescaled) %>% scale() %>% as.data.frame()
soil_cc_vp  <- meta_cc %>% dplyr::select(c, n, ph, ec)               %>% scale() %>% as.data.frame()
land_cc_vp  <- meta_cc %>% dplyr::select(agriculture, open, development, vegetation) %>% scale() %>% as.data.frame()
space_cc_vp <- meta_cc %>% dplyr::select(longitude, latitude)         %>% scale() %>% as.data.frame()

(bact_vp <- varpart(bact_clr_dist, mgmt_cc_vp, soil_cc_vp, land_cc_vp, space_cc_vp,
  scale = FALSE, permutations = 999))
## 
## Partition of squared Euclidean distance in dbRDA 
## 
## Call: varpart(Y = bact_clr_dist, X = mgmt_cc_vp, soil_cc_vp,
## land_cc_vp, space_cc_vp, scale = FALSE, permutations = 999)
## 
## Explanatory tables:
## X1:  mgmt_cc_vp
## X2:  soil_cc_vp
## X3:  land_cc_vp
## X4:  space_cc_vp 
## 
## No. of explanatory tables: 4 
## Total variation (SS): 1150723 
## No. of observations: 66 
## 
## Partition table:
##                             Df R.square Adj.R.square Testable
## [aeghklno] = X1              1  0.02565      0.01042     TRUE
## [befiklmo] = X2              4  0.15496      0.09954     TRUE
## [cfgjlmno] = X3              3  0.10677      0.06355     TRUE
## [dhijkmno] = X4              2  0.06713      0.03752     TRUE
## [abefghiklmno] = X1+X2       5  0.17722      0.10865     TRUE
## [acefghjklmno] = X1+X3       4  0.13030      0.07327     TRUE
## [adeghijklmno] = X1+X4       3  0.09020      0.04618     TRUE
## [bcefgijklmno] = X2+X3       7  0.22855      0.13544     TRUE
## [bdefhijklmno] = X2+X4       6  0.20736      0.12675     TRUE
## [cdfghijklmno] = X3+X4       5  0.14898      0.07806     TRUE
## [abcefghijklmno] = X1+X2+X3  8  0.24475      0.13874     TRUE
## [abdefghijklmno] = X1+X2+X4  7  0.22754      0.13431     TRUE
## [acdefghijklmno] = X1+X3+X4  6  0.17767      0.09405     TRUE
## [bcdefghijklmno] = X2+X3+X4  9  0.26898      0.15149     TRUE
## [abcdefghijklmno] = All     10  0.28625      0.15648     TRUE
## Individual fractions                                         
## [a] = X1 | X2+X3+X4          1               0.00499     TRUE
## [b] = X2 | X1+X3+X4          4               0.06243     TRUE
## [c] = X3 | X1+X2+X4          3               0.02217     TRUE
## [d] = X4 | X1+X2+X3          2               0.01773     TRUE
## [e]                          0               0.01100    FALSE
## [f]                          0               0.02570    FALSE
## [g]                          0               0.00258    FALSE
## [h]                          0              -0.00168    FALSE
## [i]                          0               0.00304    FALSE
## [j]                          0               0.00793    FALSE
## [k]                          0              -0.00458    FALSE
## [l]                          0              -0.00990    FALSE
## [m]                          0               0.00706    FALSE
## [n]                          0               0.00322    FALSE
## [o]                          0               0.00479    FALSE
## [p] = Residuals              0               0.84352    FALSE
## Controlling 2 tables X                                       
## [ae] = X1 | X3+X4            1               0.01598     TRUE
## [ag] = X1 | X2+X4            1               0.00756     TRUE
## [ah] = X1 | X2+X3            1               0.00331     TRUE
## [be] = X2 | X3+X4            4               0.07343     TRUE
## [bf] = X2 | X1+X4            4               0.08813     TRUE
## [bi] = X2 | X1+X3            4               0.06547     TRUE
## [cf] = X3 | X1+X4            3               0.04786     TRUE
## [cg] = X3 | X2+X4            3               0.02474     TRUE
## [cj] = X3 | X1+X2            3               0.03009     TRUE
## [dh] = X4 | X2+X3            2               0.01605     TRUE
## [di] = X4 | X1+X3            2               0.02077     TRUE
## [dj] = X4 | X1+X2            2               0.02566     TRUE
## Controlling 1 table X                                        
## [aghn] = X1 | X2             1               0.00911     TRUE
## [aehk] = X1 | X3             1               0.00973     TRUE
## [aegl] = X1 | X4             1               0.00866     TRUE
## [bfim] = X2 | X1             4               0.09823     TRUE
## [beik] = X2 | X3             4               0.07189     TRUE
## [befl] = X2 | X4             4               0.08923     TRUE
## [cfjm] = X3 | X1             3               0.06285     TRUE
## [cgjn] = X3 | X2             3               0.03590     TRUE
## [cfgl] = X3 | X4             3               0.04054     TRUE
## [dijm] = X4 | X1             2               0.03576     TRUE
## [dhjn] = X4 | X2             2               0.02721     TRUE
## [dhik] = X4 | X3             2               0.01451     TRUE
## ---
## Use function 'dbrda' to test significance of fractions of interest
plot(bact_vp, digits = 2, Xnames = c("management", "soil", "landscape", "space"))
Variation partitioning: management, soil, landscape, space

Variation partitioning: management, soil, landscape, space

PERMANOVA

set.seed(5)
(bact_permanova <- adonis2(
  bact_clr_dist ~ management_score_rescaled + ph + c + n +
    land_comp + longitude + latitude,
  data = meta_cc, permutations = perm, by = "terms"))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks:  meta_cc$site_initials 
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = bact_clr_dist ~ management_score_rescaled + ph + c + n + land_comp + longitude + latitude, data = meta_cc, permutations = perm, by = "terms")
##                           Df SumOfSqs      R2      F Pr(>F)    
## management_score_rescaled  1    29510 0.02565 1.9224  0.344    
## ph                         1    69121 0.06007 4.5028  0.001 ***
## c                          1    50969 0.04429 3.3203  0.001 ***
## n                          1    27195 0.02363 1.7716  0.286    
## land_comp                  1    34489 0.02997 2.2468  0.728    
## longitude                  1    22778 0.01979 1.4838  0.352    
## latitude                   1    26321 0.02287 1.7147  0.376    
## Residual                  58   890339 0.77372                  
## Total                     65  1150723 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

UpSet Plot — ASV Sharing

unique_asvs <- function(df, site) {
  df %>% filter(sitetype == site) %>%
    dplyr::select(any_of(bacteria_otus)) %>%
    dplyr::select(where(~ sum(.x, na.rm = TRUE) > 0)) %>%
    colnames()
}

asv_sets <- list(
  Monoculture  = unique_asvs(bact_otu, "monoculture"),
  Diversified  = unique_asvs(bact_otu, "diversified"),
  Agroforestry = unique_asvs(bact_otu, "agroforestry"),
  Forest       = unique_asvs(bact_otu, "forest"),
  Kipuka       = unique_asvs(bact_otu, "Kipuka")
)
non_empty_sets <- asv_sets[sapply(asv_sets, length) > 0]

if (length(non_empty_sets) >= 2) {
  UpSetR::upset(
    fromList(non_empty_sets), order.by = "freq",
    nsets          = length(non_empty_sets),
    sets.bar.color = site_colors[names(non_empty_sets)],
    main.bar.color = "gray60",
    matrix.color   = "gray40",
    text.scale     = 1.2, point.size = 2.8, line.size = 1
  )
}
ASV overlap across site types

ASV overlap across site types


Plots

Alpha diversity — management gradient

make_scatter <- function(df, x, y, ylabel) {
  ggplot(df %>% filter(!sitetype %in% c("Mainland", "mainland")),
    aes_string(x = x, y = y)) +
    geom_point(aes(color = sitetype, shape = sitetype)) +
    geom_smooth(method = "glm", color = "black", se = TRUE) +
    scale_color_manual(values = site_colors) +
    scale_shape_manual(values = c(16, 16, 16, 16, 16)) +
    labs(y = ylabel, x = "management intensity",
         color = "site type", shape = "site type") +
    theme_bw()
}

make_scatter(mod_df, "management_score_rescaled", "hillq0_bacteria", "observed richness (q0)")
Bacterial richness (q0) vs management intensity

Bacterial richness (q0) vs management intensity

make_scatter(mod_df, "management_score_rescaled", "hillq1_bacteria", "Exp(Shannon) (q1)")
Exp(Shannon) (q1) vs management intensity

Exp(Shannon) (q1) vs management intensity

make_scatter(mod_df, "management_score_rescaled", "hillq2_bacteria", "inverse Simpson (q2)")
Inverse Simpson (q2) vs management intensity

Inverse Simpson (q2) vs management intensity

Alpha diversity — site type boxplots

make_boxplot <- function(df, y, ylabel, em_df) {
  ggplot(df, aes_string(x = "sitetype", y = y, fill = "sitetype")) +
    geom_boxplot() +
    scale_fill_manual(values = site_colors, guide = "none") +
    scale_x_discrete(limits = sitetype_levels) +
    geom_text(data = em_df,
      aes(x = sitetype, y = ymax * 1.1, label = .group),
      inherit.aes = FALSE) +
    labs(y = ylabel, x = "site type") +
    theme_bw()
}

make_boxplot(mod_df, "hillq0_bacteria", "observed richness (q0)", bact_q0.em)
Bacterial richness (q0) by site type

Bacterial richness (q0) by site type

make_boxplot(mod_df, "hillq1_bacteria", "Exp(Shannon) (q1)", bact_q1.em)
Exp(Shannon) (q1) by site type

Exp(Shannon) (q1) by site type

make_boxplot(mod_df, "hillq2_bacteria", "inverse Simpson (q2)", bact_q2.em)
Inverse Simpson (q2) by site type

Inverse Simpson (q2) by site type

Alpha diversity panel

bact_q0_scatter <- make_scatter(mod_df, "management_score_rescaled", "hillq0_bacteria", "observed richness (q0)")
bact_q0_boxplot <- make_boxplot(mod_df, "hillq0_bacteria", "observed richness (q0)", bact_q0.em)
bact_q1_scatter <- make_scatter(mod_df, "management_score_rescaled", "hillq1_bacteria", "Exp(Shannon) (q1)")
bact_q1_boxplot <- make_boxplot(mod_df, "hillq1_bacteria", "Exp(Shannon) (q1)", bact_q1.em)
bact_q2_scatter <- make_scatter(mod_df, "management_score_rescaled", "hillq2_bacteria", "inverse Simpson (q2)")
bact_q2_boxplot <- make_boxplot(mod_df, "hillq2_bacteria", "inverse Simpson (q2)", bact_q2.em)

(bact_q0_scatter | bact_q0_boxplot) /
(bact_q1_scatter | bact_q1_boxplot) /
(bact_q2_scatter | bact_q2_boxplot)
Alpha diversity — all Hill numbers

Alpha diversity — all Hill numbers

PCoA ordination

pcoa_bact    <- cmdscale(bact_clr_dist)
pcoa_bact_df <- bind_cols(
  meta_cc %>% dplyr::select(-starts_with("bASV")),
  as.data.frame(pcoa_bact) %>% rename(Axis.1 = 1, Axis.2 = 2))

ggplot(pcoa_bact_df, aes(x = Axis.1, y = Axis.2)) +
  stat_ellipse(aes(color = sitetype, fill = sitetype),
    alpha = 0.2, level = 0.95, geom = "polygon") +
  geom_point(aes(color = sitetype), size = 2, stroke = 0.75) +
  scale_color_manual(values = site_colors) +
  scale_fill_manual(values = site_colors) +
  labs(x = "PCoA 1", y = "PCoA 2",
       color = "site type", fill = "site type") +
  theme_bw() +
  theme(legend.background = element_blank(), legend.key = element_blank())
PCoA of bacterial communities (Aitchison distance)

PCoA of bacterial communities (Aitchison distance)

dbRDA ordination

ggplot(bact_dbrda_df, aes(x = CAP1, y = CAP2)) +
  stat_ellipse(aes(color = sitetype, fill = sitetype),
    alpha = 0.2, level = 0.95, geom = "polygon") +
  geom_point(aes(color = sitetype, fill = sitetype), size = 3, shape = 16) +
  scale_color_manual(values = site_colors) +
  scale_fill_manual(values = site_colors) +
  geom_segment(data = bact_dbrda_env,
    aes(x = 0, y = 0, xend = CAP1, yend = CAP2),
    arrow = arrow(length = unit(0.2, "cm")), color = "black", alpha = 0.75) +
  geom_label(data = bact_dbrda_env,
    aes(x = CAP1 + 0.05, y = CAP2 + 0.05, label = variable),
    color = "black", size = 3) +
  labs(x = "CAP1", y = "CAP2", color = "site type", fill = "site type") +
  theme_bw() +
  theme(legend.background = element_blank(), legend.key = element_blank())
dbRDA of bacterial community composition

dbRDA of bacterial community composition