Setup

Packages

library(tidyverse)
library(readxl)
library(phyloseq)
library(vegan)
library(adespatial)
library(hillR)
library(geosphere)
library(car)
library(glmmTMB)
library(emmeans)
library(multcomp)
library(multcompView)
library(broom)
library(DHARMa)
library(UpSetR)
library(ggplot2)
library(patchwork)
library(permute)

setwd("~/Desktop/hi22/analyses/fungi")

Colors and factor levels

site_colors <- c(
  monoculture  = "#F7C1BB",
  diversified  = "#885A5A",
  agroforestry = "#84B082",
  forest       = "#353A47",
  Kipuka       = "#DC136C",
  Monoculture  = "#F7C1BB",
  Diversified  = "#885A5A",
  Agroforestry = "#84B082",
  Forest       = "#353A47"
)

sitetype_levels <- c("monoculture", "diversified", "agroforestry", "forest", "Kipuka")

Load data

ps_fung     <- readRDS("Intermediate_data/phyloseq_f_clean.RDS")
ps_fung_clr <- readRDS("Intermediate_data/phyloseq_f_clean_clr.RDS")

fungi_otus <- taxa_names(ps_fung)

fung_otu <- bind_cols(
  as(sample_data(ps_fung), "data.frame"),
  as(otu_table(ps_fung, taxa_are_rows = FALSE), "matrix") %>% as.data.frame()
)

Helper functions

get_pcs <- function(df, vars, k, prefix) {
  mat  <- df[, vars]
  keep <- complete.cases(mat)
  p    <- prcomp(mat[keep, ], center = TRUE, scale. = TRUE)
  out  <- as.data.frame(matrix(NA_real_, nrow = nrow(df), ncol = k))
  colnames(out) <- paste0(prefix, "_PC", 1:k)
  out[keep, ] <- p$x[, 1:k, drop = FALSE]
  out
}

get_pc_composite <- function(df, vars, k, prefix) {
  mat  <- df[, vars]
  keep <- complete.cases(mat)
  p    <- prcomp(mat[keep, ], center = TRUE, scale. = TRUE)
  pcs  <- as.data.frame(matrix(NA_real_, nrow = nrow(df), ncol = k))
  colnames(pcs) <- paste0(prefix, "_PC", 1:k)
  pcs[keep, ] <- p$x[, 1:k, drop = FALSE]
  w         <- summary(p)$importance[2, 1:k]
  comp_vals <- as.numeric(as.matrix(pcs[keep, , drop = FALSE]) %*% w)
  out       <- rep(NA_real_, nrow(df))
  out[keep] <- comp_vals
  setNames(data.frame(out), paste0(prefix, "_comp"))
}

posthoc_em <- function(mod, response_col, data) {
  cld(emmeans(mod, pairwise ~ sitetype, adjust = "none", type = "response")$emmeans,
    alpha = 0.05, Letters = letters, adjust = "none") %>%
    tidy() %>%
    mutate(.group = trimws(.group)) %>%
    left_join(
      data %>% group_by(sitetype) %>%
        summarise(ymax = max(.data[[response_col]], na.rm = TRUE)),
      by = "sitetype"
    )
}

Diversity Indices

fung_diversity <- fung_otu %>%
  mutate(
    hillq0_fungi   = hill_taxa(across(any_of(fungi_otus)), q = 0),
    hillq1_fungi   = hill_taxa(across(any_of(fungi_otus)), q = 1),
    hillq2_fungi   = hill_taxa(across(any_of(fungi_otus)), q = 2),
    chao_fungi     = estimateR(across(any_of(fungi_otus)))["S.chao1", ],
    shannon_fungi  = vegan::diversity(across(any_of(fungi_otus))),
    richness_fungi = specnumber(across(any_of(fungi_otus))),
    evenness_fungi = shannon_fungi / log(richness_fungi)
  ) %>%
  dplyr::select(-starts_with("fASV"))

Environmental Predictors

coords    <- fung_diversity %>% dplyr::select(longitude, latitude)
geo_dist  <- geosphere::distm(coords)
pcnm_obj  <- pcnm(geo_dist)
space_pcs <- as.data.frame(pcnm_obj$vectors)[1:2]

land_vars  <- c("agriculture", "open", "development", "vegetation")
soil_vars  <- c("c", "n", "ph", "ec")
micro_vars <- c("temp_flux_mean", "evapotranspiration_mm")

land_pcs  <- get_pc_composite(fung_diversity, land_vars,  2, "land")  %>% scale()
soil_pcs  <- get_pcs(fung_diversity,          soil_vars,  2, "soil")  %>% scale()
micro_pcs <- get_pc_composite(fung_diversity, micro_vars, 2, "micro") %>% scale()

mod_df <- bind_cols(fung_diversity, space_pcs, land_pcs, micro_pcs) %>%
  as.data.frame() %>%
  mutate(sitetype = factor(sitetype, levels = sitetype_levels)) %>%
  filter(!is.na(sitetype))

rownames(mod_df) <- mod_df$id

CLR distance matrix

if (taxa_are_rows(ps_fung_clr)) {
  fung_clr_mat <- t(as(otu_table(ps_fung_clr), "matrix"))
} else {
  fung_clr_mat <- as(otu_table(ps_fung_clr), "matrix")
}

rownames(fung_clr_mat) <- as(sample_data(ps_fung_clr), "data.frame")$id

common_samples <- intersect(rownames(fung_clr_mat), mod_df$id)
fung_clr_cc    <- fung_clr_mat[common_samples, ]
meta_cc        <- mod_df[common_samples, ]

fung_clr_cc <- fung_clr_cc[order(rownames(fung_clr_cc)), ]
meta_cc     <- meta_cc[order(meta_cc$id), ]

asv_var     <- apply(fung_clr_cc, 2, var, na.rm = TRUE)
fung_clr_cc <- fung_clr_cc[, !is.na(asv_var) & asv_var > 0, drop = FALSE]

fung_clr_dist <- dist(fung_clr_cc, method = "euclidean")

Alpha Diversity Models

Hill q0 — Observed richness

fung_q0.mod <- glmmTMB(
  hillq0_fungi ~ scale(management_score_rescaled) +
    scale(ph) + scale(c) + scale(n) + scale(land_comp) +
    scale(longitude) + scale(latitude) + (1 | site_initials),
  data = mod_df, family = nbinom2(link = "log"))

cat("AIC:", AIC(fung_q0.mod), "\n")

## AIC: 723.998

Anova(fung_q0.mod, type = "II")

## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: hillq0_fungi
##                                   Chisq Df Pr(>Chisq)
## scale(management_score_rescaled) 0.4337  1     0.5102
## scale(ph)                        0.0113  1     0.9153
## scale(c)                         0.5884  1     0.4431
## scale(n)                         2.4084  1     0.1207
## scale(land_comp)                 0.2646  1     0.6070
## scale(longitude)                 0.3257  1     0.5682
## scale(latitude)                  1.0951  1     0.2954

plot(simulateResiduals(fung_q0.mod))

DHARMa residuals — q0 management model

fung_q0.mod.st <- glmmTMB(
  hillq0_fungi ~ sitetype +
    scale(ph) + scale(c) + scale(n) + scale(land_comp) + scale(micro_comp) +
    scale(longitude) + scale(latitude) + (1 | site_initials),
  data = mod_df, family = nbinom2(link = "log"))

cat("AIC:", AIC(fung_q0.mod.st), "\n")

## AIC: 728.634

Anova(fung_q0.mod.st, type = "II")

## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: hillq0_fungi
##                    Chisq Df Pr(>Chisq)  
## sitetype          3.9335  4    0.41508  
## scale(ph)         0.0044  1    0.94727  
## scale(c)          0.1348  1    0.71347  
## scale(n)          0.3449  1    0.55701  
## scale(land_comp)  0.0068  1    0.93409  
## scale(micro_comp) 0.9952  1    0.31847  
## scale(longitude)  0.9317  1    0.33443  
## scale(latitude)   2.9063  1    0.08823 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(fung_q0.em <- posthoc_em(fung_q0.mod.st, "hillq0_fungi", mod_df))

## # A tibble: 5 × 8
##   sitetype     response std.error    df conf.low conf.high .group  ymax
##   <chr>           <dbl>     <dbl> <dbl>    <dbl>     <dbl> <chr>  <int>
## 1 Kipuka           147.      30.0   Inf     98.2      219. a        175
## 2 monoculture      171.      21.9   Inf    133.       220. a        223
## 3 diversified      181.      14.1   Inf    156.       211. a        261
## 4 forest           200.      23.1   Inf    160.       251. a        248
## 5 agroforestry     201.      17.2   Inf    170.       238. a        273

Hill q1 — Exp(Shannon)

fung_q1.mod <- glmmTMB(
  hillq1_fungi ~ scale(management_score_rescaled) +
    scale(ph) + scale(c) + scale(n) + scale(land_comp) +
    scale(longitude) + scale(latitude) + (1 | site_initials),
  data = mod_df, family = Gamma(link = "log"))

cat("AIC:", AIC(fung_q1.mod), "\n")

## AIC: 605.0253

Anova(fung_q1.mod, type = "II")

## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: hillq1_fungi
##                                   Chisq Df Pr(>Chisq)  
## scale(management_score_rescaled) 1.5538  1    0.21257  
## scale(ph)                        0.9367  1    0.33314  
## scale(c)                         2.2046  1    0.13760  
## scale(n)                         2.9626  1    0.08521 .
## scale(land_comp)                 0.9260  1    0.33589  
## scale(longitude)                 0.2692  1    0.60390  
## scale(latitude)                  1.9165  1    0.16625  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

plot(simulateResiduals(fung_q1.mod))

DHARMa residuals — q1 management model

fung_q1.mod.st <- glmmTMB(
  hillq1_fungi ~ sitetype +
    scale(ph) + scale(c) + scale(n) + scale(land_comp) + scale(micro_comp) +
    scale(longitude) + scale(latitude) + (1 | site_initials),
  data = mod_df, family = Gamma(link = "log"))

cat("AIC:", AIC(fung_q1.mod.st), "\n")

## AIC: 609.929

Anova(fung_q1.mod.st, type = "II")

## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: hillq1_fungi
##                    Chisq Df Pr(>Chisq)  
## sitetype          4.5881  4    0.33222  
## scale(ph)         2.4642  1    0.11647  
## scale(c)          0.5680  1    0.45106  
## scale(n)          0.0030  1    0.95627  
## scale(land_comp)  0.0052  1    0.94229  
## scale(micro_comp) 0.9449  1    0.33103  
## scale(longitude)  0.4883  1    0.48469  
## scale(latitude)   3.8233  1    0.05054 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(fung_q1.em <- posthoc_em(fung_q1.mod.st, "hillq1_fungi", mod_df))

## # A tibble: 5 × 8
##   sitetype     response std.error    df conf.low conf.high .group  ymax
##   <chr>           <dbl>     <dbl> <dbl>    <dbl>     <dbl> <chr>  <dbl>
## 1 Kipuka           26.4      9.53   Inf     13.0      53.6 a       55.3
## 2 diversified      48.4      6.76   Inf     36.8      63.7 ab     103. 
## 3 agroforestry     52.7      8.29   Inf     38.7      71.7 ab     123. 
## 4 monoculture      52.9     11.9    Inf     34.1      82.3 ab      74.7
## 5 forest           58.8     12.4    Inf     38.9      88.8 b       84.5

Hill q2 — Inverse Simpson

fung_q2.mod <- glmmTMB(
  hillq2_fungi ~ scale(management_score_rescaled) +
    scale(ph) + scale(c) + scale(n) + scale(land_comp) +
    scale(longitude) + scale(latitude) + (1 | site_initials),
  data = mod_df, family = Gamma(link = "log"))

cat("AIC:", AIC(fung_q2.mod), "\n")

## AIC: 511.2699

Anova(fung_q2.mod, type = "II")

## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: hillq2_fungi
##                                   Chisq Df Pr(>Chisq)  
## scale(management_score_rescaled) 3.2638  1    0.07083 .
## scale(ph)                        1.7569  1    0.18502  
## scale(c)                         3.7338  1    0.05332 .
## scale(n)                         4.0578  1    0.04397 *
## scale(land_comp)                 0.9484  1    0.33014  
## scale(longitude)                 0.6894  1    0.40635  
## scale(latitude)                  2.9386  1    0.08648 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

fung_q2.mod.st <- glmmTMB(
  hillq2_fungi ~ sitetype +
    scale(ph) + scale(c) + scale(n) + scale(land_comp) + scale(micro_comp) +
    scale(longitude) + scale(latitude) + (1 | site_initials),
  data = mod_df, family = Gamma(link = "log"))

cat("AIC:", AIC(fung_q2.mod.st), "\n")

## AIC: 518.7592

Anova(fung_q2.mod.st, type = "II")

## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: hillq2_fungi
##                    Chisq Df Pr(>Chisq)  
## sitetype          3.3696  4    0.49798  
## scale(ph)         3.9337  1    0.04733 *
## scale(c)          1.3923  1    0.23802  
## scale(n)          0.1222  1    0.72666  
## scale(land_comp)  0.0039  1    0.95016  
## scale(micro_comp) 0.5276  1    0.46762  
## scale(longitude)  0.3447  1    0.55711  
## scale(latitude)   3.2950  1    0.06949 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(fung_q2.em <- posthoc_em(fung_q2.mod.st, "hillq2_fungi", mod_df))

## # A tibble: 5 × 8
##   sitetype     response std.error    df conf.low conf.high .group  ymax
##   <chr>           <dbl>     <dbl> <dbl>    <dbl>     <dbl> <chr>  <dbl>
## 1 Kipuka           11.4      4.82   Inf     5.00      26.1 a       28.9
## 2 diversified      20.7      3.44   Inf    15.0       28.7 a       50.8
## 3 agroforestry     22.0      4.13   Inf    15.2       31.8 a       69.0
## 4 monoculture      24.2      6.42   Inf    14.4       40.7 a       35.6
## 5 forest           26.3      6.65   Inf    16.0       43.2 a       38.7

Community Composition

dbRDA

fung_dbrda <- capscale(
  fung_clr_dist ~
    scale(land_comp) + scale(longitude) + scale(latitude) +
    scale(management_score_rescaled) + scale(ph) + scale(c) + scale(n),
  data = meta_cc)

sort(vif.cca(fung_dbrda))

##                        scale(ph) scale(management_score_rescaled) 
##                         1.558111                         1.711913 
##                 scale(land_comp)                  scale(latitude) 
##                         2.211470                         2.669049 
##                 scale(longitude)                         scale(n) 
##                         3.126044                         4.466392 
##                         scale(c) 
##                         5.201031

perm <- how(nperm = 999)
setBlocks(perm) <- meta_cc$site_initials
set.seed(5)
(fung_dbrda_anova <- anova(fung_dbrda, by = "terms", permutations = perm))

## Permutation test for capscale under reduced model
## Terms added sequentially (first to last)
## Blocks:  meta_cc$site_initials 
## Permutation: free
## Number of permutations: 999
## 
## Model: capscale(formula = fung_clr_dist ~ scale(land_comp) + scale(longitude) + scale(latitude) + scale(management_score_rescaled) + scale(ph) + scale(c) + scale(n), data = meta_cc)
##                                  Df Variance      F Pr(>F)    
## scale(land_comp)                  1    549.5 4.3001  0.087 .  
## scale(longitude)                  1    208.9 1.6350  0.287    
## scale(latitude)                   1    285.7 2.2360  0.514    
## scale(management_score_rescaled)  1    215.9 1.6893  0.104    
## scale(ph)                         1    195.9 1.5330  0.039 *  
## scale(c)                          1    303.2 2.3728  0.001 ***
## scale(n)                          1    268.2 2.0986  0.135    
## Residual                         57   7283.4                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

fung_site_scores <- vegan::scores(fung_dbrda, choices = 1:2,
  display = c("sites", "bp"))

fung_dbrda_df <- as.data.frame(cbind(fung_site_scores$sites, meta_cc))

fung_dbrda_env <- as.data.frame(fung_site_scores$biplot) %>%
  rownames_to_column("variable") %>%
  mutate(variable = case_when(
    variable == "scale(land_comp)"                 ~ "landscape context",
    variable == "scale(management_score_rescaled)" ~ "management intensity",
    variable == "scale(c)"                         ~ "soil carbon",
    variable == "scale(ph)"                        ~ "soil pH",
    variable == "scale(n)"                         ~ "soil nitrogen",
    variable == "scale(longitude)"                 ~ "longitude",
    variable == "scale(latitude)"                  ~ "latitude",
    TRUE ~ variable
  )) %>%
  filter(!variable %in% c("latitude", "longitude", "management intensity"))

Variation partitioning

mgmt_cc_vp  <- meta_cc %>% dplyr::select(management_score_rescaled)              %>% scale() %>% as.data.frame()
soil_cc_vp  <- meta_cc %>% dplyr::select(c, n, ph, ec)                           %>% scale() %>% as.data.frame()
land_cc_vp  <- meta_cc %>% dplyr::select(agriculture, open, development, vegetation) %>% scale() %>% as.data.frame()
space_cc_vp <- meta_cc %>% dplyr::select(longitude, latitude)                    %>% scale() %>% as.data.frame()

(fung_vp <- varpart(fung_clr_dist, mgmt_cc_vp, soil_cc_vp, land_cc_vp, space_cc_vp,
  scale = FALSE, permutations = 999))

## 
## Partition of squared Euclidean distance in dbRDA 
## 
## Call: varpart(Y = fung_clr_dist, X = mgmt_cc_vp, soil_cc_vp,
## land_cc_vp, space_cc_vp, scale = FALSE, permutations = 999)
## 
## Explanatory tables:
## X1:  mgmt_cc_vp
## X2:  soil_cc_vp
## X3:  land_cc_vp
## X4:  space_cc_vp 
## 
## No. of explanatory tables: 4 
## Total variation (SS): 595878 
## No. of observations: 65 
## 
## Partition table:
##                             Df R.square Adj.R.square Testable
## [aeghklno] = X1              1  0.03197      0.01660     TRUE
## [befiklmo] = X2              4  0.12735      0.06917     TRUE
## [cfgjlmno] = X3              3  0.12381      0.08072     TRUE
## [dhijkmno] = X4              2  0.07475      0.04490     TRUE
## [abefghiklmno] = X1+X2       5  0.15462      0.08298     TRUE
## [acefghjklmno] = X1+X3       4  0.14753      0.09070     TRUE
## [adeghijklmno] = X1+X4       3  0.10101      0.05680     TRUE
## [bcefgijklmno] = X2+X3       7  0.22128      0.12564     TRUE
## [bdefhijklmno] = X2+X4       6  0.18819      0.10421     TRUE
## [cdfghijklmno] = X3+X4       5  0.17424      0.10426     TRUE
## [abcefghijklmno] = X1+X2+X3  8  0.24164      0.13330     TRUE
## [abdefghijklmno] = X1+X2+X4  7  0.21024      0.11325     TRUE
## [acdefghijklmno] = X1+X3+X4  6  0.19876      0.11587     TRUE
## [bcdefghijklmno] = X2+X3+X4  9  0.26908      0.14947     TRUE
## [abcdefghijklmno] = All     10  0.28615      0.15396     TRUE
## Individual fractions                                         
## [a] = X1 | X2+X3+X4          1               0.00449     TRUE
## [b] = X2 | X1+X3+X4          4               0.03809     TRUE
## [c] = X3 | X1+X2+X4          3               0.04071     TRUE
## [d] = X4 | X1+X2+X3          2               0.02066     TRUE
## [e]                          0               0.00713    FALSE
## [f]                          0               0.01836    FALSE
## [g]                          0               0.00455    FALSE
## [h]                          0               0.00317    FALSE
## [i]                          0               0.00451    FALSE
## [j]                          0               0.00961    FALSE
## [k]                          0              -0.00480    FALSE
## [l]                          0              -0.00427    FALSE
## [m]                          0               0.00542    FALSE
## [n]                          0               0.00160    FALSE
## [o]                          0               0.00473    FALSE
## [p] = Residuals              0               0.84604    FALSE
## Controlling 2 tables X                                       
## [ae] = X1 | X3+X4            1               0.01162     TRUE
## [ag] = X1 | X2+X4            1               0.00904     TRUE
## [ah] = X1 | X2+X3            1               0.00765     TRUE
## [be] = X2 | X3+X4            4               0.04522     TRUE
## [bf] = X2 | X1+X4            4               0.05645     TRUE
## [bi] = X2 | X1+X3            4               0.04259     TRUE
## [cf] = X3 | X1+X4            3               0.05907     TRUE
## [cg] = X3 | X2+X4            3               0.04526     TRUE
## [cj] = X3 | X1+X2            3               0.05032     TRUE
## [dh] = X4 | X2+X3            2               0.02383     TRUE
## [di] = X4 | X1+X3            2               0.02517     TRUE
## [dj] = X4 | X1+X2            2               0.03027     TRUE
## Controlling 1 table X                                        
## [aghn] = X1 | X2             1               0.01381     TRUE
## [aehk] = X1 | X3             1               0.00999     TRUE
## [aegl] = X1 | X4             1               0.01190     TRUE
## [bfim] = X2 | X1             4               0.06638     TRUE
## [beik] = X2 | X3             4               0.04493     TRUE
## [befl] = X2 | X4             4               0.05931     TRUE
## [cfjm] = X3 | X1             3               0.07410     TRUE
## [cgjn] = X3 | X2             3               0.05647     TRUE
## [cfgl] = X3 | X4             3               0.05936     TRUE
## [dijm] = X4 | X1             2               0.04020     TRUE
## [dhjn] = X4 | X2             2               0.03504     TRUE
## [dhik] = X4 | X3             2               0.02354     TRUE
## ---
## Use function 'dbrda' to test significance of fractions of interest

plot(fung_vp, digits = 2, Xnames = c("management", "soil", "landscape", "space"))

Variation partitioning: management, soil, landscape, space

PERMANOVA

set.seed(5)
(fung_permanova <- adonis2(
  fung_clr_dist ~ management_score_rescaled + ph + c + n +
    land_comp + longitude + latitude,
  data = meta_cc, permutations = perm, by = "terms"))

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks:  meta_cc$site_initials 
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = fung_clr_dist ~ management_score_rescaled + ph + c + n + land_comp + longitude + latitude, data = meta_cc, permutations = perm, by = "terms")
##                           Df SumOfSqs      R2      F Pr(>F)    
## management_score_rescaled  1    19049 0.03197 2.3293  0.254    
## ph                         1    19979 0.03353 2.4431  0.033 *  
## c                          1    22853 0.03835 2.7946  0.001 ***
## n                          1    17233 0.02892 2.1073  0.112    
## land_comp                  1    22069 0.03704 2.6986  0.365    
## longitude                  1    13768 0.02310 1.6835  0.001 ***
## latitude                   1    14790 0.02482 1.8085  0.742    
## Residual                  57   466137 0.78227                  
## Total                     64   595878 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

UpSet Plot — ASV Sharing

unique_asvs <- function(df, site) {
  df %>% filter(sitetype == site) %>%
    dplyr::select(any_of(fungi_otus)) %>%
    dplyr::select(where(~ sum(.x, na.rm = TRUE) > 0)) %>%
    colnames()
}

asv_sets <- list(
  Monoculture  = unique_asvs(fung_otu, "monoculture"),
  Diversified  = unique_asvs(fung_otu, "diversified"),
  Agroforestry = unique_asvs(fung_otu, "agroforestry"),
  Forest       = unique_asvs(fung_otu, "forest"),
  Kipuka       = unique_asvs(fung_otu, "Kipuka")
)
non_empty_sets <- asv_sets[sapply(asv_sets, length) > 0]

if (length(non_empty_sets) >= 2) {
  UpSetR::upset(
    fromList(non_empty_sets), order.by = "freq",
    nsets          = length(non_empty_sets),
    sets.bar.color = site_colors[names(non_empty_sets)],
    main.bar.color = "gray60",
    matrix.color   = "gray40",
    text.scale     = 1.2, point.size = 2.8, line.size = 1
  )
}

ASV overlap across site types

Plots

Alpha diversity — management gradient

make_scatter(mod_df, "management_score_rescaled", "hillq0_fungi", "observed richness (q0)")

Fungal richness (q0) vs management intensity

make_scatter(mod_df, "management_score_rescaled", "hillq1_fungi", "Exp(Shannon) (q1)")

Exp(Shannon) (q1) vs management intensity

make_scatter(mod_df, "management_score_rescaled", "hillq2_fungi", "inverse Simpson (q2)")

Inverse Simpson (q2) vs management intensity

Alpha diversity — site type boxplots

make_boxplot(mod_df, "hillq0_fungi", "observed richness (q0)", fung_q0.em)

Fungal richness (q0) by site type

make_boxplot(mod_df, "hillq1_fungi", "Exp(Shannon) (q1)", fung_q1.em)

Exp(Shannon) (q1) by site type

make_boxplot(mod_df, "hillq2_fungi", "inverse Simpson (q2)", fung_q2.em)

Inverse Simpson (q2) by site type

Alpha diversity panel

(make_scatter(mod_df, "management_score_rescaled", "hillq0_fungi", "observed richness (q0)") |
 make_boxplot(mod_df, "hillq0_fungi", "observed richness (q0)", fung_q0.em)) /
(make_scatter(mod_df, "management_score_rescaled", "hillq1_fungi", "Exp(Shannon) (q1)") |
 make_boxplot(mod_df, "hillq1_fungi", "Exp(Shannon) (q1)", fung_q1.em)) /
(make_scatter(mod_df, "management_score_rescaled", "hillq2_fungi", "inverse Simpson (q2)") |
 make_boxplot(mod_df, "hillq2_fungi", "inverse Simpson (q2)", fung_q2.em))

Alpha diversity — all Hill numbers

PCoA ordination

pcoa_fung    <- cmdscale(fung_clr_dist)
pcoa_fung_df <- bind_cols(
  meta_cc %>% dplyr::select(-starts_with("fASV")),
  as.data.frame(pcoa_fung) %>% rename(Axis.1 = 1, Axis.2 = 2))

ggplot(pcoa_fung_df, aes(x = Axis.1, y = Axis.2)) +
  stat_ellipse(aes(color = sitetype, fill = sitetype),
    alpha = 0.2, level = 0.95, geom = "polygon") +
  geom_point(aes(color = sitetype), size = 2, stroke = 0.75) +
  scale_color_manual(values = site_colors) +
  scale_fill_manual(values = site_colors) +
  labs(x = "PCoA 1", y = "PCoA 2",
       color = "site type", fill = "site type") +
  theme_bw() +
  theme(legend.background = element_blank(), legend.key = element_blank())

PCoA of fungal communities (Aitchison distance)

dbRDA ordination

ggplot(fung_dbrda_df, aes(x = CAP1, y = CAP2)) +
  stat_ellipse(aes(color = sitetype, fill = sitetype),
    alpha = 0.2, level = 0.95, geom = "polygon") +
  geom_point(aes(color = sitetype, fill = sitetype), size = 3, shape = 16) +
  scale_color_manual(values = site_colors) +
  scale_fill_manual(values = site_colors) +
  geom_segment(data = fung_dbrda_env,
    aes(x = 0, y = 0, xend = CAP1, yend = CAP2),
    arrow = arrow(length = unit(0.2, "cm")), color = "black", alpha = 0.75) +
  geom_label(data = fung_dbrda_env,
    aes(x = CAP1 + 0.05, y = CAP2 + 0.05, label = variable),
    color = "black", size = 3) +
  labs(x = "CAP1", y = "CAP2", color = "site type", fill = "site type") +
  theme_bw() +
  theme(legend.background = element_blank(), legend.key = element_blank())

dbRDA of fungal community composition

FUNGuild Functional Analysis

Data preparation

fungal_traits <- read_excel("FungalTraits.xlsx")

taxa_tbl <- as.data.frame(tax_table(ps_fung)) %>%
  rownames_to_column("ASV_ID") %>%
  left_join(fungal_traits, by = "Genus")

asv_long <- as(otu_table(ps_fung, taxa_are_rows = FALSE), "matrix") %>%
  as.data.frame() %>%
  rownames_to_column("sample_names") %>%
  pivot_longer(-sample_names, names_to = "ASV_ID", values_to = "abundance")

meta_fung <- as(sample_data(ps_fung), "data.frame") %>%
  rownames_to_column("sample_id") %>% 
  filter(!sitetype %in% c("mainland", "Mainland"))

functional_abund <- asv_long %>%
  left_join(taxa_tbl, by = "ASV_ID") %>%
  filter(!is.na(primary_lifestyle)) %>%
  left_join(meta_fung %>% dplyr::select(sample_names, sitetype,
    management_score, management_score_rescaled, ph, c, n,
    longitude, latitude, site_initials),
    by = "sample_names") %>%
  group_by(sample_names, primary_lifestyle) %>%
  summarise(abundance = sum(abundance, na.rm = TRUE), .groups = "drop") %>%
  group_by(sample_names) %>%
  mutate(relative_abundance = abundance / sum(abundance, na.rm = TRUE)) %>%
  ungroup() %>%
  left_join(meta_fung %>% dplyr::select(sample_names, sitetype,
    management_score, management_score_rescaled, ph, c, n,
    longitude, latitude, site_initials),
    by = "sample_names") %>%
  mutate(sitetype = factor(sitetype, levels = sitetype_levels))

ANOVA by functional group

(fg_anova <- functional_abund %>%
  group_by(primary_lifestyle) %>%
  summarise(
    p_value = summary(aov(relative_abundance ~ sitetype))[[1]][["Pr(>F)"]][1],
    .groups = "drop"
  ) %>%
  arrange(p_value))

## # A tibble: 21 × 2
##    primary_lifestyle      p_value
##    <chr>                    <dbl>
##  1 unspecified           0.000176
##  2 pollen_saprotroph     0.00168 
##  3 dung_saprotroph       0.00434 
##  4 animal_parasite       0.00960 
##  5 protistan_parasite    0.0102  
##  6 plant_pathogen        0.0219  
##  7 foliar_endophyte      0.0363  
##  8 root_endophyte        0.0513  
##  9 algal_parasite        0.0703  
## 10 nectar/tap_saprotroph 0.0783  
## # ℹ 11 more rows

Top 5 functional groups

top5_groups <- functional_abund %>%
  group_by(primary_lifestyle) %>%
  summarise(mean_abund = mean(relative_abundance, na.rm = TRUE)) %>%
  slice_max(mean_abund, n = 5) %>%
  pull(primary_lifestyle)

top5_summary <- functional_abund %>%
  filter(primary_lifestyle %in% top5_groups) %>%
  group_by(primary_lifestyle, sitetype) %>%
  summarise(mean_rel_abund = mean(relative_abundance, na.rm = TRUE), .groups = "drop")

ggplot(top5_summary, aes(x = sitetype, y = mean_rel_abund, fill = sitetype)) +
  geom_col() +
  facet_wrap(~ primary_lifestyle, scales = "free_y") +
  scale_fill_manual(values = site_colors, guide = "none") +
  scale_x_discrete(limits = sitetype_levels) +
  labs(x = "site type", y = "mean relative abundance") +
  theme_bw() +
  theme(axis.text.x  = element_text(angle = 30, hjust = 1),
        strip.text   = element_text(face = "bold"))

Mean relative abundance of top 5 functional groups by site type

Plant pathogen model

pathogen_df <- functional_abund %>%
  filter(primary_lifestyle == "plant_pathogen", relative_abundance > 0)

pathogen.mod <- glmmTMB(
  relative_abundance ~ scale(management_score_rescaled) +
    scale(ph) + scale(c) + scale(n) +
    scale(longitude) + scale(latitude) + (1 | site_initials),
  data = pathogen_df, family = Gamma(link = "log"))

cat("AIC:", AIC(pathogen.mod), "\n")

## AIC: -116.8082

Anova(pathogen.mod, type = "II")

## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: relative_abundance
##                                   Chisq Df Pr(>Chisq)  
## scale(management_score_rescaled) 1.0986  1    0.29458  
## scale(ph)                        0.0603  1    0.80598  
## scale(c)                         0.6013  1    0.43807  
## scale(n)                         0.1399  1    0.70835  
## scale(longitude)                 0.7699  1    0.38024  
## scale(latitude)                  5.8891  1    0.01523 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

plot(simulateResiduals(pathogen.mod))

DHARMa residuals — plant pathogen model

pathogen.mod.st <- glmmTMB(
  relative_abundance ~ sitetype +
    scale(ph) + scale(c) + scale(n) +
    scale(longitude) + scale(latitude) + (1 | site_initials),
  data = pathogen_df, family = Gamma(link = "log"))

cat("AIC:", AIC(pathogen.mod.st), "\n")

## AIC: -113.7107

Anova(pathogen.mod.st, type = "II")

## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: relative_abundance
##                   Chisq Df Pr(>Chisq)
## sitetype         4.0869  4     0.3944
## scale(ph)        0.0082  1     0.9280
## scale(c)         1.2356  1     0.2663
## scale(n)         0.2625  1     0.6084
## scale(longitude) 0.2853  1     0.5932
## scale(latitude)  1.8069  1     0.1789

(pathogen.em <- posthoc_em(pathogen.mod.st, "relative_abundance", pathogen_df))

## # A tibble: 5 × 8
##   sitetype     response std.error    df conf.low conf.high .group  ymax
##   <chr>           <dbl>     <dbl> <dbl>    <dbl>     <dbl> <chr>  <dbl>
## 1 Kipuka         0.0821    0.0323   Inf   0.0379     0.178 a      0.133
## 2 agroforestry   0.129     0.0235   Inf   0.0898     0.184 a      0.542
## 3 forest         0.140     0.0335   Inf   0.0876     0.224 a      0.257
## 4 diversified    0.176     0.0289   Inf   0.128      0.243 a      0.5  
## 5 monoculture    0.196     0.0519   Inf   0.117      0.329 a      0.393

p1 <- ggplot(pathogen_df, aes(x = management_score_rescaled, y = relative_abundance)) +
  geom_point(aes(color = sitetype), size = 3, alpha = 0.8) +
  geom_smooth(method = "glm", color = "black", se = TRUE) +
  scale_color_manual(values = site_colors) +
  labs(x = "management intensity", y = "plant pathogen relative abundance",
       color = "site type") +
  theme_bw()

p2 <- ggplot(pathogen_df, aes(x = sitetype, y = relative_abundance, fill = sitetype)) +
  geom_boxplot() +
  scale_fill_manual(values = site_colors, guide = "none") +
  scale_x_discrete(limits = sitetype_levels) +
  geom_text(data = pathogen.em,
    aes(x = sitetype, y = ymax * 1.1, label = .group),
    inherit.aes = FALSE) +
  labs(y = "plant pathogen relative abundance", x = "site type") +
  theme_bw()

p1 | p2

Plant pathogen relative abundance

Hawaii 2022 — Fungal Community Analysis

2026-06-09