library(tidyverse)
library(readxl)
library(phyloseq)
library(vegan)
library(adespatial)
library(hillR)
library(geosphere)
library(car)
library(glmmTMB)
library(emmeans)
library(multcomp)
library(multcompView)
library(broom)
library(DHARMa)
library(UpSetR)
library(ggplot2)
library(patchwork)
library(permute)
setwd("~/Desktop/hi22/analyses/fungi")
site_colors <- c(
monoculture = "#F7C1BB",
diversified = "#885A5A",
agroforestry = "#84B082",
forest = "#353A47",
Kipuka = "#DC136C",
Monoculture = "#F7C1BB",
Diversified = "#885A5A",
Agroforestry = "#84B082",
Forest = "#353A47"
)
sitetype_levels <- c("monoculture", "diversified", "agroforestry", "forest", "Kipuka")
ps_fung <- readRDS("Intermediate_data/phyloseq_f_clean.RDS")
ps_fung_clr <- readRDS("Intermediate_data/phyloseq_f_clean_clr.RDS")
fungi_otus <- taxa_names(ps_fung)
fung_otu <- bind_cols(
as(sample_data(ps_fung), "data.frame"),
as(otu_table(ps_fung, taxa_are_rows = FALSE), "matrix") %>% as.data.frame()
)
get_pcs <- function(df, vars, k, prefix) {
mat <- df[, vars]
keep <- complete.cases(mat)
p <- prcomp(mat[keep, ], center = TRUE, scale. = TRUE)
out <- as.data.frame(matrix(NA_real_, nrow = nrow(df), ncol = k))
colnames(out) <- paste0(prefix, "_PC", 1:k)
out[keep, ] <- p$x[, 1:k, drop = FALSE]
out
}
get_pc_composite <- function(df, vars, k, prefix) {
mat <- df[, vars]
keep <- complete.cases(mat)
p <- prcomp(mat[keep, ], center = TRUE, scale. = TRUE)
pcs <- as.data.frame(matrix(NA_real_, nrow = nrow(df), ncol = k))
colnames(pcs) <- paste0(prefix, "_PC", 1:k)
pcs[keep, ] <- p$x[, 1:k, drop = FALSE]
w <- summary(p)$importance[2, 1:k]
comp_vals <- as.numeric(as.matrix(pcs[keep, , drop = FALSE]) %*% w)
out <- rep(NA_real_, nrow(df))
out[keep] <- comp_vals
setNames(data.frame(out), paste0(prefix, "_comp"))
}
posthoc_em <- function(mod, response_col, data) {
cld(emmeans(mod, pairwise ~ sitetype, adjust = "none", type = "response")$emmeans,
alpha = 0.05, Letters = letters, adjust = "none") %>%
tidy() %>%
mutate(.group = trimws(.group)) %>%
left_join(
data %>% group_by(sitetype) %>%
summarise(ymax = max(.data[[response_col]], na.rm = TRUE)),
by = "sitetype"
)
}
fung_diversity <- fung_otu %>%
mutate(
hillq0_fungi = hill_taxa(across(any_of(fungi_otus)), q = 0),
hillq1_fungi = hill_taxa(across(any_of(fungi_otus)), q = 1),
hillq2_fungi = hill_taxa(across(any_of(fungi_otus)), q = 2),
chao_fungi = estimateR(across(any_of(fungi_otus)))["S.chao1", ],
shannon_fungi = vegan::diversity(across(any_of(fungi_otus))),
richness_fungi = specnumber(across(any_of(fungi_otus))),
evenness_fungi = shannon_fungi / log(richness_fungi)
) %>%
dplyr::select(-starts_with("fASV"))
coords <- fung_diversity %>% dplyr::select(longitude, latitude)
geo_dist <- geosphere::distm(coords)
pcnm_obj <- pcnm(geo_dist)
space_pcs <- as.data.frame(pcnm_obj$vectors)[1:2]
land_vars <- c("agriculture", "open", "development", "vegetation")
soil_vars <- c("c", "n", "ph", "ec")
micro_vars <- c("temp_flux_mean", "evapotranspiration_mm")
land_pcs <- get_pc_composite(fung_diversity, land_vars, 2, "land") %>% scale()
soil_pcs <- get_pcs(fung_diversity, soil_vars, 2, "soil") %>% scale()
micro_pcs <- get_pc_composite(fung_diversity, micro_vars, 2, "micro") %>% scale()
mod_df <- bind_cols(fung_diversity, space_pcs, land_pcs, micro_pcs) %>%
as.data.frame() %>%
mutate(sitetype = factor(sitetype, levels = sitetype_levels)) %>%
filter(!is.na(sitetype))
rownames(mod_df) <- mod_df$id
if (taxa_are_rows(ps_fung_clr)) {
fung_clr_mat <- t(as(otu_table(ps_fung_clr), "matrix"))
} else {
fung_clr_mat <- as(otu_table(ps_fung_clr), "matrix")
}
rownames(fung_clr_mat) <- as(sample_data(ps_fung_clr), "data.frame")$id
common_samples <- intersect(rownames(fung_clr_mat), mod_df$id)
fung_clr_cc <- fung_clr_mat[common_samples, ]
meta_cc <- mod_df[common_samples, ]
fung_clr_cc <- fung_clr_cc[order(rownames(fung_clr_cc)), ]
meta_cc <- meta_cc[order(meta_cc$id), ]
asv_var <- apply(fung_clr_cc, 2, var, na.rm = TRUE)
fung_clr_cc <- fung_clr_cc[, !is.na(asv_var) & asv_var > 0, drop = FALSE]
fung_clr_dist <- dist(fung_clr_cc, method = "euclidean")
fung_q0.mod <- glmmTMB(
hillq0_fungi ~ scale(management_score_rescaled) +
scale(ph) + scale(c) + scale(n) + scale(land_comp) +
scale(longitude) + scale(latitude) + (1 | site_initials),
data = mod_df, family = nbinom2(link = "log"))
cat("AIC:", AIC(fung_q0.mod), "\n")
## AIC: 723.998
Anova(fung_q0.mod, type = "II")
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: hillq0_fungi
## Chisq Df Pr(>Chisq)
## scale(management_score_rescaled) 0.4337 1 0.5102
## scale(ph) 0.0113 1 0.9153
## scale(c) 0.5884 1 0.4431
## scale(n) 2.4084 1 0.1207
## scale(land_comp) 0.2646 1 0.6070
## scale(longitude) 0.3257 1 0.5682
## scale(latitude) 1.0951 1 0.2954
plot(simulateResiduals(fung_q0.mod))
DHARMa residuals — q0 management model
fung_q0.mod.st <- glmmTMB(
hillq0_fungi ~ sitetype +
scale(ph) + scale(c) + scale(n) + scale(land_comp) + scale(micro_comp) +
scale(longitude) + scale(latitude) + (1 | site_initials),
data = mod_df, family = nbinom2(link = "log"))
cat("AIC:", AIC(fung_q0.mod.st), "\n")
## AIC: 728.634
Anova(fung_q0.mod.st, type = "II")
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: hillq0_fungi
## Chisq Df Pr(>Chisq)
## sitetype 3.9335 4 0.41508
## scale(ph) 0.0044 1 0.94727
## scale(c) 0.1348 1 0.71347
## scale(n) 0.3449 1 0.55701
## scale(land_comp) 0.0068 1 0.93409
## scale(micro_comp) 0.9952 1 0.31847
## scale(longitude) 0.9317 1 0.33443
## scale(latitude) 2.9063 1 0.08823 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(fung_q0.em <- posthoc_em(fung_q0.mod.st, "hillq0_fungi", mod_df))
## # A tibble: 5 × 8
## sitetype response std.error df conf.low conf.high .group ymax
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <int>
## 1 Kipuka 147. 30.0 Inf 98.2 219. a 175
## 2 monoculture 171. 21.9 Inf 133. 220. a 223
## 3 diversified 181. 14.1 Inf 156. 211. a 261
## 4 forest 200. 23.1 Inf 160. 251. a 248
## 5 agroforestry 201. 17.2 Inf 170. 238. a 273
fung_q1.mod <- glmmTMB(
hillq1_fungi ~ scale(management_score_rescaled) +
scale(ph) + scale(c) + scale(n) + scale(land_comp) +
scale(longitude) + scale(latitude) + (1 | site_initials),
data = mod_df, family = Gamma(link = "log"))
cat("AIC:", AIC(fung_q1.mod), "\n")
## AIC: 605.0253
Anova(fung_q1.mod, type = "II")
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: hillq1_fungi
## Chisq Df Pr(>Chisq)
## scale(management_score_rescaled) 1.5538 1 0.21257
## scale(ph) 0.9367 1 0.33314
## scale(c) 2.2046 1 0.13760
## scale(n) 2.9626 1 0.08521 .
## scale(land_comp) 0.9260 1 0.33589
## scale(longitude) 0.2692 1 0.60390
## scale(latitude) 1.9165 1 0.16625
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(simulateResiduals(fung_q1.mod))
DHARMa residuals — q1 management model
fung_q1.mod.st <- glmmTMB(
hillq1_fungi ~ sitetype +
scale(ph) + scale(c) + scale(n) + scale(land_comp) + scale(micro_comp) +
scale(longitude) + scale(latitude) + (1 | site_initials),
data = mod_df, family = Gamma(link = "log"))
cat("AIC:", AIC(fung_q1.mod.st), "\n")
## AIC: 609.929
Anova(fung_q1.mod.st, type = "II")
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: hillq1_fungi
## Chisq Df Pr(>Chisq)
## sitetype 4.5881 4 0.33222
## scale(ph) 2.4642 1 0.11647
## scale(c) 0.5680 1 0.45106
## scale(n) 0.0030 1 0.95627
## scale(land_comp) 0.0052 1 0.94229
## scale(micro_comp) 0.9449 1 0.33103
## scale(longitude) 0.4883 1 0.48469
## scale(latitude) 3.8233 1 0.05054 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(fung_q1.em <- posthoc_em(fung_q1.mod.st, "hillq1_fungi", mod_df))
## # A tibble: 5 × 8
## sitetype response std.error df conf.low conf.high .group ymax
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
## 1 Kipuka 26.4 9.53 Inf 13.0 53.6 a 55.3
## 2 diversified 48.4 6.76 Inf 36.8 63.7 ab 103.
## 3 agroforestry 52.7 8.29 Inf 38.7 71.7 ab 123.
## 4 monoculture 52.9 11.9 Inf 34.1 82.3 ab 74.7
## 5 forest 58.8 12.4 Inf 38.9 88.8 b 84.5
fung_q2.mod <- glmmTMB(
hillq2_fungi ~ scale(management_score_rescaled) +
scale(ph) + scale(c) + scale(n) + scale(land_comp) +
scale(longitude) + scale(latitude) + (1 | site_initials),
data = mod_df, family = Gamma(link = "log"))
cat("AIC:", AIC(fung_q2.mod), "\n")
## AIC: 511.2699
Anova(fung_q2.mod, type = "II")
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: hillq2_fungi
## Chisq Df Pr(>Chisq)
## scale(management_score_rescaled) 3.2638 1 0.07083 .
## scale(ph) 1.7569 1 0.18502
## scale(c) 3.7338 1 0.05332 .
## scale(n) 4.0578 1 0.04397 *
## scale(land_comp) 0.9484 1 0.33014
## scale(longitude) 0.6894 1 0.40635
## scale(latitude) 2.9386 1 0.08648 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fung_q2.mod.st <- glmmTMB(
hillq2_fungi ~ sitetype +
scale(ph) + scale(c) + scale(n) + scale(land_comp) + scale(micro_comp) +
scale(longitude) + scale(latitude) + (1 | site_initials),
data = mod_df, family = Gamma(link = "log"))
cat("AIC:", AIC(fung_q2.mod.st), "\n")
## AIC: 518.7592
Anova(fung_q2.mod.st, type = "II")
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: hillq2_fungi
## Chisq Df Pr(>Chisq)
## sitetype 3.3696 4 0.49798
## scale(ph) 3.9337 1 0.04733 *
## scale(c) 1.3923 1 0.23802
## scale(n) 0.1222 1 0.72666
## scale(land_comp) 0.0039 1 0.95016
## scale(micro_comp) 0.5276 1 0.46762
## scale(longitude) 0.3447 1 0.55711
## scale(latitude) 3.2950 1 0.06949 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(fung_q2.em <- posthoc_em(fung_q2.mod.st, "hillq2_fungi", mod_df))
## # A tibble: 5 × 8
## sitetype response std.error df conf.low conf.high .group ymax
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
## 1 Kipuka 11.4 4.82 Inf 5.00 26.1 a 28.9
## 2 diversified 20.7 3.44 Inf 15.0 28.7 a 50.8
## 3 agroforestry 22.0 4.13 Inf 15.2 31.8 a 69.0
## 4 monoculture 24.2 6.42 Inf 14.4 40.7 a 35.6
## 5 forest 26.3 6.65 Inf 16.0 43.2 a 38.7
fung_dbrda <- capscale(
fung_clr_dist ~
scale(land_comp) + scale(longitude) + scale(latitude) +
scale(management_score_rescaled) + scale(ph) + scale(c) + scale(n),
data = meta_cc)
sort(vif.cca(fung_dbrda))
## scale(ph) scale(management_score_rescaled)
## 1.558111 1.711913
## scale(land_comp) scale(latitude)
## 2.211470 2.669049
## scale(longitude) scale(n)
## 3.126044 4.466392
## scale(c)
## 5.201031
perm <- how(nperm = 999)
setBlocks(perm) <- meta_cc$site_initials
set.seed(5)
(fung_dbrda_anova <- anova(fung_dbrda, by = "terms", permutations = perm))
## Permutation test for capscale under reduced model
## Terms added sequentially (first to last)
## Blocks: meta_cc$site_initials
## Permutation: free
## Number of permutations: 999
##
## Model: capscale(formula = fung_clr_dist ~ scale(land_comp) + scale(longitude) + scale(latitude) + scale(management_score_rescaled) + scale(ph) + scale(c) + scale(n), data = meta_cc)
## Df Variance F Pr(>F)
## scale(land_comp) 1 549.5 4.3001 0.087 .
## scale(longitude) 1 208.9 1.6350 0.287
## scale(latitude) 1 285.7 2.2360 0.514
## scale(management_score_rescaled) 1 215.9 1.6893 0.104
## scale(ph) 1 195.9 1.5330 0.039 *
## scale(c) 1 303.2 2.3728 0.001 ***
## scale(n) 1 268.2 2.0986 0.135
## Residual 57 7283.4
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fung_site_scores <- vegan::scores(fung_dbrda, choices = 1:2,
display = c("sites", "bp"))
fung_dbrda_df <- as.data.frame(cbind(fung_site_scores$sites, meta_cc))
fung_dbrda_env <- as.data.frame(fung_site_scores$biplot) %>%
rownames_to_column("variable") %>%
mutate(variable = case_when(
variable == "scale(land_comp)" ~ "landscape context",
variable == "scale(management_score_rescaled)" ~ "management intensity",
variable == "scale(c)" ~ "soil carbon",
variable == "scale(ph)" ~ "soil pH",
variable == "scale(n)" ~ "soil nitrogen",
variable == "scale(longitude)" ~ "longitude",
variable == "scale(latitude)" ~ "latitude",
TRUE ~ variable
)) %>%
filter(!variable %in% c("latitude", "longitude", "management intensity"))
mgmt_cc_vp <- meta_cc %>% dplyr::select(management_score_rescaled) %>% scale() %>% as.data.frame()
soil_cc_vp <- meta_cc %>% dplyr::select(c, n, ph, ec) %>% scale() %>% as.data.frame()
land_cc_vp <- meta_cc %>% dplyr::select(agriculture, open, development, vegetation) %>% scale() %>% as.data.frame()
space_cc_vp <- meta_cc %>% dplyr::select(longitude, latitude) %>% scale() %>% as.data.frame()
(fung_vp <- varpart(fung_clr_dist, mgmt_cc_vp, soil_cc_vp, land_cc_vp, space_cc_vp,
scale = FALSE, permutations = 999))
##
## Partition of squared Euclidean distance in dbRDA
##
## Call: varpart(Y = fung_clr_dist, X = mgmt_cc_vp, soil_cc_vp,
## land_cc_vp, space_cc_vp, scale = FALSE, permutations = 999)
##
## Explanatory tables:
## X1: mgmt_cc_vp
## X2: soil_cc_vp
## X3: land_cc_vp
## X4: space_cc_vp
##
## No. of explanatory tables: 4
## Total variation (SS): 595878
## No. of observations: 65
##
## Partition table:
## Df R.square Adj.R.square Testable
## [aeghklno] = X1 1 0.03197 0.01660 TRUE
## [befiklmo] = X2 4 0.12735 0.06917 TRUE
## [cfgjlmno] = X3 3 0.12381 0.08072 TRUE
## [dhijkmno] = X4 2 0.07475 0.04490 TRUE
## [abefghiklmno] = X1+X2 5 0.15462 0.08298 TRUE
## [acefghjklmno] = X1+X3 4 0.14753 0.09070 TRUE
## [adeghijklmno] = X1+X4 3 0.10101 0.05680 TRUE
## [bcefgijklmno] = X2+X3 7 0.22128 0.12564 TRUE
## [bdefhijklmno] = X2+X4 6 0.18819 0.10421 TRUE
## [cdfghijklmno] = X3+X4 5 0.17424 0.10426 TRUE
## [abcefghijklmno] = X1+X2+X3 8 0.24164 0.13330 TRUE
## [abdefghijklmno] = X1+X2+X4 7 0.21024 0.11325 TRUE
## [acdefghijklmno] = X1+X3+X4 6 0.19876 0.11587 TRUE
## [bcdefghijklmno] = X2+X3+X4 9 0.26908 0.14947 TRUE
## [abcdefghijklmno] = All 10 0.28615 0.15396 TRUE
## Individual fractions
## [a] = X1 | X2+X3+X4 1 0.00449 TRUE
## [b] = X2 | X1+X3+X4 4 0.03809 TRUE
## [c] = X3 | X1+X2+X4 3 0.04071 TRUE
## [d] = X4 | X1+X2+X3 2 0.02066 TRUE
## [e] 0 0.00713 FALSE
## [f] 0 0.01836 FALSE
## [g] 0 0.00455 FALSE
## [h] 0 0.00317 FALSE
## [i] 0 0.00451 FALSE
## [j] 0 0.00961 FALSE
## [k] 0 -0.00480 FALSE
## [l] 0 -0.00427 FALSE
## [m] 0 0.00542 FALSE
## [n] 0 0.00160 FALSE
## [o] 0 0.00473 FALSE
## [p] = Residuals 0 0.84604 FALSE
## Controlling 2 tables X
## [ae] = X1 | X3+X4 1 0.01162 TRUE
## [ag] = X1 | X2+X4 1 0.00904 TRUE
## [ah] = X1 | X2+X3 1 0.00765 TRUE
## [be] = X2 | X3+X4 4 0.04522 TRUE
## [bf] = X2 | X1+X4 4 0.05645 TRUE
## [bi] = X2 | X1+X3 4 0.04259 TRUE
## [cf] = X3 | X1+X4 3 0.05907 TRUE
## [cg] = X3 | X2+X4 3 0.04526 TRUE
## [cj] = X3 | X1+X2 3 0.05032 TRUE
## [dh] = X4 | X2+X3 2 0.02383 TRUE
## [di] = X4 | X1+X3 2 0.02517 TRUE
## [dj] = X4 | X1+X2 2 0.03027 TRUE
## Controlling 1 table X
## [aghn] = X1 | X2 1 0.01381 TRUE
## [aehk] = X1 | X3 1 0.00999 TRUE
## [aegl] = X1 | X4 1 0.01190 TRUE
## [bfim] = X2 | X1 4 0.06638 TRUE
## [beik] = X2 | X3 4 0.04493 TRUE
## [befl] = X2 | X4 4 0.05931 TRUE
## [cfjm] = X3 | X1 3 0.07410 TRUE
## [cgjn] = X3 | X2 3 0.05647 TRUE
## [cfgl] = X3 | X4 3 0.05936 TRUE
## [dijm] = X4 | X1 2 0.04020 TRUE
## [dhjn] = X4 | X2 2 0.03504 TRUE
## [dhik] = X4 | X3 2 0.02354 TRUE
## ---
## Use function 'dbrda' to test significance of fractions of interest
plot(fung_vp, digits = 2, Xnames = c("management", "soil", "landscape", "space"))
Variation partitioning: management, soil, landscape, space
set.seed(5)
(fung_permanova <- adonis2(
fung_clr_dist ~ management_score_rescaled + ph + c + n +
land_comp + longitude + latitude,
data = meta_cc, permutations = perm, by = "terms"))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks: meta_cc$site_initials
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = fung_clr_dist ~ management_score_rescaled + ph + c + n + land_comp + longitude + latitude, data = meta_cc, permutations = perm, by = "terms")
## Df SumOfSqs R2 F Pr(>F)
## management_score_rescaled 1 19049 0.03197 2.3293 0.254
## ph 1 19979 0.03353 2.4431 0.033 *
## c 1 22853 0.03835 2.7946 0.001 ***
## n 1 17233 0.02892 2.1073 0.112
## land_comp 1 22069 0.03704 2.6986 0.365
## longitude 1 13768 0.02310 1.6835 0.001 ***
## latitude 1 14790 0.02482 1.8085 0.742
## Residual 57 466137 0.78227
## Total 64 595878 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
unique_asvs <- function(df, site) {
df %>% filter(sitetype == site) %>%
dplyr::select(any_of(fungi_otus)) %>%
dplyr::select(where(~ sum(.x, na.rm = TRUE) > 0)) %>%
colnames()
}
asv_sets <- list(
Monoculture = unique_asvs(fung_otu, "monoculture"),
Diversified = unique_asvs(fung_otu, "diversified"),
Agroforestry = unique_asvs(fung_otu, "agroforestry"),
Forest = unique_asvs(fung_otu, "forest"),
Kipuka = unique_asvs(fung_otu, "Kipuka")
)
non_empty_sets <- asv_sets[sapply(asv_sets, length) > 0]
if (length(non_empty_sets) >= 2) {
UpSetR::upset(
fromList(non_empty_sets), order.by = "freq",
nsets = length(non_empty_sets),
sets.bar.color = site_colors[names(non_empty_sets)],
main.bar.color = "gray60",
matrix.color = "gray40",
text.scale = 1.2, point.size = 2.8, line.size = 1
)
}
ASV overlap across site types
make_scatter(mod_df, "management_score_rescaled", "hillq0_fungi", "observed richness (q0)")
Fungal richness (q0) vs management intensity
make_scatter(mod_df, "management_score_rescaled", "hillq1_fungi", "Exp(Shannon) (q1)")
Exp(Shannon) (q1) vs management intensity
make_scatter(mod_df, "management_score_rescaled", "hillq2_fungi", "inverse Simpson (q2)")
Inverse Simpson (q2) vs management intensity
make_boxplot(mod_df, "hillq0_fungi", "observed richness (q0)", fung_q0.em)
Fungal richness (q0) by site type
make_boxplot(mod_df, "hillq1_fungi", "Exp(Shannon) (q1)", fung_q1.em)
Exp(Shannon) (q1) by site type
make_boxplot(mod_df, "hillq2_fungi", "inverse Simpson (q2)", fung_q2.em)
Inverse Simpson (q2) by site type
(make_scatter(mod_df, "management_score_rescaled", "hillq0_fungi", "observed richness (q0)") |
make_boxplot(mod_df, "hillq0_fungi", "observed richness (q0)", fung_q0.em)) /
(make_scatter(mod_df, "management_score_rescaled", "hillq1_fungi", "Exp(Shannon) (q1)") |
make_boxplot(mod_df, "hillq1_fungi", "Exp(Shannon) (q1)", fung_q1.em)) /
(make_scatter(mod_df, "management_score_rescaled", "hillq2_fungi", "inverse Simpson (q2)") |
make_boxplot(mod_df, "hillq2_fungi", "inverse Simpson (q2)", fung_q2.em))
Alpha diversity — all Hill numbers
pcoa_fung <- cmdscale(fung_clr_dist)
pcoa_fung_df <- bind_cols(
meta_cc %>% dplyr::select(-starts_with("fASV")),
as.data.frame(pcoa_fung) %>% rename(Axis.1 = 1, Axis.2 = 2))
ggplot(pcoa_fung_df, aes(x = Axis.1, y = Axis.2)) +
stat_ellipse(aes(color = sitetype, fill = sitetype),
alpha = 0.2, level = 0.95, geom = "polygon") +
geom_point(aes(color = sitetype), size = 2, stroke = 0.75) +
scale_color_manual(values = site_colors) +
scale_fill_manual(values = site_colors) +
labs(x = "PCoA 1", y = "PCoA 2",
color = "site type", fill = "site type") +
theme_bw() +
theme(legend.background = element_blank(), legend.key = element_blank())
PCoA of fungal communities (Aitchison distance)
ggplot(fung_dbrda_df, aes(x = CAP1, y = CAP2)) +
stat_ellipse(aes(color = sitetype, fill = sitetype),
alpha = 0.2, level = 0.95, geom = "polygon") +
geom_point(aes(color = sitetype, fill = sitetype), size = 3, shape = 16) +
scale_color_manual(values = site_colors) +
scale_fill_manual(values = site_colors) +
geom_segment(data = fung_dbrda_env,
aes(x = 0, y = 0, xend = CAP1, yend = CAP2),
arrow = arrow(length = unit(0.2, "cm")), color = "black", alpha = 0.75) +
geom_label(data = fung_dbrda_env,
aes(x = CAP1 + 0.05, y = CAP2 + 0.05, label = variable),
color = "black", size = 3) +
labs(x = "CAP1", y = "CAP2", color = "site type", fill = "site type") +
theme_bw() +
theme(legend.background = element_blank(), legend.key = element_blank())
dbRDA of fungal community composition
fungal_traits <- read_excel("FungalTraits.xlsx")
taxa_tbl <- as.data.frame(tax_table(ps_fung)) %>%
rownames_to_column("ASV_ID") %>%
left_join(fungal_traits, by = "Genus")
asv_long <- as(otu_table(ps_fung, taxa_are_rows = FALSE), "matrix") %>%
as.data.frame() %>%
rownames_to_column("sample_names") %>%
pivot_longer(-sample_names, names_to = "ASV_ID", values_to = "abundance")
meta_fung <- as(sample_data(ps_fung), "data.frame") %>%
rownames_to_column("sample_id") %>%
filter(!sitetype %in% c("mainland", "Mainland"))
functional_abund <- asv_long %>%
left_join(taxa_tbl, by = "ASV_ID") %>%
filter(!is.na(primary_lifestyle)) %>%
left_join(meta_fung %>% dplyr::select(sample_names, sitetype,
management_score, management_score_rescaled, ph, c, n,
longitude, latitude, site_initials),
by = "sample_names") %>%
group_by(sample_names, primary_lifestyle) %>%
summarise(abundance = sum(abundance, na.rm = TRUE), .groups = "drop") %>%
group_by(sample_names) %>%
mutate(relative_abundance = abundance / sum(abundance, na.rm = TRUE)) %>%
ungroup() %>%
left_join(meta_fung %>% dplyr::select(sample_names, sitetype,
management_score, management_score_rescaled, ph, c, n,
longitude, latitude, site_initials),
by = "sample_names") %>%
mutate(sitetype = factor(sitetype, levels = sitetype_levels))
(fg_anova <- functional_abund %>%
group_by(primary_lifestyle) %>%
summarise(
p_value = summary(aov(relative_abundance ~ sitetype))[[1]][["Pr(>F)"]][1],
.groups = "drop"
) %>%
arrange(p_value))
## # A tibble: 21 × 2
## primary_lifestyle p_value
## <chr> <dbl>
## 1 unspecified 0.000176
## 2 pollen_saprotroph 0.00168
## 3 dung_saprotroph 0.00434
## 4 animal_parasite 0.00960
## 5 protistan_parasite 0.0102
## 6 plant_pathogen 0.0219
## 7 foliar_endophyte 0.0363
## 8 root_endophyte 0.0513
## 9 algal_parasite 0.0703
## 10 nectar/tap_saprotroph 0.0783
## # ℹ 11 more rows
top5_groups <- functional_abund %>%
group_by(primary_lifestyle) %>%
summarise(mean_abund = mean(relative_abundance, na.rm = TRUE)) %>%
slice_max(mean_abund, n = 5) %>%
pull(primary_lifestyle)
top5_summary <- functional_abund %>%
filter(primary_lifestyle %in% top5_groups) %>%
group_by(primary_lifestyle, sitetype) %>%
summarise(mean_rel_abund = mean(relative_abundance, na.rm = TRUE), .groups = "drop")
ggplot(top5_summary, aes(x = sitetype, y = mean_rel_abund, fill = sitetype)) +
geom_col() +
facet_wrap(~ primary_lifestyle, scales = "free_y") +
scale_fill_manual(values = site_colors, guide = "none") +
scale_x_discrete(limits = sitetype_levels) +
labs(x = "site type", y = "mean relative abundance") +
theme_bw() +
theme(axis.text.x = element_text(angle = 30, hjust = 1),
strip.text = element_text(face = "bold"))
Mean relative abundance of top 5 functional groups by site type
pathogen_df <- functional_abund %>%
filter(primary_lifestyle == "plant_pathogen", relative_abundance > 0)
pathogen.mod <- glmmTMB(
relative_abundance ~ scale(management_score_rescaled) +
scale(ph) + scale(c) + scale(n) +
scale(longitude) + scale(latitude) + (1 | site_initials),
data = pathogen_df, family = Gamma(link = "log"))
cat("AIC:", AIC(pathogen.mod), "\n")
## AIC: -116.8082
Anova(pathogen.mod, type = "II")
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: relative_abundance
## Chisq Df Pr(>Chisq)
## scale(management_score_rescaled) 1.0986 1 0.29458
## scale(ph) 0.0603 1 0.80598
## scale(c) 0.6013 1 0.43807
## scale(n) 0.1399 1 0.70835
## scale(longitude) 0.7699 1 0.38024
## scale(latitude) 5.8891 1 0.01523 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(simulateResiduals(pathogen.mod))
DHARMa residuals — plant pathogen model
pathogen.mod.st <- glmmTMB(
relative_abundance ~ sitetype +
scale(ph) + scale(c) + scale(n) +
scale(longitude) + scale(latitude) + (1 | site_initials),
data = pathogen_df, family = Gamma(link = "log"))
cat("AIC:", AIC(pathogen.mod.st), "\n")
## AIC: -113.7107
Anova(pathogen.mod.st, type = "II")
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: relative_abundance
## Chisq Df Pr(>Chisq)
## sitetype 4.0869 4 0.3944
## scale(ph) 0.0082 1 0.9280
## scale(c) 1.2356 1 0.2663
## scale(n) 0.2625 1 0.6084
## scale(longitude) 0.2853 1 0.5932
## scale(latitude) 1.8069 1 0.1789
(pathogen.em <- posthoc_em(pathogen.mod.st, "relative_abundance", pathogen_df))
## # A tibble: 5 × 8
## sitetype response std.error df conf.low conf.high .group ymax
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
## 1 Kipuka 0.0821 0.0323 Inf 0.0379 0.178 a 0.133
## 2 agroforestry 0.129 0.0235 Inf 0.0898 0.184 a 0.542
## 3 forest 0.140 0.0335 Inf 0.0876 0.224 a 0.257
## 4 diversified 0.176 0.0289 Inf 0.128 0.243 a 0.5
## 5 monoculture 0.196 0.0519 Inf 0.117 0.329 a 0.393
p1 <- ggplot(pathogen_df, aes(x = management_score_rescaled, y = relative_abundance)) +
geom_point(aes(color = sitetype), size = 3, alpha = 0.8) +
geom_smooth(method = "glm", color = "black", se = TRUE) +
scale_color_manual(values = site_colors) +
labs(x = "management intensity", y = "plant pathogen relative abundance",
color = "site type") +
theme_bw()
p2 <- ggplot(pathogen_df, aes(x = sitetype, y = relative_abundance, fill = sitetype)) +
geom_boxplot() +
scale_fill_manual(values = site_colors, guide = "none") +
scale_x_discrete(limits = sitetype_levels) +
geom_text(data = pathogen.em,
aes(x = sitetype, y = ymax * 1.1, label = .group),
inherit.aes = FALSE) +
labs(y = "plant pathogen relative abundance", x = "site type") +
theme_bw()
p1 | p2
Plant pathogen relative abundance