bioc.pkg <- c("phyloseq", "BiocManager", "MicrobiotaProcess", "decontam")
cran.pkg <- c("tidyverse", "readxl", "ape", "pacman", "glue", "vegan",
"devtools", "ggrepel", "reshape2", "BiocManager",
"pheatmap", "patchwork", "rstatix", "ggpubr")
inst.pkg <- cran.pkg %in% installed.packages()
if (any(!inst.pkg)) { install.packages(cran.pkg[!inst.pkg], repos = "http://cran.rstudio.com/") }
inst.pkg <- bioc.pkg %in% installed.packages()
if (any(!inst.pkg)) { BiocManager::install(bioc.pkg[!inst.pkg]) }
pkg <- c("tidyverse", "phyloseq", "gridExtra", "BiocManager",
"vegan", "devtools", "ggrepel", "reshape2",
"pheatmap", "glue", "ape", "readxl")
for (i in 1:length(pkg)) {
library(pkg[i], character.only = TRUE, verbose = FALSE, attach.required = FALSE)
}
library(patchwork, verbose = FALSE)
library(ggpubr, verbose = FALSE)
library(rstatix, verbose = FALSE)
library(decontam)
library(phyloseq)
library(ggtext)
tryCatch(devtools::unload("tidytree", quiet = TRUE), error = function(e) invisible(NULL))
getwd()
## [1] "/Users/josefinehaageneklund/Downloads"
setwd("~/OneDrive - Københavns Universitet/Speciale/ON UMI 16S")
# Reading abundance/feature table
ft_raw <- read_excel("~/OneDrive - Københavns Universitet/Speciale/ON UMI 16S/IMMU_WILD_merged_table_ONT_Josefine_16s.xlsx") %>%
as.data.frame()
dim(ft_raw)
## [1] 482 134
head(ft_raw[, 1:5])
## #OTU ID
## 1 K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Actinomyces;__
## 2 K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Changpingibacter;S__Changpingibacter yushuensis strain JY-X040
## 3 K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis strain JCM 1190
## 4 K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis subsp. lactis strain YIT 4121
## 5 K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium bifidum strain KCTC 3202
## 6 K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium breve DSM 20213 = JCM 1192
## BRK97_P1 BRK98_P1 BRK99_P1 BRK100_P1
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
tail(ft_raw[, 1:5])
## #OTU ID
## 477 K__Bacteria;P__Verrucomicrobiota;C__Verrucomicrobiia;O__Verrucomicrobiales;F__Akkermansiaceae;G__Akkermansia;__
## 478 K__Bacteria;P__Verrucomicrobiota;__;__;__;__;__
## 479 K__Bacteria;__;__;__;__;__;__
## 480 Unassigned;__;__;__;__;__;__
## 481 Reads per sample/condition for >10000
## 482 BRK/Plate
## BRK97_P1 BRK98_P1 BRK99_P1 BRK100_P1
## 477 0 0 1 1
## 478 0 0 0 0
## 479 70 48 33 56
## 480 0 0 0 0
## 481 12648 16308 18279 25356
## 482 BRK97_P1 BRK98_P1 BRK99_P1 BRK100_P1
# Save strings before separating — these become rownames for both ft and tx
otu_ids <- ft_raw[["#OTU ID"]]
# Parse taxonomy from #OTU ID column
ft_sep <- tidyr::separate(data = ft_raw,
col = `#OTU ID`,
into = c("Kingdom", "Phylum",
"Class", "Order", "Family",
"Genus", "Species"),
sep = ";")
# Remove taxon tags (K__, P__, etc.)
ft_sep <- apply(ft_sep, 2, function(x) { gsub("^.__", "", x) })
# Split into taxonomy matrix (tx) and count matrix (ft)
taxonomy_cols <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
# Taxonomy matrix
tx <- ft_sep[, taxonomy_cols, drop = FALSE]
rownames(tx) <- otu_ids # restore rownames
# Count matrix — convert to numeric
ft_numeric <- ft_sep[, !colnames(ft_sep) %in% taxonomy_cols, drop = FALSE]
ft_numeric <- apply(ft_numeric, 2, function(x) as.numeric(as.character(x)))
ft_numeric <- as.matrix(ft_numeric)
rownames(ft_numeric) <- otu_ids # restore rownames
ft <- ft_numeric
# Verify
cat("ft rownames (first 2):\n"); print(head(rownames(ft), 2))
## ft rownames (first 2):
## [1] "K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Actinomyces;__"
## [2] "K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Changpingibacter;S__Changpingibacter yushuensis strain JY-X040"
cat("tx rownames (first 2):\n"); print(head(rownames(tx), 2))
## tx rownames (first 2):
## [1] "K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Actinomyces;__"
## [2] "K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Changpingibacter;S__Changpingibacter yushuensis strain JY-X040"
dim(ft)
## [1] 482 133
colSums(ft) %>% sort()
## named numeric(0)
sum(colSums(ft) == 0)
## [1] NA
range(colSums(ft))
## [1] NA NA
mt <- read_excel("~/OneDrive - Københavns Universitet/Speciale/ON UMI 16S/Metadata_for_feature_table_Final.xlsx",
na = "NA")
head(mt)
## # A tibble: 6 × 16
## Seq_ID Study `Mouse ID` Strain Sex `Mother ID` Cage Isolater Eartag
## <dbl> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 IMU 78 BALB/c Female 6 6 1 3
## 2 2 IMU 79 BALB/c Female 6 6 1 30
## 3 3 IMU 80 BALB/c Female 6 6 1 5
## 4 4 IMU 81 BALB/c Male 6 7 4 3
## 5 5 IMU 82 BALB/c Male 6 7 4 30
## 6 6 IMU 83 BALB/c Male 6 7 4 5
## # ℹ 7 more variables: `Pre-immunized` <chr>, `Pre-mix` <chr>, FMT <chr>,
## # Round <dbl>, Trial <chr>, Sample <chr>, Barcode <chr>
mt <- as.data.frame(mt) # tibble rownames virker ikke altid
rownames(mt) <- mt$Barcode
mt <- mt[colnames(ft), ] # align til feature table
# Create Groups
# Ensures group names are consistent across microbiome and flow data analyses
mt <- mt %>%
mutate(
Group = case_when(
grepl("Donor_inoculum_SPF", Study) ~ "Donor_SPF",
grepl("Donor_inoculum_Wild", Study) ~ "Donor_Wild",
grepl("Negative", Study) ~ "Negative_Control",
grepl("Mock", Study) ~ "Mock_Positive",
Strain == "BALB/c" & `Pre-immunized` == "No" & FMT == "SPF" & Sample == "weaning" ~ "BALB/c_No_SPF_weaning",
Strain == "BALB/c" & `Pre-immunized` == "Yes" & FMT == "SPF" & Sample == "Term" ~ "BALB/c_Yes_SPF_Term",
Strain == "BALB/c" & `Pre-immunized` == "No" & FMT == "Wild" & Sample == "weaning" ~ "BALB/c_No_Wild_weaning",
Strain == "BALB/c" & `Pre-immunized` == "No" & FMT == "Wild" & Sample == "Term, colon content" ~ "BALB/c_No_Wild_colon",
Strain == "Wildling" ~ "Wildling",
Strain == "Pet Shop" ~ "Pet Shop",
Strain == "Wild" ~ "Wild",
Study == "IMU_mother" & FMT == "SPF" & Sample == "Pre_ABX" ~ "Mother_SPF_PreABX",
Study == "IMU_mother" & FMT == "SPF" & Sample == "post_1.FMT" ~ "Mother_SPF_post1FMT",
Study == "IMU_mother" & FMT == "SPF" & Sample == "Term" ~ "Mother_SPF_Term",
Study == "IMU_mother" & FMT == "Wild" & Sample == "Pre_ABX" ~ "Mother_Wild_PreABX",
TRUE ~ NA_character_
),
Group = factor(Group, levels = c(
"BALB/c_No_SPF_weaning", "BALB/c_No_Wild_weaning",
"BALB/c_Yes_SPF_Term", "BALB/c_No_Wild_colon",
"Wildling", "Pet Shop", "Wild",
"Mother_SPF_PreABX", "Mother_SPF_post1FMT", "Mother_SPF_Term",
"Mother_Wild_PreABX",
"Donor_SPF", "Donor_Wild",
"Negative_Control", "Mock_Positive"
))
)
dim(mt)
## [1] 133 17
table(mt$Group, useNA = "ifany")
##
## BALB/c_No_SPF_weaning BALB/c_No_Wild_weaning BALB/c_Yes_SPF_Term
## 19 20 14
## BALB/c_No_Wild_colon Wildling Pet Shop
## 14 11 14
## Wild Mother_SPF_PreABX Mother_SPF_post1FMT
## 4 3 3
## Mother_SPF_Term Mother_Wild_PreABX Donor_SPF
## 3 6 3
## Donor_Wild Negative_Control Mock_Positive
## 3 13 3
colnames(ft)
## [1] "BRK97_P1" "BRK98_P1" "BRK99_P1" "BRK100_P1" "BRK101_P1" "BRK102_P1"
## [7] "BRK103_P1" "BRK104_P1" "BRK105_P1" "BRK106_P1" "BRK107_P1" "BRK108_P1"
## [13] "BRK109_P1" "BRK110_P1" "BRK111_P1" "BRK112_P1" "BRK113_P1" "BRK114_P1"
## [19] "BRK115_P1" "BRK116_P1" "BRK117_P1" "BRK118_P1" "BRK119_P1" "BRK120_P1"
## [25] "BRK121_P1" "BRK122_P1" "BRK123_P1" "BRK124_P1" "BRK125_P1" "BRK126_P1"
## [31] "BRK127_P1" "BRK128_P1" "BRK129_P1" "BRK130_P1" "BRK131_P1" "BRK132_P1"
## [37] "BRK133_P1" "BRK134_P1" "BRK135_P1" "BRK136_P1" "BRK137_P1" "BRK138_P1"
## [43] "BRK139_P1" "BRK141_P1" "BRK142_P1" "BRK143_P1" "BRK144_P1" "BRK145_P1"
## [49] "BRK146_P1" "BRK147_P1" "BRK148_P1" "BRK149_P1" "BRK150_P1" "BRK151_P1"
## [55] "BRK152_P1" "BRK153_P1" "BRK154_P1" "BRK155_P1" "BRK156_P1" "BRK157_P1"
## [61] "BRK158_P1" "BRK159_P1" "BRK160_P1" "BRK161_P1" "BRK162_P1" "BRK163_P1"
## [67] "BRK164_P1" "BRK165_P1" "BRK166_P1" "BRK167_P1" "BRK168_P1" "BRK169_P1"
## [73] "BRK77_P2" "BRK78_P2" "BRK80_P2" "BRK81_P2" "BRK82_P2" "BRK83_P2"
## [79] "BRK84_P2" "BRK85_P2" "BRK86_P2" "BRK87_P2" "BRK88_P2" "BRK89_P2"
## [85] "BRK90_P2" "BRK91_P2" "BRK92_P2" "BRK93_P2" "BRK94_P2" "BRK95_P2"
## [91] "BRK96_P2" "BRK01_P3" "BRK02_P3" "BRK03_P3" "BRK04_P3" "BRK05_P3"
## [97] "BRK06_P3" "BRK07_P3" "BRK08_P3" "BRK09_P3" "BRK10_P3" "BRK11_P3"
## [103] "BRK12_P3" "BRK13_P3" "BRK14_P3" "BRK15_P3" "BRK16_P3" "BRK17_P3"
## [109] "BRK18_P3" "BRK19_P3" "BRK20_P3" "BRK21_P3" "BRK22_P3" "BRK23_P3"
## [115] "BRK24_P3" "BRK25_P3" "BRK26_P3" "BRK65_P3" "BRK66_P3" "BRK67_P3"
## [121] "BRK68_P3" "BRK69_P3" "BRK70_P3" "BRK71_P3" "BRK72_P3" "BRK73_P3"
## [127] "BRK74_P3" "BRK75_P3" "BRK76_P3" "BRK77_P3" "BRK78_P3" "BRK79_P3"
## [133] "BRK80_P3"
rownames(mt)
## [1] "BRK97_P1" "BRK98_P1" "BRK99_P1" "BRK100_P1" "BRK101_P1" "BRK102_P1"
## [7] "BRK103_P1" "BRK104_P1" "BRK105_P1" "BRK106_P1" "BRK107_P1" "BRK108_P1"
## [13] "BRK109_P1" "BRK110_P1" "BRK111_P1" "BRK112_P1" "BRK113_P1" "BRK114_P1"
## [19] "BRK115_P1" "BRK116_P1" "BRK117_P1" "BRK118_P1" "BRK119_P1" "BRK120_P1"
## [25] "BRK121_P1" "BRK122_P1" "BRK123_P1" "BRK124_P1" "BRK125_P1" "BRK126_P1"
## [31] "BRK127_P1" "BRK128_P1" "BRK129_P1" "BRK130_P1" "BRK131_P1" "BRK132_P1"
## [37] "BRK133_P1" "BRK134_P1" "BRK135_P1" "BRK136_P1" "BRK137_P1" "BRK138_P1"
## [43] "BRK139_P1" "BRK141_P1" "BRK142_P1" "BRK143_P1" "BRK144_P1" "BRK145_P1"
## [49] "BRK146_P1" "BRK147_P1" "BRK148_P1" "BRK149_P1" "BRK150_P1" "BRK151_P1"
## [55] "BRK152_P1" "BRK153_P1" "BRK154_P1" "BRK155_P1" "BRK156_P1" "BRK157_P1"
## [61] "BRK158_P1" "BRK159_P1" "BRK160_P1" "BRK161_P1" "BRK162_P1" "BRK163_P1"
## [67] "BRK164_P1" "BRK165_P1" "BRK166_P1" "BRK167_P1" "BRK168_P1" "BRK169_P1"
## [73] "BRK77_P2" "BRK78_P2" "BRK80_P2" "BRK81_P2" "BRK82_P2" "BRK83_P2"
## [79] "BRK84_P2" "BRK85_P2" "BRK86_P2" "BRK87_P2" "BRK88_P2" "BRK89_P2"
## [85] "BRK90_P2" "BRK91_P2" "BRK92_P2" "BRK93_P2" "BRK94_P2" "BRK95_P2"
## [91] "BRK96_P2" "BRK01_P3" "BRK02_P3" "BRK03_P3" "BRK04_P3" "BRK05_P3"
## [97] "BRK06_P3" "BRK07_P3" "BRK08_P3" "BRK09_P3" "BRK10_P3" "BRK11_P3"
## [103] "BRK12_P3" "BRK13_P3" "BRK14_P3" "BRK15_P3" "BRK16_P3" "BRK17_P3"
## [109] "BRK18_P3" "BRK19_P3" "BRK20_P3" "BRK21_P3" "BRK22_P3" "BRK23_P3"
## [115] "BRK24_P3" "BRK25_P3" "BRK26_P3" "BRK65_P3" "BRK66_P3" "BRK67_P3"
## [121] "BRK68_P3" "BRK69_P3" "BRK70_P3" "BRK71_P3" "BRK72_P3" "BRK73_P3"
## [127] "BRK74_P3" "BRK75_P3" "BRK76_P3" "BRK77_P3" "BRK78_P3" "BRK79_P3"
## [133] "BRK80_P3"
# Verify rownames match before building
stopifnot(rownames(ft) == rownames(tx))
stopifnot(colnames(ft) == rownames(mt)) # sample names must match
pst = phyloseq(
otu_table(ft, taxa_are_rows = TRUE),
tax_table(as.matrix(tx)),
sample_data(mt)
)
pst
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 482 taxa and 133 samples ]
## sample_data() Sample Data: [ 133 samples by 17 sample variables ]
## tax_table() Taxonomy Table: [ 482 taxa by 7 taxonomic ranks ]
sample_data(pst)
## Seq_ID Study Mouse ID Strain Sex
## BRK97_P1 1 IMU 78 BALB/c Female
## BRK98_P1 2 IMU 79 BALB/c Female
## BRK99_P1 3 IMU 80 BALB/c Female
## BRK100_P1 4 IMU 81 BALB/c Male
## BRK101_P1 5 IMU 82 BALB/c Male
## BRK102_P1 6 IMU 83 BALB/c Male
## BRK103_P1 7 IMU 84 BALB/c Male
## BRK104_P1 8 IMU 85 BALB/c Male
## BRK105_P1 9 IMU 86 BALB/c Male
## BRK106_P1 10 IMU 87 BALB/c Male
## BRK107_P1 11 IMU 88 BALB/c Female
## BRK108_P1 12 IMU 89 BALB/c Female
## BRK109_P1 13 IMU 90 BALB/c Female
## BRK110_P1 14 IMU 91 BALB/c Female
## BRK111_P1 15 IMU 18 BALB/c Female
## BRK112_P1 16 IMU 20 BALB/c Female
## BRK113_P1 17 IMU 21 BALB/c Female
## BRK114_P1 18 IMU 23 BALB/c Female
## BRK115_P1 19 IMU 26 BALB/c Female
## BRK116_P1 20 IMU 28 BALB/c Female
## BRK117_P1 21 IMU 29 BALB/c Female
## BRK118_P1 22 IMU 31 BALB/c Female
## BRK119_P1 23 IMU 34 BALB/c Male
## BRK120_P1 24 IMU 35 BALB/c Male
## BRK121_P1 25 IMU 37 BALB/c Male
## BRK122_P1 26 IMU 39 BALB/c Male
## BRK123_P1 27 IMU 40 BALB/c Male
## BRK124_P1 28 IMU 44 BALB/c Male
## BRK125_P1 29 IMU_mother 6 BALB/c Female
## BRK126_P1 30 IMU_mother 12 BALB/c Female
## BRK127_P1 31 IMU_mother 16 BALB/c Female
## BRK128_P1 32 IMU_mother 154 BALB/c Female
## BRK129_P1 33 IMU_mother 19 BALB/c Female
## BRK130_P1 34 IMU_mother 3 BALB/c Female
## BRK131_P1 35 IMU_mother 13 BALB/c Female
## BRK132_P1 36 IMU_mother 20 BALB/c Female
## BRK133_P1 37 IMU_mother 5 BALB/c Female
## BRK134_P1 38 IMU_mother 6 BALB/c Female
## BRK135_P1 39 IMU_mother 12 BALB/c Female
## BRK136_P1 40 IMU_mother 16 BALB/c Female
## BRK137_P1 47 IMU_mother 6 BALB/c Female
## BRK138_P1 48 IMU_mother 12 BALB/c Female
## BRK139_P1 49 IMU_mother 16 BALB/c Female
## BRK141_P1 51 WILD 2 Wildling Female
## BRK142_P1 52 WILD 3 Wildling Female
## BRK143_P1 53 WILD 4 Wildling Female
## BRK144_P1 54 WILD 5 Wildling Female
## BRK145_P1 55 WILD 6 Wildling Female
## BRK146_P1 56 WILD 8 Wildling Male
## BRK147_P1 57 WILD 9 Wildling Male
## BRK148_P1 58 WILD 10 Wildling Male
## BRK149_P1 59 WILD 11 Wildling Male
## BRK150_P1 60 WILD 12 Wildling Male
## BRK151_P1 61 WILD 13 Wildling Male
## BRK152_P1 62 WILD 14 Pet Shop Male
## BRK153_P1 63 WILD 15 Pet Shop Male
## BRK154_P1 64 WILD 16 Pet Shop Male
## BRK155_P1 65 WILD 17 Pet Shop Male
## BRK156_P1 66 WILD 18 Pet Shop Male
## BRK157_P1 67 WILD 19 Pet Shop Male
## BRK158_P1 68 WILD 20 Pet Shop Male
## BRK159_P1 69 WILD 21 Pet Shop Female
## BRK160_P1 70 WILD 22 Pet Shop Female
## BRK161_P1 71 WILD 23 Pet Shop Female
## BRK162_P1 72 WILD 24 Pet Shop Female
## BRK163_P1 73 WILD 25 Pet Shop Female
## BRK164_P1 74 WILD 26 Pet Shop Female
## BRK165_P1 75 WILD 27 Pet Shop Female
## BRK166_P1 76 WILD 28 Wild Female
## BRK167_P1 77 WILD 29 Wild Male
## BRK168_P1 78 WILD 30 Wild Female
## BRK169_P1 79 WILD 31 Wild Female
## BRK77_P2 184 IMU <NA> BALB/c Male
## BRK78_P2 185 IMU <NA> BALB/c Male
## BRK80_P2 187 IMU <NA> BALB/c Male
## BRK81_P2 188 IMU <NA> BALB/c Male
## BRK82_P2 189 IMU <NA> BALB/c Male
## BRK83_P2 190 IMU <NA> BALB/c Male
## BRK84_P2 191 IMU <NA> BALB/c Male
## BRK85_P2 192 IMU <NA> BALB/c Male
## BRK86_P2 193 IMU <NA> BALB/c Male
## BRK87_P2 194 IMU <NA> BALB/c Female
## BRK88_P2 195 IMU <NA> BALB/c Female
## BRK89_P2 196 IMU <NA> BALB/c Female
## BRK90_P2 197 IMU <NA> BALB/c Female
## BRK91_P2 198 IMU <NA> BALB/c Female
## BRK92_P2 199 IMU <NA> BALB/c Female
## BRK93_P2 200 IMU <NA> BALB/c Female
## BRK94_P2 201 IMU <NA> BALB/c Female
## BRK95_P2 202 IMU <NA> BALB/c Female
## BRK96_P2 203 IMU <NA> BALB/c Female
## BRK01_P3 204 IMU <NA> BALB/c Male
## BRK02_P3 205 IMU <NA> BALB/c Male
## BRK03_P3 206 IMU <NA> BALB/c Male
## BRK04_P3 207 IMU <NA> BALB/c Male
## BRK05_P3 208 IMU <NA> BALB/c Male
## BRK06_P3 209 IMU <NA> BALB/c Male
## BRK07_P3 210 IMU <NA> BALB/c Male
## BRK08_P3 211 IMU <NA> BALB/c Male
## BRK09_P3 212 IMU <NA> BALB/c Male
## BRK10_P3 213 IMU <NA> BALB/c Male
## BRK11_P3 214 IMU <NA> BALB/c Female
## BRK12_P3 215 IMU <NA> BALB/c Female
## BRK13_P3 216 IMU <NA> BALB/c Female
## BRK14_P3 217 IMU <NA> BALB/c Female
## BRK15_P3 218 IMU <NA> BALB/c Female
## BRK16_P3 219 IMU <NA> BALB/c Female
## BRK17_P3 220 IMU <NA> BALB/c Female
## BRK18_P3 221 IMU <NA> BALB/c Female
## BRK19_P3 222 IMU <NA> BALB/c Female
## BRK20_P3 223 IMU <NA> BALB/c Female
## BRK21_P3 224 Donor_inoculum_Wild(Zahir) DW Wild <NA>
## BRK22_P3 225 Donor_inoculum_Wild_resampling_1 DWr1 Wild <NA>
## BRK23_P3 226 Donor_inoculum_Wild_resampling_2 DWr2 Wild <NA>
## BRK24_P3 227 Donor_inoculum_SPF(WP1Amix) DS BALB/c <NA>
## BRK25_P3 228 Donor_inoculum_SPF_resampling_1 DSr1 BALB/c <NA>
## BRK26_P3 229 Donor_inoculum_SPF_resampling_2 DSr2 BALB/c <NA>
## BRK65_P3 268 Negative Control <NA> <NA> <NA>
## BRK66_P3 269 Negative Control <NA> <NA> <NA>
## BRK67_P3 270 Negative Control <NA> <NA> <NA>
## BRK68_P3 271 Negative Control <NA> <NA> <NA>
## BRK69_P3 272 Negative Control <NA> <NA> <NA>
## BRK70_P3 273 Negative Control <NA> <NA> <NA>
## BRK71_P3 274 Negative Control <NA> <NA> <NA>
## BRK72_P3 275 Negative Control <NA> <NA> <NA>
## BRK73_P3 276 Negative Control <NA> <NA> <NA>
## BRK74_P3 277 Negative Control <NA> <NA> <NA>
## BRK75_P3 278 Negative Control (NC1) <NA> <NA> <NA>
## BRK76_P3 279 Negative Control (NC2) <NA> <NA> <NA>
## BRK77_P3 280 Negative Control (NC3) <NA> <NA> <NA>
## BRK78_P3 281 Mock Positive (PC1) <NA> <NA> <NA>
## BRK79_P3 282 Mock Positive (PC2) <NA> <NA> <NA>
## BRK80_P3 283 Mock Positive (PC3) <NA> <NA> <NA>
## Mother ID Cage Isolater Eartag Pre-immunized Pre-mix FMT
## BRK97_P1 6 6 1 3 Yes Falken_and_new SPF
## BRK98_P1 6 6 1 30 Yes Falken_and_new SPF
## BRK99_P1 6 6 1 5 Yes Falken_and_new SPF
## BRK100_P1 6 7 4 3 Yes Falken_and_new SPF
## BRK101_P1 6 7 4 30 Yes Falken_and_new SPF
## BRK102_P1 6 7 4 5 Yes Falken_and_new SPF
## BRK103_P1 112 8 4 3 Yes Falken_and_new SPF
## BRK104_P1 112 8 4 30 Yes Falken_and_new SPF
## BRK105_P1 112 8 4 5 Yes Falken_and_new SPF
## BRK106_P1 112 8 4 50 Yes Falken_and_new SPF
## BRK107_P1 112 9 1 3 Yes Falken_and_new SPF
## BRK108_P1 112 9 1 30 Yes Falken_and_new SPF
## BRK109_P1 112 9 1 5 Yes Falken_and_new SPF
## BRK110_P1 16 9 1 50 Yes Falken_and_new SPF
## BRK111_P1 45 1 2 3 No No Wild
## BRK112_P1 45 1 2 5 No No Wild
## BRK113_P1 46 2 2 3 No No Wild
## BRK114_P1 46 2 2 5 No No Wild
## BRK115_P1 40 3 2 3 No No Wild
## BRK116_P1 40 3 2 5 No No Wild
## BRK117_P1 46 4 2 3 No No Wild
## BRK118_P1 38 4 2 5 No No Wild
## BRK119_P1 15 1 3 3 No No Wild
## BRK120_P1 15 1 3 5 No No Wild
## BRK121_P1 46 2 3 3 No No Wild
## BRK122_P1 1 3 3 3 No No Wild
## BRK123_P1 1 3 3 5 No No Wild
## BRK124_P1 6 4 3 5 No No Wild
## BRK125_P1 NA NA NA NA <NA> <NA> SPF
## BRK126_P1 NA NA NA NA <NA> <NA> SPF
## BRK127_P1 NA NA NA NA <NA> <NA> SPF
## BRK128_P1 NA NA NA NA <NA> <NA> Wild
## BRK129_P1 NA NA NA NA <NA> <NA> Wild
## BRK130_P1 NA NA NA NA <NA> <NA> Wild
## BRK131_P1 NA NA NA NA <NA> <NA> Wild
## BRK132_P1 NA NA NA NA <NA> <NA> Wild
## BRK133_P1 NA NA NA NA <NA> <NA> Wild
## BRK134_P1 NA NA NA NA <NA> <NA> SPF
## BRK135_P1 NA NA NA NA <NA> <NA> SPF
## BRK136_P1 NA NA NA NA <NA> <NA> SPF
## BRK137_P1 NA NA NA NA <NA> <NA> SPF
## BRK138_P1 NA NA NA NA <NA> <NA> SPF
## BRK139_P1 NA NA NA NA <NA> <NA> SPF
## BRK141_P1 NA NA NA NA <NA> <NA> <NA>
## BRK142_P1 NA NA NA NA <NA> <NA> <NA>
## BRK143_P1 NA NA NA NA <NA> <NA> <NA>
## BRK144_P1 NA NA NA NA <NA> <NA> <NA>
## BRK145_P1 NA NA NA NA <NA> <NA> <NA>
## BRK146_P1 NA NA NA NA <NA> <NA> <NA>
## BRK147_P1 NA NA NA NA <NA> <NA> <NA>
## BRK148_P1 NA NA NA NA <NA> <NA> <NA>
## BRK149_P1 NA NA NA NA <NA> <NA> <NA>
## BRK150_P1 NA NA NA NA <NA> <NA> <NA>
## BRK151_P1 NA NA NA NA <NA> <NA> <NA>
## BRK152_P1 NA NA NA NA <NA> <NA> <NA>
## BRK153_P1 NA NA NA NA <NA> <NA> <NA>
## BRK154_P1 NA NA NA NA <NA> <NA> <NA>
## BRK155_P1 NA NA NA NA <NA> <NA> <NA>
## BRK156_P1 NA NA NA NA <NA> <NA> <NA>
## BRK157_P1 NA NA NA NA <NA> <NA> <NA>
## BRK158_P1 NA NA NA NA <NA> <NA> <NA>
## BRK159_P1 NA NA NA NA <NA> <NA> <NA>
## BRK160_P1 NA NA NA NA <NA> <NA> <NA>
## BRK161_P1 NA NA NA NA <NA> <NA> <NA>
## BRK162_P1 NA NA NA NA <NA> <NA> <NA>
## BRK163_P1 NA NA NA NA <NA> <NA> <NA>
## BRK164_P1 NA NA NA NA <NA> <NA> <NA>
## BRK165_P1 NA NA NA NA <NA> <NA> <NA>
## BRK166_P1 NA NA NA NA <NA> <NA> <NA>
## BRK167_P1 NA NA NA NA <NA> <NA> <NA>
## BRK168_P1 NA NA NA NA <NA> <NA> <NA>
## BRK169_P1 NA NA NA NA <NA> <NA> <NA>
## BRK77_P2 NA NA NA NA No No SPF
## BRK78_P2 NA NA NA NA No No SPF
## BRK80_P2 NA NA NA NA No No SPF
## BRK81_P2 NA NA NA NA No No SPF
## BRK82_P2 NA NA NA NA No No SPF
## BRK83_P2 NA NA NA NA No No SPF
## BRK84_P2 NA NA NA NA No No SPF
## BRK85_P2 NA NA NA NA No No SPF
## BRK86_P2 NA NA NA NA No No SPF
## BRK87_P2 NA NA NA NA No No SPF
## BRK88_P2 NA NA NA NA No No SPF
## BRK89_P2 NA NA NA NA No No SPF
## BRK90_P2 NA NA NA NA No No SPF
## BRK91_P2 NA NA NA NA No No SPF
## BRK92_P2 NA NA NA NA No No SPF
## BRK93_P2 NA NA NA NA No No SPF
## BRK94_P2 NA NA NA NA No No SPF
## BRK95_P2 NA NA NA NA No No SPF
## BRK96_P2 NA NA NA NA No No SPF
## BRK01_P3 NA NA NA NA No No Wild
## BRK02_P3 NA NA NA NA No No Wild
## BRK03_P3 NA NA NA NA No No Wild
## BRK04_P3 NA NA NA NA No No Wild
## BRK05_P3 NA NA NA NA No No Wild
## BRK06_P3 NA NA NA NA No No Wild
## BRK07_P3 NA NA NA NA No No Wild
## BRK08_P3 NA NA NA NA No No Wild
## BRK09_P3 NA NA NA NA No No Wild
## BRK10_P3 NA NA NA NA No No Wild
## BRK11_P3 NA NA NA NA No No Wild
## BRK12_P3 NA NA NA NA No No Wild
## BRK13_P3 NA NA NA NA No No Wild
## BRK14_P3 NA NA NA NA No No Wild
## BRK15_P3 NA NA NA NA No No Wild
## BRK16_P3 NA NA NA NA No No Wild
## BRK17_P3 NA NA NA NA No No Wild
## BRK18_P3 NA NA NA NA No No Wild
## BRK19_P3 NA NA NA NA No No Wild
## BRK20_P3 NA NA NA NA No No Wild
## BRK21_P3 NA NA NA NA <NA> <NA> <NA>
## BRK22_P3 NA NA NA NA <NA> <NA> <NA>
## BRK23_P3 NA NA NA NA <NA> <NA> <NA>
## BRK24_P3 NA NA NA NA <NA> <NA> <NA>
## BRK25_P3 NA NA NA NA <NA> <NA> <NA>
## BRK26_P3 NA NA NA NA <NA> <NA> <NA>
## BRK65_P3 NA NA NA NA No No <NA>
## BRK66_P3 NA NA NA NA No No <NA>
## BRK67_P3 NA NA NA NA No No <NA>
## BRK68_P3 NA NA NA NA No No <NA>
## BRK69_P3 NA NA NA NA No No <NA>
## BRK70_P3 NA NA NA NA No No <NA>
## BRK71_P3 NA NA NA NA No No <NA>
## BRK72_P3 NA NA NA NA No No <NA>
## BRK73_P3 NA NA NA NA No No <NA>
## BRK74_P3 NA NA NA NA No No <NA>
## BRK75_P3 NA NA NA NA No No <NA>
## BRK76_P3 NA NA NA NA No No <NA>
## BRK77_P3 NA NA NA NA No No <NA>
## BRK78_P3 NA NA NA NA No No <NA>
## BRK79_P3 NA NA NA NA No No <NA>
## BRK80_P3 NA NA NA NA No No <NA>
## Round Trial Sample Barcode
## BRK97_P1 3 Rikke Term BRK97_P1
## BRK98_P1 3 Rikke Term BRK98_P1
## BRK99_P1 3 Rikke Term BRK99_P1
## BRK100_P1 3 Rikke Term BRK100_P1
## BRK101_P1 3 Rikke Term BRK101_P1
## BRK102_P1 3 Rikke Term BRK102_P1
## BRK103_P1 3 Rikke Term BRK103_P1
## BRK104_P1 3 Rikke Term BRK104_P1
## BRK105_P1 3 Rikke Term BRK105_P1
## BRK106_P1 3 Rikke Term BRK106_P1
## BRK107_P1 3 Rikke Term BRK107_P1
## BRK108_P1 3 Rikke Term BRK108_P1
## BRK109_P1 3 Rikke Term BRK109_P1
## BRK110_P1 3 Rikke Term BRK110_P1
## BRK111_P1 1 Therese Term, colon content BRK111_P1
## BRK112_P1 1 Therese Term, colon content BRK112_P1
## BRK113_P1 1 Therese Term, colon content BRK113_P1
## BRK114_P1 1 Therese Term, colon content BRK114_P1
## BRK115_P1 1 Therese Term, colon content BRK115_P1
## BRK116_P1 1 Therese Term, colon content BRK116_P1
## BRK117_P1 1 Therese Term, colon content BRK117_P1
## BRK118_P1 1 Therese Term, colon content BRK118_P1
## BRK119_P1 1 Therese Term, colon content BRK119_P1
## BRK120_P1 1 Therese Term, colon content BRK120_P1
## BRK121_P1 1 Therese Term, colon content BRK121_P1
## BRK122_P1 1 Therese Term, colon content BRK122_P1
## BRK123_P1 1 Therese Term, colon content BRK123_P1
## BRK124_P1 1 Therese Term, colon content BRK124_P1
## BRK125_P1 NA Rikke Pre_ABX BRK125_P1
## BRK126_P1 NA Rikke Pre_ABX BRK126_P1
## BRK127_P1 NA Rikke Pre_ABX BRK127_P1
## BRK128_P1 NA Therese Pre_ABX BRK128_P1
## BRK129_P1 NA Therese Pre_ABX BRK129_P1
## BRK130_P1 NA Therese Pre_ABX BRK130_P1
## BRK131_P1 NA Therese Pre_ABX BRK131_P1
## BRK132_P1 NA Therese Pre_ABX BRK132_P1
## BRK133_P1 NA Therese Pre_ABX BRK133_P1
## BRK134_P1 NA Rikke post_1.FMT BRK134_P1
## BRK135_P1 NA Rikke post_1.FMT BRK135_P1
## BRK136_P1 NA Rikke post_1.FMT BRK136_P1
## BRK137_P1 NA Rikke Term BRK137_P1
## BRK138_P1 NA Rikke Term BRK138_P1
## BRK139_P1 NA Rikke Term BRK139_P1
## BRK141_P1 NA Ida Term BRK141_P1
## BRK142_P1 NA Ida Term BRK142_P1
## BRK143_P1 NA Ida Term BRK143_P1
## BRK144_P1 NA Ida Term BRK144_P1
## BRK145_P1 NA Ida Term BRK145_P1
## BRK146_P1 NA Ida Term BRK146_P1
## BRK147_P1 NA Ida Term BRK147_P1
## BRK148_P1 NA Ida Term BRK148_P1
## BRK149_P1 NA Ida Term BRK149_P1
## BRK150_P1 NA Ida Term BRK150_P1
## BRK151_P1 NA Ida Term BRK151_P1
## BRK152_P1 NA Ida Term BRK152_P1
## BRK153_P1 NA Ida Term BRK153_P1
## BRK154_P1 NA Ida Term BRK154_P1
## BRK155_P1 NA Ida Term BRK155_P1
## BRK156_P1 NA Ida Term BRK156_P1
## BRK157_P1 NA Ida Term BRK157_P1
## BRK158_P1 NA Ida Term BRK158_P1
## BRK159_P1 NA Ida Term BRK159_P1
## BRK160_P1 NA Ida Term BRK160_P1
## BRK161_P1 NA Ida Term BRK161_P1
## BRK162_P1 NA Ida Term BRK162_P1
## BRK163_P1 NA Ida Term BRK163_P1
## BRK164_P1 NA Ida Term BRK164_P1
## BRK165_P1 NA Ida Term BRK165_P1
## BRK166_P1 NA Ida Term BRK166_P1
## BRK167_P1 NA Ida Term BRK167_P1
## BRK168_P1 NA Ida Term BRK168_P1
## BRK169_P1 NA Ida Term BRK169_P1
## BRK77_P2 1 Therese weaning BRK77_P2
## BRK78_P2 1 Therese weaning BRK78_P2
## BRK80_P2 1 Therese weaning BRK80_P2
## BRK81_P2 1 Therese weaning BRK81_P2
## BRK82_P2 1 Therese weaning BRK82_P2
## BRK83_P2 1 Therese weaning BRK83_P2
## BRK84_P2 1 Therese weaning BRK84_P2
## BRK85_P2 1 Therese weaning BRK85_P2
## BRK86_P2 1 Therese weaning BRK86_P2
## BRK87_P2 1 Therese weaning BRK87_P2
## BRK88_P2 1 Therese weaning BRK88_P2
## BRK89_P2 1 Therese weaning BRK89_P2
## BRK90_P2 1 Therese weaning BRK90_P2
## BRK91_P2 1 Therese weaning BRK91_P2
## BRK92_P2 1 Therese weaning BRK92_P2
## BRK93_P2 1 Therese weaning BRK93_P2
## BRK94_P2 1 Therese weaning BRK94_P2
## BRK95_P2 1 Therese weaning BRK95_P2
## BRK96_P2 1 Therese weaning BRK96_P2
## BRK01_P3 1 Therese weaning BRK01_P3
## BRK02_P3 1 Therese weaning BRK02_P3
## BRK03_P3 1 Therese weaning BRK03_P3
## BRK04_P3 1 Therese weaning BRK04_P3
## BRK05_P3 1 Therese weaning BRK05_P3
## BRK06_P3 1 Therese weaning BRK06_P3
## BRK07_P3 1 Therese weaning BRK07_P3
## BRK08_P3 1 Therese weaning BRK08_P3
## BRK09_P3 1 Therese weaning BRK09_P3
## BRK10_P3 1 Therese weaning BRK10_P3
## BRK11_P3 1 Therese weaning BRK11_P3
## BRK12_P3 1 Therese weaning BRK12_P3
## BRK13_P3 1 Therese weaning BRK13_P3
## BRK14_P3 1 Therese weaning BRK14_P3
## BRK15_P3 1 Therese weaning BRK15_P3
## BRK16_P3 1 Therese weaning BRK16_P3
## BRK17_P3 1 Therese weaning BRK17_P3
## BRK18_P3 1 Therese weaning BRK18_P3
## BRK19_P3 1 Therese weaning BRK19_P3
## BRK20_P3 1 Therese weaning BRK20_P3
## BRK21_P3 NA <NA> donor, only liquid no feces BRK21_P3
## BRK22_P3 NA <NA> donor, only liquid no feces BRK22_P3
## BRK23_P3 NA <NA> donor, only liquid no feces BRK23_P3
## BRK24_P3 NA <NA> donor, only liquid no feces BRK24_P3
## BRK25_P3 NA <NA> donor, only liquid no feces BRK25_P3
## BRK26_P3 NA <NA> donor, only liquid no feces BRK26_P3
## BRK65_P3 NA <NA> <NA> BRK65_P3
## BRK66_P3 NA <NA> <NA> BRK66_P3
## BRK67_P3 NA <NA> <NA> BRK67_P3
## BRK68_P3 NA <NA> <NA> BRK68_P3
## BRK69_P3 NA <NA> <NA> BRK69_P3
## BRK70_P3 NA <NA> <NA> BRK70_P3
## BRK71_P3 NA <NA> <NA> BRK71_P3
## BRK72_P3 NA <NA> <NA> BRK72_P3
## BRK73_P3 NA <NA> <NA> BRK73_P3
## BRK74_P3 NA <NA> <NA> BRK74_P3
## BRK75_P3 NA <NA> <NA> BRK75_P3
## BRK76_P3 NA <NA> <NA> BRK76_P3
## BRK77_P3 NA <NA> <NA> BRK77_P3
## BRK78_P3 NA <NA> <NA> BRK78_P3
## BRK79_P3 NA <NA> <NA> BRK79_P3
## BRK80_P3 NA <NA> <NA> BRK80_P3
## Group
## BRK97_P1 BALB/c_Yes_SPF_Term
## BRK98_P1 BALB/c_Yes_SPF_Term
## BRK99_P1 BALB/c_Yes_SPF_Term
## BRK100_P1 BALB/c_Yes_SPF_Term
## BRK101_P1 BALB/c_Yes_SPF_Term
## BRK102_P1 BALB/c_Yes_SPF_Term
## BRK103_P1 BALB/c_Yes_SPF_Term
## BRK104_P1 BALB/c_Yes_SPF_Term
## BRK105_P1 BALB/c_Yes_SPF_Term
## BRK106_P1 BALB/c_Yes_SPF_Term
## BRK107_P1 BALB/c_Yes_SPF_Term
## BRK108_P1 BALB/c_Yes_SPF_Term
## BRK109_P1 BALB/c_Yes_SPF_Term
## BRK110_P1 BALB/c_Yes_SPF_Term
## BRK111_P1 BALB/c_No_Wild_colon
## BRK112_P1 BALB/c_No_Wild_colon
## BRK113_P1 BALB/c_No_Wild_colon
## BRK114_P1 BALB/c_No_Wild_colon
## BRK115_P1 BALB/c_No_Wild_colon
## BRK116_P1 BALB/c_No_Wild_colon
## BRK117_P1 BALB/c_No_Wild_colon
## BRK118_P1 BALB/c_No_Wild_colon
## BRK119_P1 BALB/c_No_Wild_colon
## BRK120_P1 BALB/c_No_Wild_colon
## BRK121_P1 BALB/c_No_Wild_colon
## BRK122_P1 BALB/c_No_Wild_colon
## BRK123_P1 BALB/c_No_Wild_colon
## BRK124_P1 BALB/c_No_Wild_colon
## BRK125_P1 Mother_SPF_PreABX
## BRK126_P1 Mother_SPF_PreABX
## BRK127_P1 Mother_SPF_PreABX
## BRK128_P1 Mother_Wild_PreABX
## BRK129_P1 Mother_Wild_PreABX
## BRK130_P1 Mother_Wild_PreABX
## BRK131_P1 Mother_Wild_PreABX
## BRK132_P1 Mother_Wild_PreABX
## BRK133_P1 Mother_Wild_PreABX
## BRK134_P1 Mother_SPF_post1FMT
## BRK135_P1 Mother_SPF_post1FMT
## BRK136_P1 Mother_SPF_post1FMT
## BRK137_P1 Mother_SPF_Term
## BRK138_P1 Mother_SPF_Term
## BRK139_P1 Mother_SPF_Term
## BRK141_P1 Wildling
## BRK142_P1 Wildling
## BRK143_P1 Wildling
## BRK144_P1 Wildling
## BRK145_P1 Wildling
## BRK146_P1 Wildling
## BRK147_P1 Wildling
## BRK148_P1 Wildling
## BRK149_P1 Wildling
## BRK150_P1 Wildling
## BRK151_P1 Wildling
## BRK152_P1 Pet Shop
## BRK153_P1 Pet Shop
## BRK154_P1 Pet Shop
## BRK155_P1 Pet Shop
## BRK156_P1 Pet Shop
## BRK157_P1 Pet Shop
## BRK158_P1 Pet Shop
## BRK159_P1 Pet Shop
## BRK160_P1 Pet Shop
## BRK161_P1 Pet Shop
## BRK162_P1 Pet Shop
## BRK163_P1 Pet Shop
## BRK164_P1 Pet Shop
## BRK165_P1 Pet Shop
## BRK166_P1 Wild
## BRK167_P1 Wild
## BRK168_P1 Wild
## BRK169_P1 Wild
## BRK77_P2 BALB/c_No_SPF_weaning
## BRK78_P2 BALB/c_No_SPF_weaning
## BRK80_P2 BALB/c_No_SPF_weaning
## BRK81_P2 BALB/c_No_SPF_weaning
## BRK82_P2 BALB/c_No_SPF_weaning
## BRK83_P2 BALB/c_No_SPF_weaning
## BRK84_P2 BALB/c_No_SPF_weaning
## BRK85_P2 BALB/c_No_SPF_weaning
## BRK86_P2 BALB/c_No_SPF_weaning
## BRK87_P2 BALB/c_No_SPF_weaning
## BRK88_P2 BALB/c_No_SPF_weaning
## BRK89_P2 BALB/c_No_SPF_weaning
## BRK90_P2 BALB/c_No_SPF_weaning
## BRK91_P2 BALB/c_No_SPF_weaning
## BRK92_P2 BALB/c_No_SPF_weaning
## BRK93_P2 BALB/c_No_SPF_weaning
## BRK94_P2 BALB/c_No_SPF_weaning
## BRK95_P2 BALB/c_No_SPF_weaning
## BRK96_P2 BALB/c_No_SPF_weaning
## BRK01_P3 BALB/c_No_Wild_weaning
## BRK02_P3 BALB/c_No_Wild_weaning
## BRK03_P3 BALB/c_No_Wild_weaning
## BRK04_P3 BALB/c_No_Wild_weaning
## BRK05_P3 BALB/c_No_Wild_weaning
## BRK06_P3 BALB/c_No_Wild_weaning
## BRK07_P3 BALB/c_No_Wild_weaning
## BRK08_P3 BALB/c_No_Wild_weaning
## BRK09_P3 BALB/c_No_Wild_weaning
## BRK10_P3 BALB/c_No_Wild_weaning
## BRK11_P3 BALB/c_No_Wild_weaning
## BRK12_P3 BALB/c_No_Wild_weaning
## BRK13_P3 BALB/c_No_Wild_weaning
## BRK14_P3 BALB/c_No_Wild_weaning
## BRK15_P3 BALB/c_No_Wild_weaning
## BRK16_P3 BALB/c_No_Wild_weaning
## BRK17_P3 BALB/c_No_Wild_weaning
## BRK18_P3 BALB/c_No_Wild_weaning
## BRK19_P3 BALB/c_No_Wild_weaning
## BRK20_P3 BALB/c_No_Wild_weaning
## BRK21_P3 Donor_Wild
## BRK22_P3 Donor_Wild
## BRK23_P3 Donor_Wild
## BRK24_P3 Donor_SPF
## BRK25_P3 Donor_SPF
## BRK26_P3 Donor_SPF
## BRK65_P3 Negative_Control
## BRK66_P3 Negative_Control
## BRK67_P3 Negative_Control
## BRK68_P3 Negative_Control
## BRK69_P3 Negative_Control
## BRK70_P3 Negative_Control
## BRK71_P3 Negative_Control
## BRK72_P3 Negative_Control
## BRK73_P3 Negative_Control
## BRK74_P3 Negative_Control
## BRK75_P3 Negative_Control
## BRK76_P3 Negative_Control
## BRK77_P3 Negative_Control
## BRK78_P3 Mock_Positive
## BRK79_P3 Mock_Positive
## BRK80_P3 Mock_Positive
sum(rowSums(pst@otu_table) == 0) # not present in any sample
## [1] NA
# Remove unassigned and NA phylum taxa
pst <- subset_taxa(pst, !is.na(Phylum) & !Phylum %in% c("", "uncharacterized", "unassigned"))
# Check kingdoms and keep only Bacteria
# Check whether there are archea (eucaryota) in the kingdom, and if so remove it and (keeping only bacterial kingdom)
pst@tax_table[,c(1)] %>% unique() # Viser den første kolonne af tax_table, altsp kingdom kolonnen. unique betyder at vi kun får en af hver slags.
## Taxonomy Table: [2 taxa by 1 taxonomic ranks]:
## Kingdom
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Actinomyces;__ "Bacteria"
## Unassigned;__;__;__;__;__;__ "Unassigned"
pst <- subset_taxa(pst, Kingdom %in% "Bacteria") # her fjerner vi alt det andet så vi sørger for at vi kun har "bacteria".
pst
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 479 taxa and 133 samples ]
## sample_data() Sample Data: [ 133 samples by 17 sample variables ]
## tax_table() Taxonomy Table: [ 479 taxa by 7 taxonomic ranks ]
#### Taxonomic filtering based on (low) prevalence: supervsed
#monitoring the number of the samples in which the prevalence of a taxon is at least one
# Monitor number of samples in which the prevalence of a taxon is at least one
prevdf <- apply(otu_table(pst), ifelse(taxa_are_rows(pst), 1, 2), function(x){sum(x > 0)})
weight_df <- data.frame(
ASVprev = prevdf,
TaxaAbund = taxa_sums(pst),
phyloseq::tax_table(pst)
)
head(weight_df)
## ASVprev
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Actinomyces;__ 2
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Changpingibacter;S__Changpingibacter yushuensis strain JY-X040 1
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis strain JCM 1190 12
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis subsp. lactis strain YIT 4121 0
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium bifidum strain KCTC 3202 0
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium breve DSM 20213 = JCM 1192 16
## TaxaAbund
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Actinomyces;__ 3
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Changpingibacter;S__Changpingibacter yushuensis strain JY-X040 3
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis strain JCM 1190 365
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis subsp. lactis strain YIT 4121 0
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium bifidum strain KCTC 3202 0
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium breve DSM 20213 = JCM 1192 18
## Kingdom
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Actinomyces;__ Bacteria
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Changpingibacter;S__Changpingibacter yushuensis strain JY-X040 Bacteria
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis strain JCM 1190 Bacteria
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis subsp. lactis strain YIT 4121 Bacteria
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium bifidum strain KCTC 3202 Bacteria
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium breve DSM 20213 = JCM 1192 Bacteria
## Phylum
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Actinomyces;__ Actinomycetota
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Changpingibacter;S__Changpingibacter yushuensis strain JY-X040 Actinomycetota
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis strain JCM 1190 Actinomycetota
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis subsp. lactis strain YIT 4121 Actinomycetota
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium bifidum strain KCTC 3202 Actinomycetota
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium breve DSM 20213 = JCM 1192 Actinomycetota
## Class
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Actinomyces;__ Actinomycetes
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Changpingibacter;S__Changpingibacter yushuensis strain JY-X040 Actinomycetes
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis strain JCM 1190 Actinomycetes
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis subsp. lactis strain YIT 4121 Actinomycetes
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium bifidum strain KCTC 3202 Actinomycetes
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium breve DSM 20213 = JCM 1192 Actinomycetes
## Order
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Actinomyces;__ Actinomycetales
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Changpingibacter;S__Changpingibacter yushuensis strain JY-X040 Actinomycetales
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis strain JCM 1190 Bifidobacteriales
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis subsp. lactis strain YIT 4121 Bifidobacteriales
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium bifidum strain KCTC 3202 Bifidobacteriales
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium breve DSM 20213 = JCM 1192 Bifidobacteriales
## Family
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Actinomyces;__ Actinomycetaceae
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Changpingibacter;S__Changpingibacter yushuensis strain JY-X040 Actinomycetaceae
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis strain JCM 1190 Bifidobacteriaceae
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis subsp. lactis strain YIT 4121 Bifidobacteriaceae
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium bifidum strain KCTC 3202 Bifidobacteriaceae
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium breve DSM 20213 = JCM 1192 Bifidobacteriaceae
## Genus
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Actinomyces;__ Actinomyces
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Changpingibacter;S__Changpingibacter yushuensis strain JY-X040 Changpingibacter
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis strain JCM 1190 Bifidobacterium
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis subsp. lactis strain YIT 4121 Bifidobacterium
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium bifidum strain KCTC 3202 Bifidobacterium
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium breve DSM 20213 = JCM 1192 Bifidobacterium
## Species
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Actinomyces;__ __
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Changpingibacter;S__Changpingibacter yushuensis strain JY-X040 Changpingibacter yushuensis strain JY-X040
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis strain JCM 1190 Bifidobacterium animalis strain JCM 1190
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis subsp. lactis strain YIT 4121 Bifidobacterium animalis subsp. lactis strain YIT 4121
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium bifidum strain KCTC 3202 Bifidobacterium bifidum strain KCTC 3202
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium breve DSM 20213 = JCM 1192 Bifidobacterium breve DSM 20213 = JCM 1192
# Find phyla that are of mostly low-prevalence
plyr::ddply(weight_df, "Phylum", function(x){
cbind(means = round(mean(x$ASVprev), 2), sums = round(sum(x$ASVprev), 2))
}) %>% mutate(sanity = ifelse(means == sums, "TRUE", "FALSE"))
## Phylum means sums sanity
## 1 __ 121.00 121 TRUE
## 2 Actinomycetota 16.66 783 FALSE
## 3 Bacillota 24.46 6702 FALSE
## 4 Bacteroidota 43.18 3066 FALSE
## 5 Campylobacterota 23.45 258 FALSE
## 6 Candidatus Saccharibacteria 83.00 83 TRUE
## 7 Deferribacterota 32.50 65 FALSE
## 8 Elusimicrobiota 12.00 12 TRUE
## 9 Fusobacteriota 6.67 20 FALSE
## 10 Mycoplasmatota 14.64 161 FALSE
## 11 Pseudomonadota 17.14 634 FALSE
## 12 Spirochaetota 13.67 41 FALSE
## 13 Synergistota 1.50 3 FALSE
## 14 Thermodesulfobacteriota 45.89 413 FALSE
## 15 Verrucomicrobiota 19.83 119 FALSE
# A function to find singletones — inspired by Nutella2.R script from Ida Wang Henriksen
out.ASV = function(phyloseq, threshold = 1, binwidth = 0.01) {
pacman::p_load(glue, tidyverse, reshape2, ggrepel, S4Vectors)
if (sum(colSums(otu_table(phyloseq))) / ncol(otu_table(phyloseq)) == 100) {
rel_abund = as(t(otu_table(phyloseq)), "matrix")
} else if (sum(colSums(otu_table(phyloseq))) / ncol(otu_table(phyloseq)) == 1) {
rel_abund = as(t(otu_table(phyloseq)), "matrix")
} else {
rel_abund = as(t(apply(otu_table(phyloseq),
ifelse(taxa_are_rows(phyloseq), 1, 2),
function(x) x / sum(x))), "matrix")
}
names.single = apply(rel_abund, 1, function(x){
ifelse(x == threshold, TRUE, ifelse(x == sum(x), TRUE, FALSE))
}) %>% reshape2::melt() %>%
filter(value == TRUE) %>%
dplyr::select(2) %>%
pull %>% as.vector()
if (length(names.single) == 0) {
print(glue("WOW! {length(names.single)} singletones detected in this dataset"))
qplot.noSing = qplot(rel_abund, geom = "histogram", binwidth = binwidth,
show.legend = F,
main = "Frequency count of relative abundance, no singletones detected") +
xlab("Relative abundance in samples") + ylab("Frequency") + theme_bw()
return(structure(list(qplot.noSing)))
} else {
single.ASV = rel_abund[rownames(rel_abund) %in% names.single, ]
single.ASV[single.ASV == 0] <- NA
qplot.withSing = qplot(rel_abund, geom = "histogram", binwidth = binwidth,
main = "Frequency count of relative abundance with singletones") +
geom_bar(aes(single.ASV), fill = "red", color = NA, width = binwidth) +
xlab("Relative abundance in samples") + ylab("Frequency") +
geom_label_repel(aes(x = 1, y = length(rel_abund) / 5),
label.padding = unit(0.55, "lines"),
label = glue("{length(names.single)}\n Singletones"), color = "black") +
theme_bw()
qplot.rmSing = qplot(rel_abund[!rownames(rel_abund) %in% names.single, ],
geom = "histogram", binwidth = binwidth,
main = "Frequency count of relative abundance without singletones") +
xlab("Relative abundance in samples") + ylab("Frequency") + theme_bw()
print(glue("Oh no..! {length(names.single)} singletones detected in the dataset"))
return(structure(list(qplot.withSing, qplot.rmSing, unlist(names.single))))
}
}
single.test = out.ASV(phyloseq = pst, threshold = 1, binwidth = 0.01) # threshold = 1 betyder relativ abundans = 1 (100%) i ét sample
## Oh no..! 61 singletones detected in the dataset
singletones = single.test[[3]] # extract names of singletones
single.test[[2]] # plot without singletones
# removing singletones from
pst = subset_taxa(pst, !taxa_names(pst) %in% singletones)
rm(single.test)
pst
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 418 taxa and 133 samples ]
## sample_data() Sample Data: [ 133 samples by 17 sample variables ]
## tax_table() Taxonomy Table: [ 418 taxa by 7 taxonomic ranks ]
# Flag negative controls using Group — only Negative_Control, not Mock_Positive
sample_data(pst)$is.neg <- ifelse(
sample_data(pst)$Group == "Negative_Control", TRUE, FALSE
)
table(sample_data(pst)$is.neg)
##
## FALSE TRUE
## 120 13
# Run decontam using prevalence method
contam_df <- isContaminant(pst, method = "prevalence", neg = "is.neg", threshold = 0.1)
contam_df %>% filter(contaminant == TRUE)
## freq
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;__ 0.0303451318
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Propionibacteriales;F__Propionibacteriaceae;G__Cutibacterium;__ 0.0007522273
## K__Bacteria;P__Actinomycetota;C__Coriobacteriia;O__Coriobacteriales;F__Coriobacteriaceae;G__Collinsella;S__Collinsella aerofaciens strain JCM 10188 0.0053235593
## K__Bacteria;P__Bacillota;C__Bacilli;O__Lactobacillales;F__Lactobacillaceae;G__Lapidilactobacillus;__ 0.0002455918
## K__Bacteria;P__Bacillota;C__Bacilli;O__Lactobacillales;F__Lactobacillaceae;G__Leuconostoc;__ 0.0016770835
## K__Bacteria;P__Bacillota;C__Clostridia;O__Eubacteriales;F__;G__Gemmiger;S__Gemmiger qucibialis 0.0002432113
## K__Bacteria;P__Bacillota;C__Clostridia;O__Lachnospirales;F__Lachnospiraceae;G__Blautia;S__Blautia faecis strain M25 0.0020541771
## K__Bacteria;P__Bacillota;C__Clostridia;O__Lachnospirales;F__Lachnospiraceae;G__Blautia;S__Blautia wexlerae DSM 19850 0.0033323479
## prev
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;__ 73
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Propionibacteriales;F__Propionibacteriaceae;G__Cutibacterium;__ 2
## K__Bacteria;P__Actinomycetota;C__Coriobacteriia;O__Coriobacteriales;F__Coriobacteriaceae;G__Collinsella;S__Collinsella aerofaciens strain JCM 10188 16
## K__Bacteria;P__Bacillota;C__Bacilli;O__Lactobacillales;F__Lactobacillaceae;G__Lapidilactobacillus;__ 2
## K__Bacteria;P__Bacillota;C__Bacilli;O__Lactobacillales;F__Lactobacillaceae;G__Leuconostoc;__ 4
## K__Bacteria;P__Bacillota;C__Clostridia;O__Eubacteriales;F__;G__Gemmiger;S__Gemmiger qucibialis 2
## K__Bacteria;P__Bacillota;C__Clostridia;O__Lachnospirales;F__Lachnospiraceae;G__Blautia;S__Blautia faecis strain M25 4
## K__Bacteria;P__Bacillota;C__Clostridia;O__Lachnospirales;F__Lachnospiraceae;G__Blautia;S__Blautia wexlerae DSM 19850 16
## p.freq
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;__ NA
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Propionibacteriales;F__Propionibacteriaceae;G__Cutibacterium;__ NA
## K__Bacteria;P__Actinomycetota;C__Coriobacteriia;O__Coriobacteriales;F__Coriobacteriaceae;G__Collinsella;S__Collinsella aerofaciens strain JCM 10188 NA
## K__Bacteria;P__Bacillota;C__Bacilli;O__Lactobacillales;F__Lactobacillaceae;G__Lapidilactobacillus;__ NA
## K__Bacteria;P__Bacillota;C__Bacilli;O__Lactobacillales;F__Lactobacillaceae;G__Leuconostoc;__ NA
## K__Bacteria;P__Bacillota;C__Clostridia;O__Eubacteriales;F__;G__Gemmiger;S__Gemmiger qucibialis NA
## K__Bacteria;P__Bacillota;C__Clostridia;O__Lachnospirales;F__Lachnospiraceae;G__Blautia;S__Blautia faecis strain M25 NA
## K__Bacteria;P__Bacillota;C__Clostridia;O__Lachnospirales;F__Lachnospiraceae;G__Blautia;S__Blautia wexlerae DSM 19850 NA
## p.prev
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;__ 0.0052168751
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Propionibacteriales;F__Propionibacteriaceae;G__Cutibacterium;__ 0.0977443609
## K__Bacteria;P__Actinomycetota;C__Coriobacteriia;O__Coriobacteriales;F__Coriobacteriaceae;G__Collinsella;S__Collinsella aerofaciens strain JCM 10188 0.0006626731
## K__Bacteria;P__Bacillota;C__Bacilli;O__Lactobacillales;F__Lactobacillaceae;G__Lapidilactobacillus;__ 0.0977443609
## K__Bacteria;P__Bacillota;C__Bacilli;O__Lactobacillales;F__Lactobacillaceae;G__Leuconostoc;__ 0.0251652727
## K__Bacteria;P__Bacillota;C__Clostridia;O__Eubacteriales;F__;G__Gemmiger;S__Gemmiger qucibialis 0.0977443609
## K__Bacteria;P__Bacillota;C__Clostridia;O__Lachnospirales;F__Lachnospiraceae;G__Blautia;S__Blautia faecis strain M25 0.0014348849
## K__Bacteria;P__Bacillota;C__Clostridia;O__Lachnospirales;F__Lachnospiraceae;G__Blautia;S__Blautia wexlerae DSM 19850 0.0306113033
## p
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;__ 0.0052168751
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Propionibacteriales;F__Propionibacteriaceae;G__Cutibacterium;__ 0.0977443609
## K__Bacteria;P__Actinomycetota;C__Coriobacteriia;O__Coriobacteriales;F__Coriobacteriaceae;G__Collinsella;S__Collinsella aerofaciens strain JCM 10188 0.0006626731
## K__Bacteria;P__Bacillota;C__Bacilli;O__Lactobacillales;F__Lactobacillaceae;G__Lapidilactobacillus;__ 0.0977443609
## K__Bacteria;P__Bacillota;C__Bacilli;O__Lactobacillales;F__Lactobacillaceae;G__Leuconostoc;__ 0.0251652727
## K__Bacteria;P__Bacillota;C__Clostridia;O__Eubacteriales;F__;G__Gemmiger;S__Gemmiger qucibialis 0.0977443609
## K__Bacteria;P__Bacillota;C__Clostridia;O__Lachnospirales;F__Lachnospiraceae;G__Blautia;S__Blautia faecis strain M25 0.0014348849
## K__Bacteria;P__Bacillota;C__Clostridia;O__Lachnospirales;F__Lachnospiraceae;G__Blautia;S__Blautia wexlerae DSM 19850 0.0306113033
## contaminant
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;__ TRUE
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Propionibacteriales;F__Propionibacteriaceae;G__Cutibacterium;__ TRUE
## K__Bacteria;P__Actinomycetota;C__Coriobacteriia;O__Coriobacteriales;F__Coriobacteriaceae;G__Collinsella;S__Collinsella aerofaciens strain JCM 10188 TRUE
## K__Bacteria;P__Bacillota;C__Bacilli;O__Lactobacillales;F__Lactobacillaceae;G__Lapidilactobacillus;__ TRUE
## K__Bacteria;P__Bacillota;C__Bacilli;O__Lactobacillales;F__Lactobacillaceae;G__Leuconostoc;__ TRUE
## K__Bacteria;P__Bacillota;C__Clostridia;O__Eubacteriales;F__;G__Gemmiger;S__Gemmiger qucibialis TRUE
## K__Bacteria;P__Bacillota;C__Clostridia;O__Lachnospirales;F__Lachnospiraceae;G__Blautia;S__Blautia faecis strain M25 TRUE
## K__Bacteria;P__Bacillota;C__Clostridia;O__Lachnospirales;F__Lachnospiraceae;G__Blautia;S__Blautia wexlerae DSM 19850 TRUE
# Remove contaminants
pst <- subset_taxa(pst,
!taxa_names(pst) %in% rownames(contam_df)[contam_df$contaminant == TRUE])
# Remove both negative controls and mock positives from data set so they won't be part of analysis
pst <- subset_samples(pst,
!sample_data(pst)$Group %in% c("Negative_Control", "Mock_Positive"))
sample_data(pst)$is.neg <- NULL
cat("After decontam and control removal:", ntaxa(pst), "OTUs,",
nsamples(pst), "samples\n")
## After decontam and control removal: 410 OTUs, 117 samples
# **`"After decontam and control removal:"`** — fast tekst der beskriver hvad der netop er sket
# **`ntaxa(pst)`** — tæller antal taxa tilbage i phyloseq objektet efter fjernelse af kontaminanter
# **`"OTUs,"`** — fast tekst
# **`nsamples(pst)`** — tæller antal samples tilbage efter fjernelse af negative kontroller og mock positives
# **`"samples\n"`** — fast tekst, `\n` laver et linjeskift efter beskeden
pst
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 410 taxa and 117 samples ]
## sample_data() Sample Data: [ 117 samples by 17 sample variables ]
## tax_table() Taxonomy Table: [ 410 taxa by 7 taxonomic ranks ]
# create rarefraction curve
ps_rar_curve <- MicrobiotaProcess::ggrarecurve(
obj = pst,
indexNames = c("Observe", "Shannon"),
chunks = 400,
theme(legend.spacing.y = unit(0.02, "cm"),
legend.text = element_text(size = 6)),
show.legend = FALSE
)
ps_rar_curve + theme_bw() +
geom_vline(xintercept = 15000, lty = 2, color = alpha("red", 0.5)) +
ggtitle("Rarefaction curves based on Richness and evenness",
"Each line is a sample")
# ggsave("./outputs/rarefaction_curve.jpeg", device = "jpeg", dpi = 300, height = 6, width = 9)
colSums(otu_table(pst)) %>% sort()
## BRK163_P1 BRK146_P1 BRK24_P3 BRK109_P1 BRK112_P1 BRK104_P1 BRK23_P3 BRK108_P1
## 1715 7515 8408 8904 10848 12493 12589 12639
## BRK97_P1 BRK152_P1 BRK168_P1 BRK101_P1 BRK103_P1 BRK107_P1 BRK142_P1 BRK156_P1
## 12647 13528 14397 15096 15837 15925 15985 16003
## BRK102_P1 BRK98_P1 BRK158_P1 BRK160_P1 BRK123_P1 BRK144_P1 BRK110_P1 BRK128_P1
## 16069 16308 16434 16708 17039 17262 17279 17591
## BRK05_P3 BRK145_P1 BRK111_P1 BRK150_P1 BRK07_P3 BRK22_P3 BRK99_P1 BRK169_P1
## 17613 17769 17899 18018 18086 18141 18279 18363
## BRK114_P1 BRK118_P1 BRK113_P1 BRK159_P1 BRK161_P1 BRK119_P1 BRK149_P1 BRK162_P1
## 18376 18548 18604 18679 18876 19028 19224 19225
## BRK148_P1 BRK115_P1 BRK153_P1 BRK131_P1 BRK143_P1 BRK164_P1 BRK21_P3 BRK154_P1
## 19397 19427 19458 19506 19604 19634 19722 19999
## BRK139_P1 BRK165_P1 BRK138_P1 BRK88_P2 BRK96_P2 BRK77_P2 BRK122_P1 BRK116_P1
## 20089 20099 20178 20536 20614 20763 20780 20787
## BRK105_P1 BRK17_P3 BRK85_P2 BRK126_P1 BRK117_P1 BRK127_P1 BRK04_P3 BRK151_P1
## 20832 20897 20957 21018 21055 21098 21148 21321
## BRK06_P3 BRK94_P2 BRK134_P1 BRK19_P3 BRK01_P3 BRK20_P3 BRK120_P1 BRK92_P2
## 21326 21401 21482 21578 21621 21665 21898 21936
## BRK80_P2 BRK18_P3 BRK93_P2 BRK02_P3 BRK15_P3 BRK87_P2 BRK147_P1 BRK08_P3
## 22053 22148 22278 22293 22355 22460 22463 22467
## BRK121_P1 BRK03_P3 BRK124_P1 BRK95_P2 BRK125_P1 BRK155_P1 BRK84_P2 BRK167_P1
## 22502 22624 22686 22698 22764 22766 23101 23315
## BRK129_P1 BRK16_P3 BRK82_P2 BRK137_P1 BRK86_P2 BRK135_P1 BRK90_P2 BRK81_P2
## 23337 23482 23613 23709 23879 23916 23931 23963
## BRK91_P2 BRK157_P1 BRK12_P3 BRK133_P1 BRK106_P1 BRK09_P3 BRK78_P2 BRK83_P2
## 23963 23991 24147 24345 24348 24565 24606 24634
## BRK89_P2 BRK13_P3 BRK11_P3 BRK166_P1 BRK100_P1 BRK130_P1 BRK14_P3 BRK25_P3
## 24641 24879 24926 25215 25355 25426 25555 25557
## BRK132_P1 BRK10_P3 BRK26_P3 BRK136_P1 BRK141_P1
## 25737 25894 27318 27537 27751
range(colSums(otu_table(pst)))
## [1] 1715 27751
set.seed(123)
# Adjust rarefy_depth based on rarefaction curve above
rarefy_depth <- 8000 # <-- adjust as needed, chosen to avoid loosing too important samples
# Rarefaction (putting all samples to the same level of sequencing depth)
ps_rar = rarefy_even_depth(pst, sample.size = rarefy_depth, replace = FALSE)
sample_data(ps_rar)
## Seq_ID Study Mouse.ID Strain Sex
## BRK97_P1 1 IMU 78 BALB/c Female
## BRK98_P1 2 IMU 79 BALB/c Female
## BRK99_P1 3 IMU 80 BALB/c Female
## BRK100_P1 4 IMU 81 BALB/c Male
## BRK101_P1 5 IMU 82 BALB/c Male
## BRK102_P1 6 IMU 83 BALB/c Male
## BRK103_P1 7 IMU 84 BALB/c Male
## BRK104_P1 8 IMU 85 BALB/c Male
## BRK105_P1 9 IMU 86 BALB/c Male
## BRK106_P1 10 IMU 87 BALB/c Male
## BRK107_P1 11 IMU 88 BALB/c Female
## BRK108_P1 12 IMU 89 BALB/c Female
## BRK109_P1 13 IMU 90 BALB/c Female
## BRK110_P1 14 IMU 91 BALB/c Female
## BRK111_P1 15 IMU 18 BALB/c Female
## BRK112_P1 16 IMU 20 BALB/c Female
## BRK113_P1 17 IMU 21 BALB/c Female
## BRK114_P1 18 IMU 23 BALB/c Female
## BRK115_P1 19 IMU 26 BALB/c Female
## BRK116_P1 20 IMU 28 BALB/c Female
## BRK117_P1 21 IMU 29 BALB/c Female
## BRK118_P1 22 IMU 31 BALB/c Female
## BRK119_P1 23 IMU 34 BALB/c Male
## BRK120_P1 24 IMU 35 BALB/c Male
## BRK121_P1 25 IMU 37 BALB/c Male
## BRK122_P1 26 IMU 39 BALB/c Male
## BRK123_P1 27 IMU 40 BALB/c Male
## BRK124_P1 28 IMU 44 BALB/c Male
## BRK125_P1 29 IMU_mother 6 BALB/c Female
## BRK126_P1 30 IMU_mother 12 BALB/c Female
## BRK127_P1 31 IMU_mother 16 BALB/c Female
## BRK128_P1 32 IMU_mother 154 BALB/c Female
## BRK129_P1 33 IMU_mother 19 BALB/c Female
## BRK130_P1 34 IMU_mother 3 BALB/c Female
## BRK131_P1 35 IMU_mother 13 BALB/c Female
## BRK132_P1 36 IMU_mother 20 BALB/c Female
## BRK133_P1 37 IMU_mother 5 BALB/c Female
## BRK134_P1 38 IMU_mother 6 BALB/c Female
## BRK135_P1 39 IMU_mother 12 BALB/c Female
## BRK136_P1 40 IMU_mother 16 BALB/c Female
## BRK137_P1 47 IMU_mother 6 BALB/c Female
## BRK138_P1 48 IMU_mother 12 BALB/c Female
## BRK139_P1 49 IMU_mother 16 BALB/c Female
## BRK141_P1 51 WILD 2 Wildling Female
## BRK142_P1 52 WILD 3 Wildling Female
## BRK143_P1 53 WILD 4 Wildling Female
## BRK144_P1 54 WILD 5 Wildling Female
## BRK145_P1 55 WILD 6 Wildling Female
## BRK147_P1 57 WILD 9 Wildling Male
## BRK148_P1 58 WILD 10 Wildling Male
## BRK149_P1 59 WILD 11 Wildling Male
## BRK150_P1 60 WILD 12 Wildling Male
## BRK151_P1 61 WILD 13 Wildling Male
## BRK152_P1 62 WILD 14 Pet Shop Male
## BRK153_P1 63 WILD 15 Pet Shop Male
## BRK154_P1 64 WILD 16 Pet Shop Male
## BRK155_P1 65 WILD 17 Pet Shop Male
## BRK156_P1 66 WILD 18 Pet Shop Male
## BRK157_P1 67 WILD 19 Pet Shop Male
## BRK158_P1 68 WILD 20 Pet Shop Male
## BRK159_P1 69 WILD 21 Pet Shop Female
## BRK160_P1 70 WILD 22 Pet Shop Female
## BRK161_P1 71 WILD 23 Pet Shop Female
## BRK162_P1 72 WILD 24 Pet Shop Female
## BRK164_P1 74 WILD 26 Pet Shop Female
## BRK165_P1 75 WILD 27 Pet Shop Female
## BRK166_P1 76 WILD 28 Wild Female
## BRK167_P1 77 WILD 29 Wild Male
## BRK168_P1 78 WILD 30 Wild Female
## BRK169_P1 79 WILD 31 Wild Female
## BRK77_P2 184 IMU <NA> BALB/c Male
## BRK78_P2 185 IMU <NA> BALB/c Male
## BRK80_P2 187 IMU <NA> BALB/c Male
## BRK81_P2 188 IMU <NA> BALB/c Male
## BRK82_P2 189 IMU <NA> BALB/c Male
## BRK83_P2 190 IMU <NA> BALB/c Male
## BRK84_P2 191 IMU <NA> BALB/c Male
## BRK85_P2 192 IMU <NA> BALB/c Male
## BRK86_P2 193 IMU <NA> BALB/c Male
## BRK87_P2 194 IMU <NA> BALB/c Female
## BRK88_P2 195 IMU <NA> BALB/c Female
## BRK89_P2 196 IMU <NA> BALB/c Female
## BRK90_P2 197 IMU <NA> BALB/c Female
## BRK91_P2 198 IMU <NA> BALB/c Female
## BRK92_P2 199 IMU <NA> BALB/c Female
## BRK93_P2 200 IMU <NA> BALB/c Female
## BRK94_P2 201 IMU <NA> BALB/c Female
## BRK95_P2 202 IMU <NA> BALB/c Female
## BRK96_P2 203 IMU <NA> BALB/c Female
## BRK01_P3 204 IMU <NA> BALB/c Male
## BRK02_P3 205 IMU <NA> BALB/c Male
## BRK03_P3 206 IMU <NA> BALB/c Male
## BRK04_P3 207 IMU <NA> BALB/c Male
## BRK05_P3 208 IMU <NA> BALB/c Male
## BRK06_P3 209 IMU <NA> BALB/c Male
## BRK07_P3 210 IMU <NA> BALB/c Male
## BRK08_P3 211 IMU <NA> BALB/c Male
## BRK09_P3 212 IMU <NA> BALB/c Male
## BRK10_P3 213 IMU <NA> BALB/c Male
## BRK11_P3 214 IMU <NA> BALB/c Female
## BRK12_P3 215 IMU <NA> BALB/c Female
## BRK13_P3 216 IMU <NA> BALB/c Female
## BRK14_P3 217 IMU <NA> BALB/c Female
## BRK15_P3 218 IMU <NA> BALB/c Female
## BRK16_P3 219 IMU <NA> BALB/c Female
## BRK17_P3 220 IMU <NA> BALB/c Female
## BRK18_P3 221 IMU <NA> BALB/c Female
## BRK19_P3 222 IMU <NA> BALB/c Female
## BRK20_P3 223 IMU <NA> BALB/c Female
## BRK21_P3 224 Donor_inoculum_Wild(Zahir) DW Wild <NA>
## BRK22_P3 225 Donor_inoculum_Wild_resampling_1 DWr1 Wild <NA>
## BRK23_P3 226 Donor_inoculum_Wild_resampling_2 DWr2 Wild <NA>
## BRK24_P3 227 Donor_inoculum_SPF(WP1Amix) DS BALB/c <NA>
## BRK25_P3 228 Donor_inoculum_SPF_resampling_1 DSr1 BALB/c <NA>
## BRK26_P3 229 Donor_inoculum_SPF_resampling_2 DSr2 BALB/c <NA>
## Mother.ID Cage Isolater Eartag Pre.immunized Pre.mix FMT
## BRK97_P1 6 6 1 3 Yes Falken_and_new SPF
## BRK98_P1 6 6 1 30 Yes Falken_and_new SPF
## BRK99_P1 6 6 1 5 Yes Falken_and_new SPF
## BRK100_P1 6 7 4 3 Yes Falken_and_new SPF
## BRK101_P1 6 7 4 30 Yes Falken_and_new SPF
## BRK102_P1 6 7 4 5 Yes Falken_and_new SPF
## BRK103_P1 112 8 4 3 Yes Falken_and_new SPF
## BRK104_P1 112 8 4 30 Yes Falken_and_new SPF
## BRK105_P1 112 8 4 5 Yes Falken_and_new SPF
## BRK106_P1 112 8 4 50 Yes Falken_and_new SPF
## BRK107_P1 112 9 1 3 Yes Falken_and_new SPF
## BRK108_P1 112 9 1 30 Yes Falken_and_new SPF
## BRK109_P1 112 9 1 5 Yes Falken_and_new SPF
## BRK110_P1 16 9 1 50 Yes Falken_and_new SPF
## BRK111_P1 45 1 2 3 No No Wild
## BRK112_P1 45 1 2 5 No No Wild
## BRK113_P1 46 2 2 3 No No Wild
## BRK114_P1 46 2 2 5 No No Wild
## BRK115_P1 40 3 2 3 No No Wild
## BRK116_P1 40 3 2 5 No No Wild
## BRK117_P1 46 4 2 3 No No Wild
## BRK118_P1 38 4 2 5 No No Wild
## BRK119_P1 15 1 3 3 No No Wild
## BRK120_P1 15 1 3 5 No No Wild
## BRK121_P1 46 2 3 3 No No Wild
## BRK122_P1 1 3 3 3 No No Wild
## BRK123_P1 1 3 3 5 No No Wild
## BRK124_P1 6 4 3 5 No No Wild
## BRK125_P1 NA NA NA NA <NA> <NA> SPF
## BRK126_P1 NA NA NA NA <NA> <NA> SPF
## BRK127_P1 NA NA NA NA <NA> <NA> SPF
## BRK128_P1 NA NA NA NA <NA> <NA> Wild
## BRK129_P1 NA NA NA NA <NA> <NA> Wild
## BRK130_P1 NA NA NA NA <NA> <NA> Wild
## BRK131_P1 NA NA NA NA <NA> <NA> Wild
## BRK132_P1 NA NA NA NA <NA> <NA> Wild
## BRK133_P1 NA NA NA NA <NA> <NA> Wild
## BRK134_P1 NA NA NA NA <NA> <NA> SPF
## BRK135_P1 NA NA NA NA <NA> <NA> SPF
## BRK136_P1 NA NA NA NA <NA> <NA> SPF
## BRK137_P1 NA NA NA NA <NA> <NA> SPF
## BRK138_P1 NA NA NA NA <NA> <NA> SPF
## BRK139_P1 NA NA NA NA <NA> <NA> SPF
## BRK141_P1 NA NA NA NA <NA> <NA> <NA>
## BRK142_P1 NA NA NA NA <NA> <NA> <NA>
## BRK143_P1 NA NA NA NA <NA> <NA> <NA>
## BRK144_P1 NA NA NA NA <NA> <NA> <NA>
## BRK145_P1 NA NA NA NA <NA> <NA> <NA>
## BRK147_P1 NA NA NA NA <NA> <NA> <NA>
## BRK148_P1 NA NA NA NA <NA> <NA> <NA>
## BRK149_P1 NA NA NA NA <NA> <NA> <NA>
## BRK150_P1 NA NA NA NA <NA> <NA> <NA>
## BRK151_P1 NA NA NA NA <NA> <NA> <NA>
## BRK152_P1 NA NA NA NA <NA> <NA> <NA>
## BRK153_P1 NA NA NA NA <NA> <NA> <NA>
## BRK154_P1 NA NA NA NA <NA> <NA> <NA>
## BRK155_P1 NA NA NA NA <NA> <NA> <NA>
## BRK156_P1 NA NA NA NA <NA> <NA> <NA>
## BRK157_P1 NA NA NA NA <NA> <NA> <NA>
## BRK158_P1 NA NA NA NA <NA> <NA> <NA>
## BRK159_P1 NA NA NA NA <NA> <NA> <NA>
## BRK160_P1 NA NA NA NA <NA> <NA> <NA>
## BRK161_P1 NA NA NA NA <NA> <NA> <NA>
## BRK162_P1 NA NA NA NA <NA> <NA> <NA>
## BRK164_P1 NA NA NA NA <NA> <NA> <NA>
## BRK165_P1 NA NA NA NA <NA> <NA> <NA>
## BRK166_P1 NA NA NA NA <NA> <NA> <NA>
## BRK167_P1 NA NA NA NA <NA> <NA> <NA>
## BRK168_P1 NA NA NA NA <NA> <NA> <NA>
## BRK169_P1 NA NA NA NA <NA> <NA> <NA>
## BRK77_P2 NA NA NA NA No No SPF
## BRK78_P2 NA NA NA NA No No SPF
## BRK80_P2 NA NA NA NA No No SPF
## BRK81_P2 NA NA NA NA No No SPF
## BRK82_P2 NA NA NA NA No No SPF
## BRK83_P2 NA NA NA NA No No SPF
## BRK84_P2 NA NA NA NA No No SPF
## BRK85_P2 NA NA NA NA No No SPF
## BRK86_P2 NA NA NA NA No No SPF
## BRK87_P2 NA NA NA NA No No SPF
## BRK88_P2 NA NA NA NA No No SPF
## BRK89_P2 NA NA NA NA No No SPF
## BRK90_P2 NA NA NA NA No No SPF
## BRK91_P2 NA NA NA NA No No SPF
## BRK92_P2 NA NA NA NA No No SPF
## BRK93_P2 NA NA NA NA No No SPF
## BRK94_P2 NA NA NA NA No No SPF
## BRK95_P2 NA NA NA NA No No SPF
## BRK96_P2 NA NA NA NA No No SPF
## BRK01_P3 NA NA NA NA No No Wild
## BRK02_P3 NA NA NA NA No No Wild
## BRK03_P3 NA NA NA NA No No Wild
## BRK04_P3 NA NA NA NA No No Wild
## BRK05_P3 NA NA NA NA No No Wild
## BRK06_P3 NA NA NA NA No No Wild
## BRK07_P3 NA NA NA NA No No Wild
## BRK08_P3 NA NA NA NA No No Wild
## BRK09_P3 NA NA NA NA No No Wild
## BRK10_P3 NA NA NA NA No No Wild
## BRK11_P3 NA NA NA NA No No Wild
## BRK12_P3 NA NA NA NA No No Wild
## BRK13_P3 NA NA NA NA No No Wild
## BRK14_P3 NA NA NA NA No No Wild
## BRK15_P3 NA NA NA NA No No Wild
## BRK16_P3 NA NA NA NA No No Wild
## BRK17_P3 NA NA NA NA No No Wild
## BRK18_P3 NA NA NA NA No No Wild
## BRK19_P3 NA NA NA NA No No Wild
## BRK20_P3 NA NA NA NA No No Wild
## BRK21_P3 NA NA NA NA <NA> <NA> <NA>
## BRK22_P3 NA NA NA NA <NA> <NA> <NA>
## BRK23_P3 NA NA NA NA <NA> <NA> <NA>
## BRK24_P3 NA NA NA NA <NA> <NA> <NA>
## BRK25_P3 NA NA NA NA <NA> <NA> <NA>
## BRK26_P3 NA NA NA NA <NA> <NA> <NA>
## Round Trial Sample Barcode
## BRK97_P1 3 Rikke Term BRK97_P1
## BRK98_P1 3 Rikke Term BRK98_P1
## BRK99_P1 3 Rikke Term BRK99_P1
## BRK100_P1 3 Rikke Term BRK100_P1
## BRK101_P1 3 Rikke Term BRK101_P1
## BRK102_P1 3 Rikke Term BRK102_P1
## BRK103_P1 3 Rikke Term BRK103_P1
## BRK104_P1 3 Rikke Term BRK104_P1
## BRK105_P1 3 Rikke Term BRK105_P1
## BRK106_P1 3 Rikke Term BRK106_P1
## BRK107_P1 3 Rikke Term BRK107_P1
## BRK108_P1 3 Rikke Term BRK108_P1
## BRK109_P1 3 Rikke Term BRK109_P1
## BRK110_P1 3 Rikke Term BRK110_P1
## BRK111_P1 1 Therese Term, colon content BRK111_P1
## BRK112_P1 1 Therese Term, colon content BRK112_P1
## BRK113_P1 1 Therese Term, colon content BRK113_P1
## BRK114_P1 1 Therese Term, colon content BRK114_P1
## BRK115_P1 1 Therese Term, colon content BRK115_P1
## BRK116_P1 1 Therese Term, colon content BRK116_P1
## BRK117_P1 1 Therese Term, colon content BRK117_P1
## BRK118_P1 1 Therese Term, colon content BRK118_P1
## BRK119_P1 1 Therese Term, colon content BRK119_P1
## BRK120_P1 1 Therese Term, colon content BRK120_P1
## BRK121_P1 1 Therese Term, colon content BRK121_P1
## BRK122_P1 1 Therese Term, colon content BRK122_P1
## BRK123_P1 1 Therese Term, colon content BRK123_P1
## BRK124_P1 1 Therese Term, colon content BRK124_P1
## BRK125_P1 NA Rikke Pre_ABX BRK125_P1
## BRK126_P1 NA Rikke Pre_ABX BRK126_P1
## BRK127_P1 NA Rikke Pre_ABX BRK127_P1
## BRK128_P1 NA Therese Pre_ABX BRK128_P1
## BRK129_P1 NA Therese Pre_ABX BRK129_P1
## BRK130_P1 NA Therese Pre_ABX BRK130_P1
## BRK131_P1 NA Therese Pre_ABX BRK131_P1
## BRK132_P1 NA Therese Pre_ABX BRK132_P1
## BRK133_P1 NA Therese Pre_ABX BRK133_P1
## BRK134_P1 NA Rikke post_1.FMT BRK134_P1
## BRK135_P1 NA Rikke post_1.FMT BRK135_P1
## BRK136_P1 NA Rikke post_1.FMT BRK136_P1
## BRK137_P1 NA Rikke Term BRK137_P1
## BRK138_P1 NA Rikke Term BRK138_P1
## BRK139_P1 NA Rikke Term BRK139_P1
## BRK141_P1 NA Ida Term BRK141_P1
## BRK142_P1 NA Ida Term BRK142_P1
## BRK143_P1 NA Ida Term BRK143_P1
## BRK144_P1 NA Ida Term BRK144_P1
## BRK145_P1 NA Ida Term BRK145_P1
## BRK147_P1 NA Ida Term BRK147_P1
## BRK148_P1 NA Ida Term BRK148_P1
## BRK149_P1 NA Ida Term BRK149_P1
## BRK150_P1 NA Ida Term BRK150_P1
## BRK151_P1 NA Ida Term BRK151_P1
## BRK152_P1 NA Ida Term BRK152_P1
## BRK153_P1 NA Ida Term BRK153_P1
## BRK154_P1 NA Ida Term BRK154_P1
## BRK155_P1 NA Ida Term BRK155_P1
## BRK156_P1 NA Ida Term BRK156_P1
## BRK157_P1 NA Ida Term BRK157_P1
## BRK158_P1 NA Ida Term BRK158_P1
## BRK159_P1 NA Ida Term BRK159_P1
## BRK160_P1 NA Ida Term BRK160_P1
## BRK161_P1 NA Ida Term BRK161_P1
## BRK162_P1 NA Ida Term BRK162_P1
## BRK164_P1 NA Ida Term BRK164_P1
## BRK165_P1 NA Ida Term BRK165_P1
## BRK166_P1 NA Ida Term BRK166_P1
## BRK167_P1 NA Ida Term BRK167_P1
## BRK168_P1 NA Ida Term BRK168_P1
## BRK169_P1 NA Ida Term BRK169_P1
## BRK77_P2 1 Therese weaning BRK77_P2
## BRK78_P2 1 Therese weaning BRK78_P2
## BRK80_P2 1 Therese weaning BRK80_P2
## BRK81_P2 1 Therese weaning BRK81_P2
## BRK82_P2 1 Therese weaning BRK82_P2
## BRK83_P2 1 Therese weaning BRK83_P2
## BRK84_P2 1 Therese weaning BRK84_P2
## BRK85_P2 1 Therese weaning BRK85_P2
## BRK86_P2 1 Therese weaning BRK86_P2
## BRK87_P2 1 Therese weaning BRK87_P2
## BRK88_P2 1 Therese weaning BRK88_P2
## BRK89_P2 1 Therese weaning BRK89_P2
## BRK90_P2 1 Therese weaning BRK90_P2
## BRK91_P2 1 Therese weaning BRK91_P2
## BRK92_P2 1 Therese weaning BRK92_P2
## BRK93_P2 1 Therese weaning BRK93_P2
## BRK94_P2 1 Therese weaning BRK94_P2
## BRK95_P2 1 Therese weaning BRK95_P2
## BRK96_P2 1 Therese weaning BRK96_P2
## BRK01_P3 1 Therese weaning BRK01_P3
## BRK02_P3 1 Therese weaning BRK02_P3
## BRK03_P3 1 Therese weaning BRK03_P3
## BRK04_P3 1 Therese weaning BRK04_P3
## BRK05_P3 1 Therese weaning BRK05_P3
## BRK06_P3 1 Therese weaning BRK06_P3
## BRK07_P3 1 Therese weaning BRK07_P3
## BRK08_P3 1 Therese weaning BRK08_P3
## BRK09_P3 1 Therese weaning BRK09_P3
## BRK10_P3 1 Therese weaning BRK10_P3
## BRK11_P3 1 Therese weaning BRK11_P3
## BRK12_P3 1 Therese weaning BRK12_P3
## BRK13_P3 1 Therese weaning BRK13_P3
## BRK14_P3 1 Therese weaning BRK14_P3
## BRK15_P3 1 Therese weaning BRK15_P3
## BRK16_P3 1 Therese weaning BRK16_P3
## BRK17_P3 1 Therese weaning BRK17_P3
## BRK18_P3 1 Therese weaning BRK18_P3
## BRK19_P3 1 Therese weaning BRK19_P3
## BRK20_P3 1 Therese weaning BRK20_P3
## BRK21_P3 NA <NA> donor, only liquid no feces BRK21_P3
## BRK22_P3 NA <NA> donor, only liquid no feces BRK22_P3
## BRK23_P3 NA <NA> donor, only liquid no feces BRK23_P3
## BRK24_P3 NA <NA> donor, only liquid no feces BRK24_P3
## BRK25_P3 NA <NA> donor, only liquid no feces BRK25_P3
## BRK26_P3 NA <NA> donor, only liquid no feces BRK26_P3
## Group
## BRK97_P1 BALB/c_Yes_SPF_Term
## BRK98_P1 BALB/c_Yes_SPF_Term
## BRK99_P1 BALB/c_Yes_SPF_Term
## BRK100_P1 BALB/c_Yes_SPF_Term
## BRK101_P1 BALB/c_Yes_SPF_Term
## BRK102_P1 BALB/c_Yes_SPF_Term
## BRK103_P1 BALB/c_Yes_SPF_Term
## BRK104_P1 BALB/c_Yes_SPF_Term
## BRK105_P1 BALB/c_Yes_SPF_Term
## BRK106_P1 BALB/c_Yes_SPF_Term
## BRK107_P1 BALB/c_Yes_SPF_Term
## BRK108_P1 BALB/c_Yes_SPF_Term
## BRK109_P1 BALB/c_Yes_SPF_Term
## BRK110_P1 BALB/c_Yes_SPF_Term
## BRK111_P1 BALB/c_No_Wild_colon
## BRK112_P1 BALB/c_No_Wild_colon
## BRK113_P1 BALB/c_No_Wild_colon
## BRK114_P1 BALB/c_No_Wild_colon
## BRK115_P1 BALB/c_No_Wild_colon
## BRK116_P1 BALB/c_No_Wild_colon
## BRK117_P1 BALB/c_No_Wild_colon
## BRK118_P1 BALB/c_No_Wild_colon
## BRK119_P1 BALB/c_No_Wild_colon
## BRK120_P1 BALB/c_No_Wild_colon
## BRK121_P1 BALB/c_No_Wild_colon
## BRK122_P1 BALB/c_No_Wild_colon
## BRK123_P1 BALB/c_No_Wild_colon
## BRK124_P1 BALB/c_No_Wild_colon
## BRK125_P1 Mother_SPF_PreABX
## BRK126_P1 Mother_SPF_PreABX
## BRK127_P1 Mother_SPF_PreABX
## BRK128_P1 Mother_Wild_PreABX
## BRK129_P1 Mother_Wild_PreABX
## BRK130_P1 Mother_Wild_PreABX
## BRK131_P1 Mother_Wild_PreABX
## BRK132_P1 Mother_Wild_PreABX
## BRK133_P1 Mother_Wild_PreABX
## BRK134_P1 Mother_SPF_post1FMT
## BRK135_P1 Mother_SPF_post1FMT
## BRK136_P1 Mother_SPF_post1FMT
## BRK137_P1 Mother_SPF_Term
## BRK138_P1 Mother_SPF_Term
## BRK139_P1 Mother_SPF_Term
## BRK141_P1 Wildling
## BRK142_P1 Wildling
## BRK143_P1 Wildling
## BRK144_P1 Wildling
## BRK145_P1 Wildling
## BRK147_P1 Wildling
## BRK148_P1 Wildling
## BRK149_P1 Wildling
## BRK150_P1 Wildling
## BRK151_P1 Wildling
## BRK152_P1 Pet Shop
## BRK153_P1 Pet Shop
## BRK154_P1 Pet Shop
## BRK155_P1 Pet Shop
## BRK156_P1 Pet Shop
## BRK157_P1 Pet Shop
## BRK158_P1 Pet Shop
## BRK159_P1 Pet Shop
## BRK160_P1 Pet Shop
## BRK161_P1 Pet Shop
## BRK162_P1 Pet Shop
## BRK164_P1 Pet Shop
## BRK165_P1 Pet Shop
## BRK166_P1 Wild
## BRK167_P1 Wild
## BRK168_P1 Wild
## BRK169_P1 Wild
## BRK77_P2 BALB/c_No_SPF_weaning
## BRK78_P2 BALB/c_No_SPF_weaning
## BRK80_P2 BALB/c_No_SPF_weaning
## BRK81_P2 BALB/c_No_SPF_weaning
## BRK82_P2 BALB/c_No_SPF_weaning
## BRK83_P2 BALB/c_No_SPF_weaning
## BRK84_P2 BALB/c_No_SPF_weaning
## BRK85_P2 BALB/c_No_SPF_weaning
## BRK86_P2 BALB/c_No_SPF_weaning
## BRK87_P2 BALB/c_No_SPF_weaning
## BRK88_P2 BALB/c_No_SPF_weaning
## BRK89_P2 BALB/c_No_SPF_weaning
## BRK90_P2 BALB/c_No_SPF_weaning
## BRK91_P2 BALB/c_No_SPF_weaning
## BRK92_P2 BALB/c_No_SPF_weaning
## BRK93_P2 BALB/c_No_SPF_weaning
## BRK94_P2 BALB/c_No_SPF_weaning
## BRK95_P2 BALB/c_No_SPF_weaning
## BRK96_P2 BALB/c_No_SPF_weaning
## BRK01_P3 BALB/c_No_Wild_weaning
## BRK02_P3 BALB/c_No_Wild_weaning
## BRK03_P3 BALB/c_No_Wild_weaning
## BRK04_P3 BALB/c_No_Wild_weaning
## BRK05_P3 BALB/c_No_Wild_weaning
## BRK06_P3 BALB/c_No_Wild_weaning
## BRK07_P3 BALB/c_No_Wild_weaning
## BRK08_P3 BALB/c_No_Wild_weaning
## BRK09_P3 BALB/c_No_Wild_weaning
## BRK10_P3 BALB/c_No_Wild_weaning
## BRK11_P3 BALB/c_No_Wild_weaning
## BRK12_P3 BALB/c_No_Wild_weaning
## BRK13_P3 BALB/c_No_Wild_weaning
## BRK14_P3 BALB/c_No_Wild_weaning
## BRK15_P3 BALB/c_No_Wild_weaning
## BRK16_P3 BALB/c_No_Wild_weaning
## BRK17_P3 BALB/c_No_Wild_weaning
## BRK18_P3 BALB/c_No_Wild_weaning
## BRK19_P3 BALB/c_No_Wild_weaning
## BRK20_P3 BALB/c_No_Wild_weaning
## BRK21_P3 Donor_Wild
## BRK22_P3 Donor_Wild
## BRK23_P3 Donor_Wild
## BRK24_P3 Donor_SPF
## BRK25_P3 Donor_SPF
## BRK26_P3 Donor_SPF
cat("After rarefaction:", ntaxa(ps_rar), "OTUs,", nsamples(ps_rar), "samples\n")
## After rarefaction: 357 OTUs, 115 samples
cat("Samples removed due to low depth:", nsamples(pst) - nsamples(ps_rar), "\n")
## Samples removed due to low depth: 2
# Remove taxa with abundance < 0.0001% of total (as in Nutella2 script by Ida Wang Henriksen)
# Taxonomic filtering based on abundance for rarefied data: supervsed
## Abumdance: > 0.0001% overall abundance across all samples
# We remove all the taxa with abundance less than 0.0001 %.
total.depth <- sum(otu_table(ps_rar))
totAbuThreshold <- 1e-4 * total.depth
ps_rar <- prune_taxa(taxa_sums(ps_rar) > totAbuThreshold, ps_rar)
ps_rar
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 152 taxa and 115 samples ]
## sample_data() Sample Data: [ 115 samples by 17 sample variables ]
## tax_table() Taxonomy Table: [ 152 taxa by 7 taxonomic ranks ]
sum(rowSums(ps_rar@otu_table) == 0)
## [1] 0
colSums(ps_rar@otu_table) # det er summen i hvert sample
## BRK97_P1 BRK98_P1 BRK99_P1 BRK100_P1 BRK101_P1 BRK102_P1 BRK103_P1 BRK104_P1
## 7911 7941 7979 7969 7980 7997 7970 7994
## BRK105_P1 BRK106_P1 BRK107_P1 BRK108_P1 BRK109_P1 BRK110_P1 BRK111_P1 BRK112_P1
## 7997 7993 7994 7974 7919 7971 7994 7994
## BRK113_P1 BRK114_P1 BRK115_P1 BRK116_P1 BRK117_P1 BRK118_P1 BRK119_P1 BRK120_P1
## 7984 7986 7986 7977 7992 7997 7976 7992
## BRK121_P1 BRK122_P1 BRK123_P1 BRK124_P1 BRK125_P1 BRK126_P1 BRK127_P1 BRK128_P1
## 7955 7990 7985 7983 7980 7959 7961 7989
## BRK129_P1 BRK130_P1 BRK131_P1 BRK132_P1 BRK133_P1 BRK134_P1 BRK135_P1 BRK136_P1
## 7978 7989 7957 7985 7951 7994 7992 7991
## BRK137_P1 BRK138_P1 BRK139_P1 BRK141_P1 BRK142_P1 BRK143_P1 BRK144_P1 BRK145_P1
## 7986 7941 7977 7949 7907 7944 7985 7957
## BRK147_P1 BRK148_P1 BRK149_P1 BRK150_P1 BRK151_P1 BRK152_P1 BRK153_P1 BRK154_P1
## 7944 7983 7958 7939 7916 7969 7984 7846
## BRK155_P1 BRK156_P1 BRK157_P1 BRK158_P1 BRK159_P1 BRK160_P1 BRK161_P1 BRK162_P1
## 7958 7881 7975 7972 7949 7984 7869 7927
## BRK164_P1 BRK165_P1 BRK166_P1 BRK167_P1 BRK168_P1 BRK169_P1 BRK77_P2 BRK78_P2
## 7920 7930 7829 7971 7891 7839 7967 7966
## BRK80_P2 BRK81_P2 BRK82_P2 BRK83_P2 BRK84_P2 BRK85_P2 BRK86_P2 BRK87_P2
## 7981 7984 7978 7982 7987 7987 7980 7982
## BRK88_P2 BRK89_P2 BRK90_P2 BRK91_P2 BRK92_P2 BRK93_P2 BRK94_P2 BRK95_P2
## 7980 7977 7981 7978 7991 7987 7982 7986
## BRK96_P2 BRK01_P3 BRK02_P3 BRK03_P3 BRK04_P3 BRK05_P3 BRK06_P3 BRK07_P3
## 7995 7979 7980 7967 7959 7982 7985 7977
## BRK08_P3 BRK09_P3 BRK10_P3 BRK11_P3 BRK12_P3 BRK13_P3 BRK14_P3 BRK15_P3
## 7980 7981 7965 7970 7968 7988 7976 7983
## BRK16_P3 BRK17_P3 BRK18_P3 BRK19_P3 BRK20_P3 BRK21_P3 BRK22_P3 BRK23_P3
## 7975 7968 7979 7976 7975 7962 7996 7983
## BRK24_P3 BRK25_P3 BRK26_P3
## 7911 7994 7993
pst_original = pst
ps_rar_original = ps_rar
# Relative abundance as percentage (as in Nutella2)
ps_rel = transform_sample_counts(ps_rar, function(x) { x / sum(x) * 100 })
# Natural log transformation for beta diversity (as in Nutella2)
ps_log = transform_sample_counts(ps_rar, function(x) { log(1 + x) })
# Richness: Chao1 — use non-rarefied table (as in Nutella2)
Chao1 = estimate_richness(pst, split = TRUE, measures = "Chao1") # for richness, we don't use rarefied table
# Evenness: Shannon — use rarefied table (as in Nutella2)
Shannon = estimate_richness(ps_rar, split = TRUE, measures = "Shannon")
# Add indexes to sample data
sample_data(ps_rar) <- data.frame(
sample_data(ps_rar),
Chao1 = Chao1[rownames(Chao1) %in% rownames(sample_data(ps_rar)), ][[1]],
Shannon = Shannon$Shannon
)
alpha_df = sample_data(ps_rar) %>% data.frame()
head(alpha_df[, c("Group", "Strain", "Sex", "FMT", "Chao1", "Shannon")])
## Group Strain Sex FMT Chao1 Shannon
## BRK97_P1 BALB/c_Yes_SPF_Term BALB/c Female SPF 108.3529 2.039375
## BRK98_P1 BALB/c_Yes_SPF_Term BALB/c Female SPF 143.3333 2.098244
## BRK99_P1 BALB/c_Yes_SPF_Term BALB/c Female SPF 126.3636 1.401313
## BRK100_P1 BALB/c_Yes_SPF_Term BALB/c Male SPF 148.0000 1.337422
## BRK101_P1 BALB/c_Yes_SPF_Term BALB/c Male SPF 143.0000 1.925153
## BRK102_P1 BALB/c_Yes_SPF_Term BALB/c Male SPF 111.4286 2.108371
# alpha diversitet per gruppe
alpha_df %>%
group_by(Group) %>%
summarise(
n = n(),
mean_Chao1 = round(mean(Chao1, na.rm = TRUE), 2),
mean_Shannon = round(mean(Shannon, na.rm = TRUE), 2)
) %>%
arrange(Group)
## # A tibble: 13 × 4
## Group n mean_Chao1 mean_Shannon
## <fct> <int> <dbl> <dbl>
## 1 BALB/c_No_SPF_weaning 19 122. 2.65
## 2 BALB/c_No_Wild_weaning 20 122. 2.51
## 3 BALB/c_Yes_SPF_Term 14 118. 1.64
## 4 BALB/c_No_Wild_colon 14 139. 2.4
## 5 Wildling 10 165. 2.5
## 6 Pet Shop 13 169. 2.21
## 7 Wild 4 154. 2.04
## 8 Mother_SPF_PreABX 3 172. 2.62
## 9 Mother_SPF_post1FMT 3 109. 0.53
## 10 Mother_SPF_Term 3 93.2 1.78
## 11 Mother_Wild_PreABX 6 147. 2.04
## 12 Donor_SPF 3 84.2 1.15
## 13 Donor_Wild 3 90.4 1.65
# Melt for plotting (as in Nutella2)
long_mtdat <- melt(alpha_df)
long_mtdat <- long_mtdat[long_mtdat$variable %in% c("Chao1", "Shannon"), ]
long_mtdat$variable <- factor(long_mtdat$variable, levels = c("Chao1", "Shannon"))
head(long_mtdat)
## Study Mouse.ID Strain Sex Pre.immunized Pre.mix FMT Trial Sample
## 691 IMU 78 BALB/c Female Yes Falken_and_new SPF Rikke Term
## 692 IMU 79 BALB/c Female Yes Falken_and_new SPF Rikke Term
## 693 IMU 80 BALB/c Female Yes Falken_and_new SPF Rikke Term
## 694 IMU 81 BALB/c Male Yes Falken_and_new SPF Rikke Term
## 695 IMU 82 BALB/c Male Yes Falken_and_new SPF Rikke Term
## 696 IMU 83 BALB/c Male Yes Falken_and_new SPF Rikke Term
## Barcode Group variable value
## 691 BRK97_P1 BALB/c_Yes_SPF_Term Chao1 108.3529
## 692 BRK98_P1 BALB/c_Yes_SPF_Term Chao1 143.3333
## 693 BRK99_P1 BALB/c_Yes_SPF_Term Chao1 126.3636
## 694 BRK100_P1 BALB/c_Yes_SPF_Term Chao1 148.0000
## 695 BRK101_P1 BALB/c_Yes_SPF_Term Chao1 143.0000
## 696 BRK102_P1 BALB/c_Yes_SPF_Term Chao1 111.4286
# Gloomer: wraps tax_glom() and adds unique names to taxa
# Directly from Nutella2.R / Farhad Panah's pipeline
gloomer = function(ps = data, taxa_level = taxa_level, NArm = "TRUE") {
rank.names = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
if (sum(loadedNamespaces() == "MicrobiotaProcess") > 0) {
devtools::unload("MicrobiotaProcess", FALSE)
}
# Remove uncultured Family
ps = subset_taxa(ps, !Family %in% c("uncultured", "NA", "uncategorized", "unassigend", "", " "))
if (taxa_level == "Species") {
ps = subset_taxa(ps, !Genus %in% NA)
tax_table(ps)[, taxa_level] <- ifelse(is.na(tax_table(ps)[, taxa_level]),
"unknown", paste(tax_table(ps)[, taxa_level]))
physeq = tax_glom(physeq = ps, taxrank = taxa_level, NArm = NArm)
taxdat = tax_table(physeq)[, seq_along(rank.names[1:which(rank.names == taxa_level)])]
taxdat = taxdat[complete.cases(taxdat), ] %>% as.data.frame
otudat = otu_table(physeq)
taxdat[, 6] = ifelse(taxdat[, 6] %in% c("uncategorized", NA, "uncultured", "unassigend", "", " "),
paste0("[", taxdat[, length(rank.names[1:which(rank.names == "Genus")]) - 1], "]_", taxdat[, 6]),
taxdat[, 6])
spec1 = taxdat[, taxa_level] %>% as.vector
spec2 = taxdat[, taxa_level] %>% as.vector
uni = matrix(NA, ncol = length(spec2), nrow = length(spec1))
for (i in seq_along(spec1)) { for (j in seq_along(spec2)) {
uni[i, j] = ifelse(spec1[i] == spec2[j], "TRUE", "FALSE") } }
rownames(uni) <- spec1; colnames(uni) <- spec2
uni[upper.tri(uni, diag = TRUE)] = 0
duplis = uni %>% reshape2::melt() %>% filter(value == "TRUE")
if (dim(duplis)[[1]] > 0) {
duplis = uni %>% reshape2::melt() %>% filter(value == "TRUE") %>%
dplyr::select(1) %>% unique() %>% unlist %>% as.vector
taxdat = taxdat %>% mutate(uni = ifelse(taxdat[, taxa_level] %in% duplis,
paste0("[", taxdat[, length(rank.names[1:which(rank.names == taxa_level)]) - 1], "]_", taxdat[, taxa_level]),
taxdat[, taxa_level]))
dupies <- taxdat[duplicated(taxdat[, "uni"]), "uni"]
if (length(dupies) > 0) {
taxdat = taxdat %>% data.frame %>%
mutate(uni2 = ifelse(taxdat[, "uni"] %in% dupies,
paste0("[", taxdat[, length(rank.names[1:which(rank.names == taxa_level)]) - 2], "]_", taxdat[, "uni"]),
taxdat[, "uni"]))
taxdat[, taxa_level] = taxdat[, "uni2"]
taxdat[, "uni"] <- NULL; taxdat[, "uni2"] <- NULL
} else {
taxdat[, taxa_level] = taxdat[, "uni"]; taxdat[, "uni"] <- NULL
}
}
taxdat <- as(taxdat, "matrix")
rownames(otudat) <- taxdat[rownames(taxdat) %in% rownames(otudat), taxa_level]
rownames(taxdat) <- taxdat[, taxa_level]; taxdat <- tax_table(taxdat)
taxa_names(physeq) <- taxa_names(taxdat); tax_table(physeq) <- taxdat; otu_table(physeq) <- otudat
} else if (taxa_level == "Genus") {
physeq = tax_glom(physeq = ps, taxrank = taxa_level, NArm = NArm)
taxdat = tax_table(physeq)[, seq_along(rank.names[1:which(rank.names == taxa_level)])]
taxdat = taxdat[complete.cases(taxdat), ] %>% as.data.frame
otudat = otu_table(physeq)
taxdat[, 6] = ifelse(taxdat[, 6] %in% c("uncategorized", NA, "uncultured", "unassigend", "", " "),
paste0("[", taxdat[, length(rank.names[1:which(rank.names == taxa_level)]) - 1], "]_", taxdat[, taxa_level]),
taxdat[, taxa_level])
gen1 = taxdat[, taxa_level] %>% as.vector
gen2 = taxdat[, taxa_level] %>% as.vector
uni = matrix(NA, ncol = length(gen2), nrow = length(gen1))
for (i in seq_along(gen1)) { for (j in seq_along(gen2)) {
uni[i, j] = ifelse(gen1[i] == gen2[j], "TRUE", "FALSE") } }
rownames(uni) <- gen1; colnames(uni) <- gen2
uni[upper.tri(uni, diag = TRUE)] = 0
duplis = uni %>% reshape2::melt() %>% filter(value == "TRUE")
if (dim(duplis)[[1]] > 0) {
duplis = uni %>% reshape2::melt() %>% filter(value == "TRUE") %>%
dplyr::select(1) %>% unique() %>% unlist %>% as.vector
taxdat = taxdat %>% mutate(uni = ifelse(taxdat[, taxa_level] %in% duplis,
paste0("[", taxdat[, length(rank.names[1:which(rank.names == taxa_level)]) - 1], "]_", taxdat[, taxa_level]),
taxdat[, taxa_level]))
taxdat[, taxa_level] = taxdat[, "uni"]; taxdat[, "uni"] <- NULL
taxdat <- as(taxdat, "matrix")
rownames(otudat) <- taxdat[rownames(taxdat) %in% rownames(otudat), taxa_level]
rownames(taxdat) <- taxdat[taxdat[, taxa_level] %in% rownames(otudat), taxa_level]
taxdat <- as.matrix(taxdat); taxdat <- tax_table(taxdat)
} else {
taxdat <- as.matrix(taxdat); taxdat <- tax_table(taxdat)
rownames(otudat) <- taxdat[rownames(taxdat) %in% rownames(otudat), taxa_level]
rownames(taxdat) <- taxdat[, taxa_level]; taxdat <- tax_table(taxdat)
}
taxa_names(physeq) <- taxa_names(taxdat); tax_table(physeq) <- taxdat; otu_table(physeq) <- otudat
} else {
physeq = tax_glom(physeq = ps, taxrank = taxa_level, NArm = TRUE)
taxdat = tax_table(physeq)[, seq_along(rank.names[1:which(rank.names == taxa_level)])]
taxdat = taxdat[complete.cases(taxdat), ] %>% as.data.frame
otudat = otu_table(physeq)
spec1 = taxdat[, taxa_level] %>% as.vector
spec2 = taxdat[, taxa_level] %>% as.vector
uni = matrix(NA, ncol = length(spec2), nrow = length(spec1))
for (i in seq_along(spec1)) { for (j in seq_along(spec2)) {
uni[i, j] = ifelse(spec1[i] == spec2[j], "TRUE", "FALSE") } }
rownames(uni) <- spec1; colnames(uni) <- spec2
uni[upper.tri(uni, diag = TRUE)] = 0
duplis = uni %>% reshape2::melt() %>% filter(value == "TRUE")
if (dim(duplis)[[1]] > 0) {
duplis = uni %>% reshape2::melt() %>% filter(value == "TRUE") %>%
dplyr::select(1) %>% unique() %>% unlist %>% as.vector
taxdat = taxdat %>% mutate(uni = ifelse(taxdat[, taxa_level] %in% duplis,
paste(taxdat[, length(rank.names[1:which(rank.names == taxa_level)]) - 1], "_", taxdat[, taxa_level]),
taxdat[, taxa_level]))
taxdat[, taxa_level] = taxdat[, "uni"]; taxdat[, "uni"] <- NULL
taxdat <- as.matrix(taxdat)
rownames(otudat) <- taxdat[rownames(taxdat) %in% rownames(otudat), taxa_level]
rownames(taxdat) <- taxdat[, taxa_level]; taxdat <- tax_table(taxdat)
} else {
taxdat <- as.matrix(taxdat); taxdat <- tax_table(taxdat)
rownames(otudat) <- taxdat[rownames(taxdat) %in% rownames(otudat), taxa_level]
rownames(taxdat) <- taxdat[, taxa_level]; taxdat <- tax_table(taxdat)
}
taxa_names(physeq) <- taxa_names(taxdat); tax_table(physeq) <- taxdat; otu_table(physeq) <- otudat
}
return(physeq)
}
# Color palettes (as in Nutella2)
speccol = c("darkmagenta", "antiquewhite4", "cornflowerblue", "plum4",
"purple", "cadetblue2", "pink", "steelblue2", "lightblue", "lightgreen",
"violet", "lightpink", "skyblue", "red", "yellow", "limegreen",
"magenta", "green", "coral", "blue")
cols = colors()[c(190, 20, 30, 15, 8, 90, 35, 120, 42)]
comp1_groups <- c("BALB/c_No_SPF_weaning", "BALB/c_No_Wild_weaning")
comp1_donors <- c("Donor_SPF", "Donor_Wild")
# Subset til kun originale donorer — ikke resampling
alpha_df_donors_orig <- alpha_df %>%
filter(Study %in% c("Donor_inoculum_Wild(Zahir)",
"Donor_inoculum_SPF(WP1Amix)"))
ps_c1 <- subset_samples(ps_rar,
sample_data(ps_rar)$Group %in% c(comp1_groups, comp1_donors))
alpha_c1_stat <- alpha_df %>%
filter(Group %in% comp1_groups) %>%
mutate(FMT = factor(FMT), Sex = factor(Sex))
# Chao1
lm_chao1 <- lm(Chao1 ~ FMT, data = alpha_c1_stat)
par(mfrow = c(1, 2))
qqnorm(rstandard(lm_chao1), main = "Q-Q plot — Chao1")
abline(0, 1, lty = 2)
plot(predict(lm_chao1), rstandard(lm_chao1),
main = "Residuals vs Fitted — Chao1",
xlab = "Fitted values", ylab = "Standardized residuals")
abline(h = 0, lty = 2)
# Shannon
lm_shannon <- lm(Shannon ~ FMT, data = alpha_c1_stat)
par(mfrow = c(1, 2))
qqnorm(rstandard(lm_shannon), main = "Q-Q plot — Shannon")
abline(0, 1, lty = 2)
plot(predict(lm_shannon), rstandard(lm_shannon),
main = "Residuals vs Fitted — Shannon",
xlab = "Fitted values", ylab = "Standardized residuals")
abline(h = 0, lty = 2)
par(mfrow = c(1, 1))
# Rank-transformer data
alpha_c1_stat$Chao1_rank <- rank(alpha_c1_stat$Chao1)
alpha_c1_stat$Shannon_rank <- rank(alpha_c1_stat$Shannon)
lm_chao1_rank <- lm(Chao1_rank ~ FMT, data = alpha_c1_stat)
lm_shannon_rank <- lm(Shannon_rank ~ FMT, data = alpha_c1_stat)
par(mfrow = c(1, 2))
qqnorm(rstandard(lm_chao1_rank), main = "Q-Q plot — Chao1 ranked"); abline(0, 1, lty = 2)
plot(predict(lm_chao1_rank), rstandard(lm_chao1_rank), main = "Residuals vs Fitted — Chao1 ranked", xlab = "Fitted values", ylab = "Standardized residuals"); abline(h = 0, lty = 2)
par(mfrow = c(1, 2))
qqnorm(rstandard(lm_shannon_rank), main = "Q-Q plot — Shannon ranked"); abline(0, 1, lty = 2)
plot(predict(lm_shannon_rank), rstandard(lm_shannon_rank), main = "Residuals vs Fitted — Shannon ranked", xlab = "Fitted values", ylab = "Standardized residuals"); abline(h = 0, lty = 2)
par(mfrow = c(1, 1))
# ANOVA with FMT, since no FMT * Sex interaction
## chao1
anova_chao1_rank <- aov(Chao1_rank ~ FMT, data = alpha_c1_stat)
summary(anova_chao1_rank)
## Df Sum Sq Mean Sq F value Pr(>F)
## FMT 1 0 0.23 0.002 0.967
## Residuals 37 4939 133.48
TukeyHSD(anova_chao1_rank)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Chao1_rank ~ FMT, data = alpha_c1_stat)
##
## $FMT
## diff lwr upr p adj
## Wild-SPF 0.1539474 -7.345508 7.653402 0.9670466
## shannon
anova_shannon_rank <- aov(Shannon_rank ~ FMT, data = alpha_c1_stat)
summary(anova_shannon_rank)
## Df Sum Sq Mean Sq F value Pr(>F)
## FMT 1 357 357.3 2.884 0.0978 .
## Residuals 37 4583 123.9
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(anova_shannon_rank)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Shannon_rank ~ FMT, data = alpha_c1_stat)
##
## $FMT
## diff lwr upr p adj
## Wild-SPF -6.055263 -13.27935 1.168823 0.0978349
wilcox.test(Chao1 ~ FMT, data = alpha_c1_stat)
##
## Wilcoxon rank sum test with continuity correction
##
## data: Chao1 by FMT
## W = 188.5, p-value = 0.9776
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(Shannon ~ FMT, data = alpha_c1_stat)
##
## Wilcoxon rank sum exact test
##
## data: Shannon by FMT
## W = 249, p-value = 0.1006
## alternative hypothesis: true location shift is not equal to 0
# checking for sex effect
wilcox.test(Chao1 ~ Sex, data = alpha_c1_stat)
##
## Wilcoxon rank sum test with continuity correction
##
## data: Chao1 by Sex
## W = 248.5, p-value = 0.1031
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(Shannon ~ Sex, data = alpha_c1_stat)
##
## Wilcoxon rank sum exact test
##
## data: Shannon by Sex
## W = 177, p-value = 0.7284
## alternative hypothesis: true location shift is not equal to 0
library(ggpubr)
library(ggplot2)
long_c1 <- alpha_df %>%
filter(Group %in% comp1_groups) %>%
mutate(Group = factor(Group, levels = comp1_groups)) %>%
melt()
long_c1 <- long_c1[long_c1$variable %in% c("Chao1", "Shannon"), ]
alpha_p_c1 <- ggplot(long_c1, aes(x = Group, y = value)) +
geom_violin(aes(fill = Group), trim = FALSE, alpha = 0.7) +
geom_boxplot(width = 0.10, outlier.alpha = 0) +
geom_jitter(aes(fill = Group, shape = Sex), alpha = 1, width = 0.2,
stroke = 1, size = 2) +
facet_wrap(~variable, scales = "free_y") +
theme_bw() +
scale_fill_manual(values = c("BALB/c_No_SPF_weaning" = "#8FBF8F",
"BALB/c_No_Wild_weaning" = "#3E5871")) +
scale_x_discrete(labels = c("BALB/c_No_SPF_weaning" = "BALB/c\n+ SPF FMT",
"BALB/c_No_Wild_weaning" = "BALB/c\n+ Wild FMT")) +
theme(legend.position = "right",
axis.title.x = element_text(size = 13),
axis.text.x = element_text(size = 13),
axis.title.y = element_text(size = 13),
strip.text.x = element_text(size = 13),
plot.title = element_text(size = 15, hjust = 0),
plot.subtitle = element_markdown(size = 13)) +
guides(fill = "none") +
labs(x = NULL, y = "Alpha diversity",
title = "Alpha diversity at weaning", subtitle = "Shannon FMT <i>p</i> = 0.087 (.)")
alpha_p_c1
# ggsave("./outputs/Comp1_weaning_alpha.jpeg", height = 7, width = 13, dpi = 300)
# Recipients at weaning together with respective donors
long_c1_donors <- bind_rows(
alpha_df_donors_orig,
alpha_df %>% filter(Group %in% comp1_groups)
) %>%
mutate(
Group_plot = case_when(
Study == "Donor_inoculum_SPF(WP1Amix)" ~ "Donor_SPF",
Study == "Donor_inoculum_Wild(Zahir)" ~ "Donor_Wild",
TRUE ~ as.character(Group)
),
Group_plot = factor(Group_plot, levels = c(
"BALB/c_No_SPF_weaning", "Donor_SPF",
"BALB/c_No_Wild_weaning", "Donor_Wild"
)),
FMT_type = case_when(
Group_plot %in% c("Donor_SPF", "BALB/c_No_SPF_weaning") ~ "SPF",
Group_plot %in% c("Donor_Wild", "BALB/c_No_Wild_weaning") ~ "Wild"
)
) %>%
melt()
long_c1_donors <- long_c1_donors[long_c1_donors$variable %in% c("Chao1", "Shannon"), ]
alpha_p_donors_c1 <- ggplot(long_c1_donors, aes(x = Group_plot, y = value)) +
geom_violin(data = long_c1_donors %>% filter(!Group_plot %in% c("Donor_SPF", "Donor_Wild")),
aes(fill = Group_plot), trim = FALSE, alpha = 0.7) +
geom_boxplot(data = long_c1_donors %>% filter(!Group_plot %in% c("Donor_SPF", "Donor_Wild")),
width = 0.10, outlier.alpha = 0) +
geom_jitter(data = long_c1_donors %>% filter(!Group_plot %in% c("Donor_SPF", "Donor_Wild")),
aes(fill = Group_plot, shape = Sex), alpha = 1, width = 0.2,
stroke = 1, size = 2) +
# Donorer som enkelt diamond punkt — samme farve som recipient
geom_point(data = long_c1_donors %>% filter(Group_plot %in% c("Donor_SPF", "Donor_Wild")),
aes(color = FMT_type),
shape = 18, size = 5, alpha = 1) +
facet_wrap(~variable, scales = "free_y") +
theme_bw() +
scale_fill_manual(values = c("BALB/c_No_SPF_weaning" = "#8FBF8F",
"BALB/c_No_Wild_weaning" = "#3E5871"),
name = "Group") +
scale_color_manual(values = c("SPF" = "#8FBF8F",
"Wild" = "#3E5871"), guide = "none") +
scale_shape_manual(values = c("Female" = 21, "Male" = 24), name = "Sex") +
scale_x_discrete(
limits = c("BALB/c_No_SPF_weaning", "Donor_SPF",
"BALB/c_No_Wild_weaning", "Donor_Wild"),
labels = c(
"BALB/c_No_SPF_weaning" = "BALB/c\n+ SPF FMT",
"Donor_SPF" = "Donor\nSPF",
"BALB/c_No_Wild_weaning" = "BALB/c\n+ Wild FMT",
"Donor_Wild" = "Donor\nWild"
)) +
guides(fill = "none",
shape = guide_legend(override.aes = list(size = 4))) +
theme(axis.title.x = element_text(size = 13),
axis.text.x = element_text(size = 11),
axis.title.y = element_text(size = 13),
strip.text.x = element_text(size = 13),
plot.title = element_text(size = 15, hjust = 0),
plot.subtitle = element_markdown(size = 13)) +
labs(x = NULL, y = "Alpha diversity",
title = "Alpha diversity at Weaning", subtitle = "Shannon FMT <i>p</i> = 0.087 (.)")
alpha_p_donors_c1
# ggsave("./outputs/Comp1_weaning_donors_alpha.jpeg", height = 7, width = 13, dpi = 300)
# Histogrammer for de tre mest relevante transformationer
p_raw <- ggplot() +
geom_histogram(aes(x = rowSums(otu_table(ps_rar))),
fill = "#4a8d78", color = "grey", binwidth = 2000) +
theme_bw() + ggtitle("Raw counts") +
xlab("Taxa sum") + ylab("Count")
p_log <- ggplot() +
geom_histogram(aes(x = rowSums(otu_table(ps_log))),
fill = "#4a8d78", color = "grey", binwidth = 5) +
theme_bw() + ggtitle("Natural log — log(1+x)") +
xlab("Taxa sum") + ylab("Count")
p_rel <- ggplot() +
geom_histogram(aes(x = rowSums(otu_table(ps_rel))),
fill = "#4a8d78", color = "grey", binwidth = 5) +
theme_bw() + ggtitle("Relative abundance (%)") +
xlab("Taxa sum") + ylab("Count")
p_raw + p_log + p_rel + plot_layout(ncol = 3)
ggsave("./outputs/Transformation_BetaDiversity.png",
plot = p_raw + p_log + p_rel + plot_layout(ncol = 3),
width = 12, height = 5, dpi = 300)
Conclusions - Natural logarithm concluded as the best - Naturlig logaritme har betydeligt bedre fordeling, de fleste taxa-summer ligger mellem 0-200 med en mere jævn fordeling. Stadig skæv men langt mindre ekstrem end de andre to.
# Behold kun originale donorer — ikke resampling
ps_c1 <- subset_samples(ps_c1,
!Study %in% c("Donor_inoculum_SPF_resampling_1",
"Donor_inoculum_SPF_resampling_2",
"Donor_inoculum_Wild_resampling_1",
"Donor_inoculum_Wild_resampling_2"))
# Log-transformed rarefied data
ps_c1_log <- transform_sample_counts(ps_c1, function(x){log(1 + x)})
bray_pcoa_c1 <- ordinate(ps_c1_log, method = "PCoA", distance = "bray")
evals_c1 <- bray_pcoa_c1$values$Eigenvalues
bray_p_c1 <- plot_ordination(
ps_c1_log, bray_pcoa_c1, color = "Group", shape = "Sex"
) +
stat_ellipse(data = . %>% filter(Group %in% comp1_groups),
aes(group = Group, fill = Group, color = Group),
alpha = 0.15, geom = "polygon", linetype = 2) +
# Weaning samples
geom_point(data = . %>% filter(Group %in% comp1_groups),
aes(color = Group, shape = Sex), size = 3, alpha = 0.9) +
# Donor samples — eksplicit plottet som stjerner
geom_point(data = . %>% filter(Group %in% comp1_donors),
aes(color = Group), shape = 18, size = 5, alpha = 1) +
scale_color_manual(
values = c(
"BALB/c_No_SPF_weaning" = "#8FBF8F",
"BALB/c_No_Wild_weaning" = "#3E5871",
"Donor_SPF" = "#4a7a4a",
"Donor_Wild" = "#1a2f40"
),
labels = c(
"BALB/c_No_SPF_weaning" = "BALB/c + SPF FMT",
"BALB/c_No_Wild_weaning" = "BALB/c + Wild FMT",
"Donor_SPF" = "Donor SPF",
"Donor_Wild" = "Donor Wild"
)
) +
scale_fill_manual(
values = c(
"BALB/c_No_SPF_weaning" = "#8FBF8F",
"BALB/c_No_Wild_weaning" = "#3E5871"
),
labels = c(
"BALB/c_No_SPF_weaning" = "BALB/c + SPF FMT",
"BALB/c_No_Wild_weaning" = "BALB/c + Wild FMT"
)
) + scale_shape_manual(
values = c("Female" = 16, "Male" = 17),
na.translate = FALSE,
na.value = NA
) +
guides(
color = guide_legend(title = "Group"),
fill = "none",
shape = guide_legend(title = "Sex", na.translate = FALSE)
) +
labs(
x = sprintf("PCo1 [%s%%]", round(evals_c1 / sum(evals_c1) * 100, 1)[1]),
y = sprintf("PCo2 [%s%%]", round(evals_c1 / sum(evals_c1) * 100, 1)[2]),
color = "Group", shape = "Sex",
title = "Bray-Curtis PCoA",
subtitle = "Beta Diversity at Weaning Following FMT"
) +
geom_vline(xintercept = 0, lty = 2, alpha = 0.5, color = "blue") +
geom_hline(yintercept = 0, lty = 2, alpha = 0.5, color = "blue") +
theme_bw() +
theme(axis.title = element_text(),
legend.title = element_text(size = 10),
legend.text = element_text(),
axis.text = element_text(size = 13),
plot.title = element_text(size = 15),
plot.subtitle = element_text(size = 13))
library(ggtext)
bray_p_c1 <- bray_p_c1 +
annotate("richtext",
x = Inf, y = Inf,
label = "PERMANOVA: FMT<br>*R*² = 0.48, *p* < 0.001",
hjust = 1.1, vjust = 1.5,
size = 4, color = "black",
fill = NA, label.color = NA)
bray_p_c1
# ggsave("./outputs/comp1_bray_pcoa.jpeg", dpi = 300, height = 7, width = 10)
# PERMANOVA (statistics for beta diversity)
# PERMANOVA — test if groups are significantly different
ps_c1_stat_log <- transform_sample_counts(
subset_samples(ps_rar, sample_data(ps_rar)$Group %in% comp1_groups),
function(x){log(1 + x)})
df_c1_stat <- data.frame(sample_data(ps_c1_stat_log))
bray_dist_c1 <- phyloseq::distance(ps_c1_stat_log, method = "bray")
set.seed(123)
vegan::adonis2(bray_dist_c1 ~ Group * Sex, data = df_c1_stat, permutations = 999)
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## vegan::adonis2(formula = bray_dist_c1 ~ Group * Sex, data = df_c1_stat, permutations = 999)
## Df SumOfSqs R2 F Pr(>F)
## Model 3 1.2598 0.5364 13.499 0.001 ***
## Residual 35 1.0888 0.4636
## Total 38 2.3486 1.0000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
set.seed(123)
vegan::adonis2(bray_dist_c1 ~ Group, data = df_c1_stat, permutations = 999)
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## vegan::adonis2(formula = bray_dist_c1 ~ Group, data = df_c1_stat, permutations = 999)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 1.1430 0.48669 35.081 0.001 ***
## Residual 37 1.2056 0.51331
## Total 38 2.3486 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Test Sex alene
set.seed(123)
vegan::adonis2(bray_dist_c1 ~ Sex, data = df_c1_stat, permutations = 999)
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## vegan::adonis2(formula = bray_dist_c1 ~ Sex, data = df_c1_stat, permutations = 999)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.04972 0.02117 0.8003 0.493
## Residual 37 2.29885 0.97883
## Total 38 2.34857 1.00000
# Unload MicrobiotaProcess hvis loaded
if ("MicrobiotaProcess" %in% loadedNamespaces()) {
devtools::unload("MicrobiotaProcess")
}
# Subset
ps_rel_c1 <- subset_samples(ps_rel,
sample_data(ps_rel)$Group %in% c(comp1_groups, comp1_donors) &
!sample_data(ps_rel)$Study %in% c(
"Donor_inoculum_SPF_resampling_1",
"Donor_inoculum_SPF_resampling_2",
"Donor_inoculum_Wild_resampling_1",
"Donor_inoculum_Wild_resampling_2"
))
# Fjern uassignerede Orders og nul-reads taxa
ps_rel_c1 <- subset_taxa(ps_rel_c1, !is.na(Order) & Order != "__" & Order != "")
ps_rel_c1 <- prune_taxa(taxa_sums(ps_rel_c1) > 0, ps_rel_c1)
# Kør gloomer
glom_c1 <- gloomer(
ps = ps_rel_c1,
taxa_level = "Order",
NArm = TRUE)
ntaxa(glom_c1)
## [1] 21
rel_c1_df <- glom_c1 %>%
psmelt() %>%
group_by(Sample) %>%
mutate(RelAbund = Abundance / sum(Abundance) * 100) %>%
ungroup()
# ============================================================
# PHYLUM NIVEAU
# ============================================================
ps_rel_c1_phylum <- ps_rel_c1
ps_rel_c1_phylum <- subset_taxa(ps_rel_c1_phylum,
!is.na(Phylum) & Phylum != "__" & Phylum != "")
ps_rel_c1_phylum <- prune_taxa(taxa_sums(ps_rel_c1_phylum) > 0, ps_rel_c1_phylum)
glom_c1_phylum <- gloomer(ps = ps_rel_c1_phylum, taxa_level = "Phylum", NArm = TRUE)
unique(tax_table(glom_c1_phylum)[, "Phylum"])
## Taxonomy Table: [12 taxa by 1 taxonomic ranks]:
## Phylum
## Actinomycetota "Actinomycetota"
## Bacillota "Bacillota"
## Bacteroidota "Bacteroidota"
## Campylobacterota "Campylobacterota"
## Candidatus Saccharibacteria "Candidatus Saccharibacteria"
## Deferribacterota "Deferribacterota"
## Elusimicrobiota "Elusimicrobiota"
## Fusobacteriota "Fusobacteriota"
## Mycoplasmatota "Mycoplasmatota"
## Pseudomonadota "Pseudomonadota"
## Thermodesulfobacteriota "Thermodesulfobacteriota"
## Verrucomicrobiota "Verrucomicrobiota"
phylum_col <- c(
"Bacillota" = "#C47F7F",
"Bacteroidota" = "#6B9BA4",
"Actinomycetota" = "#8FAF7E",
"Campylobacterota" = "#C4936A",
"Verrucomicrobiota" = "#9B7BAD",
"Fusobacteriota" = "#7B9E87",
"Deferribacterota" = "#B8977E",
"Mycoplasmatota" = "#7B8FA6",
"Pseudomonadota" = "#A67B9B",
"Thermodesulfobacteriota" = "#6B8E7B",
"Elusimicrobiota" = "#C4A882",
"Candidatus Saccharibacteria" = "#8FA6A6"
)
rel_c1_phylum <- glom_c1_phylum %>%
psmelt() %>%
group_by(Sample) %>%
mutate(RelAbund = Abundance / sum(Abundance) * 100) %>%
ungroup()
top_phylum_c1 <- rel_c1_phylum %>%
group_by(OTU) %>% summarise(m = mean(RelAbund)) %>%
arrange(desc(m)) %>% pull(OTU)
rel_c1_phylum_avg <- rel_c1_phylum %>%
mutate(Order_plot = OTU,
Group = factor(Group, levels = c(
"BALB/c_No_SPF_weaning", "Donor_SPF",
"BALB/c_No_Wild_weaning", "Donor_Wild"
))) %>%
group_by(Group, Order_plot) %>%
summarise(RelAbund = mean(RelAbund), .groups = "drop") %>%
mutate(Order_plot = factor(Order_plot, levels = top_phylum_c1))
p_phylum_c1 <- ggplot(rel_c1_phylum_avg,
aes(x = Group, y = RelAbund, fill = Order_plot)) +
geom_bar(stat = "identity", color = "black", linewidth = 0.3) +
scale_fill_manual(values = phylum_col,
breaks = top_phylum_c1) +
scale_x_discrete(
limits = c("BALB/c_No_SPF_weaning", "Donor_SPF",
"BALB/c_No_Wild_weaning", "Donor_Wild"),
labels = c(
"BALB/c_No_SPF_weaning" = "BALB/c\n+ SPF FMT",
"Donor_SPF" = "Donor\nSPF",
"BALB/c_No_Wild_weaning" = "BALB/c\n+ Wild FMT",
"Donor_Wild" = "Donor\nWild"
)) +
theme_bw() +
theme(axis.text.x = element_text(size = 11),
axis.title.y = element_text(size = 13),
legend.text = element_text(size = 9),
legend.title = element_text(size = 10),
plot.title = element_text(size = 15),
plot.subtitle = element_text(size = 13)) +
labs(x = NULL, y = "Relative abundance (%)", fill = "Phylum",
title = "Relative Abundance at Phylum Level", subtitle = "BALB/c Mice at Weaning by FMT source, including donors")
p_phylum_c1
# ggsave("./outputs/comp1_relabund_phylum.jpeg", dpi = 300, height = 7, width = 10)
# ============================================================
# FAMILY NIVEAU
# ============================================================
ps_rel_c1_family <- ps_rel_c1
ps_rel_c1_family <- subset_taxa(ps_rel_c1_family,
!is.na(Family) & Family != "__" & Family != "")
ps_rel_c1_family <- prune_taxa(taxa_sums(ps_rel_c1_family) > 0, ps_rel_c1_family)
glom_c1_family <- gloomer(ps = ps_rel_c1_family, taxa_level = "Family", NArm = TRUE)
unique(tax_table(glom_c1_family)[, "Family"])
## Taxonomy Table: [35 taxa by 1 taxonomic ranks]:
## Family
## Eggerthellaceae "Eggerthellaceae"
## Staphylococcaceae "Staphylococcaceae"
## Enterococcaceae "Enterococcaceae"
## Lactobacillaceae "Lactobacillaceae"
## Streptococcaceae "Streptococcaceae"
## Christensenellaceae "Christensenellaceae"
## Acutalibacteraceae "Acutalibacteraceae"
## Butyricicoccaceae "Butyricicoccaceae"
## Clostridiaceae "Clostridiaceae"
## Eubacteriaceae "Eubacteriaceae"
## Eubacteriales Family XIII. Incertae Sedis "Eubacteriales Family XIII. Incertae Sedis"
## Oscillospiraceae "Oscillospiraceae"
## Peptococcaceae "Peptococcaceae"
## Pumilibacteraceae "Pumilibacteraceae"
## Anaerotignaceae "Anaerotignaceae"
## Lachnospiraceae "Lachnospiraceae"
## Peptostreptococcaceae "Peptostreptococcaceae"
## Coprobacillaceae "Coprobacillaceae"
## Erysipelotrichaceae "Erysipelotrichaceae"
## Bacteroidaceae "Bacteroidaceae"
## Muribaculaceae "Muribaculaceae"
## Odoribacteraceae "Odoribacteraceae"
## Prevotellaceae "Prevotellaceae"
## Rikenellaceae "Rikenellaceae"
## Tannerellaceae "Tannerellaceae"
## Helicobacteraceae "Helicobacteraceae"
## Candidatus Nanosyncoccacae "Candidatus Nanosyncoccacae"
## Mucispirillaceae "Mucispirillaceae"
## Elusimicrobiaceae "Elusimicrobiaceae"
## Fusobacteriaceae "Fusobacteriaceae"
## Anaeroplasmataceae "Anaeroplasmataceae"
## Sutterellaceae "Sutterellaceae"
## Enterobacteriaceae "Enterobacteriaceae"
## Desulfovibrionaceae "Desulfovibrionaceae"
## Akkermansiaceae "Akkermansiaceae"
rel_c1_family <- glom_c1_family %>%
psmelt() %>%
group_by(Sample) %>%
mutate(RelAbund = Abundance / sum(Abundance) * 100) %>%
ungroup()
top_family_c1 <- rel_c1_family %>%
group_by(OTU) %>% summarise(m = mean(RelAbund)) %>%
arrange(desc(m)) %>% pull(OTU)
top_family_c1_filt <- rel_c1_family %>%
group_by(OTU, Group) %>%
summarise(m = mean(RelAbund), .groups = "drop") %>%
group_by(OTU) %>%
summarise(max_m = max(m)) %>%
filter(max_m >= 1) %>%
arrange(desc(max_m)) %>%
pull(OTU)
rel_c1_family_avg <- rel_c1_family %>%
mutate(Order_plot = ifelse(OTU %in% top_family_c1_filt, OTU, "Other"),
Group = factor(Group, levels = c(
"BALB/c_No_SPF_weaning", "Donor_SPF",
"BALB/c_No_Wild_weaning", "Donor_Wild"
))) %>%
group_by(Sample, Group, Order_plot) %>%
summarise(RelAbund = sum(RelAbund), .groups = "drop") %>%
group_by(Group, Order_plot) %>%
summarise(RelAbund = mean(RelAbund), .groups = "drop") %>%
mutate(Order_plot = factor(Order_plot,
levels = c(top_family_c1_filt, "Other")))
family_col <- c(
"Lachnospiraceae" = "#9B7BAD", # lilla
"Muribaculaceae" = "#6B9BA4", # blågrøn
"Bacteroidaceae" = "#C47F7F", # støvet rød
"Lactobacillaceae" = "#C4936A", # varm brun
"Rikenellaceae" = "#3D7A8A", # mørk petrol
"Prevotellaceae" = "#E9C46A", # gul
"Oscillospiraceae" = "#6B4226", # chokolade
"Odoribacteraceae" = "#A23B72", # pink-lilla
"Helicobacteraceae" = "#80B918", # lime grøn
"Other" = "grey80"
)
p_family_c1 <- ggplot(rel_c1_family_avg,
aes(x = Group, y = RelAbund, fill = Order_plot)) +
geom_bar(stat = "identity", color = "black", linewidth = 0.3) +
scale_fill_manual(values = family_col,
breaks = c(top_family_c1_filt, "Other")) +
scale_x_discrete(
limits = c("BALB/c_No_SPF_weaning", "Donor_SPF",
"BALB/c_No_Wild_weaning", "Donor_Wild"),
labels = c(
"BALB/c_No_SPF_weaning" = "BALB/c\n+ SPF FMT",
"Donor_SPF" = "Donor\nSPF",
"BALB/c_No_Wild_weaning" = "BALB/c\n+ Wild FMT",
"Donor_Wild" = "Donor\nWild"
)) +
theme_bw() +
theme(axis.text.x = element_text(size = 11),
axis.title.y = element_text(size = 13),
legend.text = element_text(size = 9),
legend.title = element_text(size = 10),
plot.title = element_text(size = 15),
plot.subtitle = element_text(size = 13)) +
labs(x = NULL, y = "Relative abundance (%)", fill = "Family",
title = "Relative Abundance at Family Level", subtitle = "BALB/c Mice at Weaning by FMT source, including donors")
p_family_c1
# ggsave("./outputs/comp1_relabund_family.jpeg", dpi = 300, height = 7, width = 10)
# Ranger alle 35 familier fra flest til mindst
family_ranked_all <- rel_c1_family %>%
group_by(OTU) %>%
summarise(m = mean(RelAbund)) %>%
arrange(desc(m)) %>%
pull(OTU)
# Farvepalette til alle 35 familier
family_col_all <- c(
# Samme farver som family_col
"Lachnospiraceae" = "#9B7BAD",
"Muribaculaceae" = "#6B9BA4",
"Bacteroidaceae" = "#C47F7F",
"Lactobacillaceae" = "#C4936A",
"Rikenellaceae" = "#3D7A8A",
"Prevotellaceae" = "#E9C46A",
"Oscillospiraceae" = "#6B4226",
"Odoribacteraceae" = "#A23B72",
"Helicobacteraceae" = "#80B918",
"Eggerthellaceae" = "#2A9D8F",
"Staphylococcaceae" = "#E84855",
"Enterococcaceae" = "#3D5A80",
"Streptococcaceae" = "#F18F01",
"Christensenellaceae" = "#264653",
"Acutalibacteraceae" = "#E76F51",
"Butyricicoccaceae" = "#6D6875",
"Clostridiaceae" = "#1D3557",
"Eubacteriaceae" = "#F4A261",
"Eubacteriales Family XIII. Incertae Sedis"= "#457B9D",
"Peptococcaceae" = "#B5838D",
"Pumilibacteraceae" = "#A8DADC",
"Anaerotignaceae" = "#C77B3A",
"Peptostreptococcaceae" = "#5C8A4A",
"Coprobacillaceae" = "#7B4F8A",
"Erysipelotrichaceae" = "#D4A853",
"Tannerellaceae" = "#4A7B8A",
"Candidatus Nanosyncoccacae" = "#8A4A5C",
"Mucispirillaceae" = "#5C7B3A",
"Elusimicrobiaceae" = "#8A7B4A",
"Fusobacteriaceae" = "#3A5C7B",
"Anaeroplasmataceae" = "#7B8A4A",
"Sutterellaceae" = "#C45C8A",
"Enterobacteriaceae" = "#4A8A7B",
"Desulfovibrionaceae" = "#8A5C3A",
"Akkermansiaceae" = "#5C3A8A"
)
# Plot data
rel_c1_family_all <- rel_c1_family %>%
mutate(Order_plot = OTU,
Group = factor(Group, levels = c(
"BALB/c_No_SPF_weaning", "Donor_SPF",
"BALB/c_No_Wild_weaning", "Donor_Wild"
))) %>%
group_by(Sample, Group, Order_plot) %>%
summarise(RelAbund = sum(RelAbund), .groups = "drop") %>%
group_by(Group, Order_plot) %>%
summarise(RelAbund = mean(RelAbund), .groups = "drop") %>%
filter(RelAbund >= 0.01) %>%
mutate(Order_plot = factor(Order_plot, levels = family_ranked_all))
plot_c1_family_all <- ggplot(rel_c1_family_all, aes(x = Group, y = RelAbund, fill = Order_plot)) +
geom_bar(stat = "identity", color = "black", linewidth = 0.3) +
scale_fill_manual(values = family_col_all,
breaks = family_ranked_all) +
scale_x_discrete(
limits = c("BALB/c_No_SPF_weaning", "Donor_SPF",
"BALB/c_No_Wild_weaning", "Donor_Wild"),
labels = c(
"BALB/c_No_SPF_weaning" = "BALB/c\n+ SPF FMT",
"Donor_SPF" = "Donor\nSPF",
"BALB/c_No_Wild_weaning" = "BALB/c\n+ Wild FMT",
"Donor_Wild" = "Donor\nWild"
)) +
theme_bw() +
theme(axis.text.x = element_text(size = 11),
axis.title.y = element_text(size = 13),
legend.text = element_text(size = 9),
legend.title = element_text(size = 10),
plot.title = element_text(size = 15),
plot.subtitle = element_text(size = 13)) +
labs(x = NULL, y = "Relative abundance (%)", fill = "Family",
title = "Relative Abundance at Family Level", subtitle = "BALB/c Mice at Weaning by FMT source, including donors")
plot_c1_family_all
# ggsave("./outputs/comp1_relabund_family_all.jpeg", dpi = 300, height = 7, width = 14)
# ============================================================
# GENUS NIVEAU
# ============================================================
ps_rel_c1_genus <- ps_rel_c1
ps_rel_c1_genus <- subset_taxa(ps_rel_c1_genus,
!is.na(Genus) & Genus != "__" & Genus != "")
ps_rel_c1_genus <- prune_taxa(taxa_sums(ps_rel_c1_genus) > 0, ps_rel_c1_genus)
glom_c1_genus <- gloomer(ps = ps_rel_c1_genus, taxa_level = "Genus", NArm = TRUE)
unique(tax_table(glom_c1_genus)[, "Genus"])
## Taxonomy Table: [70 taxa by 1 taxonomic ranks]:
## Genus
## Adlercreutzia "Adlercreutzia"
## Staphylococcus "Staphylococcus"
## Enterococcus "Enterococcus"
## Lactobacillus "Lactobacillus"
## Ligilactobacillus "Ligilactobacillus"
## Limosilactobacillus "Limosilactobacillus"
## Lactococcus "Lactococcus"
## Streptococcus "Streptococcus"
## Candidatus Borkfalkia "Candidatus Borkfalkia"
## Christensenella "Christensenella"
## Acutalibacter "Acutalibacter"
## Clostridium "Clostridium"
## Eubacterium "Eubacterium"
## Zhenpiania "Zhenpiania"
## Anaerotruncus "Anaerotruncus"
## Angelakisella "Angelakisella"
## Hominilimicola "Hominilimicola"
## Neglectibacter "Neglectibacter"
## Vermiculatibacterium "Vermiculatibacterium"
## Dehalobacterium "Dehalobacterium"
## Pumilibacter "Pumilibacter"
## Anaerotignum "Anaerotignum"
## Acetatifactor "Acetatifactor"
## Anaerostipes "Anaerostipes"
## Blautia "Blautia"
## Enterocloster "Enterocloster"
## Fusimonas "Fusimonas"
## Jutongia "Jutongia"
## Lachnospira "Lachnospira"
## Mediterraneibacter "Mediterraneibacter"
## Parablautia "Parablautia"
## Petralouisia "Petralouisia"
## Roseburia "Roseburia"
## Schaedlerella "Schaedlerella"
## Sporofaciens "Sporofaciens"
## Tyzzerella "Tyzzerella"
## Clostridioides "Clostridioides"
## Coprobacillus "Coprobacillus"
## Thomasclavelia "Thomasclavelia"
## Amedibacillus "Amedibacillus"
## Faecalibaculum "Faecalibaculum"
## Stoquefichus "Stoquefichus"
## Bacteroides "Bacteroides"
## Phocaeicola "Phocaeicola"
## Duncaniella "Duncaniella"
## Heminiphilus "Heminiphilus"
## Muribaculum "Muribaculum"
## Paramuribaculum "Paramuribaculum"
## Sangeribacter "Sangeribacter"
## Odoribacter "Odoribacter"
## Palleniella "Palleniella"
## Paraprevotella "Paraprevotella"
## Segatella "Segatella"
## Xylanibacter "Xylanibacter"
## Alistipes "Alistipes"
## Rikenella "Rikenella"
## Parabacteroides "Parabacteroides"
## Helicobacter "Helicobacter"
## Nanosyncoccus "Nanosyncoccus"
## Mucispirillum "Mucispirillum"
## Elusimicrobium "Elusimicrobium"
## Fusobacterium "Fusobacterium"
## Anaeroplasma "Anaeroplasma"
## Parasutterella "Parasutterella"
## Escherichia "Escherichia"
## Klebsiella "Klebsiella"
## Bilophila "Bilophila"
## Desulfovibrio "Desulfovibrio"
## Taurinivorans "Taurinivorans"
## Akkermansia "Akkermansia"
rel_c1_genus <- glom_c1_genus %>%
psmelt() %>%
group_by(Sample) %>%
mutate(RelAbund = Abundance / sum(Abundance) * 100) %>%
ungroup()
top_genus_c1_filt <- rel_c1_genus %>%
group_by(OTU, Group) %>%
summarise(m = mean(RelAbund), .groups = "drop") %>%
group_by(OTU) %>%
summarise(max_m = max(m)) %>%
filter(max_m >= 1) %>%
arrange(desc(max_m)) %>%
pull(OTU)
genus_col <- c(
"Bacteroides" = "#6B9BA4",
"Alistipes" = "#9B7BAD",
"Lactobacillus" = "#C4936A",
"Schaedlerella" = "#8FAF7E",
"Limosilactobacillus" = "#C47F7F",
"Duncaniella" = "#3D7A8A",
"Xylanibacter" = "#E9C46A",
"Muribaculum" = "#6B4226",
"Ligilactobacillus" = "#A23B72",
"Odoribacter" = "#80B918",
"Helicobacter" = "#2A9D8F",
"Fusimonas" = "#E84855",
"Paraprevotella" = "#3D5A80",
"Anaerotignum" = "#F18F01",
"Paramuribaculum" = "#6D6875",
"Streptococcus" = "#E76F51",
"Other" = "grey80"
)
rel_c1_genus_avg <- rel_c1_genus %>%
mutate(Order_plot = ifelse(OTU %in% top_genus_c1_filt, OTU, "Other"),
Group = factor(Group, levels = c(
"BALB/c_No_SPF_weaning", "Donor_SPF",
"BALB/c_No_Wild_weaning", "Donor_Wild"
))) %>%
group_by(Sample, Group, Order_plot) %>%
summarise(RelAbund = sum(RelAbund), .groups = "drop") %>%
group_by(Group, Order_plot) %>%
summarise(RelAbund = mean(RelAbund), .groups = "drop") %>%
mutate(Order_plot = factor(Order_plot,
levels = c(top_genus_c1_filt, "Other")))
p_genus_c1 <- ggplot(rel_c1_genus_avg,
aes(x = Group, y = RelAbund, fill = Order_plot)) +
geom_bar(stat = "identity", color = "black", linewidth = 0.3) +
scale_fill_manual(values = genus_col,
breaks = c(top_genus_c1_filt, "Other")) +
scale_x_discrete(
limits = c("BALB/c_No_SPF_weaning", "Donor_SPF",
"BALB/c_No_Wild_weaning", "Donor_Wild"),
labels = c(
"BALB/c_No_SPF_weaning" = "BALB/c\n+ SPF FMT",
"Donor_SPF" = "Donor\nSPF",
"BALB/c_No_Wild_weaning" = "BALB/c\n+ Wild FMT",
"Donor_Wild" = "Donor\nWild"
)) +
theme_bw() +
theme(axis.text.x = element_text(size = 11),
axis.title.y = element_text(size = 13),
legend.text = element_text(size = 9),
legend.title = element_text(size = 10),
plot.title = element_text(size = 15),
plot.subtitle = element_text(size = 13)) +
labs(x = NULL, y = "Relative abundance (%)", fill = "Genus",
title = "Relative Abundance at Genus Level", subtitle = "BALB/c Mice at Weaning by FMT source, including donors")
p_genus_c1
# ggsave("./outputs/comp1_relabund_genus.jpeg", dpi = 300, height = 7, width = 12)
top_genus_c1_001 <- rel_c1_genus %>%
group_by(OTU, Group) %>%
summarise(m = mean(RelAbund), .groups = "drop") %>%
group_by(OTU) %>%
summarise(max_m = max(m)) %>%
filter(max_m >= 0.01) %>%
arrange(desc(max_m)) %>%
pull(OTU)
# Farvepalette til alle 58
genus_col_001 <- c(
# Behold genus_col farver til top 16
"Bacteroides" = "#6B9BA4",
"Alistipes" = "#9B7BAD",
"Lactobacillus" = "#C4936A",
"Schaedlerella" = "#8FAF7E",
"Limosilactobacillus" = "#C47F7F",
"Duncaniella" = "#3D7A8A",
"Xylanibacter" = "#E9C46A",
"Muribaculum" = "#6B4226",
"Ligilactobacillus" = "#A23B72",
"Odoribacter" = "#80B918",
"Helicobacter" = "#2A9D8F",
"Fusimonas" = "#E84855",
"Paraprevotella" = "#3D5A80",
"Anaerotignum" = "#F18F01",
"Paramuribaculum" = "#6D6875",
"Streptococcus" = "#E76F51",
"Desulfovibrio" = "#A8DADC",
"Acetatifactor" = "#457B9D",
"Eubacterium" = "#B5838D",
"Akkermansia" = "#C77B3A",
"Candidatus Borkfalkia" = "#5C8A4A",
"Heminiphilus" = "#7B4F8A",
"Mucispirillum" = "#D4A853",
"Faecalibaculum" = "#4A7B8A",
"Acutalibacter" = "#8A4A5C",
"Bilophila" = "#5C7B3A",
"Pumilibacter" = "#8A7B4A",
"Clostridium" = "#3A5C7B",
"Sporofaciens" = "#7B8A4A",
"Adlercreutzia" = "#C45C8A",
"Blautia" = "#4A8A7B",
"Sangeribacter" = "#8A5C3A",
"Anaeroplasma" = "#5C3A8A",
"Nanosyncoccus" = "#2E86AB",
"Taurinivorans" = "#C73E1D",
"Mediterraneibacter" = "#3BB273",
"Angelakisella" = "#7B2D8B",
"Anaerotruncus" = "#6B8E23",
"Dehalobacterium" = "#C49A6C",
"Zhenpiania" = "#8B6914",
"Amedibacillus" = "#2C4770",
"Staphylococcus" = "#C0392B",
"Parablautia" = "#16A085",
"Vermiculatibacterium" = "#8E44AD",
"Parasutterella" = "#E67E22",
"Phocaeicola" = "#1A5276",
"Elusimicrobium" = "#7D6608",
"Neglectibacter" = "#4A235A",
"Jutongia" = "#5CB87A",
"Stoquefichus" = "#C4845A",
"Prevotellamassilia" = "#5A8AAA",
"Thomasclavelia" = "#A8A85A",
"Tyzzerella" = "#5AA88A",
"Parabacteroides" = "#A85A8A",
"Enterocloster" = "#8AA85A",
"Roseburia" = "#5A6AA8",
"Enterococcus" = "#A8805A",
"Petralouisia" = "#6AA85A"
)
# Plot data
rel_c1_genus_001 <- rel_c1_genus %>%
mutate(Order_plot = OTU, # ingen "Other" — brug OTU direkte
Group = factor(Group, levels = c(
"BALB/c_No_SPF_weaning", "Donor_SPF",
"BALB/c_No_Wild_weaning", "Donor_Wild"
))) %>%
group_by(Sample, Group, Order_plot) %>%
summarise(RelAbund = sum(RelAbund), .groups = "drop") %>%
group_by(Group, Order_plot) %>%
summarise(RelAbund = mean(RelAbund), .groups = "drop") %>%
filter(RelAbund >= 0.01) %>%
mutate(Order_plot = factor(Order_plot,
levels = top_genus_c1_001)) # ingen "Other" i levels
p_genus_c1_all <- ggplot(rel_c1_genus_001, aes(x = Group, y = RelAbund, fill = Order_plot)) +
geom_bar(stat = "identity", color = "black", linewidth = 0.3) +
scale_fill_manual(values = genus_col_001, breaks = top_genus_c1_001) +
scale_x_discrete(
limits = c("BALB/c_No_SPF_weaning", "Donor_SPF",
"BALB/c_No_Wild_weaning", "Donor_Wild"),
labels = c(
"BALB/c_No_SPF_weaning" = "BALB/c\n+ SPF FMT",
"Donor_SPF" = "Donor\nSPF",
"BALB/c_No_Wild_weaning" = "BALB/c\n+ Wild FMT",
"Donor_Wild" = "Donor\nWild"
)) +
theme_bw() +
theme(axis.text.x = element_text(size = 11),
axis.title.y = element_text(size = 13),
legend.text = element_text(size = 9),
legend.title = element_text(size = 10),
plot.title = element_text(size = 15),
plot.subtitle = element_text(size = 13)) +
labs(x = NULL, y = "Relative abundance (%)", fill = "Genus",
title = "Relative Abundance at Genus Level", subtitle = "BALB/c Mice at Weaning by FMT source, including donors")
p_genus_c1_all
# "Genera with mean relative abundance ≥ 0.01% shown" REMEBER!!!!
# ggsave("./outputs/comp1_relabund_genus_all.jpeg", dpi = 300, height = 7, width = 16)
rel_c1_genus_avg %>%
arrange(Group, desc(RelAbund)) %>%
print(n = Inf)
## # A tibble: 92 × 3
## Group Order_plot RelAbund
## <fct> <fct> <dbl>
## 1 BALB/c_No_SPF_weaning Alistipes 17.2
## 2 BALB/c_No_SPF_weaning Lactobacillus 16.8
## 3 BALB/c_No_SPF_weaning Limosilactobacillus 12.7
## 4 BALB/c_No_SPF_weaning Schaedlerella 9.38
## 5 BALB/c_No_SPF_weaning Duncaniella 8.30
## 6 BALB/c_No_SPF_weaning Other 5.95
## 7 BALB/c_No_SPF_weaning Bacteroides 5.60
## 8 BALB/c_No_SPF_weaning Odoribacter 5.19
## 9 BALB/c_No_SPF_weaning Anaerotignum 3.66
## 10 BALB/c_No_SPF_weaning Ligilactobacillus 3.25
## 11 BALB/c_No_SPF_weaning Paramuribaculum 2.80
## 12 BALB/c_No_SPF_weaning Fusimonas 2.60
## 13 BALB/c_No_SPF_weaning Muribaculum 1.89
## 14 BALB/c_No_SPF_weaning Xylanibacter 1.47
## 15 BALB/c_No_SPF_weaning Mucispirillum 1.15
## 16 BALB/c_No_SPF_weaning Desulfovibrio 1.03
## 17 BALB/c_No_SPF_weaning Acetatifactor 0.834
## 18 BALB/c_No_SPF_weaning Heminiphilus 0.104
## 19 BALB/c_No_SPF_weaning Phocaeicola 0.0732
## 20 BALB/c_No_SPF_weaning Streptococcus 0.0429
## 21 BALB/c_No_SPF_weaning Akkermansia 0.0117
## 22 BALB/c_No_SPF_weaning Helicobacter 0
## 23 BALB/c_No_SPF_weaning Paraprevotella 0
## 24 Donor_SPF Muribaculum 20.3
## 25 Donor_SPF Duncaniella 15.5
## 26 Donor_SPF Bacteroides 11.4
## 27 Donor_SPF Lactobacillus 9.96
## 28 Donor_SPF Fusimonas 8.01
## 29 Donor_SPF Xylanibacter 5.74
## 30 Donor_SPF Paramuribaculum 5.63
## 31 Donor_SPF Other 5.24
## 32 Donor_SPF Limosilactobacillus 4.45
## 33 Donor_SPF Alistipes 3.79
## 34 Donor_SPF Heminiphilus 2.46
## 35 Donor_SPF Schaedlerella 1.25
## 36 Donor_SPF Acetatifactor 1.02
## 37 Donor_SPF Phocaeicola 1.02
## 38 Donor_SPF Akkermansia 0.977
## 39 Donor_SPF Ligilactobacillus 0.899
## 40 Donor_SPF Odoribacter 0.899
## 41 Donor_SPF Desulfovibrio 0.742
## 42 Donor_SPF Anaerotignum 0.313
## 43 Donor_SPF Mucispirillum 0.234
## 44 Donor_SPF Streptococcus 0.156
## 45 Donor_SPF Helicobacter 0.0391
## 46 Donor_SPF Paraprevotella 0.0391
## 47 BALB/c_No_Wild_weaning Bacteroides 44.3
## 48 BALB/c_No_Wild_weaning Alistipes 6.92
## 49 BALB/c_No_Wild_weaning Xylanibacter 6.89
## 50 BALB/c_No_Wild_weaning Schaedlerella 6.65
## 51 BALB/c_No_Wild_weaning Other 5.57
## 52 BALB/c_No_Wild_weaning Helicobacter 5.29
## 53 BALB/c_No_Wild_weaning Lactobacillus 4.58
## 54 BALB/c_No_Wild_weaning Duncaniella 3.91
## 55 BALB/c_No_Wild_weaning Paraprevotella 3.74
## 56 BALB/c_No_Wild_weaning Muribaculum 2.75
## 57 BALB/c_No_Wild_weaning Streptococcus 2.09
## 58 BALB/c_No_Wild_weaning Fusimonas 1.36
## 59 BALB/c_No_Wild_weaning Akkermansia 1.27
## 60 BALB/c_No_Wild_weaning Ligilactobacillus 1.05
## 61 BALB/c_No_Wild_weaning Heminiphilus 1.03
## 62 BALB/c_No_Wild_weaning Limosilactobacillus 0.957
## 63 BALB/c_No_Wild_weaning Desulfovibrio 0.681
## 64 BALB/c_No_Wild_weaning Acetatifactor 0.661
## 65 BALB/c_No_Wild_weaning Paramuribaculum 0.257
## 66 BALB/c_No_Wild_weaning Odoribacter 0.0490
## 67 BALB/c_No_Wild_weaning Mucispirillum 0.0175
## 68 BALB/c_No_Wild_weaning Anaerotignum 0.0127
## 69 BALB/c_No_Wild_weaning Phocaeicola 0
## 70 Donor_Wild Ligilactobacillus 37.3
## 71 Donor_Wild Lactobacillus 21.1
## 72 Donor_Wild Bacteroides 9.55
## 73 Donor_Wild Limosilactobacillus 8.73
## 74 Donor_Wild Duncaniella 8.23
## 75 Donor_Wild Muribaculum 4.61
## 76 Donor_Wild Streptococcus 4.13
## 77 Donor_Wild Other 3.66
## 78 Donor_Wild Xylanibacter 0.962
## 79 Donor_Wild Desulfovibrio 0.629
## 80 Donor_Wild Schaedlerella 0.315
## 81 Donor_Wild Paraprevotella 0.278
## 82 Donor_Wild Alistipes 0.222
## 83 Donor_Wild Heminiphilus 0.0925
## 84 Donor_Wild Paramuribaculum 0.0925
## 85 Donor_Wild Acetatifactor 0.0370
## 86 Donor_Wild Fusimonas 0.0370
## 87 Donor_Wild Helicobacter 0.0370
## 88 Donor_Wild Akkermansia 0
## 89 Donor_Wild Anaerotignum 0
## 90 Donor_Wild Mucispirillum 0
## 91 Donor_Wild Odoribacter 0
## 92 Donor_Wild Phocaeicola 0
# Tjek Helicobacter relativ abundans per gruppe
rel_c1_genus %>%
filter(OTU == "Helicobacter") %>%
group_by(Group) %>%
summarise(
n = n(),
mean_RelAbund = round(mean(RelAbund), 3),
min_RelAbund = round(min(RelAbund), 3),
max_RelAbund = round(max(RelAbund), 3),
n_present = sum(RelAbund > 0)
) %>%
arrange(Group)
## # A tibble: 4 × 6
## Group n mean_RelAbund min_RelAbund max_RelAbund n_present
## <fct> <int> <dbl> <dbl> <dbl> <int>
## 1 BALB/c_No_SPF_weaning 19 0 0 0 0
## 2 BALB/c_No_Wild_weaning 20 5.30 0.245 20.1 20
## 3 Donor_SPF 1 0.039 0.039 0.039 1
## 4 Donor_Wild 1 0.037 0.037 0.037 1
# BALB/c + SPF FMT (n=0) ikke i nogen
# BALB/c + Wild FMT (n=20): i alle 20 prøver
# tilstede i begge donor
# Tjek hvilke Helicobacter species der er i Wild FMT gruppen
ps_rel %>%
subset_samples(Group == "BALB/c_No_Wild_weaning") %>%
subset_taxa(Genus == "Helicobacter") %>%
psmelt() %>%
group_by(OTU, Species) %>%
summarise(mean_RelAbund = round(mean(Abundance /
ave(Abundance, Sample, FUN = sum) * 100), 3),
n_present = sum(Abundance > 0),
.groups = "drop") %>%
filter(mean_RelAbund > 0) %>%
arrange(desc(mean_RelAbund))
## # A tibble: 2 × 4
## OTU Species mean_RelAbund n_present
## <chr> <chr> <dbl> <int>
## 1 K__Bacteria;P__Campylobacterota;C__Epsilonpro… Helico… 100 20
## 2 K__Bacteria;P__Campylobacterota;C__Epsilonpro… __ 100 20
ps_rel %>%
subset_samples(Study == "Donor_inoculum_SPF(WP1Amix)") %>%
subset_taxa(Genus == "Helicobacter") %>%
psmelt() %>%
filter(Abundance > 0) %>%
select(OTU, Species, Abundance)
## OTU
## 1 K__Bacteria;P__Campylobacterota;C__Epsilonproteobacteria;O__Campylobacterales;F__Helicobacteraceae;G__Helicobacter;S__Helicobacter ganmani strain CMRI H02
## Species Abundance
## 1 Helicobacter ganmani strain CMRI H02 0.01264063
ps_rel %>%
subset_samples(Study == "Donor_inoculum_Wild(Zahir)") %>%
subset_taxa(Genus == "Helicobacter") %>%
psmelt() %>%
filter(Abundance > 0) %>%
select(OTU, Species, Abundance)
## OTU
## 1 K__Bacteria;P__Campylobacterota;C__Epsilonproteobacteria;O__Campylobacterales;F__Helicobacteraceae;G__Helicobacter;__
## 2 K__Bacteria;P__Campylobacterota;C__Epsilonproteobacteria;O__Campylobacterales;F__Helicobacteraceae;G__Helicobacter;S__Helicobacter ganmani strain CMRI H02
## Species Abundance
## 1 __ 0.01255966
## 2 Helicobacter ganmani strain CMRI H02 0.01255966
# Use non-rarefied (pst_original) for DESeq2
ps_c1_deseq <- subset_samples(pst_original,
sample_data(pst_original)$Group %in% c("BALB/c_No_SPF_weaning",
"BALB/c_No_Wild_weaning"))
# Remove zero-count taxa
ps_c1_deseq <- prune_taxa(taxa_sums(ps_c1_deseq) > 0, ps_c1_deseq)
# Prune unassigned ranks before glomming — prevents duplicate taxa_names
ps_c1_deseq <- subset_taxa(ps_c1_deseq,
!is.na(Genus) & Genus != "" & Genus != "__" &
!is.na(Family) & Family != "" & Family != "__" &
!is.na(Order) & Order != "" & Order != "__")
ps_c1_deseq <- prune_taxa(taxa_sums(ps_c1_deseq) > 0, ps_c1_deseq)
# Agglomerate to genus level
ps_c1_deseq_genus <- tax_glom(ps_c1_deseq, taxrank = "Genus", NArm = TRUE)
# Make genus names unique before assigning as taxa names
genus_names <- tax_table(ps_c1_deseq_genus)[, "Genus"]
taxa_names(ps_c1_deseq_genus) <- make.unique(as.character(genus_names))
library(DESeq2)
library(DESeq2)
# ============================================================
# DIFFERENTIAL ABUNDANCE — DESeq2
# Comparison 1: Wild FMT vs SPF FMT at weaning
# ============================================================
# Forbered phyloseq objekt
m_ps <- ps_c1_deseq_genus
sample_data(m_ps)$Group <- factor(sample_data(m_ps)$Group,
levels = c("BALB/c_No_SPF_weaning",
"BALB/c_No_Wild_weaning"))
set.seed(123)
# Lav DESeq2 objekt og kør
m_dds <- phyloseq_to_deseq2(m_ps, ~ Group)
m_dds <- DESeq(m_dds)
# Tjek levels og resultatnavne
levels(colData(m_dds)$Group)
resultsNames(m_dds)
# Hent resultater
Wild_vs_SPF <- results(m_dds, contrast = c("Group",
"BALB/c_No_Wild_weaning",
"BALB/c_No_SPF_weaning"))
# Konverter til data frame
df_Wild_vs_SPF <- as.data.frame(Wild_vs_SPF)
df_Wild_vs_SPF$taxon <- rownames(df_Wild_vs_SPF)
# Signifikante taxa
sig_Wild_vs_SPF <- subset(df_Wild_vs_SPF, !is.na(padj) & padj < 0.05)
cat("Signifikante taxa:", nrow(sig_Wild_vs_SPF), "\n")
sig_Wild_vs_SPF %>% arrange(log2FoldChange)
# Tilføj diffexpressed kolonne
df_Wild_vs_SPF <- df_Wild_vs_SPF %>%
mutate(diffexpressed = case_when(
log2FoldChange > 0 & padj < 0.05 ~ "Higher in Wild FMT",
log2FoldChange < 0 & padj < 0.05 ~ "Higher in SPF FMT",
TRUE ~ "ns"
),
diffexpressed = factor(diffexpressed,
levels = c("Higher in Wild FMT",
"Higher in SPF FMT",
"ns")))
# ============================================================
# VOLCANO PLOT
# ============================================================
library(ggtext)
library(ggrepel)
deseq_plot_labelled <- ggplot(df_Wild_vs_SPF,
aes(x = log2FoldChange,
y = -log10(padj),
col = diffexpressed)) +
geom_point(size = 2.5, alpha = 0.8) +
geom_vline(xintercept = 0, lty = 2, color = "grey50") +
geom_hline(yintercept = -log10(0.05), lty = 2, color = "grey50") +
geom_label_repel(
data = df_Wild_vs_SPF %>% filter(padj < 0.05),
aes(label = taxon),
size = 3,
max.overlaps = 20,
fontface = "italic",
label.r = 0.10
) +
scale_color_manual(values = c(
"Higher in Wild FMT" = "#3E5871",
"Higher in SPF FMT" = "#8FBF8F",
"ns" = "grey70"
)) +
theme_bw() +
theme(legend.title = element_text(),
axis.title = element_text(),
plot.title = element_markdown(size = 15),
plot.subtitle = element_markdown(size = 13)) +
labs(title = "Differential Abundance at Genus Level",
subtitle = "BALB/c Mice at Weaning: SPF vs Wild FMT",
y = "-log10(p-value)",
x = "log2 Fold Change",
col = NULL)
deseq_plot_labelled
# ggsave("./outputs/comp1_volcano_labelled.jpeg", plot = deseq_plot_labelled, width = 10, height = 6, dpi = 300)
# ============================================================
# LOG2 FOLD CHANGE PLOT
# ============================================================
p_lfc_c1 <- sig_Wild_vs_SPF %>%
arrange(log2FoldChange) %>%
mutate(taxon = factor(taxon, levels = taxon),
direction = ifelse(log2FoldChange > 0,
"Higher in Wild FMT",
"Higher in SPF FMT")) %>%
ggplot(aes(x = log2FoldChange, y = taxon, fill = direction)) +
geom_col(color = "white", linewidth = 0.3) +
geom_errorbarh(aes(xmin = log2FoldChange - lfcSE,
xmax = log2FoldChange + lfcSE),
height = 0.3, color = "grey30") +
geom_vline(xintercept = 0, lty = 2, color = "grey50") +
scale_fill_manual(values = c(
"Higher in Wild FMT" = "#3E5871",
"Higher in SPF FMT" = "#8FBF8F"
)) +
theme_bw() +
theme(legend.title = element_text(size = 11),
legend.text = element_text(size = 10),
axis.title = element_text(size = 12),
axis.text.y = element_text(face = "italic", size = 10),
axis.text.x = element_text(size = 10),
plot.title = element_markdown(size = 15),
plot.subtitle = element_text(size = 13)) +
labs(x = "log2 Fold Change",
y = NULL,
fill = NULL,
title = "Differential Abundance at Genus Level",
subtitle = "BALB/c Mice at Weaning: SPF vs Wild FMT",
caption = "Benjamini-Hochberg correction, only taxa with adjusted p < 0.05 shown")
p_lfc_c1
# ggsave("./outputs/comp1_lfc.jpeg", plot = p_lfc_c1, dpi = 300, height = 7, width = 10)
sig_Wild_vs_SPF %>%
select(taxon, log2FoldChange, padj) %>%
arrange(desc(log2FoldChange))
write.csv(
sig_Wild_vs_SPF %>%
select(taxon, log2FoldChange, padj) %>%
arrange(desc(log2FoldChange)),
file = "./outputs/comp1_deseq2_significant_taxa.csv",
row.names = FALSE
)
write.csv(
sig_Wild_vs_SPF %>%
select(taxon, log2FoldChange, lfcSE, padj) %>%
arrange(desc(log2FoldChange)),
file = "./outputs/comp1_deseq2_significant_taxa_full.csv",
row.names = FALSE
)
comp2_groups <- c("BALB/c_Yes_SPF_Term", "BALB/c_No_Wild_colon",
"Wildling", "Pet Shop", "Wild")
ps_c2 <- subset_samples(ps_rar, sample_data(ps_rar)$Group %in% comp2_groups)
alpha_c2 <- alpha_df %>%
filter(Group %in% comp2_groups) %>%
mutate(Group = factor(Group, levels = comp2_groups))
# Chao1
lm_chao1_c2 <- lm(Chao1 ~ Group, data = alpha_c2)
par(mfrow = c(1, 2))
qqnorm(rstandard(lm_chao1_c2), main = "Q-Q plot — Chao1")
abline(0, 1, lty = 2)
plot(predict(lm_chao1_c2), rstandard(lm_chao1_c2),
main = "Residuals vs Fitted — Chao1",
xlab = "Fitted values", ylab = "Standardized residuals")
abline(h = 0, lty = 2)
par(mfrow = c(1, 1))
# Shannon
lm_shannon_c2 <- lm(Shannon ~ Group, data = alpha_c2)
par(mfrow = c(1, 2))
qqnorm(rstandard(lm_shannon_c2), main = "Q-Q plot — Shannon")
abline(0, 1, lty = 2)
plot(predict(lm_shannon_c2), rstandard(lm_shannon_c2),
main = "Residuals vs Fitted — Shannon",
xlab = "Fitted values", ylab = "Standardized residuals")
abline(h = 0, lty = 2)
par(mfrow = c(1, 1))
# Tilføj Sex til alpha_c2
alpha_c2 <- alpha_df %>%
filter(Group %in% comp2_groups) %>%
mutate(Group = factor(Group, levels = comp2_groups),
Sex = factor(Sex))
alpha_c2 %>%
group_by(Group, Sex) %>%
summarise(n = n(), .groups = "drop") %>%
pivot_wider(names_from = Sex, values_from = n, values_fill = 0)
## # A tibble: 5 × 3
## Group Female Male
## <fct> <int> <int>
## 1 BALB/c_Yes_SPF_Term 7 7
## 2 BALB/c_No_Wild_colon 8 6
## 3 Wildling 5 5
## 4 Pet Shop 6 7
## 5 Wild 3 1
# ============================================================
# CHAO1
# ============================================================
# One-way ANOVA Group + Sex (with sex as covariable)
anova_chao1_c2 <- aov(Chao1 ~ Group + Sex, data = alpha_c2)
summary(anova_chao1_c2)
## Df Sum Sq Mean Sq F value Pr(>F)
## Group 4 22315 5579 11.735 9.21e-07 ***
## Sex 1 666 666 1.401 0.242
## Residuals 49 23295 475
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(anova_chao1_c2)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Chao1 ~ Group + Sex, data = alpha_c2)
##
## $Group
## diff lwr upr
## BALB/c_No_Wild_colon-BALB/c_Yes_SPF_Term 21.119859 -2.2182178 44.45794
## Wildling-BALB/c_Yes_SPF_Term 47.064537 21.4989550 72.63012
## Pet Shop-BALB/c_Yes_SPF_Term 50.895025 27.1123732 74.67768
## Wild-BALB/c_Yes_SPF_Term 35.828647 0.8215319 70.83576
## Wildling-BALB/c_No_Wild_colon 25.944678 0.3790959 51.51026
## Pet Shop-BALB/c_No_Wild_colon 29.775166 5.9925141 53.55782
## Wild-BALB/c_No_Wild_colon 14.708788 -20.2983272 49.71590
## Pet Shop-Wildling 3.830487 -22.1415684 29.80254
## Wild-Wildling -11.235890 -47.7657586 25.29398
## Wild-Pet Shop -15.066378 -50.3714313 20.23868
## p adj
## BALB/c_No_Wild_colon-BALB/c_Yes_SPF_Term 0.0934756
## Wildling-BALB/c_Yes_SPF_Term 0.0000355
## Pet Shop-BALB/c_Yes_SPF_Term 0.0000018
## Wild-BALB/c_Yes_SPF_Term 0.0424754
## Wildling-BALB/c_No_Wild_colon 0.0451209
## Pet Shop-BALB/c_No_Wild_colon 0.0074479
## Wild-BALB/c_No_Wild_colon 0.7570628
## Pet Shop-Wildling 0.9934237
## Wild-Wildling 0.9061034
## Wild-Pet Shop 0.7464684
##
## $Sex
## diff lwr upr p adj
## Male-Female 6.893747 -4.940206 18.7277 0.2473985
# Hvis Sex ikke signifikant — kør Group only
anova_chao1_c2_group <- aov(Chao1 ~ Group, data = alpha_c2)
summary(anova_chao1_c2_group)
## Df Sum Sq Mean Sq F value Pr(>F)
## Group 4 22315 5579 11.64 9.32e-07 ***
## Residuals 50 23961 479
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(anova_chao1_c2_group)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Chao1 ~ Group, data = alpha_c2)
##
## $Group
## diff lwr upr
## BALB/c_No_Wild_colon-BALB/c_Yes_SPF_Term 21.119859 -2.2939786 44.53370
## Wildling-BALB/c_Yes_SPF_Term 47.064537 21.4159632 72.71311
## Pet Shop-BALB/c_Yes_SPF_Term 50.895025 27.0351692 74.75488
## Wild-BALB/c_Yes_SPF_Term 35.828647 0.7078908 70.94940
## Wildling-BALB/c_No_Wild_colon 25.944678 0.2961041 51.59325
## Pet Shop-BALB/c_No_Wild_colon 29.775166 5.9153101 53.63502
## Wild-BALB/c_No_Wild_colon 14.708788 -20.4119683 49.82954
## Pet Shop-Wildling 3.830487 -22.2258797 29.88685
## Wild-Wildling -11.235890 -47.8843429 25.41256
## Wild-Pet Shop -15.066378 -50.4860395 20.35328
## p adj
## BALB/c_No_Wild_colon-BALB/c_Yes_SPF_Term 0.0952907
## Wildling-BALB/c_Yes_SPF_Term 0.0000365
## Pet Shop-BALB/c_Yes_SPF_Term 0.0000019
## Wild-BALB/c_Yes_SPF_Term 0.0434624
## Wildling-BALB/c_No_Wild_colon 0.0461575
## Pet Shop-BALB/c_No_Wild_colon 0.0076633
## Wild-BALB/c_No_Wild_colon 0.7597369
## Pet Shop-Wildling 0.9935277
## Wild-Wildling 0.9073589
## Wild-Pet Shop 0.7492219
# ============================================================
# SHANNON
# ============================================================
# One-way ANOVA Group + Sex (with sex as covariable)
anova_shannon_c2 <- aov(Shannon ~ Group + Sex, data = alpha_c2)
summary(anova_shannon_c2)
## Df Sum Sq Mean Sq F value Pr(>F)
## Group 4 5.780 1.4450 9.145 1.34e-05 ***
## Sex 1 0.177 0.1774 1.122 0.295
## Residuals 49 7.743 0.1580
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(anova_shannon_c2)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Shannon ~ Group + Sex, data = alpha_c2)
##
## $Group
## diff lwr upr
## BALB/c_No_Wild_colon-BALB/c_Yes_SPF_Term 0.7560324 0.3305450 1.1815199
## Wildling-BALB/c_Yes_SPF_Term 0.8575056 0.3914074 1.3236037
## Pet Shop-BALB/c_Yes_SPF_Term 0.5620502 0.1284575 0.9956429
## Wild-BALB/c_Yes_SPF_Term 0.4000831 -0.2381481 1.0383142
## Wildling-BALB/c_No_Wild_colon 0.1014731 -0.3646250 0.5675713
## Pet Shop-BALB/c_No_Wild_colon -0.1939822 -0.6275749 0.2396104
## Wild-BALB/c_No_Wild_colon -0.3559494 -0.9941805 0.2822818
## Pet Shop-Wildling -0.2954554 -0.7689641 0.1780533
## Wild-Wildling -0.4574225 -1.1234157 0.2085707
## Wild-Pet Shop -0.1619671 -0.8056301 0.4816959
## p adj
## BALB/c_No_Wild_colon-BALB/c_Yes_SPF_Term 0.0000660
## Wildling-BALB/c_Yes_SPF_Term 0.0000359
## Pet Shop-BALB/c_Yes_SPF_Term 0.0051684
## Wild-BALB/c_Yes_SPF_Term 0.3993303
## Wildling-BALB/c_No_Wild_colon 0.9717740
## Pet Shop-BALB/c_No_Wild_colon 0.7123056
## Wild-BALB/c_No_Wild_colon 0.5174039
## Pet Shop-Wildling 0.4040356
## Wild-Wildling 0.3080015
## Wild-Pet Shop 0.9526340
##
## $Sex
## diff lwr upr p adj
## Male-Female 0.1124947 -0.1032557 0.328245 0.2998647
# Hvis Sex ikke signifikant — kør Group only
anova_shannon_c2_group <- aov(Shannon ~ Group, data = alpha_c2)
summary(anova_shannon_c2_group)
## Df Sum Sq Mean Sq F value Pr(>F)
## Group 4 5.78 1.4450 9.122 1.3e-05 ***
## Residuals 50 7.92 0.1584
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(anova_shannon_c2_group)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Shannon ~ Group, data = alpha_c2)
##
## $Group
## diff lwr upr
## BALB/c_No_Wild_colon-BALB/c_Yes_SPF_Term 0.7560324 0.3303453 1.1817196
## Wildling-BALB/c_Yes_SPF_Term 0.8575056 0.3911886 1.3238225
## Pet Shop-BALB/c_Yes_SPF_Term 0.5620502 0.1282540 0.9958464
## Wild-BALB/c_Yes_SPF_Term 0.4000831 -0.2384477 1.0386138
## Wildling-BALB/c_No_Wild_colon 0.1014731 -0.3648438 0.5677901
## Pet Shop-BALB/c_No_Wild_colon -0.1939822 -0.6277785 0.2398140
## Wild-BALB/c_No_Wild_colon -0.3559494 -0.9944801 0.2825814
## Pet Shop-Wildling -0.2954554 -0.7691864 0.1782756
## Wild-Wildling -0.4574225 -1.1237283 0.2088833
## Wild-Pet Shop -0.1619671 -0.8059323 0.4819980
## p adj
## BALB/c_No_Wild_colon-BALB/c_Yes_SPF_Term 0.0000648
## Wildling-BALB/c_Yes_SPF_Term 0.0000351
## Pet Shop-BALB/c_Yes_SPF_Term 0.0051648
## Wild-BALB/c_Yes_SPF_Term 0.4003591
## Wildling-BALB/c_No_Wild_colon 0.9719137
## Pet Shop-BALB/c_No_Wild_colon 0.7132132
## Wild-BALB/c_No_Wild_colon 0.5184818
## Pet Shop-Wildling 0.4050686
## Wild-Wildling 0.3089116
## Wild-Pet Shop 0.9528579
library(ggpubr)
library(ggh4x)
long_c2 <- melt(alpha_c2)
long_c2 <- long_c2[long_c2$variable %in% c("Chao1", "Shannon"), ]
long_c2$variable <- factor(long_c2$variable, levels = c("Chao1", "Shannon"))
fill_palette_c2 <- c(
"BALB/c_No_Wild_colon" = "#3E5871", # dusty navy
"BALB/c_Yes_SPF_Term" = "#4E8A4E", # dark green
"Wildling" = "#8B6F47", # brown
"Pet Shop" = "#C9873E", # orange
"Wild" = "#6B2737" # deep muted burgundy
)
# Opdater y-akse limits via coord_cartesian eller scale
alpha_p_c2 <- ggplot(long_c2, aes(x = Group, y = value)) +
geom_violin(aes(fill = Group), trim = FALSE, alpha = 0.7) +
geom_boxplot(width = 0.10, outlier.alpha = 0) +
geom_jitter(aes(fill = Group, shape = Sex), alpha = 1, width = 0.2,
stroke = 1, size = 2) +
facet_wrap(~variable, scales = "free_y") +
theme_bw() +
scale_fill_manual(values = fill_palette_c2) +
scale_shape_manual(values = c("Female" = 21, "Male" = 24),
na.translate = FALSE) +
scale_x_discrete(labels = c(
"BALB/c_Yes_SPF_Term" = "BALB/c\nImmunized\n+ SPF FMT",
"BALB/c_No_Wild_colon" = "BALB/c\n+ Wild FMT",
"Wildling" = "Wildling",
"Pet Shop" = "Pet Shop",
"Wild" = "Wild"
)) +
guides(fill = "none",
shape = guide_legend(title = "Sex")) +
theme(axis.text.x = element_text(angle = 25, hjust = 1, size = 11),
axis.title.y = element_text(size = 13),
strip.text.x = element_text(size = 13),
plot.title = element_markdown(size = 15)) +
labs(x = NULL, y = "Alpha diversity",
title = "Alpha diversity") +
facet_wrap(~variable, scales = "free_y") +
# Sæt y-akse limits per facet via en hjælpefunktion
ggh4x::facetted_pos_scales(y = list(
variable == "Chao1" ~ scale_y_continuous(limits = c(NA, 275), breaks = c(25, 50, 75, 100, 125, 150, 175, 200, 225, 250, 275)),
variable == "Shannon" ~ scale_y_continuous(limits = c(NA, 4.0), breaks = c(0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4))
))
# Signifikansbrackets – y.position spredt ud over den nye plads
sig_chao1 <- data.frame(
variable = "Chao1",
group1 = c("BALB/c_Yes_SPF_Term", "BALB/c_Yes_SPF_Term",
"BALB/c_Yes_SPF_Term", "BALB/c_No_Wild_colon",
"BALB/c_No_Wild_colon"),
group2 = c("Wildling", "Pet Shop",
"Wild", "Wildling",
"Pet Shop"),
y.position = c(261, 268, 275, 247, 254),
label = c("***", "***", "*", "*", "**")
)
sig_shannon <- data.frame(
variable = "Shannon",
group1 = c("BALB/c_Yes_SPF_Term", "BALB/c_Yes_SPF_Term",
"BALB/c_Yes_SPF_Term"),
group2 = c("BALB/c_No_Wild_colon", "Wildling", "Pet Shop"),
y.position = c(3.6, 3.8, 4),
label = c("***", "***", "**")
)
sig_all <- bind_rows(sig_chao1, sig_shannon)
alpha_p_c2 <- alpha_p_c2 +
stat_pvalue_manual(
data = sig_all,
label = "label",
xmin = "group1",
xmax = "group2",
y.position = "y.position",
tip.length = 0.0,
size = 3.0
)
# Add p-value annotation in top right corner of each facet
stat_anova_anno_c2_alpha <- data.frame(
variable = c("Chao1", "Shannon"),
label = c("Group: p < 0.001", "Group: p < 0.001"),
x = c(Inf, Inf),
y = c(Inf, Inf)
)
alpha_p_c2 <- alpha_p_c2 +
geom_text(data = stat_anova_anno_c2_alpha,
aes(x = x, y = y, label = label),
inherit.aes = FALSE,
hjust = 1.1, vjust = 1.5,
size = 3.5, fontface = "italic", color = "black")
alpha_p_c2
# ggsave("./outputs/comp2_alpha.jpeg", height = 8, width = 16, dpi = 300)
fill_palette_c2 <- c(
"BALB/c_Yes_SPF_Term" = "#4E8A4E", # dark green
"BALB/c_No_Wild_colon" = "#3E5871", # dusty navy
"Wildling" = "#8B6F47", # brown
"Pet Shop" = "#C9873E", # orange
"Wild" = "#6B2737" # deep muted burgundy
)
colour_palette_c2 <- c(
"BALB/c_Yes_SPF_Term" = "#3A6F3A", # deeper forest green
"BALB/c_No_Wild_colon" = "#2F4357", # deeper dusty navy
"Wildling" = "#6F5736", # darker brown
"Pet Shop" = "#A86E2E", # darker muted orange
"Wild" = "#4A1825" # darker burgundy
)
ps_c2_log <- transform_sample_counts(ps_c2, function(x){log(1 + x)})
bray_pcoa_c2 <- ordinate(ps_c2_log, method = "PCoA", distance = "bray")
evals_c2 <- bray_pcoa_c2$values$Eigenvalues
bray_p_c2 <- plot_ordination(
ps_c2_log, bray_pcoa_c2, color = "Group", shape = "Sex"
) +
labs(
x = sprintf("PCo1 [%s%%]", round(evals_c2 / sum(evals_c2) * 100, 1)[1]),
y = sprintf("PCo2 [%s%%]", round(evals_c2 / sum(evals_c2) * 100, 1)[2]),
col = "Group", shape = "Sex",
title = "Bray-Curtis PCoA"
) +
stat_ellipse(aes(group = Group, fill = Group), alpha = 0.1, geom = "polygon") +
coord_fixed() +
scale_color_manual(values = colour_palette_c2, # darker outline colours
labels = c(
"BALB/c_Yes_SPF_Term" = "BALB/c Immunized + SPF FMT",
"BALB/c_No_Wild_colon" = "BALB/c + Wild FMT",
"Wildling" = "Wildling",
"Pet Shop" = "Pet Shop",
"Wild" = "Wild"
)) +
scale_fill_manual(values = fill_palette_c2, # lighter fill for ellipses
labels = c(
"BALB/c_Yes_SPF_Term" = "BALB/c Immunized + SPF FMT",
"BALB/c_No_Wild_colon" = "BALB/c + Wild FMT",
"Wildling" = "Wildling",
"Pet Shop" = "Pet Shop",
"Wild" = "Wild"
)) +
geom_vline(xintercept = 0, lty = 2, alpha = 0.5, color = "blue") +
geom_hline(yintercept = 0, lty = 2, alpha = 0.5, color = "blue") +
theme_bw() +
theme(axis.title = element_text(),
legend.title = element_text(size = 10),
legend.text = element_text(),
axis.text = element_text(size = 13),
plot.title = element_text(size = 15)) +
geom_point(size = 3)
library(ggtext)
bray_p_c2 <- bray_p_c2 +
annotate("richtext",
x = Inf, y = Inf,
label = "PERMANOVA: Group<br>*R*² = 0.47, *p* < 0.001",
hjust = 1.1, vjust = 1.5,
size = 4, color = "black",
fill = NA, label.color = NA)
bray_p_c2
# ggsave("./outputs/comp2_bray_pcoa_80perc.jpeg", dpi = 300, height = 7, width = 12)
set.seed(123)
df_c2 <- data.frame(sample_data(ps_c2_log))
bray_dist_c2 <- phyloseq::distance(ps_c2_log, method = "bray")
vegan::adonis2(bray_dist_c2 ~ Group, data = df_c2, permutations = 999)
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## vegan::adonis2(formula = bray_dist_c2 ~ Group, data = df_c2, permutations = 999)
## Df SumOfSqs R2 F Pr(>F)
## Model 4 3.0835 0.46882 11.033 0.001 ***
## Residual 50 3.4937 0.53118
## Total 54 6.5772 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
vegan::adonis2(bray_dist_c2 ~ Sex, data = df_c2, permutations = 999) # not significant, explain 1.948 % of variance
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## vegan::adonis2(formula = bray_dist_c2 ~ Sex, data = df_c2, permutations = 999)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.1187 0.01805 0.9744 0.424
## Residual 53 6.4585 0.98195
## Total 54 6.5772 1.00000
vegan::adonis2(bray_dist_c2 ~ Group, data = df_c2, permutations = 999) # significant and explain 46.878 % of variance
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## vegan::adonis2(formula = bray_dist_c2 ~ Group, data = df_c2, permutations = 999)
## Df SumOfSqs R2 F Pr(>F)
## Model 4 3.0835 0.46882 11.033 0.001 ***
## Residual 50 3.4937 0.53118
## Total 54 6.5772 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Beta diversity at term — FMT grupper versus deres donorer (samme donor som ved weaning)
# Donor-grupper: Donor_SPF og Donor_Wild — ekskluder resampling
comp2_term_groups <- c("BALB/c_Yes_SPF_Term", "BALB/c_No_Wild_colon")
comp2_term_donors <- c("Donor_SPF", "Donor_Wild")
# Subset: kun term-grupper + originale donorer (ingen resampling)
ps_c2_donors <- subset_samples(ps_rar,
sample_data(ps_rar)$Group %in% c(comp2_term_groups, comp2_term_donors) &
!Study %in% c("Donor_inoculum_SPF_resampling_1",
"Donor_inoculum_SPF_resampling_2",
"Donor_inoculum_Wild_resampling_1",
"Donor_inoculum_Wild_resampling_2"))
ps_c2_donors_log <- transform_sample_counts(ps_c2_donors, function(x){log(1 + x)})
bray_pcoa_c2_donors <- ordinate(ps_c2_donors_log, method = "PCoA", distance = "bray")
evals_c2_donors <- bray_pcoa_c2_donors$values$Eigenvalues
bray_p_c2_donors <- plot_ordination(
ps_c2_donors_log, bray_pcoa_c2_donors, color = "Group", shape = "Sex"
) +
# Ellipser kun på term-grupper (ikke donors)
stat_ellipse(data = . %>% filter(Group %in% comp2_term_groups),
aes(group = Group, fill = Group, color = Group, shape = NULL),
alpha = 0.15, geom = "polygon", linetype = 2) +
# Term-prøver
geom_point(data = . %>% filter(Group %in% comp2_term_groups),
aes(color = Group, shape = Sex), size = 3, alpha = 0.9) +
# Donor-prøver — plottet som diamonds (shape 18)
geom_point(data = . %>% filter(Group %in% comp2_term_donors),
aes(color = Group), shape = 18, size = 5, alpha = 1) +
scale_color_manual(
values = c(
"BALB/c_Yes_SPF_Term" = "#4E8A4E", # dark green
"BALB/c_No_Wild_colon" = "#3E5871", # dusty navy
"Donor_SPF" = "#2d5a2d", # darker green for donor
"Donor_Wild" = "#1a2f40" # darker navy for donor
),
labels = c(
"BALB/c_Yes_SPF_Term" = "BALB/c Immunized + SPF FMT",
"BALB/c_No_Wild_colon" = "BALB/c + Wild FMT",
"Donor_SPF" = "Donor SPF",
"Donor_Wild" = "Donor Wild"
)
) +
scale_fill_manual(
values = c(
"BALB/c_Yes_SPF_Term" = "#4E8A4E",
"BALB/c_No_Wild_colon" = "#3E5871"
),
labels = c(
"BALB/c_Yes_SPF_Term" = "BALB/c Immunized + SPF FMT",
"BALB/c_No_Wild_colon" = "BALB/c + Wild FMT"
)
) +
scale_shape_manual(
values = c("Female" = 16, "Male" = 17),
na.translate = FALSE,
na.value = NA
) +
guides(
color = guide_legend(title = "Group"),
fill = "none",
shape = guide_legend(title = "Sex", na.translate = FALSE)
) +
labs(
x = sprintf("PCo1 [%s%%]", round(evals_c2_donors / sum(evals_c2_donors) * 100, 1)[1]),
y = sprintf("PCo2 [%s%%]", round(evals_c2_donors / sum(evals_c2_donors) * 100, 1)[2]),
color = "Group", shape = "Sex",
title = "Bray-Curtis PCoA",
subtitle = "Beta Diversity at Termination — FMT Groups vs Donor"
) +
geom_vline(xintercept = 0, lty = 2, alpha = 0.5, color = "blue") +
geom_hline(yintercept = 0, lty = 2, alpha = 0.5, color = "blue") +
theme_bw() +
theme(axis.title = element_text(),
legend.title = element_text(size = 10),
legend.text = element_text(),
axis.text = element_text(size = 13),
plot.title = element_text(size = 15),
plot.subtitle = element_text(size = 13))
library(ggtext)
bray_p_c2_donors <- bray_p_c2_donors +
annotate("richtext",
x = Inf, y = Inf,
label = "PERMANOVA: Group<br>*R*² = 0.39, *p* < 0.001",
hjust = 1.1, vjust = 1.5,
size = 4, color = "black",
fill = NA, label.color = NA)
bray_p_c2_donors
# ggsave("./outputs/comp2_bray_pcoa_donors.jpeg", dpi = 300, height = 7, width = 10)
# PERMANOVA for term FMT groups vs donor comparison
# NB: Donors ekskluderes fra PERMANOVA — testes kun på de biologiske grupper
ps_c2_donors_stat_log <- transform_sample_counts(
subset_samples(ps_rar,
sample_data(ps_rar)$Group %in% comp2_term_groups &
!Study %in% c("Donor_inoculum_SPF_resampling_1",
"Donor_inoculum_SPF_resampling_2",
"Donor_inoculum_Wild_resampling_1",
"Donor_inoculum_Wild_resampling_2")),
function(x){log(1 + x)})
df_c2_donors_stat <- data.frame(sample_data(ps_c2_donors_stat_log))
bray_dist_c2_donors <- phyloseq::distance(ps_c2_donors_stat_log, method = "bray")
set.seed(123)
vegan::adonis2(bray_dist_c2_donors ~ Group * Sex, data = df_c2_donors_stat, permutations = 999)
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## vegan::adonis2(formula = bray_dist_c2_donors ~ Group * Sex, data = df_c2_donors_stat, permutations = 999)
## Df SumOfSqs R2 F Pr(>F)
## Model 3 1.2241 0.48717 7.5998 0.001 ***
## Residual 24 1.2885 0.51283
## Total 27 2.5126 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
set.seed(123)
vegan::adonis2(bray_dist_c2_donors ~ Group, data = df_c2_donors_stat, permutations = 999)
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## vegan::adonis2(formula = bray_dist_c2_donors ~ Group, data = df_c2_donors_stat, permutations = 999)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.96797 0.38525 16.294 0.001 ***
## Residual 26 1.54459 0.61475
## Total 27 2.51257 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
comp2_groups <- c("BALB/c_Yes_SPF_Term", "BALB/c_No_Wild_colon",
"Wildling", "Pet Shop", "Wild")
# Fælles farvepalette til x-aksen labels
comp2_labels <- c(
"BALB/c_Yes_SPF_Term" = "BALB/c\nImmunized\n+ SPF FMT",
"BALB/c_No_Wild_colon" = "BALB/c\n+ Wild FMT",
"Wildling" = "Wildling",
"Pet Shop" = "Pet Shop",
"Wild" = "Wild"
)
# Subset ps_rel til Comparison 2 grupper
ps_rel_c2 <- subset_samples(ps_rel,
sample_data(ps_rel)$Group %in% comp2_groups)
# Unload MicrobiotaProcess hvis loaded
if ("MicrobiotaProcess" %in% loadedNamespaces()) {
devtools::unload("MicrobiotaProcess")
}
ps_rel_c2_phylum <- ps_rel_c2
ps_rel_c2_phylum <- subset_taxa(ps_rel_c2_phylum,
!is.na(Phylum) & Phylum != "__" & Phylum != "")
ps_rel_c2_phylum <- prune_taxa(taxa_sums(ps_rel_c2_phylum) > 0, ps_rel_c2_phylum)
glom_c2_phylum <- gloomer(ps = ps_rel_c2_phylum, taxa_level = "Phylum", NArm = TRUE)
unique(tax_table(glom_c2_phylum)[, "Phylum"])
## Taxonomy Table: [13 taxa by 1 taxonomic ranks]:
## Phylum
## Actinomycetota "Actinomycetota"
## Bacillota "Bacillota"
## Bacteroidota "Bacteroidota"
## Campylobacterota "Campylobacterota"
## Candidatus Saccharibacteria "Candidatus Saccharibacteria"
## Deferribacterota "Deferribacterota"
## Elusimicrobiota "Elusimicrobiota"
## Fusobacteriota "Fusobacteriota"
## Mycoplasmatota "Mycoplasmatota"
## Pseudomonadota "Pseudomonadota"
## Spirochaetota "Spirochaetota"
## Thermodesulfobacteriota "Thermodesulfobacteriota"
## Verrucomicrobiota "Verrucomicrobiota"
phylum_col_c2 <- c(
"Bacillota" = "#C47F7F",
"Bacteroidota" = "#6B9BA4",
"Pseudomonadota" = "#A67B9B",
"Campylobacterota" = "#C4936A",
"Mycoplasmatota" = "#7B8FA6",
"Thermodesulfobacteriota" = "#6B8E7B",
"Verrucomicrobiota" = "#9B7BAD",
"Fusobacteriota" = "#7B9E87",
"Actinomycetota" = "#8FAF7E",
"Candidatus Saccharibacteria" = "#8FA6A6",
"Spirochaetota" = "#C77B3A",
"Deferribacterota" = "#B8977E",
"Elusimicrobiota" = "#C4A882"
)
rel_c2_phylum <- glom_c2_phylum %>%
psmelt() %>%
group_by(Sample) %>%
mutate(RelAbund = Abundance / sum(Abundance) * 100) %>%
ungroup()
top_phylum_c2 <- rel_c2_phylum %>%
group_by(OTU) %>%
summarise(m = mean(RelAbund)) %>%
arrange(desc(m)) %>%
pull(OTU)
rel_c2_phylum_avg <- rel_c2_phylum %>%
mutate(Order_plot = OTU,
Group = factor(Group, levels = comp2_groups)) %>%
group_by(Group, Order_plot) %>%
summarise(RelAbund = mean(RelAbund), .groups = "drop") %>%
mutate(Order_plot = factor(Order_plot, levels = top_phylum_c2))
p_phylum_c2 <- ggplot(rel_c2_phylum_avg,
aes(x = Group, y = RelAbund, fill = Order_plot)) +
geom_bar(stat = "identity", color = "black", linewidth = 0.3) +
scale_fill_manual(values = phylum_col_c2, breaks = top_phylum_c2) +
scale_x_discrete(limits = comp2_groups, labels = comp2_labels) +
theme_bw() +
theme(axis.text.x = element_text(size = 11),
axis.title.y = element_text(size = 13),
legend.text = element_text(size = 9),
legend.title = element_text(size = 10),
plot.title = element_text(size = 15)) +
labs(x = NULL, y = "Relative abundance (%)", fill = "Phylum",
title = "Relative Abundance at Phylum Level")
p_phylum_c2
# ggsave("./outputs/comp2_relabund_phylum.jpeg", dpi = 300, height = 7, width = 14)
ps_rel_c2_genus_all5 <- ps_rel_c2
ps_rel_c2_genus_all5 <- subset_taxa(ps_rel_c2_genus_all5,
!is.na(Genus) & Genus != "__" & Genus != "")
ps_rel_c2_genus_all5 <- prune_taxa(taxa_sums(ps_rel_c2_genus_all5) > 0, ps_rel_c2_genus_all5)
glom_c2_genus_all5 <- gloomer(ps = ps_rel_c2_genus_all5, taxa_level = "Genus", NArm = TRUE)
unique(tax_table(glom_c2_genus_all5)[, "Genus"])
## Taxonomy Table: [80 taxa by 1 taxonomic ranks]:
## Genus
## Bifidobacterium "Bifidobacterium"
## Adlercreutzia "Adlercreutzia"
## Staphylococcus "Staphylococcus"
## Enterococcus "Enterococcus"
## Lactobacillus "Lactobacillus"
## Ligilactobacillus "Ligilactobacillus"
## Limosilactobacillus "Limosilactobacillus"
## Lactococcus "Lactococcus"
## Streptococcus "Streptococcus"
## Candidatus Borkfalkia "Candidatus Borkfalkia"
## Christensenella "Christensenella"
## Acutalibacter "Acutalibacter"
## Butyricicoccus "Butyricicoccus"
## Clostridium "Clostridium"
## Eubacterium "Eubacterium"
## Zhenpiania "Zhenpiania"
## Anaerotruncus "Anaerotruncus"
## Angelakisella "Angelakisella"
## Hominilimicola "Hominilimicola"
## Neglectibacter "Neglectibacter"
## Vermiculatibacterium "Vermiculatibacterium"
## Dehalobacterium "Dehalobacterium"
## Pumilibacter "Pumilibacter"
## Anaerotignum "Anaerotignum"
## Acetatifactor "Acetatifactor"
## Anaerostipes "Anaerostipes"
## Blautia "Blautia"
## Enterocloster "Enterocloster"
## Fusimonas "Fusimonas"
## Jutongia "Jutongia"
## Lachnospira "Lachnospira"
## Mediterraneibacter "Mediterraneibacter"
## Parablautia "Parablautia"
## Petralouisia "Petralouisia"
## Roseburia "Roseburia"
## Schaedlerella "Schaedlerella"
## Sporofaciens "Sporofaciens"
## Tyzzerella "Tyzzerella"
## Clostridioides "Clostridioides"
## Paeniclostridium "Paeniclostridium"
## Coprobacillus "Coprobacillus"
## Thomasclavelia "Thomasclavelia"
## Allobaculum "Allobaculum"
## Amedibacillus "Amedibacillus"
## Dubosiella "Dubosiella"
## Faecalibaculum "Faecalibaculum"
## Stoquefichus "Stoquefichus"
## Bacteroides "Bacteroides"
## Phocaeicola "Phocaeicola"
## Duncaniella "Duncaniella"
## Heminiphilus "Heminiphilus"
## Muribaculum "Muribaculum"
## Paramuribaculum "Paramuribaculum"
## Sangeribacter "Sangeribacter"
## Odoribacter "Odoribacter"
## Palleniella "Palleniella"
## Paraprevotella "Paraprevotella"
## Segatella "Segatella"
## Xylanibacter "Xylanibacter"
## Alistipes "Alistipes"
## Rikenella "Rikenella"
## Parabacteroides "Parabacteroides"
## Helicobacter "Helicobacter"
## Nanosyncoccus "Nanosyncoccus"
## Mucispirillum "Mucispirillum"
## Elusimicrobium "Elusimicrobium"
## Fusobacterium "Fusobacterium"
## Metamycoplasma "Metamycoplasma"
## Malacoplasma "Malacoplasma"
## Anaeroplasma "Anaeroplasma"
## Parasutterella "Parasutterella"
## Sutterella "Sutterella"
## Escherichia "Escherichia"
## Klebsiella "Klebsiella"
## Proteus "Proteus"
## Treponema "Treponema"
## Bilophila "Bilophila"
## Desulfovibrio "Desulfovibrio"
## Taurinivorans "Taurinivorans"
## Akkermansia "Akkermansia"
rel_c2_genus_all5 <- glom_c2_genus_all5 %>%
psmelt() %>%
group_by(Sample) %>%
mutate(RelAbund = Abundance / sum(Abundance) * 100) %>%
ungroup()
# Genera >= 1% i mindst én gruppe
top_genus_c2_filt <- rel_c2_genus_all5 %>%
group_by(OTU, Group) %>%
summarise(m = mean(RelAbund), .groups = "drop") %>%
group_by(OTU) %>%
summarise(max_m = max(m)) %>%
filter(max_m >= 1) %>%
arrange(desc(max_m)) %>%
pull(OTU)
# Genera >= 0.01% i mindst én gruppe (til fuld plot)
top_genus_c2_001 <- rel_c2_genus_all5 %>%
group_by(OTU, Group) %>%
summarise(m = mean(RelAbund), .groups = "drop") %>%
group_by(OTU) %>%
summarise(max_m = max(m)) %>%
filter(max_m >= 0.01) %>%
arrange(desc(max_m)) %>%
pull(OTU)
genus_col_c2 <- c(
"Lactobacillus" = "#C4936A",
"Bacteroides" = "#6B9BA4",
"Ligilactobacillus" = "#A23B72",
"Fusimonas" = "#E84855",
"Christensenella" = "#C77B3A",
"Limosilactobacillus" = "#C47F7F",
"Alistipes" = "#9B7BAD",
"Clostridium" = "#1D3557",
"Mediterraneibacter" = "#D4A853",
"Schaedlerella" = "#8FAF7E",
"Acetatifactor" = "#5C8A4A",
"Duncaniella" = "#3D7A8A",
"Palleniella" = "#B5838D",
"Streptococcus" = "#E76F51",
"Phocaeicola" = "#1A5276",
"Allobaculum" = "#E8C3A0",
"Roseburia" = "#5A6AA8",
"Xylanibacter" = "#E9C46A",
"Odoribacter" = "#80B918",
"Helicobacter" = "#2A9D8F",
"Candidatus Borkfalkia" = "#5C7B3A",
"Thomasclavelia" = "#7B5EA7",
"Blautia" = "#4A8A7B",
"Paraprevotella" = "#3D5A80",
"Anaerotignum" = "#F18F01",
"Heminiphilus" = "#7B4F8A",
"Desulfovibrio" = "#A8DADC",
"Pumilibacter" = "#8A7B4A",
"Muribaculum" = "#6B4226",
"Dubosiella" = "#C45C8A",
"Enterocloster" = "#8AA85A",
"Segatella" = "#6B8E23",
"Nanosyncoccus" = "#2E86AB",
"Eubacterium" = "#C4745A",
"Acutalibacter" = "#8A4A5C",
"Sporofaciens" = "#7B8A4A",
"Metamycoplasma" = "#5C7B9A",
"Adlercreutzia" = "#C49A6C",
"Parablautia" = "#FFD700",
"Fusobacterium" = "#E84040",
"Akkermansia" = "#5C3A8A",
"Other" = "grey80"
)
stopifnot(!any(duplicated(genus_col_c2)))
rel_c2_genus_avg <- rel_c2_genus_all5 %>%
mutate(Order_plot = ifelse(OTU %in% top_genus_c2_filt, OTU, "Other"),
Group = factor(Group, levels = comp2_groups)) %>%
group_by(Sample, Group, Order_plot) %>%
summarise(RelAbund = sum(RelAbund), .groups = "drop") %>%
group_by(Group, Order_plot) %>%
summarise(RelAbund = mean(RelAbund), .groups = "drop") %>%
mutate(Order_plot = factor(Order_plot,
levels = c(top_genus_c2_filt, "Other")))
p_genus_c2 <- ggplot(rel_c2_genus_avg,
aes(x = Group, y = RelAbund, fill = Order_plot)) +
geom_bar(stat = "identity", color = "black", linewidth = 0.3) +
scale_fill_manual(values = genus_col_c2,
breaks = c(top_genus_c2_filt, "Other")) +
scale_x_discrete(limits = comp2_groups, labels = comp2_labels) +
theme_bw() +
theme(axis.text.x = element_text(size = 11),
axis.title.y = element_text(size = 13),
legend.text = element_text(size = 9),
legend.title = element_text(size = 10),
plot.title = element_text(size = 15)) +
labs(x = NULL, y = "Relative abundance (%)", fill = "Genus",
title = "Relative Abundance at Genus Level")
p_genus_c2
# ggsave("./outputs/comp2_relabund_genus.jpeg", dpi = 300, height = 7, width = 14)
genus_col_c2_001 <- c(
"Lactobacillus" = "#C4936A",
"Bacteroides" = "#6B9BA4",
"Ligilactobacillus" = "#A23B72",
"Fusimonas" = "#E84855",
"Alistipes" = "#9B7BAD",
"Schaedlerella" = "#8FAF7E",
"Limosilactobacillus" = "#C47F7F",
"Duncaniella" = "#3D7A8A",
"Clostridium" = "#3A5C7B",
"Mediterraneibacter" = "#3BB273",
"Acetatifactor" = "#457B9D",
"Palleniella" = "#B5838D",
"Streptococcus" = "#E76F51",
"Roseburia" = "#5A6AA8",
"Phocaeicola" = "#1A5276",
"Helicobacter" = "#2A9D8F",
"Blautia" = "#4A8A7B",
"Odoribacter" = "#80B918",
"Anaerotignum" = "#F18F01",
"Xylanibacter" = "#E9C46A",
"Thomasclavelia" = "#A8A85A",
"Muribaculum" = "#6B4226",
"Christensenella" = "#7B2D8B",
"Paraprevotella" = "#3D5A80",
"Desulfovibrio" = "#A8DADC",
"Candidatus Borkfalkia" = "#5C8A4A",
"Enterocloster" = "#8AA85A",
"Allobaculum" = "#D4856A",
"Acutalibacter" = "#8A4A5C",
"Segatella" = "#6B8E23",
"Dubosiella" = "#C49A6C",
"Sporofaciens" = "#7B8A4A",
"Heminiphilus" = "#7B4F8A",
"Metamycoplasma" = "#7B8FA6",
"Parablautia" = "#FFD700",
"Petralouisia" = "#6AA85A",
"Eubacterium" = "#F4A261",
"Akkermansia" = "#5C3A8A",
"Paramuribaculum" = "#6D6875",
"Fusobacterium" = "#C73E1D",
"Anaerostipes" = "#5C7B3A",
"Lactococcus" = "#E8A020",
"Jutongia" = "#5CB87A",
"Pumilibacter" = "#8A7B4A",
"Enterococcus" = "#A8805A",
"Nanosyncoccus" = "#2E86AB",
"Huintestinicola" = "#9A5C7B",
"Adlercreutzia" = "#C45C8A",
"Anaeroplasma" = "#C49F6C",
"Parabacteroides" = "#A85A8A",
"Taurinivorans" = "#8E6B3E",
"Angelakisella" = "#6B5EA7",
"Sangeribacter" = "#8A5C3A",
"Anaerotruncus" = "#5C7B9A",
"Faecalibaculum" = "#4A7B8A",
"Neglectibacter" = "#4A235A",
"Lachnospira" = "#9A7B5C",
"Dehalobacterium" = "#2C4770",
"Paeniclostridium" = "#7B9A5C",
"Coprobacillus" = "#C4845A",
"Escherichia" = "#C0392B",
"Mucispirillum" = "#D4A853",
"Sutterella" = "#E67E22",
"Treponema" = "#A07830",
"Rikenella" = "#5A8AAA",
"Stoquefichus" = "#7D6608",
"Bifidobacterium" = "#264653",
"Hominilimicola" = "#6A9A6A",
"Tyzzerella" = "#5AA88A",
"Prevotellamassilia" = "#8A6A9A",
"Vermiculatibacterium" = "#8E44AD",
"Proteus" = "#C77B3A",
"Amedibacillus" = "#7B6914",
"Malacoplasma" = "#4A7B5C",
"Clostridioides" = "#9A4A3A",
"Butyricicoccus" = "#5C9A7B",
"Parasutterella" = "#7B5C9A",
"Elusimicrobium" = "#9A7B4A",
"Bilophila" = "#A8C5A0",
"Klebsiella" = "#C45A3A",
"Zhenpiania" = "#8B6914",
"Staphylococcus" = "#6B3A5C"
)
stopifnot(!any(duplicated(genus_col_c2_001)))
rel_c2_genus_all <- rel_c2_genus_all5 %>%
filter(OTU %in% top_genus_c2_001) %>%
mutate(Order_plot = OTU,
Group = factor(Group, levels = comp2_groups)) %>%
group_by(Sample, Group, Order_plot) %>%
summarise(RelAbund = sum(RelAbund), .groups = "drop") %>%
group_by(Group, Order_plot) %>%
summarise(RelAbund = mean(RelAbund), .groups = "drop") %>%
mutate(Order_plot = factor(Order_plot, levels = top_genus_c2_001))
p_c2_genus_all <- ggplot(rel_c2_genus_all, aes(x = Group, y = RelAbund, fill = Order_plot)) +
geom_bar(stat = "identity", color = "black", linewidth = 0.3) +
scale_fill_manual(values = genus_col_c2_001, breaks = top_genus_c2_001) +
scale_x_discrete(limits = comp2_groups, labels = comp2_labels) +
theme_bw() +
theme(axis.text.x = element_text(size = 11),
axis.title.y = element_text(size = 13),
legend.text = element_text(size = 8),
legend.title = element_text(size = 10),
plot.title = element_text(size = 15)) +
labs(x = NULL, y = "Relative abundance (%)", fill = "Genus",
title = "Relative Abundance at Genus Level")
p_c2_genus_all
# Husk: "Genera with mean relative abundance ≥ 0.01% shown"
# ggsave("./outputs/comp2_relabund_genus_all.jpeg", dpi = 300, height = 7, width = 16)
# Subset to all comp2 groups
ps_rel_c2_persample <- subset_samples(ps_rel,
sample_data(ps_rel)$Group %in% comp2_groups)
ps_rel_c2_persample <- subset_taxa(ps_rel_c2_persample,
!is.na(Genus) & Genus != "" & Genus != "__")
ps_rel_c2_persample <- prune_taxa(taxa_sums(ps_rel_c2_persample) > 0, ps_rel_c2_persample)
# Agglomerate to genus level
ps_rel_c2_persample_genus <- tax_glom(ps_rel_c2_persample, taxrank = "Genus", NArm = TRUE)
taxa_names(ps_rel_c2_persample_genus) <- make.unique(
as.character(tax_table(ps_rel_c2_persample_genus)[, "Genus"]))
# Melt and calculate relative abundance per sample
rel_c2_genus_persample <- psmelt(ps_rel_c2_persample_genus) %>%
group_by(Sample) %>%
mutate(RelAbund = Abundance / sum(Abundance) * 100) %>%
ungroup()
# Plot data — reuse top_genus_c2_filt already defined in your pipeline
rel_c2_genus_sample <- rel_c2_genus_persample %>%
mutate(Genus_plot = ifelse(OTU %in% top_genus_c2_filt, OTU, "Other"),
Group = factor(Group, levels = comp2_groups)) %>%
group_by(Sample, Group, Genus_plot) %>%
summarise(RelAbund = sum(RelAbund), .groups = "drop") %>%
mutate(Genus_plot = factor(Genus_plot,
levels = c(top_genus_c2_filt, "Other")))
# Per-sample stacked barplot
p_genus_c2_persample <- ggplot(rel_c2_genus_sample,
aes(x = Sample, y = RelAbund, fill = Genus_plot)) +
geom_bar(stat = "identity", color = "black", linewidth = 0.2) +
facet_wrap(~Group, scales = "free_x", labeller = labeller(Group = comp2_labels)) +
scale_fill_manual(values = c(genus_col_c2, "Other" = "grey80"),
breaks = c(top_genus_c2_filt, "Other")) +
theme_bw() +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_text(size = 13),
legend.text = element_text(size = 9),
legend.title = element_text(size = 10),
strip.text = element_text(size = 11),
plot.title = element_text(size = 15)) +
labs(x = "Sample",
y = "Relative abundance (%)",
fill = "Genus",
title = "Relative Abundance at Genus Level")
p_genus_c2_persample
# ggsave("./outputs/comp2_genus_persample.jpeg", plot = p_genus_c2_persample, dpi = 300, height = 7, width = 18)
# ============================================================
# GENUS NIVEAU — Term BALB/c grupper vs donorer
# ============================================================
comp2_term_groups <- c("BALB/c_Yes_SPF_Term", "BALB/c_No_Wild_colon")
comp2_term_donors <- c("Donor_SPF", "Donor_Wild")
ps_rel_c2_donors <- subset_samples(ps_rel,
sample_data(ps_rel)$Group %in% c(comp2_term_groups, comp2_term_donors) &
!sample_data(ps_rel)$Study %in% c(
"Donor_inoculum_SPF_resampling_1",
"Donor_inoculum_SPF_resampling_2",
"Donor_inoculum_Wild_resampling_1",
"Donor_inoculum_Wild_resampling_2"))
ps_rel_c2_genus_donors <- ps_rel_c2_donors
ps_rel_c2_genus_donors <- subset_taxa(ps_rel_c2_genus_donors,
!is.na(Genus) & Genus != "__" & Genus != "")
ps_rel_c2_genus_donors <- prune_taxa(taxa_sums(ps_rel_c2_genus_donors) > 0, ps_rel_c2_genus_donors)
glom_c2_genus_donors <- gloomer(ps = ps_rel_c2_genus_donors, taxa_level = "Genus", NArm = TRUE)
unique(tax_table(glom_c2_genus_donors)[, "Genus"])
## Taxonomy Table: [78 taxa by 1 taxonomic ranks]:
## Genus
## Adlercreutzia "Adlercreutzia"
## Staphylococcus "Staphylococcus"
## Enterococcus "Enterococcus"
## Lactobacillus "Lactobacillus"
## Ligilactobacillus "Ligilactobacillus"
## Limosilactobacillus "Limosilactobacillus"
## Lactococcus "Lactococcus"
## Streptococcus "Streptococcus"
## Candidatus Borkfalkia "Candidatus Borkfalkia"
## Christensenella "Christensenella"
## Acutalibacter "Acutalibacter"
## Butyricicoccus "Butyricicoccus"
## Clostridium "Clostridium"
## Eubacterium "Eubacterium"
## Zhenpiania "Zhenpiania"
## Anaerotruncus "Anaerotruncus"
## Angelakisella "Angelakisella"
## Hominilimicola "Hominilimicola"
## Neglectibacter "Neglectibacter"
## Vermiculatibacterium "Vermiculatibacterium"
## Dehalobacterium "Dehalobacterium"
## Pumilibacter "Pumilibacter"
## Anaerotignum "Anaerotignum"
## Acetatifactor "Acetatifactor"
## Anaerostipes "Anaerostipes"
## Blautia "Blautia"
## Enterocloster "Enterocloster"
## Fusimonas "Fusimonas"
## Jutongia "Jutongia"
## Lachnospira "Lachnospira"
## Mediterraneibacter "Mediterraneibacter"
## Parablautia "Parablautia"
## Petralouisia "Petralouisia"
## Roseburia "Roseburia"
## Schaedlerella "Schaedlerella"
## Sporofaciens "Sporofaciens"
## Tyzzerella "Tyzzerella"
## Clostridioides "Clostridioides"
## Paeniclostridium "Paeniclostridium"
## Coprobacillus "Coprobacillus"
## Thomasclavelia "Thomasclavelia"
## Allobaculum "Allobaculum"
## Amedibacillus "Amedibacillus"
## Dubosiella "Dubosiella"
## Faecalibaculum "Faecalibaculum"
## Stoquefichus "Stoquefichus"
## Bacteroides "Bacteroides"
## Phocaeicola "Phocaeicola"
## Duncaniella "Duncaniella"
## Heminiphilus "Heminiphilus"
## Muribaculum "Muribaculum"
## Paramuribaculum "Paramuribaculum"
## Sangeribacter "Sangeribacter"
## Odoribacter "Odoribacter"
## Palleniella "Palleniella"
## Paraprevotella "Paraprevotella"
## Segatella "Segatella"
## Xylanibacter "Xylanibacter"
## Alistipes "Alistipes"
## Rikenella "Rikenella"
## Parabacteroides "Parabacteroides"
## Helicobacter "Helicobacter"
## Nanosyncoccus "Nanosyncoccus"
## Mucispirillum "Mucispirillum"
## Elusimicrobium "Elusimicrobium"
## Fusobacterium "Fusobacterium"
## Metamycoplasma "Metamycoplasma"
## Malacoplasma "Malacoplasma"
## Anaeroplasma "Anaeroplasma"
## Parasutterella "Parasutterella"
## Sutterella "Sutterella"
## Escherichia "Escherichia"
## Klebsiella "Klebsiella"
## Proteus "Proteus"
## Bilophila "Bilophila"
## Desulfovibrio "Desulfovibrio"
## Taurinivorans "Taurinivorans"
## Akkermansia "Akkermansia"
rel_c2_genus_donors <- glom_c2_genus_donors %>%
psmelt() %>%
group_by(Sample) %>%
mutate(RelAbund = Abundance / sum(Abundance) * 100) %>%
ungroup()
top_genus_c2_filt_donors <- rel_c2_genus_donors %>%
group_by(OTU, Group) %>%
summarise(m = mean(RelAbund), .groups = "drop") %>%
group_by(OTU) %>%
summarise(max_m = max(m)) %>%
filter(max_m >= 1) %>%
arrange(desc(max_m)) %>%
pull(OTU)
# Farvepalette til donors-plot
genus_col_c2_donors <- c(
"Lactobacillus" = "#C4936A",
"Bacteroides" = "#6B9BA4",
"Ligilactobacillus" = "#A23B72",
"Fusimonas" = "#E84855",
"Christensenella" = "#C77B3A",
"Limosilactobacillus" = "#C47F7F",
"Alistipes" = "#9B7BAD",
"Clostridium" = "#1D3557",
"Mediterraneibacter" = "#D4A853",
"Schaedlerella" = "#8FAF7E",
"Acetatifactor" = "#5C8A4A",
"Duncaniella" = "#3D7A8A",
"Palleniella" = "#B5838D",
"Streptococcus" = "#E76F51",
"Phocaeicola" = "#1A5276",
"Allobaculum" = "#E8C3A0",
"Roseburia" = "#5A6AA8",
"Xylanibacter" = "#E9C46A",
"Odoribacter" = "#80B918",
"Helicobacter" = "#2A9D8F",
"Candidatus Borkfalkia" = "#5C7B3A",
"Thomasclavelia" = "#7B5EA7",
"Blautia" = "#4A8A7B",
"Paraprevotella" = "#3D5A80",
"Anaerotignum" = "#F18F01",
"Heminiphilus" = "#7B4F8A",
"Desulfovibrio" = "#A8DADC",
"Pumilibacter" = "#8A7B4A",
"Muribaculum" = "#6B4226",
"Dubosiella" = "#C45C8A",
"Enterocloster" = "#8AA85A",
"Segatella" = "#6B8E23",
"Nanosyncoccus" = "#2E86AB",
"Eubacterium" = "#C4745A",
"Acutalibacter" = "#8A4A5C",
"Sporofaciens" = "#7B8A4A",
"Metamycoplasma" = "#5C7B9A",
"Adlercreutzia" = "#C49A6C",
"Parablautia" = "#FFD700",
"Fusobacterium" = "#E84040",
"Akkermansia" = "#5C3A8A",
"Other" = "grey80"
)
stopifnot(!any(duplicated(genus_col_c2_donors)))
rel_c2_genus_avg_donors <- rel_c2_genus_donors %>%
mutate(Order_plot = ifelse(OTU %in% top_genus_c2_filt_donors, OTU, "Other"),
Group = factor(Group, levels = c(
"BALB/c_Yes_SPF_Term", "Donor_SPF",
"BALB/c_No_Wild_colon", "Donor_Wild"
))) %>%
group_by(Sample, Group, Order_plot) %>%
summarise(RelAbund = sum(RelAbund), .groups = "drop") %>%
group_by(Group, Order_plot) %>%
summarise(RelAbund = mean(RelAbund), .groups = "drop") %>%
mutate(Order_plot = factor(Order_plot,
levels = c(top_genus_c2_filt_donors, "Other")))
p_genus_c2_donors <- ggplot(rel_c2_genus_avg_donors,
aes(x = Group, y = RelAbund, fill = Order_plot)) +
geom_bar(stat = "identity", color = "black", linewidth = 0.3) +
scale_fill_manual(values = genus_col_c2_donors,
breaks = c(top_genus_c2_filt_donors, "Other")) +
scale_x_discrete(
limits = c("BALB/c_Yes_SPF_Term", "Donor_SPF",
"BALB/c_No_Wild_colon", "Donor_Wild"),
labels = c(
"BALB/c_Yes_SPF_Term" = "BALB/c\nImmunized\n+ SPF FMT",
"Donor_SPF" = "Donor\nSPF",
"BALB/c_No_Wild_colon" = "BALB/c\n+ Wild FMT",
"Donor_Wild" = "Donor\nWild"
)) +
theme_bw() +
theme(axis.text.x = element_text(size = 11),
axis.title.y = element_text(size = 13),
legend.text = element_text(size = 9),
legend.title = element_text(size = 10),
plot.title = element_text(size = 15),
plot.subtitle = element_text(size = 13)) +
labs(x = NULL, y = "Relative abundance (%)", fill = "Genus",
title = "Relative Abundance at Genus Level",
subtitle = "BALB/c Mice at Term by FMT Source, Including Donors")
p_genus_c2_donors
# ggsave("./outputs/comp2_relabund_genus_donors.jpeg", dpi = 300, height = 7, width = 13)
set.seed(123)
library(DESeq2)
library(ggtext)
library(ggrepel)
# ============================================================
# DIFFERENTIAL ABUNDANCE — DESeq2
# Comparison 2: Wild-spectrum groups — Wild FMT as reference
# Groups: BALB/c_No_Wild_colon, Wildling, Pet Shop, Wild
# BALB/c_Yes_SPF_Term excluded (distinct colonization trajectory,
# see PCoA — forms isolated cluster separate from wild-spectrum groups)
# ============================================================
# Prepare phyloseq object — use non-rarefied (pst_original) for DESeq2
# Subset to wild-spectrum groups only
comp2_deseq_groups <- c("BALB/c_No_Wild_colon", "Wildling", "Pet Shop", "Wild")
ps_c2_deseq <- subset_samples(pst_original,
sample_data(pst_original)$Group %in% comp2_deseq_groups)
# Remove taxa with zero counts across these samples
ps_c2_deseq <- prune_taxa(taxa_sums(ps_c2_deseq) > 0, ps_c2_deseq)
# Prune unassigned at multiple ranks before glomming — prevents duplicate taxa_names in gloomer
ps_c2_deseq <- subset_taxa(ps_c2_deseq,
!is.na(Genus) & Genus != "" & Genus != "__" &
!is.na(Family) & Family != "" & Family != "__" &
!is.na(Order) & Order != "" & Order != "__")
ps_c2_deseq <- prune_taxa(taxa_sums(ps_c2_deseq) > 0, ps_c2_deseq)
# Check for duplicate genera before glomming
tx_check <- as.data.frame(tax_table(ps_c2_deseq))
tx_check[duplicated(tx_check$Genus) | duplicated(tx_check$Genus, fromLast = TRUE),
c("Family", "Genus")]
# Agglomerate to genus level
# Use tax_glom directly instead of gloomer for DESeq2 object
ps_c2_deseq_genus <- tax_glom(ps_c2_deseq, taxrank = "Genus", NArm = TRUE)
# Make genus names unique before assigning as taxa names
genus_names <- tax_table(ps_c2_deseq_genus)[, "Genus"]
taxa_names(ps_c2_deseq_genus) <- make.unique(as.character(genus_names))
# Set Group factor — BALB/c_No_Wild_colon as reference
sample_data(ps_c2_deseq_genus)$Group <- factor(
sample_data(ps_c2_deseq_genus)$Group,
levels = c("BALB/c_No_Wild_colon", # reference
"Wildling",
"Pet Shop",
"Wild")
)
# Manual conversion — bypasses phyloseq_to_deseq2 entirely
count_mat <- as(otu_table(ps_c2_deseq_genus), "matrix")
# Ensure taxa are rows
if (!taxa_are_rows(ps_c2_deseq_genus)) {
count_mat <- t(count_mat)
}
# Round to integers (DESeq2 requires integer counts)
count_mat <- round(count_mat)
# Sample metadata
col_data <- data.frame(sample_data(ps_c2_deseq_genus))
col_data$Group <- factor(col_data$Group,
levels = c("BALB/c_No_Wild_colon",
"Wildling",
"Pet Shop",
"Wild"))
# Build DESeq2 object directly
dds_c2 <- DESeqDataSetFromMatrix(
countData = count_mat,
colData = col_data,
design = ~ Group
)
dds_c2 <- DESeq(dds_c2)
# Check levels and result names
levels(colData(dds_c2)$Group)
resultsNames(dds_c2)
# ============================================================
# EXTRACT RESULTS — three contrasts vs BALB/c + Wild FMT
# ============================================================
# 1. Wildling vs BALB/c + Wild FMT
res_Wildling <- results(dds_c2, contrast = c("Group", "Wildling", "BALB/c_No_Wild_colon"))
df_Wildling <- as.data.frame(res_Wildling) %>%
mutate(taxon = rownames(.),
contrast = "Wildling vs Wild FMT")
# 2. Wild vs BALB/c + Wild FMT
res_Wild <- results(dds_c2, contrast = c("Group", "Wild", "BALB/c_No_Wild_colon"))
df_Wild <- as.data.frame(res_Wild) %>%
mutate(taxon = rownames(.),
contrast = "Wild vs Wild FMT")
# 3. Pet Shop vs BALB/c + Wild FMT
res_PetShop <- results(dds_c2, contrast = c("Group", "Pet Shop", "BALB/c_No_Wild_colon"))
df_PetShop <- as.data.frame(res_PetShop) %>%
mutate(taxon = rownames(.),
contrast = "Pet Shop vs Wild FMT")
# Significant taxa per contrast
cat("Significant taxa — Wildling vs Wild FMT:",
nrow(subset(df_Wildling, !is.na(padj) & padj < 0.05)), "\n")
cat("Significant taxa — Wild vs Wild FMT:",
nrow(subset(df_Wild, !is.na(padj) & padj < 0.05)), "\n")
cat("Significant taxa — Pet Shop vs Wild FMT:",
nrow(subset(df_PetShop, !is.na(padj) & padj < 0.05)), "\n")
# ============================================================
# VOLCANO PLOT FUNCTION
# ============================================================
volcano_plot_c2 <- function(df, contrast_name, col_pos, col_neg) {
comparison_label <- sub(" vs .*", "", contrast_name)
reference_label <- sub(".* vs ", "", contrast_name)
df <- df %>%
filter(!is.na(padj)) %>%
mutate(diffexpressed = case_when(
log2FoldChange > 0 & padj < 0.05 ~ paste0("Higher in ", comparison_label),
log2FoldChange < 0 & padj < 0.05 ~ paste0("Higher in ", reference_label),
TRUE ~ "ns"
),
diffexpressed = factor(diffexpressed,
levels = c(paste0("Higher in ", comparison_label),
paste0("Higher in ", reference_label),
"ns")))
ggplot(df, aes(x = log2FoldChange,
y = -log10(padj),
col = diffexpressed)) +
geom_point(size = 2.5, alpha = 0.8) +
geom_vline(xintercept = 0, lty = 2, color = "grey50") +
geom_hline(yintercept = -log10(0.05), lty = 2, color = "grey50") +
geom_label_repel(
data = df %>% filter(padj < 0.05),
aes(label = taxon),
size = 3,
max.overlaps = 20,
fontface = "italic",
label.r = 0.10
) +
scale_color_manual(values = c(
setNames(col_pos, paste0("Higher in ", comparison_label)),
setNames(col_neg, paste0("Higher in ", reference_label)),
"ns" = "grey70"
)) +
theme_bw() +
theme(legend.title = element_text(),
axis.title = element_text(),
plot.title = element_markdown(size = 15),
plot.subtitle = element_markdown(size = 13)
) +
labs(title = "Differential Abundance at Genus Level",
subtitle = paste0(contrast_name),
y = "-log10 (adjusted p-value)",
x = "log2 Fold Change",
col = NULL,
caption = "Benjamini-Hochberg correction, only taxa with adjusted p < 0.05 shown")
}
# ============================================================
# GENERATE VOLCANO PLOTS
# ============================================================
p_volcano_wildling <- volcano_plot_c2(df_Wildling,
contrast_name = "Wildling vs Wild FMT",
col_pos = "#8B6F47",
col_neg = "#3E5871")
p_volcano_wild <- volcano_plot_c2(df_Wild,
contrast_name = "Wild vs Wild FMT",
col_pos = "#6B2737",
col_neg = "#3E5871")
p_volcano_petshop <- volcano_plot_c2(df_PetShop,
contrast_name = "Pet Shop vs Wild FMT",
col_pos = "#C9873E",
col_neg = "#3E5871")
p_volcano_wildling
p_volcano_wild
p_volcano_petshop
#ggsave("./outputs/comp2_volcano_wildFMT_wildling.jpeg", plot = p_volcano_wildling, width = 10, height = 6, dpi = 300)
#ggsave("./outputs/comp2_volcano_wildFMT_wild.jpeg", plot = p_volcano_wild, width = 10, height = 6, dpi = 300)
#ggsave("./outputs/comp2_volcano_wildFMT_petshop.jpeg", plot = p_volcano_petshop, width = 10, height = 6, dpi = 300)
# ============================================================
# LFC BAR CHART FUNCTION — Comp2 comparisons vs Wild FMT
# ============================================================
lfc_bar_chart_c2 <- function(df, contrast_name, col_pos, col_neg) {
comparison_label <- sub(" vs .*", "", contrast_name)
reference_label <- sub(".* vs ", "", contrast_name)
df_sig <- df %>%
filter(!is.na(padj) & padj < 0.05) %>%
mutate(
diffexpressed = case_when(
log2FoldChange > 0 ~ paste0("Higher in ", comparison_label),
log2FoldChange < 0 ~ paste0("Higher in ", reference_label)
),
diffexpressed = factor(diffexpressed,
levels = c(paste0("Higher in ", comparison_label),
paste0("Higher in ", reference_label))),
taxon = factor(taxon, levels = taxon[order(log2FoldChange)])
)
ggplot(df_sig, aes(x = log2FoldChange, y = taxon, fill = diffexpressed)) +
geom_bar(stat = "identity") +
geom_errorbarh(aes(xmin = log2FoldChange - lfcSE,
xmax = log2FoldChange + lfcSE),
height = 0.3, color = "grey30") +
geom_vline(xintercept = 0, lty = 2, color = "grey50") +
scale_fill_manual(values = c(
setNames(col_pos, paste0("Higher in ", comparison_label)),
setNames(col_neg, paste0("Higher in ", reference_label))
)) +
theme_bw() +
theme(
axis.text.y = element_text(face = "italic", size = 10),
axis.title = element_text(),
legend.title = element_text(),
plot.title = element_markdown(size = 15),
plot.subtitle = element_markdown(size = 13)
) +
labs(
title = "Differential Abundance at Genus Level",
subtitle = contrast_name,
x = "log2 Fold Change",
y = NULL,
fill = NULL,
caption = "Benjamini-Hochberg correction, only taxa with adjusted p < 0.05 shown"
)
}
# ============================================================
# GENERATE BAR CHARTS
# ============================================================
p_lfc_wildling <- lfc_bar_chart_c2(df_Wildling,
contrast_name = "Wildling vs Wild FMT",
col_pos = "#8B6F47",
col_neg = "#3E5871")
p_lfc_wild <- lfc_bar_chart_c2(df_Wild,
contrast_name = "Wild vs Wild FMT",
col_pos = "#6B2737",
col_neg = "#3E5871")
p_lfc_petshop <- lfc_bar_chart_c2(df_PetShop,
contrast_name = "Pet Shop vs Wild FMT",
col_pos = "#C9873E",
col_neg = "#3E5871")
p_lfc_wildling
p_lfc_wild
p_lfc_petshop
# ggsave("./outputs/comp2_lfc_wildFMT_wildling.jpeg", plot = p_lfc_wildling, width = 10, height = 8, dpi = 300)
# ggsave("./outputs/comp2_lfc_wildFMT_wild.jpeg", plot = p_lfc_wild, width = 10, height = 8, dpi = 300)
# ggsave("./outputs/comp2_lfc_wildFMT_petshop.jpeg", plot = p_lfc_petshop, width = 10, height = 10, dpi = 300)
# Get significant taxa for each comparison with full stats
write.csv(
df_Wildling %>%
filter(!is.na(padj) & padj < 0.05) %>%
select(taxon, log2FoldChange, lfcSE, padj) %>%
arrange(desc(log2FoldChange)),
file = "./outputs/comp2_deseq2_wildling_vs_wildfmt.csv",
row.names = FALSE
)
write.csv(
df_Wild %>%
filter(!is.na(padj) & padj < 0.05) %>%
select(taxon, log2FoldChange, lfcSE, padj) %>%
arrange(desc(log2FoldChange)),
file = "./outputs/comp2_deseq2_wild_vs_wildfmt.csv",
row.names = FALSE
)
write.csv(
df_PetShop %>%
filter(!is.na(padj) & padj < 0.05) %>%
select(taxon, log2FoldChange, lfcSE, padj) %>%
arrange(desc(log2FoldChange)),
file = "./outputs/comp2_deseq2_petshop_vs_wildfmt.csv",
row.names = FALSE
)
# Print counts
cat("Wildling vs Wild FMT significant taxa:", nrow(df_Wildling %>% filter(!is.na(padj) & padj < 0.05)), "\n")
cat("Wild vs Wild FMT significant taxa:", nrow(df_Wild %>% filter(!is.na(padj) & padj < 0.05)), "\n")
cat("Pet Shop vs Wild FMT significant taxa:", nrow(df_PetShop %>% filter(!is.na(padj) & padj < 0.05)), "\n")
set.seed(123)
library(DESeq2)
library(ggtext)
library(ggrepel)
# ============================================================
# DIFFERENTIAL ABUNDANCE — DESeq2
# Comparison 2: Wildling as reference
# Contrasts: Wild vs Wildling, Pet Shop vs Wildling
# ============================================================
# Relevel — Wildling as reference
dds_c2$Group <- relevel(dds_c2$Group, ref = "Wildling")
# Re-run DESeq2 with new reference
dds_c2 <- DESeq(dds_c2)
resultsNames(dds_c2)
# Extract contrasts
res_Wild <- results(dds_c2, contrast = c("Group", "Wild", "Wildling"))
res_PetShop <- results(dds_c2, contrast = c("Group", "Pet Shop", "Wildling"))
df_Wild <- as.data.frame(res_Wild) %>% mutate(taxon = rownames(.))
df_PetShop <- as.data.frame(res_PetShop) %>% mutate(taxon = rownames(.))
cat("Significant taxa — Wild vs Wildling:",
nrow(subset(df_Wild, !is.na(padj) & padj < 0.05)), "\n")
cat("Significant taxa — Pet Shop vs Wildling:",
nrow(subset(df_PetShop, !is.na(padj) & padj < 0.05)), "\n")
# ============================================================
# GENERATE VOLCANO PLOTS
# Colors match fill_palette_c2
# ============================================================
p_volcano_wildling_wild <- volcano_plot_c2(df_Wild,
contrast_name = "Wild vs Wildling",
col_pos = "#6B2737", # Wild burgundy
col_neg = "#8B6F47") # Wildling brown
p_volcano_wildling_petshop <- volcano_plot_c2(df_PetShop,
contrast_name = "Pet Shop vs Wildling",
col_pos = "#C9873E", # Pet Shop orange
col_neg = "#8B6F47") # Wildling brown
p_volcano_wildling_wild
p_volcano_wildling_petshop
# ggsave("./outputs/comp2_volcano_wildlingVSwild.jpeg", plot = p_volcano_wild, width = 10, height = 6, dpi = 300)
# ggsave("./outputs/comp2_volcano_widlingVSpetshop.jpeg", plot = p_volcano_petshop, width = 10, height = 6, dpi = 300)
p_lfc_wildling_petshop <- lfc_bar_chart_c2(
df_PetShop,
contrast_name = "Pet Shop vs Wildling",
col_pos = "#C9873E", # Pet Shop orange
col_neg = "#8B6F47" # Wildling brown
)
p_lfc_wildling_petshop
ggsave("./outputs/comp2_da_wildlingVSpetshop.jpeg",
plot = p_lfc_wildling_petshop, width = 10, height = 8, dpi = 300)
write.csv(
df_PetShop %>%
filter(!is.na(padj) & padj < 0.05) %>%
select(taxon, log2FoldChange, lfcSE, padj) %>%
arrange(desc(log2FoldChange)),
file = "./outputs/comp2_deseq2_petshop_vs_wildling.csv",
row.names = FALSE
)
cat("Pet Shop vs Wildling significant taxa:", nrow(df_PetShop %>% filter(!is.na(padj) & padj < 0.05)), "\n")
# ============================================================
# Helicobacter species prevalence — from non-agglomerated ps_rel
# ============================================================
check_groups <- c("BALB/c_No_Wild_colon", "Wildling", "Pet Shop", "Wild")
heli_otu_ids <- taxa_names(ps_rel)[
grepl("G__Helicobacter", taxa_names(ps_rel))
]
ps_heli_prev <- subset_samples(ps_rel,
sample_data(ps_rel)$Group %in% check_groups)
ps_heli_prev <- prune_taxa(heli_otu_ids, ps_heli_prev)
ps_heli_prev <- prune_taxa(taxa_sums(ps_heli_prev) > 0, ps_heli_prev)
# Clean species names
heli_name_map <- data.frame(otu_id = taxa_names(ps_heli_prev)) %>%
mutate(
species = sub(".*S__", "", otu_id),
species = ifelse(grepl("^__", species),
"Helicobacter sp. (unclassified)", species),
species = sub(" strain.*", "", species),
species = sub(" DSM.*", "", species),
species = sub(" MIT.*", "", species),
species = sub(" CMRI.*", "", species),
species = trimws(species)
)
taxa_names(ps_heli_prev) <- heli_name_map$species
# Melt and calculate prevalence per species per group
heli_melt_prev <- psmelt(ps_heli_prev) %>%
group_by(OTU, Group) %>%
summarise(
n_present = sum(Abundance > 0),
n_total = n(),
prevalence = round(n_present / n_total, 2),
.groups = "drop"
) %>%
pivot_wider(
names_from = Group,
values_from = c(n_present, n_total, prevalence),
values_fill = 0
) %>%
arrange(OTU)
print(heli_melt_prev %>% select(OTU, starts_with("prevalence")), n = Inf)
## # A tibble: 3 × 5
## OTU prevalence_BALB/c_No…¹ prevalence_Wildling `prevalence_Pet Shop`
## <chr> <dbl> <dbl> <dbl>
## 1 Helicobacter… 0.21 0.9 0.85
## 2 Helicobacter… 1 0.4 0.85
## 3 K__Bacteria;… 0.86 1 1
## # ℹ abbreviated name: ¹​`prevalence_BALB/c_No_Wild_colon`
## # ℹ 1 more variable: prevalence_Wild <dbl>
# ============================================================
# GENERA EXCLUSIVE TO WILD MICE — Comparison 2 groups
# ============================================================
# Agglomerate ps_rel to genus level for all comp2 groups
ps_rel_c2_excl <- subset_samples(ps_rel,
sample_data(ps_rel)$Group %in% comp2_groups)
ps_rel_c2_excl <- subset_taxa(ps_rel_c2_excl,
!is.na(Genus) & Genus != "" & Genus != "__")
ps_rel_c2_excl <- prune_taxa(taxa_sums(ps_rel_c2_excl) > 0, ps_rel_c2_excl)
ps_rel_c2_excl_genus <- tax_glom(ps_rel_c2_excl, taxrank = "Genus", NArm = TRUE)
taxa_names(ps_rel_c2_excl_genus) <- make.unique(
as.character(tax_table(ps_rel_c2_excl_genus)[, "Genus"]))
# Melt and calculate relative abundance per sample
rel_c2_excl <- psmelt(ps_rel_c2_excl_genus) %>%
group_by(Sample) %>%
mutate(RelAbund = Abundance / sum(Abundance) * 100) %>%
ungroup()
# Calculate prevalence and mean relative abundance per genus per group
prev_c2 <- rel_c2_excl %>%
group_by(OTU, Group) %>%
summarise(
n_present = sum(Abundance > 0),
n_total = n(),
prevalence = n_present / n_total,
mean_rel = mean(RelAbund),
.groups = "drop"
) %>%
pivot_wider(names_from = Group,
values_from = c(n_present, n_total, prevalence, mean_rel),
values_fill = 0)
wild_exclusive_relaxed2 <- prev_c2 %>%
filter(
prevalence_Wild >= 0,
prevalence_Wildling == 0,
`prevalence_Pet Shop` == 0,
`prevalence_BALB/c_Yes_SPF_Term` == 0,
`prevalence_BALB/c_No_Wild_colon` == 0
) %>%
arrange(desc(mean_rel_Wild))
cat("Genera found in Wild but absent in all other groups:", nrow(wild_exclusive_relaxed2), "\n\n")
## Genera found in Wild but absent in all other groups: 0
wild_exclusive_relaxed2 %>%
select(OTU, n_present_Wild, n_total_Wild, prevalence_Wild, mean_rel_Wild) %>%
print(n = Inf)
## # A tibble: 0 × 5
## # ℹ 5 variables: OTU <chr>, n_present_Wild <int>, n_total_Wild <int>,
## # prevalence_Wild <dbl>, mean_rel_Wild <dbl>
# ============================================================
# Helicobacter relative abundance — Comp2 groups
# ============================================================
# Subset to comp2 groups, glom to genus, keep only Helicobacter
ps_heli_c2 <- subset_samples(ps_rel,
sample_data(ps_rel)$Group %in% comp2_groups)
ps_heli_c2 <- subset_taxa(ps_heli_c2,
!is.na(Genus) & Genus == "Helicobacter")
ps_heli_c2 <- prune_taxa(taxa_sums(ps_heli_c2) > 0, ps_heli_c2)
# Aggregate to genus level
ps_heli_c2_glom <- tax_glom(ps_heli_c2, taxrank = "Genus", NArm = TRUE)
# Melt and compute relative abundance within each sample
heli_c2_df <- psmelt(ps_heli_c2_glom) %>%
group_by(Sample) %>%
mutate(RelAbund = Abundance / sum(Abundance) * 100) %>%
ungroup() %>%
mutate(Group = factor(Group, levels = comp2_groups))
# Quick summary table: mean ± SD per group
heli_c2_df %>%
group_by(Group) %>%
summarise(
n = n(),
mean_rel = round(mean(RelAbund), 3),
sd_rel = round(sd(RelAbund), 3),
n_present = sum(Abundance > 0),
prevalence = round(n_present / n, 2),
.groups = "drop"
)
## # A tibble: 5 × 6
## Group n mean_rel sd_rel n_present prevalence
## <fct> <int> <dbl> <dbl> <int> <dbl>
## 1 BALB/c_Yes_SPF_Term 14 NaN NA 4 0.29
## 2 BALB/c_No_Wild_colon 14 100 0 14 1
## 3 Wildling 10 100 0 10 1
## 4 Pet Shop 13 100 0 13 1
## 5 Wild 4 NaN NA 3 0.75
heli_c2_df <- psmelt(ps_heli_c2_glom) %>%
mutate(Group = factor(Group, levels = comp2_groups))
# Summary per group
heli_c2_df %>%
group_by(Group) %>%
summarise(
n = n(),
mean_rel = round(mean(Abundance), 3), # Abundance IS already % from ps_rel
sd_rel = round(sd(Abundance), 3),
n_present = sum(Abundance > 0),
prevalence = round(n_present / n(), 2),
.groups = "drop"
)
## # A tibble: 5 × 6
## Group n mean_rel sd_rel n_present prevalence
## <fct> <int> <dbl> <dbl> <int> <dbl>
## 1 BALB/c_Yes_SPF_Term 14 0.004 0.006 4 0.29
## 2 BALB/c_No_Wild_colon 14 0.376 0.534 14 1
## 3 Wildling 10 0.541 0.475 10 1
## 4 Pet Shop 13 2.33 4.95 13 1
## 5 Wild 4 0.111 0.126 3 0.75
rel_c2_excl %>%
filter(OTU == "Helicobacter") %>%
group_by(Group) %>%
summarise(
n_present = sum(Abundance > 0),
n_total = n(),
prevalence = n_present / n_total,
mean_rel = round(mean(RelAbund), 3),
.groups = "drop"
)
## # A tibble: 5 × 5
## Group n_present n_total prevalence mean_rel
## <fct> <int> <int> <dbl> <dbl>
## 1 BALB/c_Yes_SPF_Term 4 14 0.286 0.013
## 2 BALB/c_No_Wild_colon 14 14 1 1.06
## 3 Wildling 10 10 1 0.777
## 4 Pet Shop 13 13 1 3.99
## 5 Wild 3 4 0.75 0.681
heli_c2 <- rel_c2_excl %>%
filter(OTU == "Helicobacter") %>%
mutate(Group = factor(Group, levels = comp2_groups))
p_heli_c2 <- ggplot(heli_c2, aes(x = Group, y = RelAbund, fill = Group)) +
geom_boxplot(width = 0.4, outlier.alpha = 0) +
geom_jitter(aes(shape = Sex), width = 0.15, size = 2.5,
alpha = 0.8, stroke = 1) +
scale_fill_manual(values = fill_palette_c2) +
scale_shape_manual(values = c("Female" = 21, "Male" = 24),
na.translate = FALSE) +
scale_y_continuous(limits = c(0,30), breaks = c(0, 5, 10, 15, 20, 25, 30)) +
scale_x_discrete(labels = comp2_labels) +
theme_bw() +
theme(axis.text.x = element_text(angle = 25, hjust = 1, size = 11),
axis.title.y = element_text(size = 13),
legend.text = element_text(size = 10),
plot.title = element_markdown()) +
guides(fill = "none",
shape = guide_legend(title = "Sex")) +
labs(x = NULL,
y = "Relative abundance (%)",
title = "*Helicobacter*<br>Relative abundance across adult groups")
p_heli_c2
# ggsave("./outputs/comp2_helicobacter.jpeg", plot = p_heli_c2, dpi = 300, height = 6, width = 10)
# ============================================================
# HELICOBACTER SPECIES — relative abundance across adult groups
# ============================================================
library(ggtext)
heli_groups <- c("BALB/c_No_Wild_colon", "Wildling", "Pet Shop", "Wild")
heli_labels <- c(
"BALB/c_No_Wild_colon" = "BALB/c\n+ Wild FMT",
"Wildling" = "Wildling",
"Pet Shop" = "Pet Shop",
"Wild" = "Wild"
)
# ============================================================
# Extract Helicobacter OTUs from the NON-agglomerated ps_rel
# ============================================================
# Get all OTU IDs that are Helicobacter
heli_otu_ids <- taxa_names(ps_rel)[
grepl("G__Helicobacter", taxa_names(ps_rel))
]
# Subset phyloseq to Helicobacter OTUs and target groups
ps_heli <- subset_samples(ps_rel,
sample_data(ps_rel)$Group %in% heli_groups)
ps_heli <- prune_taxa(heli_otu_ids, ps_heli)
ps_heli <- prune_taxa(taxa_sums(ps_heli) > 0, ps_heli)
# Parse species names from OTU ID strings
species_lookup <- data.frame(
otu_id = taxa_names(ps_heli)
) %>%
mutate(
species = sub(".*S__", "", otu_id),
species = case_when(
grepl("^__", species) ~ "Helicobacter sp. (unclassified)",
TRUE ~ species
),
# Strip strain designations for cleaner labels
species = sub(" strain.*", "", species),
species = sub(" DSM.*", "", species),
species = sub(" JCM.*", "", species),
species = sub(" MIT.*", "", species),
species = sub(" CMRI.*", "", species),
species = trimws(species)
)
# Rename taxa in phyloseq object to species names
taxa_names(ps_heli) <- species_lookup$species
# Melt and calculate relative abundance per sample
heli_melt <- psmelt(ps_heli) %>%
group_by(Sample) %>%
mutate(RelAbund = Abundance / sum(Abundance) * 100) %>%
ungroup() %>%
mutate(Group = factor(Group, levels = heli_groups))
# ============================================================
# PLOT
# ============================================================
p_heli_species <- ggplot(heli_melt,
aes(x = Group, y = RelAbund, fill = Group)) +
geom_boxplot(width = 0.45, outlier.alpha = 0) +
geom_jitter(aes(shape = Sex), width = 0.15, size = 2.2,
alpha = 0.8, stroke = 0.8) +
facet_wrap(~ OTU, scales = "free_y", ncol = 3) +
scale_fill_manual(values = fill_palette_c2) +
scale_shape_manual(values = c("Female" = 21, "Male" = 24),
na.translate = FALSE) +
scale_x_discrete(labels = heli_labels) +
theme_bw() +
theme(
axis.text.x = element_text(angle = 30, hjust = 1, size = 9),
axis.title.y = element_text(size = 12),
strip.text = element_text(face = "italic", size = 10),
legend.position = "none",
plot.title = element_text(size = 14)
) +
labs(
x = NULL,
y = "Relative abundance (%)",
title = "Relative Abundance of Helicobacter species",
)
p_heli_species
# ggsave("./outputs/comp2_helicobacter_species.jpeg",
# plot = p_heli_species,
# width = 12, height = 10, dpi = 300)
check_groups <- c("BALB/c_No_Wild_colon", "Wildling", "Pet Shop", "Wild")
ps_excl <- subset_samples(ps_rel,
sample_data(ps_rel)$Group %in% check_groups)
ps_excl <- subset_taxa(ps_excl,
!is.na(Genus) & Genus != "" & Genus != "__")
ps_excl <- prune_taxa(taxa_sums(ps_excl) > 0, ps_excl)
ps_excl_genus <- tax_glom(ps_excl, taxrank = "Genus", NArm = TRUE)
taxa_names(ps_excl_genus) <- make.unique(
as.character(tax_table(ps_excl_genus)[, "Genus"]))
rel_excl <- psmelt(ps_excl_genus) %>%
group_by(Sample) %>%
mutate(RelAbund = Abundance / sum(Abundance) * 100) %>%
ungroup()
prev_excl <- rel_excl %>%
group_by(OTU, Group) %>%
summarise(
n_present = sum(Abundance > 0),
n_total = n(),
prevalence = n_present / n_total,
mean_rel = mean(RelAbund),
.groups = "drop"
) %>%
pivot_wider(
names_from = Group,
values_from = c(n_present, n_total, prevalence, mean_rel),
values_fill = 0
)
# Then run the filter
absent_in_wildfmt_all3 <- prev_excl %>%
filter(
`prevalence_BALB/c_No_Wild_colon` == 0,
prevalence_Wildling > 0,
`prevalence_Pet Shop` > 0,
prevalence_Wild > 0
) %>%
arrange(desc(mean_rel_Wild))
cat("Genera absent in Wild FMT but present in ALL THREE other groups:\n")
## Genera absent in Wild FMT but present in ALL THREE other groups:
absent_in_wildfmt_all3 %>%
select(OTU,
starts_with("prevalence_"),
starts_with("mean_rel_")) %>%
print(n = Inf)
## # A tibble: 4 × 9
## OTU prevalence_BALB/c_No…¹ prevalence_Wildling `prevalence_Pet Shop`
## <chr> <dbl> <dbl> <dbl>
## 1 Treponema 0 0.7 0.154
## 2 Hominilimico… 0 0.3 0.231
## 3 Enterococcus 0 0.6 0.923
## 4 Escherichia 0 0.1 0.615
## # ℹ abbreviated name: ¹​`prevalence_BALB/c_No_Wild_colon`
## # ℹ 5 more variables: prevalence_Wild <dbl>,
## # `mean_rel_BALB/c_No_Wild_colon` <dbl>, mean_rel_Wildling <dbl>,
## # `mean_rel_Pet Shop` <dbl>, mean_rel_Wild <dbl>
absent_genera <- absent_in_wildfmt_all3$OTU
p_absent <- rel_excl %>%
filter(OTU %in% absent_genera,
Group != "BALB/c_No_Wild_colon") %>%
mutate(Group = factor(Group, levels = c("Wildling", "Pet Shop", "Wild"))) %>%
ggplot(aes(x = Group, y = RelAbund, fill = Group)) +
geom_boxplot(width = 0.45, outlier.alpha = 0) +
geom_jitter(aes(shape = Sex), width = 0.15, size = 2.2,
alpha = 0.8, stroke = 0.8) +
facet_wrap(~ OTU, scales = "free_y", ncol = 2) +
scale_fill_manual(values = fill_palette_c2) +
scale_shape_manual(values = c("Female" = 21, "Male" = 24),
na.translate = FALSE) +
scale_x_discrete(labels = c(
"Wildling" = "Wildling",
"Pet Shop" = "Pet Shop",
"Wild" = "Wild"
)) +
theme_bw() +
theme(
axis.text.x = element_text(angle = 25, hjust = 1, size = 10),
axis.title.y = element_text(size = 12),
strip.text = element_text(face = "italic", size = 11),
legend.position = "none",
plot.title = element_markdown(size = 14),
plot.subtitle = element_text(size = 11, color = "grey40")
) +
labs(
x = NULL,
y = "Relative abundance (%)",
title = "Genera absent in BALB/c + Wild FMT",
subtitle = "Present in Wildling, Pet Shop, and Wild mice"
)
p_absent
# ggsave("./outputs/comp2_absent_wildfmt.jpeg", plot = p_absent, width = 10, height = 8, dpi = 300)
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-apple-darwin20
## Running under: macOS 15.7.5
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Europe/Copenhagen
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] ggh4x_0.3.1 S4Vectors_0.44.0 BiocGenerics_0.52.0
## [4] ggtext_0.1.2 decontam_1.26.0 rstatix_0.7.3
## [7] ggpubr_0.6.3 patchwork_1.3.2 readxl_1.4.5
## [10] ape_5.8-1 glue_1.8.0 pheatmap_1.0.13
## [13] reshape2_1.4.5 ggrepel_0.9.8 devtools_2.5.0
## [16] vegan_2.7-3 BiocManager_1.30.27 gridExtra_2.3
## [19] phyloseq_1.50.0 lubridate_1.9.5 forcats_1.0.1
## [22] stringr_1.6.0 dplyr_1.2.0 purrr_1.2.1
## [25] readr_2.2.0 tidyr_1.3.2 tibble_3.3.1
## [28] ggplot2_4.0.2 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] permute_0.9-10 rlang_1.1.7 magrittr_2.0.4
## [4] ade4_1.7-24 otel_0.2.0 compiler_4.4.1
## [7] mgcv_1.9-4 systemfonts_1.3.2 vctrs_0.7.2
## [10] pkgconfig_2.0.3 crayon_1.5.3 fastmap_1.2.0
## [13] backports_1.5.0 XVector_0.46.0 ellipsis_0.3.2
## [16] labeling_0.4.3 utf8_1.2.6 rmarkdown_2.30
## [19] markdown_2.0 sessioninfo_1.2.3 tzdb_0.5.0
## [22] UCSC.utils_1.2.0 ragg_1.5.2 xfun_0.57
## [25] zlibbioc_1.52.0 cachem_1.1.0 litedown_0.9
## [28] GenomeInfoDb_1.42.3 jsonlite_2.0.0 biomformat_1.34.0
## [31] rhdf5filters_1.18.1 Rhdf5lib_1.28.0 broom_1.0.12
## [34] parallel_4.4.1 cluster_2.1.8.2 R6_2.6.1
## [37] bslib_0.10.0 stringi_1.8.7 RColorBrewer_1.1-3
## [40] car_3.1-5 pkgload_1.5.0 jquerylib_0.1.4
## [43] cellranger_1.1.0 Rcpp_1.1.1 iterators_1.0.14
## [46] knitr_1.51 usethis_3.2.1 pacman_0.5.1
## [49] IRanges_2.40.1 Matrix_1.7-5 splines_4.4.1
## [52] igraph_2.2.2 timechange_0.4.0 tidyselect_1.2.1
## [55] abind_1.4-8 rstudioapi_0.18.0 yaml_2.3.12
## [58] codetools_0.2-20 pkgbuild_1.4.8 lattice_0.22-9
## [61] plyr_1.8.9 Biobase_2.66.0 withr_3.0.2
## [64] S7_0.2.1 evaluate_1.0.5 survival_3.8-6
## [67] xml2_1.5.2 Biostrings_2.74.1 pillar_1.11.1
## [70] carData_3.0-6 foreach_1.5.2 generics_0.1.4
## [73] hms_1.1.4 commonmark_2.0.0 scales_1.4.0
## [76] tools_4.4.1 data.table_1.18.2.1 ggsignif_0.6.4
## [79] fs_2.0.1 rhdf5_2.50.2 grid_4.4.1
## [82] colorspace_2.1-2 nlme_3.1-168 GenomeInfoDbData_1.2.13
## [85] Formula_1.2-5 cli_3.6.5 textshaping_1.0.5
## [88] gtable_0.3.6 sass_0.4.10 digest_0.6.39
## [91] farver_2.1.2 memoise_2.0.1 htmltools_0.5.9
## [94] multtest_2.62.0 lifecycle_1.0.5 httr_1.4.8
## [97] gridtext_0.1.6 MASS_7.3-65