1. Packages

bioc.pkg <- c("phyloseq", "BiocManager", "MicrobiotaProcess", "decontam")
cran.pkg <- c("tidyverse", "readxl", "ape", "pacman", "glue", "vegan",
              "devtools", "ggrepel", "reshape2", "BiocManager",
              "pheatmap", "patchwork", "rstatix", "ggpubr")

inst.pkg <- cran.pkg %in% installed.packages()
if (any(!inst.pkg)) { install.packages(cran.pkg[!inst.pkg], repos = "http://cran.rstudio.com/") }

inst.pkg <- bioc.pkg %in% installed.packages()
if (any(!inst.pkg)) { BiocManager::install(bioc.pkg[!inst.pkg]) }

pkg <- c("tidyverse", "phyloseq", "gridExtra", "BiocManager",
         "vegan", "devtools", "ggrepel", "reshape2",
         "pheatmap", "glue", "ape", "readxl")

for (i in 1:length(pkg)) {
  library(pkg[i], character.only = TRUE, verbose = FALSE, attach.required = FALSE)
}

library(patchwork, verbose = FALSE)
library(ggpubr, verbose = FALSE)
library(rstatix, verbose = FALSE)
library(decontam)
library(phyloseq)
library(ggtext)

tryCatch(devtools::unload("tidytree", quiet = TRUE), error = function(e) invisible(NULL))

2. Import Data

getwd()

## [1] "/Users/josefinehaageneklund/Downloads"

setwd("~/OneDrive - Københavns Universitet/Speciale/ON UMI 16S")

Feature table

# Reading abundance/feature table
ft_raw <- read_excel("~/OneDrive - Københavns Universitet/Speciale/ON UMI 16S/IMMU_WILD_merged_table_ONT_Josefine_16s.xlsx") %>%
  as.data.frame()

dim(ft_raw)

## [1] 482 134

head(ft_raw[, 1:5])

##                                                                                                                                                                  #OTU ID
## 1                                                                K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Actinomyces;__
## 2                K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Changpingibacter;S__Changpingibacter yushuensis strain JY-X040
## 3               K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis strain JCM 1190
## 4 K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis subsp. lactis strain YIT 4121
## 5               K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium bifidum strain KCTC 3202
## 6             K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium breve DSM 20213 = JCM 1192
##   BRK97_P1 BRK98_P1 BRK99_P1 BRK100_P1
## 1        0        0        0         0
## 2        0        0        0         0
## 3        0        0        0         0
## 4        0        0        0         0
## 5        0        0        0         0
## 6        0        0        0         0

tail(ft_raw[, 1:5])

##                                                                                                             #OTU ID
## 477 K__Bacteria;P__Verrucomicrobiota;C__Verrucomicrobiia;O__Verrucomicrobiales;F__Akkermansiaceae;G__Akkermansia;__
## 478                                                                 K__Bacteria;P__Verrucomicrobiota;__;__;__;__;__
## 479                                                                                   K__Bacteria;__;__;__;__;__;__
## 480                                                                                    Unassigned;__;__;__;__;__;__
## 481                                                                           Reads per sample/condition for >10000
## 482                                                                                                       BRK/Plate
##     BRK97_P1 BRK98_P1 BRK99_P1 BRK100_P1
## 477        0        0        1         1
## 478        0        0        0         0
## 479       70       48       33        56
## 480        0        0        0         0
## 481    12648    16308    18279     25356
## 482 BRK97_P1 BRK98_P1 BRK99_P1 BRK100_P1

# Save strings before separating — these become rownames for both ft and tx
otu_ids <- ft_raw[["#OTU ID"]]

Taxonomy

# Parse taxonomy from #OTU ID column
ft_sep <- tidyr::separate(data = ft_raw,
                          col = `#OTU ID`,
                          into = c("Kingdom", "Phylum",
                                   "Class", "Order", "Family",
                                   "Genus", "Species"),
                          sep = ";")

# Remove taxon tags (K__, P__, etc.)
ft_sep <- apply(ft_sep, 2, function(x) { gsub("^.__", "", x) })

# Split into taxonomy matrix (tx) and count matrix (ft)
taxonomy_cols <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")

# Taxonomy matrix
tx <- ft_sep[, taxonomy_cols, drop = FALSE]
rownames(tx) <- otu_ids   # restore rownames

# Count matrix — convert to numeric
ft_numeric <- ft_sep[, !colnames(ft_sep) %in% taxonomy_cols, drop = FALSE]
ft_numeric <- apply(ft_numeric, 2, function(x) as.numeric(as.character(x)))
ft_numeric <- as.matrix(ft_numeric)
rownames(ft_numeric) <- otu_ids   # restore rownames
ft <- ft_numeric

# Verify
cat("ft rownames (first 2):\n"); print(head(rownames(ft), 2))

## ft rownames (first 2):

## [1] "K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Actinomyces;__"                                                
## [2] "K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Changpingibacter;S__Changpingibacter yushuensis strain JY-X040"

cat("tx rownames (first 2):\n"); print(head(rownames(tx), 2))

## tx rownames (first 2):

## [1] "K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Actinomyces;__"                                                
## [2] "K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Changpingibacter;S__Changpingibacter yushuensis strain JY-X040"

dim(ft)

## [1] 482 133

colSums(ft) %>% sort()

## named numeric(0)

sum(colSums(ft) == 0)

## [1] NA

range(colSums(ft))

## [1] NA NA

Metadata

mt <- read_excel("~/OneDrive - Københavns Universitet/Speciale/ON UMI 16S/Metadata_for_feature_table_Final.xlsx", 
    na = "NA")

head(mt)

## # A tibble: 6 × 16
##   Seq_ID Study `Mouse ID` Strain Sex    `Mother ID`  Cage Isolater Eartag
##    <dbl> <chr> <chr>      <chr>  <chr>        <dbl> <dbl>    <dbl>  <dbl>
## 1      1 IMU   78         BALB/c Female           6     6        1      3
## 2      2 IMU   79         BALB/c Female           6     6        1     30
## 3      3 IMU   80         BALB/c Female           6     6        1      5
## 4      4 IMU   81         BALB/c Male             6     7        4      3
## 5      5 IMU   82         BALB/c Male             6     7        4     30
## 6      6 IMU   83         BALB/c Male             6     7        4      5
## # ℹ 7 more variables: `Pre-immunized` <chr>, `Pre-mix` <chr>, FMT <chr>,
## #   Round <dbl>, Trial <chr>, Sample <chr>, Barcode <chr>

mt <- as.data.frame(mt)           # tibble rownames virker ikke altid
rownames(mt) <- mt$Barcode
mt <- mt[colnames(ft), ]          # align til feature table

# Create Groups
# Ensures group names are consistent across microbiome and flow data analyses
mt <- mt %>%
  mutate(
    Group = case_when(
      grepl("Donor_inoculum_SPF", Study)                             ~ "Donor_SPF",
      grepl("Donor_inoculum_Wild", Study)                            ~ "Donor_Wild",
      grepl("Negative", Study)                                       ~ "Negative_Control",
      grepl("Mock", Study)                                           ~ "Mock_Positive",
      Strain == "BALB/c" & `Pre-immunized` == "No"  & FMT == "SPF"  & Sample == "weaning"             ~ "BALB/c_No_SPF_weaning",
      Strain == "BALB/c" & `Pre-immunized` == "Yes" & FMT == "SPF"  & Sample == "Term"                ~ "BALB/c_Yes_SPF_Term",
      Strain == "BALB/c" & `Pre-immunized` == "No"  & FMT == "Wild" & Sample == "weaning"             ~ "BALB/c_No_Wild_weaning",
      Strain == "BALB/c" & `Pre-immunized` == "No"  & FMT == "Wild" & Sample == "Term, colon content" ~ "BALB/c_No_Wild_colon",
      Strain == "Wildling"                                           ~ "Wildling",
      Strain == "Pet Shop"                                           ~ "Pet Shop",
      Strain == "Wild"                                               ~ "Wild",
      Study == "IMU_mother" & FMT == "SPF"  & Sample == "Pre_ABX"    ~ "Mother_SPF_PreABX",
      Study == "IMU_mother" & FMT == "SPF"  & Sample == "post_1.FMT" ~ "Mother_SPF_post1FMT",
      Study == "IMU_mother" & FMT == "SPF"  & Sample == "Term"       ~ "Mother_SPF_Term",
      Study == "IMU_mother" & FMT == "Wild" & Sample == "Pre_ABX"    ~ "Mother_Wild_PreABX",
      TRUE ~ NA_character_
    ),
    Group = factor(Group, levels = c(
      "BALB/c_No_SPF_weaning", "BALB/c_No_Wild_weaning",
      "BALB/c_Yes_SPF_Term", "BALB/c_No_Wild_colon",
      "Wildling", "Pet Shop", "Wild",
      "Mother_SPF_PreABX", "Mother_SPF_post1FMT", "Mother_SPF_Term",
      "Mother_Wild_PreABX",
      "Donor_SPF", "Donor_Wild",
      "Negative_Control", "Mock_Positive"
    ))
  )

dim(mt)

## [1] 133  17

table(mt$Group, useNA = "ifany")

## 
##  BALB/c_No_SPF_weaning BALB/c_No_Wild_weaning    BALB/c_Yes_SPF_Term 
##                     19                     20                     14 
##   BALB/c_No_Wild_colon               Wildling               Pet Shop 
##                     14                     11                     14 
##                   Wild      Mother_SPF_PreABX    Mother_SPF_post1FMT 
##                      4                      3                      3 
##        Mother_SPF_Term     Mother_Wild_PreABX              Donor_SPF 
##                      3                      6                      3 
##             Donor_Wild       Negative_Control          Mock_Positive 
##                      3                     13                      3

colnames(ft)

##   [1] "BRK97_P1"  "BRK98_P1"  "BRK99_P1"  "BRK100_P1" "BRK101_P1" "BRK102_P1"
##   [7] "BRK103_P1" "BRK104_P1" "BRK105_P1" "BRK106_P1" "BRK107_P1" "BRK108_P1"
##  [13] "BRK109_P1" "BRK110_P1" "BRK111_P1" "BRK112_P1" "BRK113_P1" "BRK114_P1"
##  [19] "BRK115_P1" "BRK116_P1" "BRK117_P1" "BRK118_P1" "BRK119_P1" "BRK120_P1"
##  [25] "BRK121_P1" "BRK122_P1" "BRK123_P1" "BRK124_P1" "BRK125_P1" "BRK126_P1"
##  [31] "BRK127_P1" "BRK128_P1" "BRK129_P1" "BRK130_P1" "BRK131_P1" "BRK132_P1"
##  [37] "BRK133_P1" "BRK134_P1" "BRK135_P1" "BRK136_P1" "BRK137_P1" "BRK138_P1"
##  [43] "BRK139_P1" "BRK141_P1" "BRK142_P1" "BRK143_P1" "BRK144_P1" "BRK145_P1"
##  [49] "BRK146_P1" "BRK147_P1" "BRK148_P1" "BRK149_P1" "BRK150_P1" "BRK151_P1"
##  [55] "BRK152_P1" "BRK153_P1" "BRK154_P1" "BRK155_P1" "BRK156_P1" "BRK157_P1"
##  [61] "BRK158_P1" "BRK159_P1" "BRK160_P1" "BRK161_P1" "BRK162_P1" "BRK163_P1"
##  [67] "BRK164_P1" "BRK165_P1" "BRK166_P1" "BRK167_P1" "BRK168_P1" "BRK169_P1"
##  [73] "BRK77_P2"  "BRK78_P2"  "BRK80_P2"  "BRK81_P2"  "BRK82_P2"  "BRK83_P2" 
##  [79] "BRK84_P2"  "BRK85_P2"  "BRK86_P2"  "BRK87_P2"  "BRK88_P2"  "BRK89_P2" 
##  [85] "BRK90_P2"  "BRK91_P2"  "BRK92_P2"  "BRK93_P2"  "BRK94_P2"  "BRK95_P2" 
##  [91] "BRK96_P2"  "BRK01_P3"  "BRK02_P3"  "BRK03_P3"  "BRK04_P3"  "BRK05_P3" 
##  [97] "BRK06_P3"  "BRK07_P3"  "BRK08_P3"  "BRK09_P3"  "BRK10_P3"  "BRK11_P3" 
## [103] "BRK12_P3"  "BRK13_P3"  "BRK14_P3"  "BRK15_P3"  "BRK16_P3"  "BRK17_P3" 
## [109] "BRK18_P3"  "BRK19_P3"  "BRK20_P3"  "BRK21_P3"  "BRK22_P3"  "BRK23_P3" 
## [115] "BRK24_P3"  "BRK25_P3"  "BRK26_P3"  "BRK65_P3"  "BRK66_P3"  "BRK67_P3" 
## [121] "BRK68_P3"  "BRK69_P3"  "BRK70_P3"  "BRK71_P3"  "BRK72_P3"  "BRK73_P3" 
## [127] "BRK74_P3"  "BRK75_P3"  "BRK76_P3"  "BRK77_P3"  "BRK78_P3"  "BRK79_P3" 
## [133] "BRK80_P3"

rownames(mt)

##   [1] "BRK97_P1"  "BRK98_P1"  "BRK99_P1"  "BRK100_P1" "BRK101_P1" "BRK102_P1"
##   [7] "BRK103_P1" "BRK104_P1" "BRK105_P1" "BRK106_P1" "BRK107_P1" "BRK108_P1"
##  [13] "BRK109_P1" "BRK110_P1" "BRK111_P1" "BRK112_P1" "BRK113_P1" "BRK114_P1"
##  [19] "BRK115_P1" "BRK116_P1" "BRK117_P1" "BRK118_P1" "BRK119_P1" "BRK120_P1"
##  [25] "BRK121_P1" "BRK122_P1" "BRK123_P1" "BRK124_P1" "BRK125_P1" "BRK126_P1"
##  [31] "BRK127_P1" "BRK128_P1" "BRK129_P1" "BRK130_P1" "BRK131_P1" "BRK132_P1"
##  [37] "BRK133_P1" "BRK134_P1" "BRK135_P1" "BRK136_P1" "BRK137_P1" "BRK138_P1"
##  [43] "BRK139_P1" "BRK141_P1" "BRK142_P1" "BRK143_P1" "BRK144_P1" "BRK145_P1"
##  [49] "BRK146_P1" "BRK147_P1" "BRK148_P1" "BRK149_P1" "BRK150_P1" "BRK151_P1"
##  [55] "BRK152_P1" "BRK153_P1" "BRK154_P1" "BRK155_P1" "BRK156_P1" "BRK157_P1"
##  [61] "BRK158_P1" "BRK159_P1" "BRK160_P1" "BRK161_P1" "BRK162_P1" "BRK163_P1"
##  [67] "BRK164_P1" "BRK165_P1" "BRK166_P1" "BRK167_P1" "BRK168_P1" "BRK169_P1"
##  [73] "BRK77_P2"  "BRK78_P2"  "BRK80_P2"  "BRK81_P2"  "BRK82_P2"  "BRK83_P2" 
##  [79] "BRK84_P2"  "BRK85_P2"  "BRK86_P2"  "BRK87_P2"  "BRK88_P2"  "BRK89_P2" 
##  [85] "BRK90_P2"  "BRK91_P2"  "BRK92_P2"  "BRK93_P2"  "BRK94_P2"  "BRK95_P2" 
##  [91] "BRK96_P2"  "BRK01_P3"  "BRK02_P3"  "BRK03_P3"  "BRK04_P3"  "BRK05_P3" 
##  [97] "BRK06_P3"  "BRK07_P3"  "BRK08_P3"  "BRK09_P3"  "BRK10_P3"  "BRK11_P3" 
## [103] "BRK12_P3"  "BRK13_P3"  "BRK14_P3"  "BRK15_P3"  "BRK16_P3"  "BRK17_P3" 
## [109] "BRK18_P3"  "BRK19_P3"  "BRK20_P3"  "BRK21_P3"  "BRK22_P3"  "BRK23_P3" 
## [115] "BRK24_P3"  "BRK25_P3"  "BRK26_P3"  "BRK65_P3"  "BRK66_P3"  "BRK67_P3" 
## [121] "BRK68_P3"  "BRK69_P3"  "BRK70_P3"  "BRK71_P3"  "BRK72_P3"  "BRK73_P3" 
## [127] "BRK74_P3"  "BRK75_P3"  "BRK76_P3"  "BRK77_P3"  "BRK78_P3"  "BRK79_P3" 
## [133] "BRK80_P3"

3. Build Phyloseq Object

# Verify rownames match before building
stopifnot(rownames(ft) == rownames(tx))   
stopifnot(colnames(ft) == rownames(mt))   # sample names must match

pst = phyloseq(
  otu_table(ft, taxa_are_rows = TRUE),
  tax_table(as.matrix(tx)),
  sample_data(mt)
)

pst

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 482 taxa and 133 samples ]
## sample_data() Sample Data:       [ 133 samples by 17 sample variables ]
## tax_table()   Taxonomy Table:    [ 482 taxa by 7 taxonomic ranks ]

sample_data(pst)

##           Seq_ID                            Study Mouse ID   Strain    Sex
## BRK97_P1       1                              IMU       78   BALB/c Female
## BRK98_P1       2                              IMU       79   BALB/c Female
## BRK99_P1       3                              IMU       80   BALB/c Female
## BRK100_P1      4                              IMU       81   BALB/c   Male
## BRK101_P1      5                              IMU       82   BALB/c   Male
## BRK102_P1      6                              IMU       83   BALB/c   Male
## BRK103_P1      7                              IMU       84   BALB/c   Male
## BRK104_P1      8                              IMU       85   BALB/c   Male
## BRK105_P1      9                              IMU       86   BALB/c   Male
## BRK106_P1     10                              IMU       87   BALB/c   Male
## BRK107_P1     11                              IMU       88   BALB/c Female
## BRK108_P1     12                              IMU       89   BALB/c Female
## BRK109_P1     13                              IMU       90   BALB/c Female
## BRK110_P1     14                              IMU       91   BALB/c Female
## BRK111_P1     15                              IMU       18   BALB/c Female
## BRK112_P1     16                              IMU       20   BALB/c Female
## BRK113_P1     17                              IMU       21   BALB/c Female
## BRK114_P1     18                              IMU       23   BALB/c Female
## BRK115_P1     19                              IMU       26   BALB/c Female
## BRK116_P1     20                              IMU       28   BALB/c Female
## BRK117_P1     21                              IMU       29   BALB/c Female
## BRK118_P1     22                              IMU       31   BALB/c Female
## BRK119_P1     23                              IMU       34   BALB/c   Male
## BRK120_P1     24                              IMU       35   BALB/c   Male
## BRK121_P1     25                              IMU       37   BALB/c   Male
## BRK122_P1     26                              IMU       39   BALB/c   Male
## BRK123_P1     27                              IMU       40   BALB/c   Male
## BRK124_P1     28                              IMU       44   BALB/c   Male
## BRK125_P1     29                       IMU_mother        6   BALB/c Female
## BRK126_P1     30                       IMU_mother       12   BALB/c Female
## BRK127_P1     31                       IMU_mother       16   BALB/c Female
## BRK128_P1     32                       IMU_mother      154   BALB/c Female
## BRK129_P1     33                       IMU_mother       19   BALB/c Female
## BRK130_P1     34                       IMU_mother        3   BALB/c Female
## BRK131_P1     35                       IMU_mother       13   BALB/c Female
## BRK132_P1     36                       IMU_mother       20   BALB/c Female
## BRK133_P1     37                       IMU_mother        5   BALB/c Female
## BRK134_P1     38                       IMU_mother        6   BALB/c Female
## BRK135_P1     39                       IMU_mother       12   BALB/c Female
## BRK136_P1     40                       IMU_mother       16   BALB/c Female
## BRK137_P1     47                       IMU_mother        6   BALB/c Female
## BRK138_P1     48                       IMU_mother       12   BALB/c Female
## BRK139_P1     49                       IMU_mother       16   BALB/c Female
## BRK141_P1     51                             WILD        2 Wildling Female
## BRK142_P1     52                             WILD        3 Wildling Female
## BRK143_P1     53                             WILD        4 Wildling Female
## BRK144_P1     54                             WILD        5 Wildling Female
## BRK145_P1     55                             WILD        6 Wildling Female
## BRK146_P1     56                             WILD        8 Wildling   Male
## BRK147_P1     57                             WILD        9 Wildling   Male
## BRK148_P1     58                             WILD       10 Wildling   Male
## BRK149_P1     59                             WILD       11 Wildling   Male
## BRK150_P1     60                             WILD       12 Wildling   Male
## BRK151_P1     61                             WILD       13 Wildling   Male
## BRK152_P1     62                             WILD       14 Pet Shop   Male
## BRK153_P1     63                             WILD       15 Pet Shop   Male
## BRK154_P1     64                             WILD       16 Pet Shop   Male
## BRK155_P1     65                             WILD       17 Pet Shop   Male
## BRK156_P1     66                             WILD       18 Pet Shop   Male
## BRK157_P1     67                             WILD       19 Pet Shop   Male
## BRK158_P1     68                             WILD       20 Pet Shop   Male
## BRK159_P1     69                             WILD       21 Pet Shop Female
## BRK160_P1     70                             WILD       22 Pet Shop Female
## BRK161_P1     71                             WILD       23 Pet Shop Female
## BRK162_P1     72                             WILD       24 Pet Shop Female
## BRK163_P1     73                             WILD       25 Pet Shop Female
## BRK164_P1     74                             WILD       26 Pet Shop Female
## BRK165_P1     75                             WILD       27 Pet Shop Female
## BRK166_P1     76                             WILD       28     Wild Female
## BRK167_P1     77                             WILD       29     Wild   Male
## BRK168_P1     78                             WILD       30     Wild Female
## BRK169_P1     79                             WILD       31     Wild Female
## BRK77_P2     184                              IMU     <NA>   BALB/c   Male
## BRK78_P2     185                              IMU     <NA>   BALB/c   Male
## BRK80_P2     187                              IMU     <NA>   BALB/c   Male
## BRK81_P2     188                              IMU     <NA>   BALB/c   Male
## BRK82_P2     189                              IMU     <NA>   BALB/c   Male
## BRK83_P2     190                              IMU     <NA>   BALB/c   Male
## BRK84_P2     191                              IMU     <NA>   BALB/c   Male
## BRK85_P2     192                              IMU     <NA>   BALB/c   Male
## BRK86_P2     193                              IMU     <NA>   BALB/c   Male
## BRK87_P2     194                              IMU     <NA>   BALB/c Female
## BRK88_P2     195                              IMU     <NA>   BALB/c Female
## BRK89_P2     196                              IMU     <NA>   BALB/c Female
## BRK90_P2     197                              IMU     <NA>   BALB/c Female
## BRK91_P2     198                              IMU     <NA>   BALB/c Female
## BRK92_P2     199                              IMU     <NA>   BALB/c Female
## BRK93_P2     200                              IMU     <NA>   BALB/c Female
## BRK94_P2     201                              IMU     <NA>   BALB/c Female
## BRK95_P2     202                              IMU     <NA>   BALB/c Female
## BRK96_P2     203                              IMU     <NA>   BALB/c Female
## BRK01_P3     204                              IMU     <NA>   BALB/c   Male
## BRK02_P3     205                              IMU     <NA>   BALB/c   Male
## BRK03_P3     206                              IMU     <NA>   BALB/c   Male
## BRK04_P3     207                              IMU     <NA>   BALB/c   Male
## BRK05_P3     208                              IMU     <NA>   BALB/c   Male
## BRK06_P3     209                              IMU     <NA>   BALB/c   Male
## BRK07_P3     210                              IMU     <NA>   BALB/c   Male
## BRK08_P3     211                              IMU     <NA>   BALB/c   Male
## BRK09_P3     212                              IMU     <NA>   BALB/c   Male
## BRK10_P3     213                              IMU     <NA>   BALB/c   Male
## BRK11_P3     214                              IMU     <NA>   BALB/c Female
## BRK12_P3     215                              IMU     <NA>   BALB/c Female
## BRK13_P3     216                              IMU     <NA>   BALB/c Female
## BRK14_P3     217                              IMU     <NA>   BALB/c Female
## BRK15_P3     218                              IMU     <NA>   BALB/c Female
## BRK16_P3     219                              IMU     <NA>   BALB/c Female
## BRK17_P3     220                              IMU     <NA>   BALB/c Female
## BRK18_P3     221                              IMU     <NA>   BALB/c Female
## BRK19_P3     222                              IMU     <NA>   BALB/c Female
## BRK20_P3     223                              IMU     <NA>   BALB/c Female
## BRK21_P3     224       Donor_inoculum_Wild(Zahir)       DW     Wild   <NA>
## BRK22_P3     225 Donor_inoculum_Wild_resampling_1     DWr1     Wild   <NA>
## BRK23_P3     226 Donor_inoculum_Wild_resampling_2     DWr2     Wild   <NA>
## BRK24_P3     227      Donor_inoculum_SPF(WP1Amix)       DS   BALB/c   <NA>
## BRK25_P3     228  Donor_inoculum_SPF_resampling_1     DSr1   BALB/c   <NA>
## BRK26_P3     229  Donor_inoculum_SPF_resampling_2     DSr2   BALB/c   <NA>
## BRK65_P3     268                 Negative Control     <NA>     <NA>   <NA>
## BRK66_P3     269                 Negative Control     <NA>     <NA>   <NA>
## BRK67_P3     270                 Negative Control     <NA>     <NA>   <NA>
## BRK68_P3     271                 Negative Control     <NA>     <NA>   <NA>
## BRK69_P3     272                 Negative Control     <NA>     <NA>   <NA>
## BRK70_P3     273                 Negative Control     <NA>     <NA>   <NA>
## BRK71_P3     274                 Negative Control     <NA>     <NA>   <NA>
## BRK72_P3     275                 Negative Control     <NA>     <NA>   <NA>
## BRK73_P3     276                 Negative Control     <NA>     <NA>   <NA>
## BRK74_P3     277                 Negative Control     <NA>     <NA>   <NA>
## BRK75_P3     278           Negative Control (NC1)     <NA>     <NA>   <NA>
## BRK76_P3     279           Negative Control (NC2)     <NA>     <NA>   <NA>
## BRK77_P3     280           Negative Control (NC3)     <NA>     <NA>   <NA>
## BRK78_P3     281              Mock Positive (PC1)     <NA>     <NA>   <NA>
## BRK79_P3     282              Mock Positive (PC2)     <NA>     <NA>   <NA>
## BRK80_P3     283              Mock Positive (PC3)     <NA>     <NA>   <NA>
##           Mother ID Cage Isolater Eartag Pre-immunized        Pre-mix  FMT
## BRK97_P1          6    6        1      3           Yes Falken_and_new  SPF
## BRK98_P1          6    6        1     30           Yes Falken_and_new  SPF
## BRK99_P1          6    6        1      5           Yes Falken_and_new  SPF
## BRK100_P1         6    7        4      3           Yes Falken_and_new  SPF
## BRK101_P1         6    7        4     30           Yes Falken_and_new  SPF
## BRK102_P1         6    7        4      5           Yes Falken_and_new  SPF
## BRK103_P1       112    8        4      3           Yes Falken_and_new  SPF
## BRK104_P1       112    8        4     30           Yes Falken_and_new  SPF
## BRK105_P1       112    8        4      5           Yes Falken_and_new  SPF
## BRK106_P1       112    8        4     50           Yes Falken_and_new  SPF
## BRK107_P1       112    9        1      3           Yes Falken_and_new  SPF
## BRK108_P1       112    9        1     30           Yes Falken_and_new  SPF
## BRK109_P1       112    9        1      5           Yes Falken_and_new  SPF
## BRK110_P1        16    9        1     50           Yes Falken_and_new  SPF
## BRK111_P1        45    1        2      3            No             No Wild
## BRK112_P1        45    1        2      5            No             No Wild
## BRK113_P1        46    2        2      3            No             No Wild
## BRK114_P1        46    2        2      5            No             No Wild
## BRK115_P1        40    3        2      3            No             No Wild
## BRK116_P1        40    3        2      5            No             No Wild
## BRK117_P1        46    4        2      3            No             No Wild
## BRK118_P1        38    4        2      5            No             No Wild
## BRK119_P1        15    1        3      3            No             No Wild
## BRK120_P1        15    1        3      5            No             No Wild
## BRK121_P1        46    2        3      3            No             No Wild
## BRK122_P1         1    3        3      3            No             No Wild
## BRK123_P1         1    3        3      5            No             No Wild
## BRK124_P1         6    4        3      5            No             No Wild
## BRK125_P1        NA   NA       NA     NA          <NA>           <NA>  SPF
## BRK126_P1        NA   NA       NA     NA          <NA>           <NA>  SPF
## BRK127_P1        NA   NA       NA     NA          <NA>           <NA>  SPF
## BRK128_P1        NA   NA       NA     NA          <NA>           <NA> Wild
## BRK129_P1        NA   NA       NA     NA          <NA>           <NA> Wild
## BRK130_P1        NA   NA       NA     NA          <NA>           <NA> Wild
## BRK131_P1        NA   NA       NA     NA          <NA>           <NA> Wild
## BRK132_P1        NA   NA       NA     NA          <NA>           <NA> Wild
## BRK133_P1        NA   NA       NA     NA          <NA>           <NA> Wild
## BRK134_P1        NA   NA       NA     NA          <NA>           <NA>  SPF
## BRK135_P1        NA   NA       NA     NA          <NA>           <NA>  SPF
## BRK136_P1        NA   NA       NA     NA          <NA>           <NA>  SPF
## BRK137_P1        NA   NA       NA     NA          <NA>           <NA>  SPF
## BRK138_P1        NA   NA       NA     NA          <NA>           <NA>  SPF
## BRK139_P1        NA   NA       NA     NA          <NA>           <NA>  SPF
## BRK141_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK142_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK143_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK144_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK145_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK146_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK147_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK148_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK149_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK150_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK151_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK152_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK153_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK154_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK155_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK156_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK157_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK158_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK159_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK160_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK161_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK162_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK163_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK164_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK165_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK166_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK167_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK168_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK169_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK77_P2         NA   NA       NA     NA            No             No  SPF
## BRK78_P2         NA   NA       NA     NA            No             No  SPF
## BRK80_P2         NA   NA       NA     NA            No             No  SPF
## BRK81_P2         NA   NA       NA     NA            No             No  SPF
## BRK82_P2         NA   NA       NA     NA            No             No  SPF
## BRK83_P2         NA   NA       NA     NA            No             No  SPF
## BRK84_P2         NA   NA       NA     NA            No             No  SPF
## BRK85_P2         NA   NA       NA     NA            No             No  SPF
## BRK86_P2         NA   NA       NA     NA            No             No  SPF
## BRK87_P2         NA   NA       NA     NA            No             No  SPF
## BRK88_P2         NA   NA       NA     NA            No             No  SPF
## BRK89_P2         NA   NA       NA     NA            No             No  SPF
## BRK90_P2         NA   NA       NA     NA            No             No  SPF
## BRK91_P2         NA   NA       NA     NA            No             No  SPF
## BRK92_P2         NA   NA       NA     NA            No             No  SPF
## BRK93_P2         NA   NA       NA     NA            No             No  SPF
## BRK94_P2         NA   NA       NA     NA            No             No  SPF
## BRK95_P2         NA   NA       NA     NA            No             No  SPF
## BRK96_P2         NA   NA       NA     NA            No             No  SPF
## BRK01_P3         NA   NA       NA     NA            No             No Wild
## BRK02_P3         NA   NA       NA     NA            No             No Wild
## BRK03_P3         NA   NA       NA     NA            No             No Wild
## BRK04_P3         NA   NA       NA     NA            No             No Wild
## BRK05_P3         NA   NA       NA     NA            No             No Wild
## BRK06_P3         NA   NA       NA     NA            No             No Wild
## BRK07_P3         NA   NA       NA     NA            No             No Wild
## BRK08_P3         NA   NA       NA     NA            No             No Wild
## BRK09_P3         NA   NA       NA     NA            No             No Wild
## BRK10_P3         NA   NA       NA     NA            No             No Wild
## BRK11_P3         NA   NA       NA     NA            No             No Wild
## BRK12_P3         NA   NA       NA     NA            No             No Wild
## BRK13_P3         NA   NA       NA     NA            No             No Wild
## BRK14_P3         NA   NA       NA     NA            No             No Wild
## BRK15_P3         NA   NA       NA     NA            No             No Wild
## BRK16_P3         NA   NA       NA     NA            No             No Wild
## BRK17_P3         NA   NA       NA     NA            No             No Wild
## BRK18_P3         NA   NA       NA     NA            No             No Wild
## BRK19_P3         NA   NA       NA     NA            No             No Wild
## BRK20_P3         NA   NA       NA     NA            No             No Wild
## BRK21_P3         NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK22_P3         NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK23_P3         NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK24_P3         NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK25_P3         NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK26_P3         NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK65_P3         NA   NA       NA     NA            No             No <NA>
## BRK66_P3         NA   NA       NA     NA            No             No <NA>
## BRK67_P3         NA   NA       NA     NA            No             No <NA>
## BRK68_P3         NA   NA       NA     NA            No             No <NA>
## BRK69_P3         NA   NA       NA     NA            No             No <NA>
## BRK70_P3         NA   NA       NA     NA            No             No <NA>
## BRK71_P3         NA   NA       NA     NA            No             No <NA>
## BRK72_P3         NA   NA       NA     NA            No             No <NA>
## BRK73_P3         NA   NA       NA     NA            No             No <NA>
## BRK74_P3         NA   NA       NA     NA            No             No <NA>
## BRK75_P3         NA   NA       NA     NA            No             No <NA>
## BRK76_P3         NA   NA       NA     NA            No             No <NA>
## BRK77_P3         NA   NA       NA     NA            No             No <NA>
## BRK78_P3         NA   NA       NA     NA            No             No <NA>
## BRK79_P3         NA   NA       NA     NA            No             No <NA>
## BRK80_P3         NA   NA       NA     NA            No             No <NA>
##           Round   Trial                      Sample   Barcode
## BRK97_P1      3   Rikke                        Term  BRK97_P1
## BRK98_P1      3   Rikke                        Term  BRK98_P1
## BRK99_P1      3   Rikke                        Term  BRK99_P1
## BRK100_P1     3   Rikke                        Term BRK100_P1
## BRK101_P1     3   Rikke                        Term BRK101_P1
## BRK102_P1     3   Rikke                        Term BRK102_P1
## BRK103_P1     3   Rikke                        Term BRK103_P1
## BRK104_P1     3   Rikke                        Term BRK104_P1
## BRK105_P1     3   Rikke                        Term BRK105_P1
## BRK106_P1     3   Rikke                        Term BRK106_P1
## BRK107_P1     3   Rikke                        Term BRK107_P1
## BRK108_P1     3   Rikke                        Term BRK108_P1
## BRK109_P1     3   Rikke                        Term BRK109_P1
## BRK110_P1     3   Rikke                        Term BRK110_P1
## BRK111_P1     1 Therese         Term, colon content BRK111_P1
## BRK112_P1     1 Therese         Term, colon content BRK112_P1
## BRK113_P1     1 Therese         Term, colon content BRK113_P1
## BRK114_P1     1 Therese         Term, colon content BRK114_P1
## BRK115_P1     1 Therese         Term, colon content BRK115_P1
## BRK116_P1     1 Therese         Term, colon content BRK116_P1
## BRK117_P1     1 Therese         Term, colon content BRK117_P1
## BRK118_P1     1 Therese         Term, colon content BRK118_P1
## BRK119_P1     1 Therese         Term, colon content BRK119_P1
## BRK120_P1     1 Therese         Term, colon content BRK120_P1
## BRK121_P1     1 Therese         Term, colon content BRK121_P1
## BRK122_P1     1 Therese         Term, colon content BRK122_P1
## BRK123_P1     1 Therese         Term, colon content BRK123_P1
## BRK124_P1     1 Therese         Term, colon content BRK124_P1
## BRK125_P1    NA   Rikke                     Pre_ABX BRK125_P1
## BRK126_P1    NA   Rikke                     Pre_ABX BRK126_P1
## BRK127_P1    NA   Rikke                     Pre_ABX BRK127_P1
## BRK128_P1    NA Therese                     Pre_ABX BRK128_P1
## BRK129_P1    NA Therese                     Pre_ABX BRK129_P1
## BRK130_P1    NA Therese                     Pre_ABX BRK130_P1
## BRK131_P1    NA Therese                     Pre_ABX BRK131_P1
## BRK132_P1    NA Therese                     Pre_ABX BRK132_P1
## BRK133_P1    NA Therese                     Pre_ABX BRK133_P1
## BRK134_P1    NA   Rikke                  post_1.FMT BRK134_P1
## BRK135_P1    NA   Rikke                  post_1.FMT BRK135_P1
## BRK136_P1    NA   Rikke                  post_1.FMT BRK136_P1
## BRK137_P1    NA   Rikke                        Term BRK137_P1
## BRK138_P1    NA   Rikke                        Term BRK138_P1
## BRK139_P1    NA   Rikke                        Term BRK139_P1
## BRK141_P1    NA     Ida                        Term BRK141_P1
## BRK142_P1    NA     Ida                        Term BRK142_P1
## BRK143_P1    NA     Ida                        Term BRK143_P1
## BRK144_P1    NA     Ida                        Term BRK144_P1
## BRK145_P1    NA     Ida                        Term BRK145_P1
## BRK146_P1    NA     Ida                        Term BRK146_P1
## BRK147_P1    NA     Ida                        Term BRK147_P1
## BRK148_P1    NA     Ida                        Term BRK148_P1
## BRK149_P1    NA     Ida                        Term BRK149_P1
## BRK150_P1    NA     Ida                        Term BRK150_P1
## BRK151_P1    NA     Ida                        Term BRK151_P1
## BRK152_P1    NA     Ida                        Term BRK152_P1
## BRK153_P1    NA     Ida                        Term BRK153_P1
## BRK154_P1    NA     Ida                        Term BRK154_P1
## BRK155_P1    NA     Ida                        Term BRK155_P1
## BRK156_P1    NA     Ida                        Term BRK156_P1
## BRK157_P1    NA     Ida                        Term BRK157_P1
## BRK158_P1    NA     Ida                        Term BRK158_P1
## BRK159_P1    NA     Ida                        Term BRK159_P1
## BRK160_P1    NA     Ida                        Term BRK160_P1
## BRK161_P1    NA     Ida                        Term BRK161_P1
## BRK162_P1    NA     Ida                        Term BRK162_P1
## BRK163_P1    NA     Ida                        Term BRK163_P1
## BRK164_P1    NA     Ida                        Term BRK164_P1
## BRK165_P1    NA     Ida                        Term BRK165_P1
## BRK166_P1    NA     Ida                        Term BRK166_P1
## BRK167_P1    NA     Ida                        Term BRK167_P1
## BRK168_P1    NA     Ida                        Term BRK168_P1
## BRK169_P1    NA     Ida                        Term BRK169_P1
## BRK77_P2      1 Therese                     weaning  BRK77_P2
## BRK78_P2      1 Therese                     weaning  BRK78_P2
## BRK80_P2      1 Therese                     weaning  BRK80_P2
## BRK81_P2      1 Therese                     weaning  BRK81_P2
## BRK82_P2      1 Therese                     weaning  BRK82_P2
## BRK83_P2      1 Therese                     weaning  BRK83_P2
## BRK84_P2      1 Therese                     weaning  BRK84_P2
## BRK85_P2      1 Therese                     weaning  BRK85_P2
## BRK86_P2      1 Therese                     weaning  BRK86_P2
## BRK87_P2      1 Therese                     weaning  BRK87_P2
## BRK88_P2      1 Therese                     weaning  BRK88_P2
## BRK89_P2      1 Therese                     weaning  BRK89_P2
## BRK90_P2      1 Therese                     weaning  BRK90_P2
## BRK91_P2      1 Therese                     weaning  BRK91_P2
## BRK92_P2      1 Therese                     weaning  BRK92_P2
## BRK93_P2      1 Therese                     weaning  BRK93_P2
## BRK94_P2      1 Therese                     weaning  BRK94_P2
## BRK95_P2      1 Therese                     weaning  BRK95_P2
## BRK96_P2      1 Therese                     weaning  BRK96_P2
## BRK01_P3      1 Therese                     weaning  BRK01_P3
## BRK02_P3      1 Therese                     weaning  BRK02_P3
## BRK03_P3      1 Therese                     weaning  BRK03_P3
## BRK04_P3      1 Therese                     weaning  BRK04_P3
## BRK05_P3      1 Therese                     weaning  BRK05_P3
## BRK06_P3      1 Therese                     weaning  BRK06_P3
## BRK07_P3      1 Therese                     weaning  BRK07_P3
## BRK08_P3      1 Therese                     weaning  BRK08_P3
## BRK09_P3      1 Therese                     weaning  BRK09_P3
## BRK10_P3      1 Therese                     weaning  BRK10_P3
## BRK11_P3      1 Therese                     weaning  BRK11_P3
## BRK12_P3      1 Therese                     weaning  BRK12_P3
## BRK13_P3      1 Therese                     weaning  BRK13_P3
## BRK14_P3      1 Therese                     weaning  BRK14_P3
## BRK15_P3      1 Therese                     weaning  BRK15_P3
## BRK16_P3      1 Therese                     weaning  BRK16_P3
## BRK17_P3      1 Therese                     weaning  BRK17_P3
## BRK18_P3      1 Therese                     weaning  BRK18_P3
## BRK19_P3      1 Therese                     weaning  BRK19_P3
## BRK20_P3      1 Therese                     weaning  BRK20_P3
## BRK21_P3     NA    <NA> donor, only liquid no feces  BRK21_P3
## BRK22_P3     NA    <NA> donor, only liquid no feces  BRK22_P3
## BRK23_P3     NA    <NA> donor, only liquid no feces  BRK23_P3
## BRK24_P3     NA    <NA> donor, only liquid no feces  BRK24_P3
## BRK25_P3     NA    <NA> donor, only liquid no feces  BRK25_P3
## BRK26_P3     NA    <NA> donor, only liquid no feces  BRK26_P3
## BRK65_P3     NA    <NA>                        <NA>  BRK65_P3
## BRK66_P3     NA    <NA>                        <NA>  BRK66_P3
## BRK67_P3     NA    <NA>                        <NA>  BRK67_P3
## BRK68_P3     NA    <NA>                        <NA>  BRK68_P3
## BRK69_P3     NA    <NA>                        <NA>  BRK69_P3
## BRK70_P3     NA    <NA>                        <NA>  BRK70_P3
## BRK71_P3     NA    <NA>                        <NA>  BRK71_P3
## BRK72_P3     NA    <NA>                        <NA>  BRK72_P3
## BRK73_P3     NA    <NA>                        <NA>  BRK73_P3
## BRK74_P3     NA    <NA>                        <NA>  BRK74_P3
## BRK75_P3     NA    <NA>                        <NA>  BRK75_P3
## BRK76_P3     NA    <NA>                        <NA>  BRK76_P3
## BRK77_P3     NA    <NA>                        <NA>  BRK77_P3
## BRK78_P3     NA    <NA>                        <NA>  BRK78_P3
## BRK79_P3     NA    <NA>                        <NA>  BRK79_P3
## BRK80_P3     NA    <NA>                        <NA>  BRK80_P3
##                            Group
## BRK97_P1     BALB/c_Yes_SPF_Term
## BRK98_P1     BALB/c_Yes_SPF_Term
## BRK99_P1     BALB/c_Yes_SPF_Term
## BRK100_P1    BALB/c_Yes_SPF_Term
## BRK101_P1    BALB/c_Yes_SPF_Term
## BRK102_P1    BALB/c_Yes_SPF_Term
## BRK103_P1    BALB/c_Yes_SPF_Term
## BRK104_P1    BALB/c_Yes_SPF_Term
## BRK105_P1    BALB/c_Yes_SPF_Term
## BRK106_P1    BALB/c_Yes_SPF_Term
## BRK107_P1    BALB/c_Yes_SPF_Term
## BRK108_P1    BALB/c_Yes_SPF_Term
## BRK109_P1    BALB/c_Yes_SPF_Term
## BRK110_P1    BALB/c_Yes_SPF_Term
## BRK111_P1   BALB/c_No_Wild_colon
## BRK112_P1   BALB/c_No_Wild_colon
## BRK113_P1   BALB/c_No_Wild_colon
## BRK114_P1   BALB/c_No_Wild_colon
## BRK115_P1   BALB/c_No_Wild_colon
## BRK116_P1   BALB/c_No_Wild_colon
## BRK117_P1   BALB/c_No_Wild_colon
## BRK118_P1   BALB/c_No_Wild_colon
## BRK119_P1   BALB/c_No_Wild_colon
## BRK120_P1   BALB/c_No_Wild_colon
## BRK121_P1   BALB/c_No_Wild_colon
## BRK122_P1   BALB/c_No_Wild_colon
## BRK123_P1   BALB/c_No_Wild_colon
## BRK124_P1   BALB/c_No_Wild_colon
## BRK125_P1      Mother_SPF_PreABX
## BRK126_P1      Mother_SPF_PreABX
## BRK127_P1      Mother_SPF_PreABX
## BRK128_P1     Mother_Wild_PreABX
## BRK129_P1     Mother_Wild_PreABX
## BRK130_P1     Mother_Wild_PreABX
## BRK131_P1     Mother_Wild_PreABX
## BRK132_P1     Mother_Wild_PreABX
## BRK133_P1     Mother_Wild_PreABX
## BRK134_P1    Mother_SPF_post1FMT
## BRK135_P1    Mother_SPF_post1FMT
## BRK136_P1    Mother_SPF_post1FMT
## BRK137_P1        Mother_SPF_Term
## BRK138_P1        Mother_SPF_Term
## BRK139_P1        Mother_SPF_Term
## BRK141_P1               Wildling
## BRK142_P1               Wildling
## BRK143_P1               Wildling
## BRK144_P1               Wildling
## BRK145_P1               Wildling
## BRK146_P1               Wildling
## BRK147_P1               Wildling
## BRK148_P1               Wildling
## BRK149_P1               Wildling
## BRK150_P1               Wildling
## BRK151_P1               Wildling
## BRK152_P1               Pet Shop
## BRK153_P1               Pet Shop
## BRK154_P1               Pet Shop
## BRK155_P1               Pet Shop
## BRK156_P1               Pet Shop
## BRK157_P1               Pet Shop
## BRK158_P1               Pet Shop
## BRK159_P1               Pet Shop
## BRK160_P1               Pet Shop
## BRK161_P1               Pet Shop
## BRK162_P1               Pet Shop
## BRK163_P1               Pet Shop
## BRK164_P1               Pet Shop
## BRK165_P1               Pet Shop
## BRK166_P1                   Wild
## BRK167_P1                   Wild
## BRK168_P1                   Wild
## BRK169_P1                   Wild
## BRK77_P2   BALB/c_No_SPF_weaning
## BRK78_P2   BALB/c_No_SPF_weaning
## BRK80_P2   BALB/c_No_SPF_weaning
## BRK81_P2   BALB/c_No_SPF_weaning
## BRK82_P2   BALB/c_No_SPF_weaning
## BRK83_P2   BALB/c_No_SPF_weaning
## BRK84_P2   BALB/c_No_SPF_weaning
## BRK85_P2   BALB/c_No_SPF_weaning
## BRK86_P2   BALB/c_No_SPF_weaning
## BRK87_P2   BALB/c_No_SPF_weaning
## BRK88_P2   BALB/c_No_SPF_weaning
## BRK89_P2   BALB/c_No_SPF_weaning
## BRK90_P2   BALB/c_No_SPF_weaning
## BRK91_P2   BALB/c_No_SPF_weaning
## BRK92_P2   BALB/c_No_SPF_weaning
## BRK93_P2   BALB/c_No_SPF_weaning
## BRK94_P2   BALB/c_No_SPF_weaning
## BRK95_P2   BALB/c_No_SPF_weaning
## BRK96_P2   BALB/c_No_SPF_weaning
## BRK01_P3  BALB/c_No_Wild_weaning
## BRK02_P3  BALB/c_No_Wild_weaning
## BRK03_P3  BALB/c_No_Wild_weaning
## BRK04_P3  BALB/c_No_Wild_weaning
## BRK05_P3  BALB/c_No_Wild_weaning
## BRK06_P3  BALB/c_No_Wild_weaning
## BRK07_P3  BALB/c_No_Wild_weaning
## BRK08_P3  BALB/c_No_Wild_weaning
## BRK09_P3  BALB/c_No_Wild_weaning
## BRK10_P3  BALB/c_No_Wild_weaning
## BRK11_P3  BALB/c_No_Wild_weaning
## BRK12_P3  BALB/c_No_Wild_weaning
## BRK13_P3  BALB/c_No_Wild_weaning
## BRK14_P3  BALB/c_No_Wild_weaning
## BRK15_P3  BALB/c_No_Wild_weaning
## BRK16_P3  BALB/c_No_Wild_weaning
## BRK17_P3  BALB/c_No_Wild_weaning
## BRK18_P3  BALB/c_No_Wild_weaning
## BRK19_P3  BALB/c_No_Wild_weaning
## BRK20_P3  BALB/c_No_Wild_weaning
## BRK21_P3              Donor_Wild
## BRK22_P3              Donor_Wild
## BRK23_P3              Donor_Wild
## BRK24_P3               Donor_SPF
## BRK25_P3               Donor_SPF
## BRK26_P3               Donor_SPF
## BRK65_P3        Negative_Control
## BRK66_P3        Negative_Control
## BRK67_P3        Negative_Control
## BRK68_P3        Negative_Control
## BRK69_P3        Negative_Control
## BRK70_P3        Negative_Control
## BRK71_P3        Negative_Control
## BRK72_P3        Negative_Control
## BRK73_P3        Negative_Control
## BRK74_P3        Negative_Control
## BRK75_P3        Negative_Control
## BRK76_P3        Negative_Control
## BRK77_P3        Negative_Control
## BRK78_P3           Mock_Positive
## BRK79_P3           Mock_Positive
## BRK80_P3           Mock_Positive

sum(rowSums(pst@otu_table) == 0)   # not present in any sample

## [1] NA

4. Filtering and Preprocessing

# Remove unassigned and NA phylum taxa
pst <- subset_taxa(pst, !is.na(Phylum) & !Phylum %in% c("", "uncharacterized", "unassigned"))

# Check kingdoms and keep only Bacteria
# Check whether there are archea (eucaryota) in the kingdom, and if so remove it and (keeping only bacterial kingdom) 
pst@tax_table[,c(1)] %>% unique()   # Viser den første kolonne af tax_table, altsp kingdom kolonnen. unique betyder at vi kun får en af hver slags.

## Taxonomy Table:     [2 taxa by 1 taxonomic ranks]:
##                                                                                                         Kingdom     
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Actinomyces;__ "Bacteria"  
## Unassigned;__;__;__;__;__;__                                                                            "Unassigned"

pst <- subset_taxa(pst, Kingdom %in% "Bacteria")   # her fjerner vi alt det andet så vi sørger for at vi kun har "bacteria".
pst

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 479 taxa and 133 samples ]
## sample_data() Sample Data:       [ 133 samples by 17 sample variables ]
## tax_table()   Taxonomy Table:    [ 479 taxa by 7 taxonomic ranks ]

Prevalence filtering

#### Taxonomic filtering based on (low) prevalence: supervsed
#monitoring the number of the samples in which the prevalence of a taxon is at least one
# Monitor number of samples in which the prevalence of a taxon is at least one
prevdf <- apply(otu_table(pst), ifelse(taxa_are_rows(pst), 1, 2), function(x){sum(x > 0)})

weight_df <- data.frame(
  ASVprev   = prevdf,
  TaxaAbund = taxa_sums(pst),
  phyloseq::tax_table(pst)
)
head(weight_df)

##                                                                                                                                                                        ASVprev
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Actinomyces;__                                                                      2
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Changpingibacter;S__Changpingibacter yushuensis strain JY-X040                      1
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis strain JCM 1190                    12
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis subsp. lactis strain YIT 4121       0
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium bifidum strain KCTC 3202                     0
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium breve DSM 20213 = JCM 1192                  16
##                                                                                                                                                                        TaxaAbund
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Actinomyces;__                                                                        3
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Changpingibacter;S__Changpingibacter yushuensis strain JY-X040                        3
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis strain JCM 1190                     365
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis subsp. lactis strain YIT 4121         0
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium bifidum strain KCTC 3202                       0
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium breve DSM 20213 = JCM 1192                    18
##                                                                                                                                                                         Kingdom
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Actinomyces;__                                                                Bacteria
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Changpingibacter;S__Changpingibacter yushuensis strain JY-X040                Bacteria
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis strain JCM 1190               Bacteria
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis subsp. lactis strain YIT 4121 Bacteria
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium bifidum strain KCTC 3202               Bacteria
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium breve DSM 20213 = JCM 1192             Bacteria
##                                                                                                                                                                                Phylum
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Actinomyces;__                                                                Actinomycetota
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Changpingibacter;S__Changpingibacter yushuensis strain JY-X040                Actinomycetota
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis strain JCM 1190               Actinomycetota
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis subsp. lactis strain YIT 4121 Actinomycetota
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium bifidum strain KCTC 3202               Actinomycetota
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium breve DSM 20213 = JCM 1192             Actinomycetota
##                                                                                                                                                                                Class
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Actinomyces;__                                                                Actinomycetes
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Changpingibacter;S__Changpingibacter yushuensis strain JY-X040                Actinomycetes
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis strain JCM 1190               Actinomycetes
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis subsp. lactis strain YIT 4121 Actinomycetes
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium bifidum strain KCTC 3202               Actinomycetes
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium breve DSM 20213 = JCM 1192             Actinomycetes
##                                                                                                                                                                                    Order
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Actinomyces;__                                                                  Actinomycetales
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Changpingibacter;S__Changpingibacter yushuensis strain JY-X040                  Actinomycetales
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis strain JCM 1190               Bifidobacteriales
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis subsp. lactis strain YIT 4121 Bifidobacteriales
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium bifidum strain KCTC 3202               Bifidobacteriales
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium breve DSM 20213 = JCM 1192             Bifidobacteriales
##                                                                                                                                                                                    Family
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Actinomyces;__                                                                  Actinomycetaceae
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Changpingibacter;S__Changpingibacter yushuensis strain JY-X040                  Actinomycetaceae
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis strain JCM 1190               Bifidobacteriaceae
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis subsp. lactis strain YIT 4121 Bifidobacteriaceae
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium bifidum strain KCTC 3202               Bifidobacteriaceae
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium breve DSM 20213 = JCM 1192             Bifidobacteriaceae
##                                                                                                                                                                                   Genus
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Actinomyces;__                                                                     Actinomyces
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Changpingibacter;S__Changpingibacter yushuensis strain JY-X040                Changpingibacter
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis strain JCM 1190                Bifidobacterium
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis subsp. lactis strain YIT 4121  Bifidobacterium
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium bifidum strain KCTC 3202                Bifidobacterium
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium breve DSM 20213 = JCM 1192              Bifidobacterium
##                                                                                                                                                                                                                       Species
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Actinomyces;__                                                                                                                    __
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Actinomycetales;F__Actinomycetaceae;G__Changpingibacter;S__Changpingibacter yushuensis strain JY-X040                            Changpingibacter yushuensis strain JY-X040
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis strain JCM 1190                             Bifidobacterium animalis strain JCM 1190
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium animalis subsp. lactis strain YIT 4121 Bifidobacterium animalis subsp. lactis strain YIT 4121
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium bifidum strain KCTC 3202                             Bifidobacterium bifidum strain KCTC 3202
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;S__Bifidobacterium breve DSM 20213 = JCM 1192                         Bifidobacterium breve DSM 20213 = JCM 1192

# Find phyla that are of mostly low-prevalence
plyr::ddply(weight_df, "Phylum", function(x){
  cbind(means = round(mean(x$ASVprev), 2), sums = round(sum(x$ASVprev), 2))
}) %>% mutate(sanity = ifelse(means == sums, "TRUE", "FALSE"))

##                         Phylum  means sums sanity
## 1                           __ 121.00  121   TRUE
## 2               Actinomycetota  16.66  783  FALSE
## 3                    Bacillota  24.46 6702  FALSE
## 4                 Bacteroidota  43.18 3066  FALSE
## 5             Campylobacterota  23.45  258  FALSE
## 6  Candidatus Saccharibacteria  83.00   83   TRUE
## 7             Deferribacterota  32.50   65  FALSE
## 8              Elusimicrobiota  12.00   12   TRUE
## 9               Fusobacteriota   6.67   20  FALSE
## 10              Mycoplasmatota  14.64  161  FALSE
## 11              Pseudomonadota  17.14  634  FALSE
## 12               Spirochaetota  13.67   41  FALSE
## 13                Synergistota   1.50    3  FALSE
## 14     Thermodesulfobacteriota  45.89  413  FALSE
## 15           Verrucomicrobiota  19.83  119  FALSE

5. Singletone Detection & Removal

# A function to find singletones — inspired by Nutella2.R script from Ida Wang Henriksen
out.ASV = function(phyloseq, threshold = 1, binwidth = 0.01) {

  pacman::p_load(glue, tidyverse, reshape2, ggrepel, S4Vectors)

  if (sum(colSums(otu_table(phyloseq))) / ncol(otu_table(phyloseq)) == 100) {
    rel_abund = as(t(otu_table(phyloseq)), "matrix")
  } else if (sum(colSums(otu_table(phyloseq))) / ncol(otu_table(phyloseq)) == 1) {
    rel_abund = as(t(otu_table(phyloseq)), "matrix")
  } else {
    rel_abund = as(t(apply(otu_table(phyloseq),
                           ifelse(taxa_are_rows(phyloseq), 1, 2),
                           function(x) x / sum(x))), "matrix")
  }

  names.single = apply(rel_abund, 1, function(x){
    ifelse(x == threshold, TRUE, ifelse(x == sum(x), TRUE, FALSE))
  }) %>% reshape2::melt() %>%
    filter(value == TRUE) %>%
    dplyr::select(2) %>%
    pull %>% as.vector()

  if (length(names.single) == 0) {
    print(glue("WOW! {length(names.single)} singletones detected in this dataset"))
    qplot.noSing = qplot(rel_abund, geom = "histogram", binwidth = binwidth,
                         show.legend = F,
                         main = "Frequency count of relative abundance, no singletones detected") +
      xlab("Relative abundance in samples") + ylab("Frequency") + theme_bw()
    return(structure(list(qplot.noSing)))
  } else {
    single.ASV = rel_abund[rownames(rel_abund) %in% names.single, ]
    single.ASV[single.ASV == 0] <- NA
    qplot.withSing = qplot(rel_abund, geom = "histogram", binwidth = binwidth,
                           main = "Frequency count of relative abundance with singletones") +
      geom_bar(aes(single.ASV), fill = "red", color = NA, width = binwidth) +
      xlab("Relative abundance in samples") + ylab("Frequency") +
      geom_label_repel(aes(x = 1, y = length(rel_abund) / 5),
                       label.padding = unit(0.55, "lines"),
                       label = glue("{length(names.single)}\n Singletones"), color = "black") +
      theme_bw()
    qplot.rmSing = qplot(rel_abund[!rownames(rel_abund) %in% names.single, ],
                         geom = "histogram", binwidth = binwidth,
                         main = "Frequency count of relative abundance without singletones") +
      xlab("Relative abundance in samples") + ylab("Frequency") + theme_bw()
    print(glue("Oh no..! {length(names.single)} singletones detected in the dataset"))
    return(structure(list(qplot.withSing, qplot.rmSing, unlist(names.single))))
  }
}

single.test = out.ASV(phyloseq = pst, threshold = 1, binwidth = 0.01) # threshold = 1 betyder relativ abundans = 1 (100%) i ét sample

## Oh no..! 61 singletones detected in the dataset

singletones = single.test[[3]]   # extract names of singletones

single.test[[2]]   # plot without singletones

# removing singletones from 
pst = subset_taxa(pst, !taxa_names(pst) %in% singletones)
rm(single.test)
pst

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 418 taxa and 133 samples ]
## sample_data() Sample Data:       [ 133 samples by 17 sample variables ]
## tax_table()   Taxonomy Table:    [ 418 taxa by 7 taxonomic ranks ]

6. Decontam — Remove Contaminants Using Negative Controls

# Flag negative controls using Group — only Negative_Control, not Mock_Positive
sample_data(pst)$is.neg <- ifelse(
  sample_data(pst)$Group == "Negative_Control", TRUE, FALSE
)
table(sample_data(pst)$is.neg)

## 
## FALSE  TRUE 
##   120    13

# Run decontam using prevalence method
contam_df <- isContaminant(pst, method = "prevalence", neg = "is.neg", threshold = 0.1)
contam_df %>% filter(contaminant == TRUE)

##                                                                                                                                                             freq
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;__                                     0.0303451318
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Propionibacteriales;F__Propionibacteriaceae;G__Cutibacterium;__                                   0.0007522273
## K__Bacteria;P__Actinomycetota;C__Coriobacteriia;O__Coriobacteriales;F__Coriobacteriaceae;G__Collinsella;S__Collinsella aerofaciens strain JCM 10188 0.0053235593
## K__Bacteria;P__Bacillota;C__Bacilli;O__Lactobacillales;F__Lactobacillaceae;G__Lapidilactobacillus;__                                                0.0002455918
## K__Bacteria;P__Bacillota;C__Bacilli;O__Lactobacillales;F__Lactobacillaceae;G__Leuconostoc;__                                                        0.0016770835
## K__Bacteria;P__Bacillota;C__Clostridia;O__Eubacteriales;F__;G__Gemmiger;S__Gemmiger qucibialis                                                      0.0002432113
## K__Bacteria;P__Bacillota;C__Clostridia;O__Lachnospirales;F__Lachnospiraceae;G__Blautia;S__Blautia faecis strain M25                                 0.0020541771
## K__Bacteria;P__Bacillota;C__Clostridia;O__Lachnospirales;F__Lachnospiraceae;G__Blautia;S__Blautia wexlerae DSM 19850                                0.0033323479
##                                                                                                                                                     prev
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;__                                       73
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Propionibacteriales;F__Propionibacteriaceae;G__Cutibacterium;__                                      2
## K__Bacteria;P__Actinomycetota;C__Coriobacteriia;O__Coriobacteriales;F__Coriobacteriaceae;G__Collinsella;S__Collinsella aerofaciens strain JCM 10188   16
## K__Bacteria;P__Bacillota;C__Bacilli;O__Lactobacillales;F__Lactobacillaceae;G__Lapidilactobacillus;__                                                   2
## K__Bacteria;P__Bacillota;C__Bacilli;O__Lactobacillales;F__Lactobacillaceae;G__Leuconostoc;__                                                           4
## K__Bacteria;P__Bacillota;C__Clostridia;O__Eubacteriales;F__;G__Gemmiger;S__Gemmiger qucibialis                                                         2
## K__Bacteria;P__Bacillota;C__Clostridia;O__Lachnospirales;F__Lachnospiraceae;G__Blautia;S__Blautia faecis strain M25                                    4
## K__Bacteria;P__Bacillota;C__Clostridia;O__Lachnospirales;F__Lachnospiraceae;G__Blautia;S__Blautia wexlerae DSM 19850                                  16
##                                                                                                                                                     p.freq
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;__                                         NA
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Propionibacteriales;F__Propionibacteriaceae;G__Cutibacterium;__                                       NA
## K__Bacteria;P__Actinomycetota;C__Coriobacteriia;O__Coriobacteriales;F__Coriobacteriaceae;G__Collinsella;S__Collinsella aerofaciens strain JCM 10188     NA
## K__Bacteria;P__Bacillota;C__Bacilli;O__Lactobacillales;F__Lactobacillaceae;G__Lapidilactobacillus;__                                                    NA
## K__Bacteria;P__Bacillota;C__Bacilli;O__Lactobacillales;F__Lactobacillaceae;G__Leuconostoc;__                                                            NA
## K__Bacteria;P__Bacillota;C__Clostridia;O__Eubacteriales;F__;G__Gemmiger;S__Gemmiger qucibialis                                                          NA
## K__Bacteria;P__Bacillota;C__Clostridia;O__Lachnospirales;F__Lachnospiraceae;G__Blautia;S__Blautia faecis strain M25                                     NA
## K__Bacteria;P__Bacillota;C__Clostridia;O__Lachnospirales;F__Lachnospiraceae;G__Blautia;S__Blautia wexlerae DSM 19850                                    NA
##                                                                                                                                                           p.prev
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;__                                     0.0052168751
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Propionibacteriales;F__Propionibacteriaceae;G__Cutibacterium;__                                   0.0977443609
## K__Bacteria;P__Actinomycetota;C__Coriobacteriia;O__Coriobacteriales;F__Coriobacteriaceae;G__Collinsella;S__Collinsella aerofaciens strain JCM 10188 0.0006626731
## K__Bacteria;P__Bacillota;C__Bacilli;O__Lactobacillales;F__Lactobacillaceae;G__Lapidilactobacillus;__                                                0.0977443609
## K__Bacteria;P__Bacillota;C__Bacilli;O__Lactobacillales;F__Lactobacillaceae;G__Leuconostoc;__                                                        0.0251652727
## K__Bacteria;P__Bacillota;C__Clostridia;O__Eubacteriales;F__;G__Gemmiger;S__Gemmiger qucibialis                                                      0.0977443609
## K__Bacteria;P__Bacillota;C__Clostridia;O__Lachnospirales;F__Lachnospiraceae;G__Blautia;S__Blautia faecis strain M25                                 0.0014348849
## K__Bacteria;P__Bacillota;C__Clostridia;O__Lachnospirales;F__Lachnospiraceae;G__Blautia;S__Blautia wexlerae DSM 19850                                0.0306113033
##                                                                                                                                                                p
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;__                                     0.0052168751
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Propionibacteriales;F__Propionibacteriaceae;G__Cutibacterium;__                                   0.0977443609
## K__Bacteria;P__Actinomycetota;C__Coriobacteriia;O__Coriobacteriales;F__Coriobacteriaceae;G__Collinsella;S__Collinsella aerofaciens strain JCM 10188 0.0006626731
## K__Bacteria;P__Bacillota;C__Bacilli;O__Lactobacillales;F__Lactobacillaceae;G__Lapidilactobacillus;__                                                0.0977443609
## K__Bacteria;P__Bacillota;C__Bacilli;O__Lactobacillales;F__Lactobacillaceae;G__Leuconostoc;__                                                        0.0251652727
## K__Bacteria;P__Bacillota;C__Clostridia;O__Eubacteriales;F__;G__Gemmiger;S__Gemmiger qucibialis                                                      0.0977443609
## K__Bacteria;P__Bacillota;C__Clostridia;O__Lachnospirales;F__Lachnospiraceae;G__Blautia;S__Blautia faecis strain M25                                 0.0014348849
## K__Bacteria;P__Bacillota;C__Clostridia;O__Lachnospirales;F__Lachnospiraceae;G__Blautia;S__Blautia wexlerae DSM 19850                                0.0306113033
##                                                                                                                                                     contaminant
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Bifidobacteriales;F__Bifidobacteriaceae;G__Bifidobacterium;__                                            TRUE
## K__Bacteria;P__Actinomycetota;C__Actinomycetes;O__Propionibacteriales;F__Propionibacteriaceae;G__Cutibacterium;__                                          TRUE
## K__Bacteria;P__Actinomycetota;C__Coriobacteriia;O__Coriobacteriales;F__Coriobacteriaceae;G__Collinsella;S__Collinsella aerofaciens strain JCM 10188        TRUE
## K__Bacteria;P__Bacillota;C__Bacilli;O__Lactobacillales;F__Lactobacillaceae;G__Lapidilactobacillus;__                                                       TRUE
## K__Bacteria;P__Bacillota;C__Bacilli;O__Lactobacillales;F__Lactobacillaceae;G__Leuconostoc;__                                                               TRUE
## K__Bacteria;P__Bacillota;C__Clostridia;O__Eubacteriales;F__;G__Gemmiger;S__Gemmiger qucibialis                                                             TRUE
## K__Bacteria;P__Bacillota;C__Clostridia;O__Lachnospirales;F__Lachnospiraceae;G__Blautia;S__Blautia faecis strain M25                                        TRUE
## K__Bacteria;P__Bacillota;C__Clostridia;O__Lachnospirales;F__Lachnospiraceae;G__Blautia;S__Blautia wexlerae DSM 19850                                       TRUE

# Remove contaminants
pst <- subset_taxa(pst,
  !taxa_names(pst) %in% rownames(contam_df)[contam_df$contaminant == TRUE])

# Remove both negative controls and mock positives from data set so they won't be part of analysis
pst <- subset_samples(pst,
  !sample_data(pst)$Group %in% c("Negative_Control", "Mock_Positive"))
sample_data(pst)$is.neg <- NULL

cat("After decontam and control removal:", ntaxa(pst), "OTUs,",
    nsamples(pst), "samples\n")

## After decontam and control removal: 410 OTUs, 117 samples

# **`"After decontam and control removal:"`** — fast tekst der beskriver hvad der netop er sket
# **`ntaxa(pst)`** — tæller antal taxa tilbage i phyloseq objektet efter fjernelse af kontaminanter
# **`"OTUs,"`** — fast tekst
# **`nsamples(pst)`** — tæller antal samples tilbage efter fjernelse af negative kontroller og mock positives
# **`"samples\n"`** — fast tekst, `\n` laver et linjeskift efter beskeden

pst

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 410 taxa and 117 samples ]
## sample_data() Sample Data:       [ 117 samples by 17 sample variables ]
## tax_table()   Taxonomy Table:    [ 410 taxa by 7 taxonomic ranks ]

7. Rarefaction

Rarefaction curve

# create rarefraction curve
ps_rar_curve <- MicrobiotaProcess::ggrarecurve(
  obj        = pst,
  indexNames = c("Observe", "Shannon"),
  chunks     = 400,
  theme(legend.spacing.y = unit(0.02, "cm"),
        legend.text = element_text(size = 6)),
  show.legend = FALSE
)

ps_rar_curve + theme_bw() +
  geom_vline(xintercept = 15000, lty = 2, color = alpha("red", 0.5)) +
  ggtitle("Rarefaction curves based on Richness and evenness",
          "Each line is a sample")

# ggsave("./outputs/rarefaction_curve.jpeg", device = "jpeg", dpi = 300, height = 6, width = 9)

colSums(otu_table(pst)) %>% sort()

## BRK163_P1 BRK146_P1  BRK24_P3 BRK109_P1 BRK112_P1 BRK104_P1  BRK23_P3 BRK108_P1 
##      1715      7515      8408      8904     10848     12493     12589     12639 
##  BRK97_P1 BRK152_P1 BRK168_P1 BRK101_P1 BRK103_P1 BRK107_P1 BRK142_P1 BRK156_P1 
##     12647     13528     14397     15096     15837     15925     15985     16003 
## BRK102_P1  BRK98_P1 BRK158_P1 BRK160_P1 BRK123_P1 BRK144_P1 BRK110_P1 BRK128_P1 
##     16069     16308     16434     16708     17039     17262     17279     17591 
##  BRK05_P3 BRK145_P1 BRK111_P1 BRK150_P1  BRK07_P3  BRK22_P3  BRK99_P1 BRK169_P1 
##     17613     17769     17899     18018     18086     18141     18279     18363 
## BRK114_P1 BRK118_P1 BRK113_P1 BRK159_P1 BRK161_P1 BRK119_P1 BRK149_P1 BRK162_P1 
##     18376     18548     18604     18679     18876     19028     19224     19225 
## BRK148_P1 BRK115_P1 BRK153_P1 BRK131_P1 BRK143_P1 BRK164_P1  BRK21_P3 BRK154_P1 
##     19397     19427     19458     19506     19604     19634     19722     19999 
## BRK139_P1 BRK165_P1 BRK138_P1  BRK88_P2  BRK96_P2  BRK77_P2 BRK122_P1 BRK116_P1 
##     20089     20099     20178     20536     20614     20763     20780     20787 
## BRK105_P1  BRK17_P3  BRK85_P2 BRK126_P1 BRK117_P1 BRK127_P1  BRK04_P3 BRK151_P1 
##     20832     20897     20957     21018     21055     21098     21148     21321 
##  BRK06_P3  BRK94_P2 BRK134_P1  BRK19_P3  BRK01_P3  BRK20_P3 BRK120_P1  BRK92_P2 
##     21326     21401     21482     21578     21621     21665     21898     21936 
##  BRK80_P2  BRK18_P3  BRK93_P2  BRK02_P3  BRK15_P3  BRK87_P2 BRK147_P1  BRK08_P3 
##     22053     22148     22278     22293     22355     22460     22463     22467 
## BRK121_P1  BRK03_P3 BRK124_P1  BRK95_P2 BRK125_P1 BRK155_P1  BRK84_P2 BRK167_P1 
##     22502     22624     22686     22698     22764     22766     23101     23315 
## BRK129_P1  BRK16_P3  BRK82_P2 BRK137_P1  BRK86_P2 BRK135_P1  BRK90_P2  BRK81_P2 
##     23337     23482     23613     23709     23879     23916     23931     23963 
##  BRK91_P2 BRK157_P1  BRK12_P3 BRK133_P1 BRK106_P1  BRK09_P3  BRK78_P2  BRK83_P2 
##     23963     23991     24147     24345     24348     24565     24606     24634 
##  BRK89_P2  BRK13_P3  BRK11_P3 BRK166_P1 BRK100_P1 BRK130_P1  BRK14_P3  BRK25_P3 
##     24641     24879     24926     25215     25355     25426     25555     25557 
## BRK132_P1  BRK10_P3  BRK26_P3 BRK136_P1 BRK141_P1 
##     25737     25894     27318     27537     27751

range(colSums(otu_table(pst)))

## [1]  1715 27751

Rarefy

set.seed(123)

# Adjust rarefy_depth based on rarefaction curve above
rarefy_depth <- 8000   # <-- adjust as needed, chosen to avoid loosing too important samples

# Rarefaction (putting all samples to the same level of sequencing depth)
ps_rar = rarefy_even_depth(pst, sample.size = rarefy_depth, replace = FALSE)
sample_data(ps_rar)

##           Seq_ID                            Study Mouse.ID   Strain    Sex
## BRK97_P1       1                              IMU       78   BALB/c Female
## BRK98_P1       2                              IMU       79   BALB/c Female
## BRK99_P1       3                              IMU       80   BALB/c Female
## BRK100_P1      4                              IMU       81   BALB/c   Male
## BRK101_P1      5                              IMU       82   BALB/c   Male
## BRK102_P1      6                              IMU       83   BALB/c   Male
## BRK103_P1      7                              IMU       84   BALB/c   Male
## BRK104_P1      8                              IMU       85   BALB/c   Male
## BRK105_P1      9                              IMU       86   BALB/c   Male
## BRK106_P1     10                              IMU       87   BALB/c   Male
## BRK107_P1     11                              IMU       88   BALB/c Female
## BRK108_P1     12                              IMU       89   BALB/c Female
## BRK109_P1     13                              IMU       90   BALB/c Female
## BRK110_P1     14                              IMU       91   BALB/c Female
## BRK111_P1     15                              IMU       18   BALB/c Female
## BRK112_P1     16                              IMU       20   BALB/c Female
## BRK113_P1     17                              IMU       21   BALB/c Female
## BRK114_P1     18                              IMU       23   BALB/c Female
## BRK115_P1     19                              IMU       26   BALB/c Female
## BRK116_P1     20                              IMU       28   BALB/c Female
## BRK117_P1     21                              IMU       29   BALB/c Female
## BRK118_P1     22                              IMU       31   BALB/c Female
## BRK119_P1     23                              IMU       34   BALB/c   Male
## BRK120_P1     24                              IMU       35   BALB/c   Male
## BRK121_P1     25                              IMU       37   BALB/c   Male
## BRK122_P1     26                              IMU       39   BALB/c   Male
## BRK123_P1     27                              IMU       40   BALB/c   Male
## BRK124_P1     28                              IMU       44   BALB/c   Male
## BRK125_P1     29                       IMU_mother        6   BALB/c Female
## BRK126_P1     30                       IMU_mother       12   BALB/c Female
## BRK127_P1     31                       IMU_mother       16   BALB/c Female
## BRK128_P1     32                       IMU_mother      154   BALB/c Female
## BRK129_P1     33                       IMU_mother       19   BALB/c Female
## BRK130_P1     34                       IMU_mother        3   BALB/c Female
## BRK131_P1     35                       IMU_mother       13   BALB/c Female
## BRK132_P1     36                       IMU_mother       20   BALB/c Female
## BRK133_P1     37                       IMU_mother        5   BALB/c Female
## BRK134_P1     38                       IMU_mother        6   BALB/c Female
## BRK135_P1     39                       IMU_mother       12   BALB/c Female
## BRK136_P1     40                       IMU_mother       16   BALB/c Female
## BRK137_P1     47                       IMU_mother        6   BALB/c Female
## BRK138_P1     48                       IMU_mother       12   BALB/c Female
## BRK139_P1     49                       IMU_mother       16   BALB/c Female
## BRK141_P1     51                             WILD        2 Wildling Female
## BRK142_P1     52                             WILD        3 Wildling Female
## BRK143_P1     53                             WILD        4 Wildling Female
## BRK144_P1     54                             WILD        5 Wildling Female
## BRK145_P1     55                             WILD        6 Wildling Female
## BRK147_P1     57                             WILD        9 Wildling   Male
## BRK148_P1     58                             WILD       10 Wildling   Male
## BRK149_P1     59                             WILD       11 Wildling   Male
## BRK150_P1     60                             WILD       12 Wildling   Male
## BRK151_P1     61                             WILD       13 Wildling   Male
## BRK152_P1     62                             WILD       14 Pet Shop   Male
## BRK153_P1     63                             WILD       15 Pet Shop   Male
## BRK154_P1     64                             WILD       16 Pet Shop   Male
## BRK155_P1     65                             WILD       17 Pet Shop   Male
## BRK156_P1     66                             WILD       18 Pet Shop   Male
## BRK157_P1     67                             WILD       19 Pet Shop   Male
## BRK158_P1     68                             WILD       20 Pet Shop   Male
## BRK159_P1     69                             WILD       21 Pet Shop Female
## BRK160_P1     70                             WILD       22 Pet Shop Female
## BRK161_P1     71                             WILD       23 Pet Shop Female
## BRK162_P1     72                             WILD       24 Pet Shop Female
## BRK164_P1     74                             WILD       26 Pet Shop Female
## BRK165_P1     75                             WILD       27 Pet Shop Female
## BRK166_P1     76                             WILD       28     Wild Female
## BRK167_P1     77                             WILD       29     Wild   Male
## BRK168_P1     78                             WILD       30     Wild Female
## BRK169_P1     79                             WILD       31     Wild Female
## BRK77_P2     184                              IMU     <NA>   BALB/c   Male
## BRK78_P2     185                              IMU     <NA>   BALB/c   Male
## BRK80_P2     187                              IMU     <NA>   BALB/c   Male
## BRK81_P2     188                              IMU     <NA>   BALB/c   Male
## BRK82_P2     189                              IMU     <NA>   BALB/c   Male
## BRK83_P2     190                              IMU     <NA>   BALB/c   Male
## BRK84_P2     191                              IMU     <NA>   BALB/c   Male
## BRK85_P2     192                              IMU     <NA>   BALB/c   Male
## BRK86_P2     193                              IMU     <NA>   BALB/c   Male
## BRK87_P2     194                              IMU     <NA>   BALB/c Female
## BRK88_P2     195                              IMU     <NA>   BALB/c Female
## BRK89_P2     196                              IMU     <NA>   BALB/c Female
## BRK90_P2     197                              IMU     <NA>   BALB/c Female
## BRK91_P2     198                              IMU     <NA>   BALB/c Female
## BRK92_P2     199                              IMU     <NA>   BALB/c Female
## BRK93_P2     200                              IMU     <NA>   BALB/c Female
## BRK94_P2     201                              IMU     <NA>   BALB/c Female
## BRK95_P2     202                              IMU     <NA>   BALB/c Female
## BRK96_P2     203                              IMU     <NA>   BALB/c Female
## BRK01_P3     204                              IMU     <NA>   BALB/c   Male
## BRK02_P3     205                              IMU     <NA>   BALB/c   Male
## BRK03_P3     206                              IMU     <NA>   BALB/c   Male
## BRK04_P3     207                              IMU     <NA>   BALB/c   Male
## BRK05_P3     208                              IMU     <NA>   BALB/c   Male
## BRK06_P3     209                              IMU     <NA>   BALB/c   Male
## BRK07_P3     210                              IMU     <NA>   BALB/c   Male
## BRK08_P3     211                              IMU     <NA>   BALB/c   Male
## BRK09_P3     212                              IMU     <NA>   BALB/c   Male
## BRK10_P3     213                              IMU     <NA>   BALB/c   Male
## BRK11_P3     214                              IMU     <NA>   BALB/c Female
## BRK12_P3     215                              IMU     <NA>   BALB/c Female
## BRK13_P3     216                              IMU     <NA>   BALB/c Female
## BRK14_P3     217                              IMU     <NA>   BALB/c Female
## BRK15_P3     218                              IMU     <NA>   BALB/c Female
## BRK16_P3     219                              IMU     <NA>   BALB/c Female
## BRK17_P3     220                              IMU     <NA>   BALB/c Female
## BRK18_P3     221                              IMU     <NA>   BALB/c Female
## BRK19_P3     222                              IMU     <NA>   BALB/c Female
## BRK20_P3     223                              IMU     <NA>   BALB/c Female
## BRK21_P3     224       Donor_inoculum_Wild(Zahir)       DW     Wild   <NA>
## BRK22_P3     225 Donor_inoculum_Wild_resampling_1     DWr1     Wild   <NA>
## BRK23_P3     226 Donor_inoculum_Wild_resampling_2     DWr2     Wild   <NA>
## BRK24_P3     227      Donor_inoculum_SPF(WP1Amix)       DS   BALB/c   <NA>
## BRK25_P3     228  Donor_inoculum_SPF_resampling_1     DSr1   BALB/c   <NA>
## BRK26_P3     229  Donor_inoculum_SPF_resampling_2     DSr2   BALB/c   <NA>
##           Mother.ID Cage Isolater Eartag Pre.immunized        Pre.mix  FMT
## BRK97_P1          6    6        1      3           Yes Falken_and_new  SPF
## BRK98_P1          6    6        1     30           Yes Falken_and_new  SPF
## BRK99_P1          6    6        1      5           Yes Falken_and_new  SPF
## BRK100_P1         6    7        4      3           Yes Falken_and_new  SPF
## BRK101_P1         6    7        4     30           Yes Falken_and_new  SPF
## BRK102_P1         6    7        4      5           Yes Falken_and_new  SPF
## BRK103_P1       112    8        4      3           Yes Falken_and_new  SPF
## BRK104_P1       112    8        4     30           Yes Falken_and_new  SPF
## BRK105_P1       112    8        4      5           Yes Falken_and_new  SPF
## BRK106_P1       112    8        4     50           Yes Falken_and_new  SPF
## BRK107_P1       112    9        1      3           Yes Falken_and_new  SPF
## BRK108_P1       112    9        1     30           Yes Falken_and_new  SPF
## BRK109_P1       112    9        1      5           Yes Falken_and_new  SPF
## BRK110_P1        16    9        1     50           Yes Falken_and_new  SPF
## BRK111_P1        45    1        2      3            No             No Wild
## BRK112_P1        45    1        2      5            No             No Wild
## BRK113_P1        46    2        2      3            No             No Wild
## BRK114_P1        46    2        2      5            No             No Wild
## BRK115_P1        40    3        2      3            No             No Wild
## BRK116_P1        40    3        2      5            No             No Wild
## BRK117_P1        46    4        2      3            No             No Wild
## BRK118_P1        38    4        2      5            No             No Wild
## BRK119_P1        15    1        3      3            No             No Wild
## BRK120_P1        15    1        3      5            No             No Wild
## BRK121_P1        46    2        3      3            No             No Wild
## BRK122_P1         1    3        3      3            No             No Wild
## BRK123_P1         1    3        3      5            No             No Wild
## BRK124_P1         6    4        3      5            No             No Wild
## BRK125_P1        NA   NA       NA     NA          <NA>           <NA>  SPF
## BRK126_P1        NA   NA       NA     NA          <NA>           <NA>  SPF
## BRK127_P1        NA   NA       NA     NA          <NA>           <NA>  SPF
## BRK128_P1        NA   NA       NA     NA          <NA>           <NA> Wild
## BRK129_P1        NA   NA       NA     NA          <NA>           <NA> Wild
## BRK130_P1        NA   NA       NA     NA          <NA>           <NA> Wild
## BRK131_P1        NA   NA       NA     NA          <NA>           <NA> Wild
## BRK132_P1        NA   NA       NA     NA          <NA>           <NA> Wild
## BRK133_P1        NA   NA       NA     NA          <NA>           <NA> Wild
## BRK134_P1        NA   NA       NA     NA          <NA>           <NA>  SPF
## BRK135_P1        NA   NA       NA     NA          <NA>           <NA>  SPF
## BRK136_P1        NA   NA       NA     NA          <NA>           <NA>  SPF
## BRK137_P1        NA   NA       NA     NA          <NA>           <NA>  SPF
## BRK138_P1        NA   NA       NA     NA          <NA>           <NA>  SPF
## BRK139_P1        NA   NA       NA     NA          <NA>           <NA>  SPF
## BRK141_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK142_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK143_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK144_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK145_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK147_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK148_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK149_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK150_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK151_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK152_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK153_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK154_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK155_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK156_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK157_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK158_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK159_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK160_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK161_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK162_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK164_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK165_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK166_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK167_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK168_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK169_P1        NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK77_P2         NA   NA       NA     NA            No             No  SPF
## BRK78_P2         NA   NA       NA     NA            No             No  SPF
## BRK80_P2         NA   NA       NA     NA            No             No  SPF
## BRK81_P2         NA   NA       NA     NA            No             No  SPF
## BRK82_P2         NA   NA       NA     NA            No             No  SPF
## BRK83_P2         NA   NA       NA     NA            No             No  SPF
## BRK84_P2         NA   NA       NA     NA            No             No  SPF
## BRK85_P2         NA   NA       NA     NA            No             No  SPF
## BRK86_P2         NA   NA       NA     NA            No             No  SPF
## BRK87_P2         NA   NA       NA     NA            No             No  SPF
## BRK88_P2         NA   NA       NA     NA            No             No  SPF
## BRK89_P2         NA   NA       NA     NA            No             No  SPF
## BRK90_P2         NA   NA       NA     NA            No             No  SPF
## BRK91_P2         NA   NA       NA     NA            No             No  SPF
## BRK92_P2         NA   NA       NA     NA            No             No  SPF
## BRK93_P2         NA   NA       NA     NA            No             No  SPF
## BRK94_P2         NA   NA       NA     NA            No             No  SPF
## BRK95_P2         NA   NA       NA     NA            No             No  SPF
## BRK96_P2         NA   NA       NA     NA            No             No  SPF
## BRK01_P3         NA   NA       NA     NA            No             No Wild
## BRK02_P3         NA   NA       NA     NA            No             No Wild
## BRK03_P3         NA   NA       NA     NA            No             No Wild
## BRK04_P3         NA   NA       NA     NA            No             No Wild
## BRK05_P3         NA   NA       NA     NA            No             No Wild
## BRK06_P3         NA   NA       NA     NA            No             No Wild
## BRK07_P3         NA   NA       NA     NA            No             No Wild
## BRK08_P3         NA   NA       NA     NA            No             No Wild
## BRK09_P3         NA   NA       NA     NA            No             No Wild
## BRK10_P3         NA   NA       NA     NA            No             No Wild
## BRK11_P3         NA   NA       NA     NA            No             No Wild
## BRK12_P3         NA   NA       NA     NA            No             No Wild
## BRK13_P3         NA   NA       NA     NA            No             No Wild
## BRK14_P3         NA   NA       NA     NA            No             No Wild
## BRK15_P3         NA   NA       NA     NA            No             No Wild
## BRK16_P3         NA   NA       NA     NA            No             No Wild
## BRK17_P3         NA   NA       NA     NA            No             No Wild
## BRK18_P3         NA   NA       NA     NA            No             No Wild
## BRK19_P3         NA   NA       NA     NA            No             No Wild
## BRK20_P3         NA   NA       NA     NA            No             No Wild
## BRK21_P3         NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK22_P3         NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK23_P3         NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK24_P3         NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK25_P3         NA   NA       NA     NA          <NA>           <NA> <NA>
## BRK26_P3         NA   NA       NA     NA          <NA>           <NA> <NA>
##           Round   Trial                      Sample   Barcode
## BRK97_P1      3   Rikke                        Term  BRK97_P1
## BRK98_P1      3   Rikke                        Term  BRK98_P1
## BRK99_P1      3   Rikke                        Term  BRK99_P1
## BRK100_P1     3   Rikke                        Term BRK100_P1
## BRK101_P1     3   Rikke                        Term BRK101_P1
## BRK102_P1     3   Rikke                        Term BRK102_P1
## BRK103_P1     3   Rikke                        Term BRK103_P1
## BRK104_P1     3   Rikke                        Term BRK104_P1
## BRK105_P1     3   Rikke                        Term BRK105_P1
## BRK106_P1     3   Rikke                        Term BRK106_P1
## BRK107_P1     3   Rikke                        Term BRK107_P1
## BRK108_P1     3   Rikke                        Term BRK108_P1
## BRK109_P1     3   Rikke                        Term BRK109_P1
## BRK110_P1     3   Rikke                        Term BRK110_P1
## BRK111_P1     1 Therese         Term, colon content BRK111_P1
## BRK112_P1     1 Therese         Term, colon content BRK112_P1
## BRK113_P1     1 Therese         Term, colon content BRK113_P1
## BRK114_P1     1 Therese         Term, colon content BRK114_P1
## BRK115_P1     1 Therese         Term, colon content BRK115_P1
## BRK116_P1     1 Therese         Term, colon content BRK116_P1
## BRK117_P1     1 Therese         Term, colon content BRK117_P1
## BRK118_P1     1 Therese         Term, colon content BRK118_P1
## BRK119_P1     1 Therese         Term, colon content BRK119_P1
## BRK120_P1     1 Therese         Term, colon content BRK120_P1
## BRK121_P1     1 Therese         Term, colon content BRK121_P1
## BRK122_P1     1 Therese         Term, colon content BRK122_P1
## BRK123_P1     1 Therese         Term, colon content BRK123_P1
## BRK124_P1     1 Therese         Term, colon content BRK124_P1
## BRK125_P1    NA   Rikke                     Pre_ABX BRK125_P1
## BRK126_P1    NA   Rikke                     Pre_ABX BRK126_P1
## BRK127_P1    NA   Rikke                     Pre_ABX BRK127_P1
## BRK128_P1    NA Therese                     Pre_ABX BRK128_P1
## BRK129_P1    NA Therese                     Pre_ABX BRK129_P1
## BRK130_P1    NA Therese                     Pre_ABX BRK130_P1
## BRK131_P1    NA Therese                     Pre_ABX BRK131_P1
## BRK132_P1    NA Therese                     Pre_ABX BRK132_P1
## BRK133_P1    NA Therese                     Pre_ABX BRK133_P1
## BRK134_P1    NA   Rikke                  post_1.FMT BRK134_P1
## BRK135_P1    NA   Rikke                  post_1.FMT BRK135_P1
## BRK136_P1    NA   Rikke                  post_1.FMT BRK136_P1
## BRK137_P1    NA   Rikke                        Term BRK137_P1
## BRK138_P1    NA   Rikke                        Term BRK138_P1
## BRK139_P1    NA   Rikke                        Term BRK139_P1
## BRK141_P1    NA     Ida                        Term BRK141_P1
## BRK142_P1    NA     Ida                        Term BRK142_P1
## BRK143_P1    NA     Ida                        Term BRK143_P1
## BRK144_P1    NA     Ida                        Term BRK144_P1
## BRK145_P1    NA     Ida                        Term BRK145_P1
## BRK147_P1    NA     Ida                        Term BRK147_P1
## BRK148_P1    NA     Ida                        Term BRK148_P1
## BRK149_P1    NA     Ida                        Term BRK149_P1
## BRK150_P1    NA     Ida                        Term BRK150_P1
## BRK151_P1    NA     Ida                        Term BRK151_P1
## BRK152_P1    NA     Ida                        Term BRK152_P1
## BRK153_P1    NA     Ida                        Term BRK153_P1
## BRK154_P1    NA     Ida                        Term BRK154_P1
## BRK155_P1    NA     Ida                        Term BRK155_P1
## BRK156_P1    NA     Ida                        Term BRK156_P1
## BRK157_P1    NA     Ida                        Term BRK157_P1
## BRK158_P1    NA     Ida                        Term BRK158_P1
## BRK159_P1    NA     Ida                        Term BRK159_P1
## BRK160_P1    NA     Ida                        Term BRK160_P1
## BRK161_P1    NA     Ida                        Term BRK161_P1
## BRK162_P1    NA     Ida                        Term BRK162_P1
## BRK164_P1    NA     Ida                        Term BRK164_P1
## BRK165_P1    NA     Ida                        Term BRK165_P1
## BRK166_P1    NA     Ida                        Term BRK166_P1
## BRK167_P1    NA     Ida                        Term BRK167_P1
## BRK168_P1    NA     Ida                        Term BRK168_P1
## BRK169_P1    NA     Ida                        Term BRK169_P1
## BRK77_P2      1 Therese                     weaning  BRK77_P2
## BRK78_P2      1 Therese                     weaning  BRK78_P2
## BRK80_P2      1 Therese                     weaning  BRK80_P2
## BRK81_P2      1 Therese                     weaning  BRK81_P2
## BRK82_P2      1 Therese                     weaning  BRK82_P2
## BRK83_P2      1 Therese                     weaning  BRK83_P2
## BRK84_P2      1 Therese                     weaning  BRK84_P2
## BRK85_P2      1 Therese                     weaning  BRK85_P2
## BRK86_P2      1 Therese                     weaning  BRK86_P2
## BRK87_P2      1 Therese                     weaning  BRK87_P2
## BRK88_P2      1 Therese                     weaning  BRK88_P2
## BRK89_P2      1 Therese                     weaning  BRK89_P2
## BRK90_P2      1 Therese                     weaning  BRK90_P2
## BRK91_P2      1 Therese                     weaning  BRK91_P2
## BRK92_P2      1 Therese                     weaning  BRK92_P2
## BRK93_P2      1 Therese                     weaning  BRK93_P2
## BRK94_P2      1 Therese                     weaning  BRK94_P2
## BRK95_P2      1 Therese                     weaning  BRK95_P2
## BRK96_P2      1 Therese                     weaning  BRK96_P2
## BRK01_P3      1 Therese                     weaning  BRK01_P3
## BRK02_P3      1 Therese                     weaning  BRK02_P3
## BRK03_P3      1 Therese                     weaning  BRK03_P3
## BRK04_P3      1 Therese                     weaning  BRK04_P3
## BRK05_P3      1 Therese                     weaning  BRK05_P3
## BRK06_P3      1 Therese                     weaning  BRK06_P3
## BRK07_P3      1 Therese                     weaning  BRK07_P3
## BRK08_P3      1 Therese                     weaning  BRK08_P3
## BRK09_P3      1 Therese                     weaning  BRK09_P3
## BRK10_P3      1 Therese                     weaning  BRK10_P3
## BRK11_P3      1 Therese                     weaning  BRK11_P3
## BRK12_P3      1 Therese                     weaning  BRK12_P3
## BRK13_P3      1 Therese                     weaning  BRK13_P3
## BRK14_P3      1 Therese                     weaning  BRK14_P3
## BRK15_P3      1 Therese                     weaning  BRK15_P3
## BRK16_P3      1 Therese                     weaning  BRK16_P3
## BRK17_P3      1 Therese                     weaning  BRK17_P3
## BRK18_P3      1 Therese                     weaning  BRK18_P3
## BRK19_P3      1 Therese                     weaning  BRK19_P3
## BRK20_P3      1 Therese                     weaning  BRK20_P3
## BRK21_P3     NA    <NA> donor, only liquid no feces  BRK21_P3
## BRK22_P3     NA    <NA> donor, only liquid no feces  BRK22_P3
## BRK23_P3     NA    <NA> donor, only liquid no feces  BRK23_P3
## BRK24_P3     NA    <NA> donor, only liquid no feces  BRK24_P3
## BRK25_P3     NA    <NA> donor, only liquid no feces  BRK25_P3
## BRK26_P3     NA    <NA> donor, only liquid no feces  BRK26_P3
##                            Group
## BRK97_P1     BALB/c_Yes_SPF_Term
## BRK98_P1     BALB/c_Yes_SPF_Term
## BRK99_P1     BALB/c_Yes_SPF_Term
## BRK100_P1    BALB/c_Yes_SPF_Term
## BRK101_P1    BALB/c_Yes_SPF_Term
## BRK102_P1    BALB/c_Yes_SPF_Term
## BRK103_P1    BALB/c_Yes_SPF_Term
## BRK104_P1    BALB/c_Yes_SPF_Term
## BRK105_P1    BALB/c_Yes_SPF_Term
## BRK106_P1    BALB/c_Yes_SPF_Term
## BRK107_P1    BALB/c_Yes_SPF_Term
## BRK108_P1    BALB/c_Yes_SPF_Term
## BRK109_P1    BALB/c_Yes_SPF_Term
## BRK110_P1    BALB/c_Yes_SPF_Term
## BRK111_P1   BALB/c_No_Wild_colon
## BRK112_P1   BALB/c_No_Wild_colon
## BRK113_P1   BALB/c_No_Wild_colon
## BRK114_P1   BALB/c_No_Wild_colon
## BRK115_P1   BALB/c_No_Wild_colon
## BRK116_P1   BALB/c_No_Wild_colon
## BRK117_P1   BALB/c_No_Wild_colon
## BRK118_P1   BALB/c_No_Wild_colon
## BRK119_P1   BALB/c_No_Wild_colon
## BRK120_P1   BALB/c_No_Wild_colon
## BRK121_P1   BALB/c_No_Wild_colon
## BRK122_P1   BALB/c_No_Wild_colon
## BRK123_P1   BALB/c_No_Wild_colon
## BRK124_P1   BALB/c_No_Wild_colon
## BRK125_P1      Mother_SPF_PreABX
## BRK126_P1      Mother_SPF_PreABX
## BRK127_P1      Mother_SPF_PreABX
## BRK128_P1     Mother_Wild_PreABX
## BRK129_P1     Mother_Wild_PreABX
## BRK130_P1     Mother_Wild_PreABX
## BRK131_P1     Mother_Wild_PreABX
## BRK132_P1     Mother_Wild_PreABX
## BRK133_P1     Mother_Wild_PreABX
## BRK134_P1    Mother_SPF_post1FMT
## BRK135_P1    Mother_SPF_post1FMT
## BRK136_P1    Mother_SPF_post1FMT
## BRK137_P1        Mother_SPF_Term
## BRK138_P1        Mother_SPF_Term
## BRK139_P1        Mother_SPF_Term
## BRK141_P1               Wildling
## BRK142_P1               Wildling
## BRK143_P1               Wildling
## BRK144_P1               Wildling
## BRK145_P1               Wildling
## BRK147_P1               Wildling
## BRK148_P1               Wildling
## BRK149_P1               Wildling
## BRK150_P1               Wildling
## BRK151_P1               Wildling
## BRK152_P1               Pet Shop
## BRK153_P1               Pet Shop
## BRK154_P1               Pet Shop
## BRK155_P1               Pet Shop
## BRK156_P1               Pet Shop
## BRK157_P1               Pet Shop
## BRK158_P1               Pet Shop
## BRK159_P1               Pet Shop
## BRK160_P1               Pet Shop
## BRK161_P1               Pet Shop
## BRK162_P1               Pet Shop
## BRK164_P1               Pet Shop
## BRK165_P1               Pet Shop
## BRK166_P1                   Wild
## BRK167_P1                   Wild
## BRK168_P1                   Wild
## BRK169_P1                   Wild
## BRK77_P2   BALB/c_No_SPF_weaning
## BRK78_P2   BALB/c_No_SPF_weaning
## BRK80_P2   BALB/c_No_SPF_weaning
## BRK81_P2   BALB/c_No_SPF_weaning
## BRK82_P2   BALB/c_No_SPF_weaning
## BRK83_P2   BALB/c_No_SPF_weaning
## BRK84_P2   BALB/c_No_SPF_weaning
## BRK85_P2   BALB/c_No_SPF_weaning
## BRK86_P2   BALB/c_No_SPF_weaning
## BRK87_P2   BALB/c_No_SPF_weaning
## BRK88_P2   BALB/c_No_SPF_weaning
## BRK89_P2   BALB/c_No_SPF_weaning
## BRK90_P2   BALB/c_No_SPF_weaning
## BRK91_P2   BALB/c_No_SPF_weaning
## BRK92_P2   BALB/c_No_SPF_weaning
## BRK93_P2   BALB/c_No_SPF_weaning
## BRK94_P2   BALB/c_No_SPF_weaning
## BRK95_P2   BALB/c_No_SPF_weaning
## BRK96_P2   BALB/c_No_SPF_weaning
## BRK01_P3  BALB/c_No_Wild_weaning
## BRK02_P3  BALB/c_No_Wild_weaning
## BRK03_P3  BALB/c_No_Wild_weaning
## BRK04_P3  BALB/c_No_Wild_weaning
## BRK05_P3  BALB/c_No_Wild_weaning
## BRK06_P3  BALB/c_No_Wild_weaning
## BRK07_P3  BALB/c_No_Wild_weaning
## BRK08_P3  BALB/c_No_Wild_weaning
## BRK09_P3  BALB/c_No_Wild_weaning
## BRK10_P3  BALB/c_No_Wild_weaning
## BRK11_P3  BALB/c_No_Wild_weaning
## BRK12_P3  BALB/c_No_Wild_weaning
## BRK13_P3  BALB/c_No_Wild_weaning
## BRK14_P3  BALB/c_No_Wild_weaning
## BRK15_P3  BALB/c_No_Wild_weaning
## BRK16_P3  BALB/c_No_Wild_weaning
## BRK17_P3  BALB/c_No_Wild_weaning
## BRK18_P3  BALB/c_No_Wild_weaning
## BRK19_P3  BALB/c_No_Wild_weaning
## BRK20_P3  BALB/c_No_Wild_weaning
## BRK21_P3              Donor_Wild
## BRK22_P3              Donor_Wild
## BRK23_P3              Donor_Wild
## BRK24_P3               Donor_SPF
## BRK25_P3               Donor_SPF
## BRK26_P3               Donor_SPF

cat("After rarefaction:", ntaxa(ps_rar), "OTUs,", nsamples(ps_rar), "samples\n")

## After rarefaction: 357 OTUs, 115 samples

cat("Samples removed due to low depth:", nsamples(pst) - nsamples(ps_rar), "\n")

## Samples removed due to low depth: 2

Abundance threshold filtering (post-rarefaction)

# Remove taxa with abundance < 0.0001% of total (as in Nutella2 script by Ida Wang Henriksen)
# Taxonomic filtering based on abundance for rarefied data: supervsed
## Abumdance: > 0.0001% overall abundance across all samples
# We remove all the taxa with abundance less than 0.0001 %. 

total.depth     <- sum(otu_table(ps_rar))
totAbuThreshold <- 1e-4 * total.depth
ps_rar <- prune_taxa(taxa_sums(ps_rar) > totAbuThreshold, ps_rar)
ps_rar

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 152 taxa and 115 samples ]
## sample_data() Sample Data:       [ 115 samples by 17 sample variables ]
## tax_table()   Taxonomy Table:    [ 152 taxa by 7 taxonomic ranks ]

sum(rowSums(ps_rar@otu_table) == 0)

## [1] 0

colSums(ps_rar@otu_table)  # det er summen i hvert sample

##  BRK97_P1  BRK98_P1  BRK99_P1 BRK100_P1 BRK101_P1 BRK102_P1 BRK103_P1 BRK104_P1 
##      7911      7941      7979      7969      7980      7997      7970      7994 
## BRK105_P1 BRK106_P1 BRK107_P1 BRK108_P1 BRK109_P1 BRK110_P1 BRK111_P1 BRK112_P1 
##      7997      7993      7994      7974      7919      7971      7994      7994 
## BRK113_P1 BRK114_P1 BRK115_P1 BRK116_P1 BRK117_P1 BRK118_P1 BRK119_P1 BRK120_P1 
##      7984      7986      7986      7977      7992      7997      7976      7992 
## BRK121_P1 BRK122_P1 BRK123_P1 BRK124_P1 BRK125_P1 BRK126_P1 BRK127_P1 BRK128_P1 
##      7955      7990      7985      7983      7980      7959      7961      7989 
## BRK129_P1 BRK130_P1 BRK131_P1 BRK132_P1 BRK133_P1 BRK134_P1 BRK135_P1 BRK136_P1 
##      7978      7989      7957      7985      7951      7994      7992      7991 
## BRK137_P1 BRK138_P1 BRK139_P1 BRK141_P1 BRK142_P1 BRK143_P1 BRK144_P1 BRK145_P1 
##      7986      7941      7977      7949      7907      7944      7985      7957 
## BRK147_P1 BRK148_P1 BRK149_P1 BRK150_P1 BRK151_P1 BRK152_P1 BRK153_P1 BRK154_P1 
##      7944      7983      7958      7939      7916      7969      7984      7846 
## BRK155_P1 BRK156_P1 BRK157_P1 BRK158_P1 BRK159_P1 BRK160_P1 BRK161_P1 BRK162_P1 
##      7958      7881      7975      7972      7949      7984      7869      7927 
## BRK164_P1 BRK165_P1 BRK166_P1 BRK167_P1 BRK168_P1 BRK169_P1  BRK77_P2  BRK78_P2 
##      7920      7930      7829      7971      7891      7839      7967      7966 
##  BRK80_P2  BRK81_P2  BRK82_P2  BRK83_P2  BRK84_P2  BRK85_P2  BRK86_P2  BRK87_P2 
##      7981      7984      7978      7982      7987      7987      7980      7982 
##  BRK88_P2  BRK89_P2  BRK90_P2  BRK91_P2  BRK92_P2  BRK93_P2  BRK94_P2  BRK95_P2 
##      7980      7977      7981      7978      7991      7987      7982      7986 
##  BRK96_P2  BRK01_P3  BRK02_P3  BRK03_P3  BRK04_P3  BRK05_P3  BRK06_P3  BRK07_P3 
##      7995      7979      7980      7967      7959      7982      7985      7977 
##  BRK08_P3  BRK09_P3  BRK10_P3  BRK11_P3  BRK12_P3  BRK13_P3  BRK14_P3  BRK15_P3 
##      7980      7981      7965      7970      7968      7988      7976      7983 
##  BRK16_P3  BRK17_P3  BRK18_P3  BRK19_P3  BRK20_P3  BRK21_P3  BRK22_P3  BRK23_P3 
##      7975      7968      7979      7976      7975      7962      7996      7983 
##  BRK24_P3  BRK25_P3  BRK26_P3 
##      7911      7994      7993

pst_original    = pst
ps_rar_original = ps_rar

Transformation objects

# Relative abundance as percentage (as in Nutella2) 
ps_rel = transform_sample_counts(ps_rar, function(x) { x / sum(x) * 100 })

# Natural log transformation for beta diversity (as in Nutella2)
ps_log = transform_sample_counts(ps_rar, function(x) { log(1 + x) })

8. Prep for Alpha Diversity

# Richness: Chao1 — use non-rarefied table (as in Nutella2)
Chao1 = estimate_richness(pst, split = TRUE, measures = "Chao1") # for richness, we don't use rarefied table

# Evenness: Shannon — use rarefied table (as in Nutella2)
Shannon = estimate_richness(ps_rar, split = TRUE, measures = "Shannon")

# Add indexes to sample data
sample_data(ps_rar) <- data.frame(
  sample_data(ps_rar),
  Chao1   = Chao1[rownames(Chao1) %in% rownames(sample_data(ps_rar)), ][[1]],
  Shannon = Shannon$Shannon
)

alpha_df = sample_data(ps_rar) %>% data.frame()
head(alpha_df[, c("Group", "Strain", "Sex", "FMT", "Chao1", "Shannon")])

##                         Group Strain    Sex FMT    Chao1  Shannon
## BRK97_P1  BALB/c_Yes_SPF_Term BALB/c Female SPF 108.3529 2.039375
## BRK98_P1  BALB/c_Yes_SPF_Term BALB/c Female SPF 143.3333 2.098244
## BRK99_P1  BALB/c_Yes_SPF_Term BALB/c Female SPF 126.3636 1.401313
## BRK100_P1 BALB/c_Yes_SPF_Term BALB/c   Male SPF 148.0000 1.337422
## BRK101_P1 BALB/c_Yes_SPF_Term BALB/c   Male SPF 143.0000 1.925153
## BRK102_P1 BALB/c_Yes_SPF_Term BALB/c   Male SPF 111.4286 2.108371

# alpha diversitet per gruppe
alpha_df %>%
  group_by(Group) %>%
  summarise(
    n            = n(),
    mean_Chao1   = round(mean(Chao1,   na.rm = TRUE), 2),
    mean_Shannon = round(mean(Shannon, na.rm = TRUE), 2)
  ) %>%
  arrange(Group)

## # A tibble: 13 × 4
##    Group                      n mean_Chao1 mean_Shannon
##    <fct>                  <int>      <dbl>        <dbl>
##  1 BALB/c_No_SPF_weaning     19      122.          2.65
##  2 BALB/c_No_Wild_weaning    20      122.          2.51
##  3 BALB/c_Yes_SPF_Term       14      118.          1.64
##  4 BALB/c_No_Wild_colon      14      139.          2.4 
##  5 Wildling                  10      165.          2.5 
##  6 Pet Shop                  13      169.          2.21
##  7 Wild                       4      154.          2.04
##  8 Mother_SPF_PreABX          3      172.          2.62
##  9 Mother_SPF_post1FMT        3      109.          0.53
## 10 Mother_SPF_Term            3       93.2         1.78
## 11 Mother_Wild_PreABX         6      147.          2.04
## 12 Donor_SPF                  3       84.2         1.15
## 13 Donor_Wild                 3       90.4         1.65

# Melt for plotting (as in Nutella2)
long_mtdat <- melt(alpha_df)
long_mtdat <- long_mtdat[long_mtdat$variable %in% c("Chao1", "Shannon"), ]
long_mtdat$variable <- factor(long_mtdat$variable, levels = c("Chao1", "Shannon"))
head(long_mtdat)

##     Study Mouse.ID Strain    Sex Pre.immunized        Pre.mix FMT Trial Sample
## 691   IMU       78 BALB/c Female           Yes Falken_and_new SPF Rikke   Term
## 692   IMU       79 BALB/c Female           Yes Falken_and_new SPF Rikke   Term
## 693   IMU       80 BALB/c Female           Yes Falken_and_new SPF Rikke   Term
## 694   IMU       81 BALB/c   Male           Yes Falken_and_new SPF Rikke   Term
## 695   IMU       82 BALB/c   Male           Yes Falken_and_new SPF Rikke   Term
## 696   IMU       83 BALB/c   Male           Yes Falken_and_new SPF Rikke   Term
##       Barcode               Group variable    value
## 691  BRK97_P1 BALB/c_Yes_SPF_Term    Chao1 108.3529
## 692  BRK98_P1 BALB/c_Yes_SPF_Term    Chao1 143.3333
## 693  BRK99_P1 BALB/c_Yes_SPF_Term    Chao1 126.3636
## 694 BRK100_P1 BALB/c_Yes_SPF_Term    Chao1 148.0000
## 695 BRK101_P1 BALB/c_Yes_SPF_Term    Chao1 143.0000
## 696 BRK102_P1 BALB/c_Yes_SPF_Term    Chao1 111.4286

9. Gloomer Function

# Gloomer: wraps tax_glom() and adds unique names to taxa
# Directly from Nutella2.R / Farhad Panah's pipeline
gloomer = function(ps = data, taxa_level = taxa_level, NArm = "TRUE") {
  rank.names = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")

  if (sum(loadedNamespaces() == "MicrobiotaProcess") > 0) {
    devtools::unload("MicrobiotaProcess", FALSE)
  }

  # Remove uncultured Family
  ps = subset_taxa(ps, !Family %in% c("uncultured", "NA", "uncategorized", "unassigend", "", " "))

  if (taxa_level == "Species") {
    ps = subset_taxa(ps, !Genus %in% NA)
    tax_table(ps)[, taxa_level] <- ifelse(is.na(tax_table(ps)[, taxa_level]),
                                          "unknown", paste(tax_table(ps)[, taxa_level]))
    physeq = tax_glom(physeq = ps, taxrank = taxa_level, NArm = NArm)
    taxdat = tax_table(physeq)[, seq_along(rank.names[1:which(rank.names == taxa_level)])]
    taxdat = taxdat[complete.cases(taxdat), ] %>% as.data.frame
    otudat = otu_table(physeq)
    taxdat[, 6] = ifelse(taxdat[, 6] %in% c("uncategorized", NA, "uncultured", "unassigend", "", " "),
                         paste0("[", taxdat[, length(rank.names[1:which(rank.names == "Genus")]) - 1], "]_", taxdat[, 6]),
                         taxdat[, 6])
    spec1 = taxdat[, taxa_level] %>% as.vector
    spec2 = taxdat[, taxa_level] %>% as.vector
    uni   = matrix(NA, ncol = length(spec2), nrow = length(spec1))
    for (i in seq_along(spec1)) { for (j in seq_along(spec2)) {
      uni[i, j] = ifelse(spec1[i] == spec2[j], "TRUE", "FALSE") } }
    rownames(uni) <- spec1; colnames(uni) <- spec2
    uni[upper.tri(uni, diag = TRUE)] = 0
    duplis = uni %>% reshape2::melt() %>% filter(value == "TRUE")
    if (dim(duplis)[[1]] > 0) {
      duplis = uni %>% reshape2::melt() %>% filter(value == "TRUE") %>%
        dplyr::select(1) %>% unique() %>% unlist %>% as.vector
      taxdat = taxdat %>% mutate(uni = ifelse(taxdat[, taxa_level] %in% duplis,
                                              paste0("[", taxdat[, length(rank.names[1:which(rank.names == taxa_level)]) - 1], "]_", taxdat[, taxa_level]),
                                              taxdat[, taxa_level]))
      dupies <- taxdat[duplicated(taxdat[, "uni"]), "uni"]
      if (length(dupies) > 0) {
        taxdat = taxdat %>% data.frame %>%
          mutate(uni2 = ifelse(taxdat[, "uni"] %in% dupies,
                               paste0("[", taxdat[, length(rank.names[1:which(rank.names == taxa_level)]) - 2], "]_", taxdat[, "uni"]),
                               taxdat[, "uni"]))
        taxdat[, taxa_level] = taxdat[, "uni2"]
        taxdat[, "uni"] <- NULL; taxdat[, "uni2"] <- NULL
      } else {
        taxdat[, taxa_level] = taxdat[, "uni"]; taxdat[, "uni"] <- NULL
      }
    }
    taxdat <- as(taxdat, "matrix")
    rownames(otudat) <- taxdat[rownames(taxdat) %in% rownames(otudat), taxa_level]
    rownames(taxdat) <- taxdat[, taxa_level]; taxdat <- tax_table(taxdat)
    taxa_names(physeq) <- taxa_names(taxdat); tax_table(physeq) <- taxdat; otu_table(physeq) <- otudat

  } else if (taxa_level == "Genus") {
    physeq = tax_glom(physeq = ps, taxrank = taxa_level, NArm = NArm)
    taxdat = tax_table(physeq)[, seq_along(rank.names[1:which(rank.names == taxa_level)])]
    taxdat = taxdat[complete.cases(taxdat), ] %>% as.data.frame
    otudat = otu_table(physeq)
    taxdat[, 6] = ifelse(taxdat[, 6] %in% c("uncategorized", NA, "uncultured", "unassigend", "", " "),
                         paste0("[", taxdat[, length(rank.names[1:which(rank.names == taxa_level)]) - 1], "]_", taxdat[, taxa_level]),
                         taxdat[, taxa_level])
    gen1 = taxdat[, taxa_level] %>% as.vector
    gen2 = taxdat[, taxa_level] %>% as.vector
    uni  = matrix(NA, ncol = length(gen2), nrow = length(gen1))
    for (i in seq_along(gen1)) { for (j in seq_along(gen2)) {
      uni[i, j] = ifelse(gen1[i] == gen2[j], "TRUE", "FALSE") } }
    rownames(uni) <- gen1; colnames(uni) <- gen2
    uni[upper.tri(uni, diag = TRUE)] = 0
    duplis = uni %>% reshape2::melt() %>% filter(value == "TRUE")
    if (dim(duplis)[[1]] > 0) {
      duplis = uni %>% reshape2::melt() %>% filter(value == "TRUE") %>%
        dplyr::select(1) %>% unique() %>% unlist %>% as.vector
      taxdat = taxdat %>% mutate(uni = ifelse(taxdat[, taxa_level] %in% duplis,
                                              paste0("[", taxdat[, length(rank.names[1:which(rank.names == taxa_level)]) - 1], "]_", taxdat[, taxa_level]),
                                              taxdat[, taxa_level]))
      taxdat[, taxa_level] = taxdat[, "uni"]; taxdat[, "uni"] <- NULL
      taxdat <- as(taxdat, "matrix")
      rownames(otudat) <- taxdat[rownames(taxdat) %in% rownames(otudat), taxa_level]
      rownames(taxdat) <- taxdat[taxdat[, taxa_level] %in% rownames(otudat), taxa_level]
      taxdat <- as.matrix(taxdat); taxdat <- tax_table(taxdat)
    } else {
      taxdat <- as.matrix(taxdat); taxdat <- tax_table(taxdat)
      rownames(otudat) <- taxdat[rownames(taxdat) %in% rownames(otudat), taxa_level]
      rownames(taxdat) <- taxdat[, taxa_level]; taxdat <- tax_table(taxdat)
    }
    taxa_names(physeq) <- taxa_names(taxdat); tax_table(physeq) <- taxdat; otu_table(physeq) <- otudat

  } else {
    physeq = tax_glom(physeq = ps, taxrank = taxa_level, NArm = TRUE)
    taxdat = tax_table(physeq)[, seq_along(rank.names[1:which(rank.names == taxa_level)])]
    taxdat = taxdat[complete.cases(taxdat), ] %>% as.data.frame
    otudat = otu_table(physeq)
    spec1 = taxdat[, taxa_level] %>% as.vector
    spec2 = taxdat[, taxa_level] %>% as.vector
    uni   = matrix(NA, ncol = length(spec2), nrow = length(spec1))
    for (i in seq_along(spec1)) { for (j in seq_along(spec2)) {
      uni[i, j] = ifelse(spec1[i] == spec2[j], "TRUE", "FALSE") } }
    rownames(uni) <- spec1; colnames(uni) <- spec2
    uni[upper.tri(uni, diag = TRUE)] = 0
    duplis = uni %>% reshape2::melt() %>% filter(value == "TRUE")
    if (dim(duplis)[[1]] > 0) {
      duplis = uni %>% reshape2::melt() %>% filter(value == "TRUE") %>%
        dplyr::select(1) %>% unique() %>% unlist %>% as.vector
      taxdat = taxdat %>% mutate(uni = ifelse(taxdat[, taxa_level] %in% duplis,
                                              paste(taxdat[, length(rank.names[1:which(rank.names == taxa_level)]) - 1], "_", taxdat[, taxa_level]),
                                              taxdat[, taxa_level]))
      taxdat[, taxa_level] = taxdat[, "uni"]; taxdat[, "uni"] <- NULL
      taxdat <- as.matrix(taxdat)
      rownames(otudat) <- taxdat[rownames(taxdat) %in% rownames(otudat), taxa_level]
      rownames(taxdat) <- taxdat[, taxa_level]; taxdat <- tax_table(taxdat)
    } else {
      taxdat <- as.matrix(taxdat); taxdat <- tax_table(taxdat)
      rownames(otudat) <- taxdat[rownames(taxdat) %in% rownames(otudat), taxa_level]
      rownames(taxdat) <- taxdat[, taxa_level]; taxdat <- tax_table(taxdat)
    }
    taxa_names(physeq) <- taxa_names(taxdat); tax_table(physeq) <- taxdat; otu_table(physeq) <- otudat
  }
  return(physeq)
}

# Color palettes (as in Nutella2)
speccol = c("darkmagenta", "antiquewhite4", "cornflowerblue", "plum4",
            "purple", "cadetblue2", "pink", "steelblue2", "lightblue", "lightgreen",
            "violet", "lightpink", "skyblue", "red", "yellow", "limegreen",
            "magenta", "green", "coral", "blue")

cols = colors()[c(190, 20, 30, 15, 8, 90, 35, 120, 42)]

10. Comparison 1 — FMT Engraftment at Weaning

Alpha diversity

comp1_groups <- c("BALB/c_No_SPF_weaning", "BALB/c_No_Wild_weaning")
comp1_donors <- c("Donor_SPF", "Donor_Wild")

# Subset til kun originale donorer — ikke resampling
alpha_df_donors_orig <- alpha_df %>%
  filter(Study %in% c("Donor_inoculum_Wild(Zahir)", 
                      "Donor_inoculum_SPF(WP1Amix)"))

ps_c1 <- subset_samples(ps_rar,
  sample_data(ps_rar)$Group %in% c(comp1_groups, comp1_donors))

alpha_c1_stat <- alpha_df %>%
  filter(Group %in% comp1_groups) %>%
  mutate(FMT = factor(FMT), Sex = factor(Sex))

# Chao1
lm_chao1 <- lm(Chao1 ~ FMT, data = alpha_c1_stat)

par(mfrow = c(1, 2))
qqnorm(rstandard(lm_chao1), main = "Q-Q plot — Chao1")
abline(0, 1, lty = 2)

plot(predict(lm_chao1), rstandard(lm_chao1),
     main = "Residuals vs Fitted — Chao1",
     xlab = "Fitted values", ylab = "Standardized residuals")
abline(h = 0, lty = 2)

# Shannon
lm_shannon <- lm(Shannon ~ FMT, data = alpha_c1_stat)

par(mfrow = c(1, 2))
qqnorm(rstandard(lm_shannon), main = "Q-Q plot — Shannon")
abline(0, 1, lty = 2)

plot(predict(lm_shannon), rstandard(lm_shannon),
     main = "Residuals vs Fitted — Shannon",
     xlab = "Fitted values", ylab = "Standardized residuals")
abline(h = 0, lty = 2)

par(mfrow = c(1, 1))

# Rank-transformer data
alpha_c1_stat$Chao1_rank   <- rank(alpha_c1_stat$Chao1)
alpha_c1_stat$Shannon_rank <- rank(alpha_c1_stat$Shannon)

lm_chao1_rank   <- lm(Chao1_rank   ~ FMT, data = alpha_c1_stat)
lm_shannon_rank <- lm(Shannon_rank ~ FMT, data = alpha_c1_stat)

par(mfrow = c(1, 2))
qqnorm(rstandard(lm_chao1_rank),   main = "Q-Q plot — Chao1 ranked");   abline(0, 1, lty = 2)
plot(predict(lm_chao1_rank), rstandard(lm_chao1_rank), main = "Residuals vs Fitted — Chao1 ranked", xlab = "Fitted values", ylab = "Standardized residuals"); abline(h = 0, lty = 2)

par(mfrow = c(1, 2))
qqnorm(rstandard(lm_shannon_rank), main = "Q-Q plot — Shannon ranked"); abline(0, 1, lty = 2)
plot(predict(lm_shannon_rank), rstandard(lm_shannon_rank), main = "Residuals vs Fitted — Shannon ranked", xlab = "Fitted values", ylab = "Standardized residuals"); abline(h = 0, lty = 2)

par(mfrow = c(1, 1))

# ANOVA with FMT, since no FMT * Sex interaction
## chao1
anova_chao1_rank   <- aov(Chao1_rank ~ FMT, data = alpha_c1_stat)
summary(anova_chao1_rank)

##             Df Sum Sq Mean Sq F value Pr(>F)
## FMT          1      0    0.23   0.002  0.967
## Residuals   37   4939  133.48

TukeyHSD(anova_chao1_rank)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Chao1_rank ~ FMT, data = alpha_c1_stat)
## 
## $FMT
##               diff       lwr      upr     p adj
## Wild-SPF 0.1539474 -7.345508 7.653402 0.9670466

## shannon
anova_shannon_rank <- aov(Shannon_rank ~ FMT, data = alpha_c1_stat)
summary(anova_shannon_rank)

##             Df Sum Sq Mean Sq F value Pr(>F)  
## FMT          1    357   357.3   2.884 0.0978 .
## Residuals   37   4583   123.9                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

TukeyHSD(anova_shannon_rank)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Shannon_rank ~ FMT, data = alpha_c1_stat)
## 
## $FMT
##               diff       lwr      upr     p adj
## Wild-SPF -6.055263 -13.27935 1.168823 0.0978349

wilcox.test(Chao1 ~ FMT, data = alpha_c1_stat)

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Chao1 by FMT
## W = 188.5, p-value = 0.9776
## alternative hypothesis: true location shift is not equal to 0

wilcox.test(Shannon ~ FMT, data = alpha_c1_stat)

## 
##  Wilcoxon rank sum exact test
## 
## data:  Shannon by FMT
## W = 249, p-value = 0.1006
## alternative hypothesis: true location shift is not equal to 0

# checking for sex effect
wilcox.test(Chao1 ~ Sex, data = alpha_c1_stat)

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Chao1 by Sex
## W = 248.5, p-value = 0.1031
## alternative hypothesis: true location shift is not equal to 0

wilcox.test(Shannon ~ Sex, data = alpha_c1_stat)

## 
##  Wilcoxon rank sum exact test
## 
## data:  Shannon by Sex
## W = 177, p-value = 0.7284
## alternative hypothesis: true location shift is not equal to 0

Plot w/o donors

library(ggpubr)
library(ggplot2)

long_c1 <- alpha_df %>%
  filter(Group %in% comp1_groups) %>%
  mutate(Group = factor(Group, levels = comp1_groups)) %>%
  melt()
long_c1 <- long_c1[long_c1$variable %in% c("Chao1", "Shannon"), ]

alpha_p_c1 <- ggplot(long_c1, aes(x = Group, y = value)) +
  geom_violin(aes(fill = Group), trim = FALSE, alpha = 0.7) +
  geom_boxplot(width = 0.10, outlier.alpha = 0) +
  geom_jitter(aes(fill = Group, shape = Sex), alpha = 1, width = 0.2,
              stroke = 1, size = 2) +
  facet_wrap(~variable, scales = "free_y") +
  theme_bw() +
  scale_fill_manual(values = c("BALB/c_No_SPF_weaning" = "#8FBF8F",
                               "BALB/c_No_Wild_weaning" = "#3E5871")) +
  scale_x_discrete(labels = c("BALB/c_No_SPF_weaning" = "BALB/c\n+ SPF FMT",
                               "BALB/c_No_Wild_weaning" = "BALB/c\n+ Wild FMT")) +
  theme(legend.position = "right",
        axis.title.x    = element_text(size = 13),
        axis.text.x     = element_text(size = 13),
        axis.title.y    = element_text(size = 13),
        strip.text.x    = element_text(size = 13),
        plot.title      = element_text(size = 15, hjust = 0),
        plot.subtitle = element_markdown(size = 13)) + 
  guides(fill = "none") + 
  labs(x = NULL, y = "Alpha diversity",
       title = "Alpha diversity at weaning", subtitle = "Shannon FMT <i>p</i> = 0.087 (.)")

alpha_p_c1

# ggsave("./outputs/Comp1_weaning_alpha.jpeg", height = 7, width = 13, dpi = 300)

Plot with donors

# Recipients at weaning together with respective donors
long_c1_donors <- bind_rows(
  alpha_df_donors_orig,
  alpha_df %>% filter(Group %in% comp1_groups)
) %>%
  mutate(
    Group_plot = case_when(
      Study == "Donor_inoculum_SPF(WP1Amix)"  ~ "Donor_SPF",
      Study == "Donor_inoculum_Wild(Zahir)"    ~ "Donor_Wild",
      TRUE ~ as.character(Group)
    ),
    Group_plot = factor(Group_plot, levels = c(
  "BALB/c_No_SPF_weaning", "Donor_SPF",
  "BALB/c_No_Wild_weaning", "Donor_Wild"
    )),
    FMT_type = case_when(
      Group_plot %in% c("Donor_SPF", "BALB/c_No_SPF_weaning") ~ "SPF",
      Group_plot %in% c("Donor_Wild", "BALB/c_No_Wild_weaning") ~ "Wild"
    )
  ) %>%
  melt()
long_c1_donors <- long_c1_donors[long_c1_donors$variable %in% c("Chao1", "Shannon"), ]

alpha_p_donors_c1 <- ggplot(long_c1_donors, aes(x = Group_plot, y = value)) +
  geom_violin(data = long_c1_donors %>% filter(!Group_plot %in% c("Donor_SPF", "Donor_Wild")),
              aes(fill = Group_plot), trim = FALSE, alpha = 0.7) +
  geom_boxplot(data = long_c1_donors %>% filter(!Group_plot %in% c("Donor_SPF", "Donor_Wild")),
               width = 0.10, outlier.alpha = 0) +
  geom_jitter(data = long_c1_donors %>% filter(!Group_plot %in% c("Donor_SPF", "Donor_Wild")),
              aes(fill = Group_plot, shape = Sex), alpha = 1, width = 0.2,
              stroke = 1, size = 2) +
  # Donorer som enkelt diamond punkt — samme farve som recipient
  geom_point(data = long_c1_donors %>% filter(Group_plot %in% c("Donor_SPF", "Donor_Wild")),
             aes(color = FMT_type),
             shape = 18, size = 5, alpha = 1) +
  facet_wrap(~variable, scales = "free_y") +
  theme_bw() +
  scale_fill_manual(values  = c("BALB/c_No_SPF_weaning"  = "#8FBF8F",
                                "BALB/c_No_Wild_weaning" = "#3E5871"),
                    name = "Group") +
  scale_color_manual(values = c("SPF"  = "#8FBF8F",
                                "Wild" = "#3E5871"), guide = "none") +
  scale_shape_manual(values = c("Female" = 21, "Male" = 24), name = "Sex") +
  scale_x_discrete(
    limits = c("BALB/c_No_SPF_weaning", "Donor_SPF",
             "BALB/c_No_Wild_weaning", "Donor_Wild"),
    labels = c(
  "BALB/c_No_SPF_weaning"  = "BALB/c\n+ SPF FMT",
  "Donor_SPF"              = "Donor\nSPF",
  "BALB/c_No_Wild_weaning" = "BALB/c\n+ Wild FMT",
  "Donor_Wild"             = "Donor\nWild"
  )) +
  guides(fill  = "none",
         shape = guide_legend(override.aes = list(size = 4))) +
  theme(axis.title.x = element_text(size = 13),
        axis.text.x  = element_text(size = 11),
        axis.title.y = element_text(size = 13),
        strip.text.x = element_text(size = 13),
        plot.title   = element_text(size = 15, hjust = 0),
        plot.subtitle = element_markdown(size = 13)) +
  labs(x = NULL, y = "Alpha diversity",
       title = "Alpha diversity at Weaning", subtitle = "Shannon FMT <i>p</i> = 0.087 (.)")

alpha_p_donors_c1

# ggsave("./outputs/Comp1_weaning_donors_alpha.jpeg", height = 7, width = 13, dpi = 300)

Beta diversity — Bray-Curtis PCoA

Transformation examples - logarithm

# Histogrammer for de tre mest relevante transformationer
p_raw <- ggplot() +
  geom_histogram(aes(x = rowSums(otu_table(ps_rar))),
                 fill = "#4a8d78", color = "grey", binwidth = 2000) +
  theme_bw() + ggtitle("Raw counts") +
  xlab("Taxa sum") + ylab("Count")

p_log <- ggplot() +
  geom_histogram(aes(x = rowSums(otu_table(ps_log))),
                 fill = "#4a8d78", color = "grey", binwidth = 5) +
  theme_bw() + ggtitle("Natural log — log(1+x)") +
  xlab("Taxa sum") + ylab("Count")

p_rel <- ggplot() +
  geom_histogram(aes(x = rowSums(otu_table(ps_rel))),
                 fill = "#4a8d78", color = "grey", binwidth = 5) +
  theme_bw() + ggtitle("Relative abundance (%)") +
  xlab("Taxa sum") + ylab("Count")

p_raw + p_log + p_rel + plot_layout(ncol = 3)

ggsave("./outputs/Transformation_BetaDiversity.png", 
       plot = p_raw + p_log + p_rel + plot_layout(ncol = 3),
       width = 12, height = 5, dpi = 300)

Conclusions - Natural logarithm concluded as the best - Naturlig logaritme har betydeligt bedre fordeling, de fleste taxa-summer ligger mellem 0-200 med en mere jævn fordeling. Stadig skæv men langt mindre ekstrem end de andre to.

Bray PCoA

# Behold kun originale donorer — ikke resampling
ps_c1 <- subset_samples(ps_c1,
  !Study %in% c("Donor_inoculum_SPF_resampling_1",
                "Donor_inoculum_SPF_resampling_2",
                "Donor_inoculum_Wild_resampling_1",
                "Donor_inoculum_Wild_resampling_2"))

# Log-transformed rarefied data
ps_c1_log    <- transform_sample_counts(ps_c1, function(x){log(1 + x)})
bray_pcoa_c1 <- ordinate(ps_c1_log, method = "PCoA", distance = "bray")
evals_c1     <- bray_pcoa_c1$values$Eigenvalues

bray_p_c1 <- plot_ordination(
  ps_c1_log, bray_pcoa_c1, color = "Group", shape = "Sex"
) +
  stat_ellipse(data = . %>% filter(Group %in% comp1_groups),
               aes(group = Group, fill = Group, color = Group),
               alpha = 0.15, geom = "polygon", linetype = 2) +
  # Weaning samples
  geom_point(data = . %>% filter(Group %in% comp1_groups),
             aes(color = Group, shape = Sex), size = 3, alpha = 0.9) +
  # Donor samples — eksplicit plottet som stjerner
  geom_point(data = . %>% filter(Group %in% comp1_donors),
             aes(color = Group), shape = 18, size = 5, alpha = 1) +
  scale_color_manual(
  values = c(
    "BALB/c_No_SPF_weaning"  = "#8FBF8F",
    "BALB/c_No_Wild_weaning" = "#3E5871",
    "Donor_SPF"              = "#4a7a4a",
    "Donor_Wild"             = "#1a2f40"
  ),
  labels = c(
    "BALB/c_No_SPF_weaning"  = "BALB/c + SPF FMT",
    "BALB/c_No_Wild_weaning" = "BALB/c + Wild FMT",
    "Donor_SPF"              = "Donor SPF",
    "Donor_Wild"             = "Donor Wild"
  )
) +
scale_fill_manual(
  values = c(
    "BALB/c_No_SPF_weaning"  = "#8FBF8F",
    "BALB/c_No_Wild_weaning" = "#3E5871"
  ),
  labels = c(
    "BALB/c_No_SPF_weaning"  = "BALB/c + SPF FMT",
    "BALB/c_No_Wild_weaning" = "BALB/c + Wild FMT"
  )
) + scale_shape_manual(
  values       = c("Female" = 16, "Male" = 17),
  na.translate = FALSE,
  na.value     = NA
) +
guides(
  color = guide_legend(title = "Group"),
  fill  = "none",
  shape = guide_legend(title = "Sex", na.translate = FALSE)
) +
  labs(
    x     = sprintf("PCo1 [%s%%]", round(evals_c1 / sum(evals_c1) * 100, 1)[1]),
    y     = sprintf("PCo2 [%s%%]", round(evals_c1 / sum(evals_c1) * 100, 1)[2]),
    color = "Group", shape = "Sex",
    title = "Bray-Curtis PCoA",
    subtitle = "Beta Diversity at Weaning Following FMT"
  ) +
  geom_vline(xintercept = 0, lty = 2, alpha = 0.5, color = "blue") +
  geom_hline(yintercept = 0, lty = 2, alpha = 0.5, color = "blue") +
  theme_bw() +
  theme(axis.title   = element_text(),
        legend.title = element_text(size = 10),
        legend.text  = element_text(),
        axis.text    = element_text(size = 13), 
        plot.title = element_text(size = 15),
        plot.subtitle = element_text(size = 13))

library(ggtext)

bray_p_c1 <- bray_p_c1 +
  annotate("richtext",
           x = Inf, y = Inf,
           label = "PERMANOVA: FMT<br>*R*² = 0.48, *p* < 0.001",
           hjust = 1.1, vjust = 1.5,
           size = 4, color = "black",
           fill = NA, label.color = NA)

bray_p_c1

# ggsave("./outputs/comp1_bray_pcoa.jpeg", dpi = 300, height = 7, width = 10)

# PERMANOVA (statistics for beta diversity)
# PERMANOVA — test if groups are significantly different
ps_c1_stat_log <- transform_sample_counts(
  subset_samples(ps_rar, sample_data(ps_rar)$Group %in% comp1_groups),
  function(x){log(1 + x)})

df_c1_stat   <- data.frame(sample_data(ps_c1_stat_log))
bray_dist_c1 <- phyloseq::distance(ps_c1_stat_log, method = "bray")

set.seed(123)
vegan::adonis2(bray_dist_c1 ~ Group * Sex, data = df_c1_stat, permutations = 999)

## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## vegan::adonis2(formula = bray_dist_c1 ~ Group * Sex, data = df_c1_stat, permutations = 999)
##          Df SumOfSqs     R2      F Pr(>F)    
## Model     3   1.2598 0.5364 13.499  0.001 ***
## Residual 35   1.0888 0.4636                  
## Total    38   2.3486 1.0000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

set.seed(123)
vegan::adonis2(bray_dist_c1 ~ Group, data = df_c1_stat, permutations = 999)

## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## vegan::adonis2(formula = bray_dist_c1 ~ Group, data = df_c1_stat, permutations = 999)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1   1.1430 0.48669 35.081  0.001 ***
## Residual 37   1.2056 0.51331                  
## Total    38   2.3486 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Test Sex alene
set.seed(123)
vegan::adonis2(bray_dist_c1 ~ Sex, data = df_c1_stat, permutations = 999)

## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## vegan::adonis2(formula = bray_dist_c1 ~ Sex, data = df_c1_stat, permutations = 999)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1  0.04972 0.02117 0.8003  0.493
## Residual 37  2.29885 0.97883              
## Total    38  2.34857 1.00000

Relative abundance barplot

Order level

# Unload MicrobiotaProcess hvis loaded
if ("MicrobiotaProcess" %in% loadedNamespaces()) {
  devtools::unload("MicrobiotaProcess")
}

# Subset
ps_rel_c1 <- subset_samples(ps_rel,
  sample_data(ps_rel)$Group %in% c(comp1_groups, comp1_donors) &
  !sample_data(ps_rel)$Study %in% c(
    "Donor_inoculum_SPF_resampling_1",
    "Donor_inoculum_SPF_resampling_2",
    "Donor_inoculum_Wild_resampling_1",
    "Donor_inoculum_Wild_resampling_2"
  ))

# Fjern uassignerede Orders og nul-reads taxa
ps_rel_c1 <- subset_taxa(ps_rel_c1, !is.na(Order) & Order != "__" & Order != "")
ps_rel_c1 <- prune_taxa(taxa_sums(ps_rel_c1) > 0, ps_rel_c1)

# Kør gloomer
glom_c1 <- gloomer(
  ps         = ps_rel_c1,
  taxa_level = "Order",
  NArm       = TRUE)

ntaxa(glom_c1)

## [1] 21

rel_c1_df <- glom_c1 %>%
  psmelt() %>%
  group_by(Sample) %>%
  mutate(RelAbund = Abundance / sum(Abundance) * 100) %>%
  ungroup()

Phylum level

# ============================================================
# PHYLUM NIVEAU
# ============================================================

ps_rel_c1_phylum <- ps_rel_c1
ps_rel_c1_phylum <- subset_taxa(ps_rel_c1_phylum, 
                                !is.na(Phylum) & Phylum != "__" & Phylum != "")
ps_rel_c1_phylum <- prune_taxa(taxa_sums(ps_rel_c1_phylum) > 0, ps_rel_c1_phylum)

glom_c1_phylum <- gloomer(ps = ps_rel_c1_phylum, taxa_level = "Phylum", NArm = TRUE)

unique(tax_table(glom_c1_phylum)[, "Phylum"])

## Taxonomy Table:     [12 taxa by 1 taxonomic ranks]:
##                             Phylum                       
## Actinomycetota              "Actinomycetota"             
## Bacillota                   "Bacillota"                  
## Bacteroidota                "Bacteroidota"               
## Campylobacterota            "Campylobacterota"           
## Candidatus Saccharibacteria "Candidatus Saccharibacteria"
## Deferribacterota            "Deferribacterota"           
## Elusimicrobiota             "Elusimicrobiota"            
## Fusobacteriota              "Fusobacteriota"             
## Mycoplasmatota              "Mycoplasmatota"             
## Pseudomonadota              "Pseudomonadota"             
## Thermodesulfobacteriota     "Thermodesulfobacteriota"    
## Verrucomicrobiota           "Verrucomicrobiota"

phylum_col <- c(
  "Bacillota"                   = "#C47F7F",
  "Bacteroidota"                = "#6B9BA4",
  "Actinomycetota"              = "#8FAF7E",
  "Campylobacterota"            = "#C4936A",
  "Verrucomicrobiota"           = "#9B7BAD",
  "Fusobacteriota"              = "#7B9E87",
  "Deferribacterota"            = "#B8977E",
  "Mycoplasmatota"              = "#7B8FA6",
  "Pseudomonadota"              = "#A67B9B",
  "Thermodesulfobacteriota"     = "#6B8E7B",
  "Elusimicrobiota"             = "#C4A882",
  "Candidatus Saccharibacteria" = "#8FA6A6"
)

rel_c1_phylum <- glom_c1_phylum %>%
  psmelt() %>%
  group_by(Sample) %>%
  mutate(RelAbund = Abundance / sum(Abundance) * 100) %>%
  ungroup()

top_phylum_c1 <- rel_c1_phylum %>%
  group_by(OTU) %>% summarise(m = mean(RelAbund)) %>%
  arrange(desc(m)) %>% pull(OTU)

rel_c1_phylum_avg <- rel_c1_phylum %>%
  mutate(Order_plot = OTU,
         Group = factor(Group, levels = c(
           "BALB/c_No_SPF_weaning", "Donor_SPF",
           "BALB/c_No_Wild_weaning", "Donor_Wild"
         ))) %>%
  group_by(Group, Order_plot) %>%
  summarise(RelAbund = mean(RelAbund), .groups = "drop") %>%
  mutate(Order_plot = factor(Order_plot, levels = top_phylum_c1))

p_phylum_c1 <- ggplot(rel_c1_phylum_avg, 
                      aes(x = Group, y = RelAbund, fill = Order_plot)) +
  geom_bar(stat = "identity", color = "black", linewidth = 0.3) +
  scale_fill_manual(values = phylum_col,
                  breaks = top_phylum_c1) +
  scale_x_discrete(
    limits = c("BALB/c_No_SPF_weaning", "Donor_SPF",
               "BALB/c_No_Wild_weaning", "Donor_Wild"),
    labels = c(
      "BALB/c_No_SPF_weaning"  = "BALB/c\n+ SPF FMT",
      "Donor_SPF"              = "Donor\nSPF",
      "BALB/c_No_Wild_weaning" = "BALB/c\n+ Wild FMT",
      "Donor_Wild"             = "Donor\nWild"
    )) +
  theme_bw() +
  theme(axis.text.x  = element_text(size = 11),
        axis.title.y = element_text(size = 13),
        legend.text  = element_text(size = 9),
        legend.title = element_text(size = 10),
        plot.title = element_text(size = 15),
        plot.subtitle = element_text(size = 13)) +
  labs(x = NULL, y = "Relative abundance (%)", fill = "Phylum",
       title = "Relative Abundance at Phylum Level", subtitle = "BALB/c Mice at Weaning by FMT source, including donors")

p_phylum_c1

# ggsave("./outputs/comp1_relabund_phylum.jpeg", dpi = 300, height = 7, width = 10)

Family level

# ============================================================
# FAMILY NIVEAU
# ============================================================

ps_rel_c1_family <- ps_rel_c1
ps_rel_c1_family <- subset_taxa(ps_rel_c1_family,
                                !is.na(Family) & Family != "__" & Family != "")
ps_rel_c1_family <- prune_taxa(taxa_sums(ps_rel_c1_family) > 0, ps_rel_c1_family)

glom_c1_family <- gloomer(ps = ps_rel_c1_family, taxa_level = "Family", NArm = TRUE)

unique(tax_table(glom_c1_family)[, "Family"])

## Taxonomy Table:     [35 taxa by 1 taxonomic ranks]:
##                                           Family                                     
## Eggerthellaceae                           "Eggerthellaceae"                          
## Staphylococcaceae                         "Staphylococcaceae"                        
## Enterococcaceae                           "Enterococcaceae"                          
## Lactobacillaceae                          "Lactobacillaceae"                         
## Streptococcaceae                          "Streptococcaceae"                         
## Christensenellaceae                       "Christensenellaceae"                      
## Acutalibacteraceae                        "Acutalibacteraceae"                       
## Butyricicoccaceae                         "Butyricicoccaceae"                        
## Clostridiaceae                            "Clostridiaceae"                           
## Eubacteriaceae                            "Eubacteriaceae"                           
## Eubacteriales Family XIII. Incertae Sedis "Eubacteriales Family XIII. Incertae Sedis"
## Oscillospiraceae                          "Oscillospiraceae"                         
## Peptococcaceae                            "Peptococcaceae"                           
## Pumilibacteraceae                         "Pumilibacteraceae"                        
## Anaerotignaceae                           "Anaerotignaceae"                          
## Lachnospiraceae                           "Lachnospiraceae"                          
## Peptostreptococcaceae                     "Peptostreptococcaceae"                    
## Coprobacillaceae                          "Coprobacillaceae"                         
## Erysipelotrichaceae                       "Erysipelotrichaceae"                      
## Bacteroidaceae                            "Bacteroidaceae"                           
## Muribaculaceae                            "Muribaculaceae"                           
## Odoribacteraceae                          "Odoribacteraceae"                         
## Prevotellaceae                            "Prevotellaceae"                           
## Rikenellaceae                             "Rikenellaceae"                            
## Tannerellaceae                            "Tannerellaceae"                           
## Helicobacteraceae                         "Helicobacteraceae"                        
## Candidatus Nanosyncoccacae                "Candidatus Nanosyncoccacae"               
## Mucispirillaceae                          "Mucispirillaceae"                         
## Elusimicrobiaceae                         "Elusimicrobiaceae"                        
## Fusobacteriaceae                          "Fusobacteriaceae"                         
## Anaeroplasmataceae                        "Anaeroplasmataceae"                       
## Sutterellaceae                            "Sutterellaceae"                           
## Enterobacteriaceae                        "Enterobacteriaceae"                       
## Desulfovibrionaceae                       "Desulfovibrionaceae"                      
## Akkermansiaceae                           "Akkermansiaceae"

rel_c1_family <- glom_c1_family %>%
  psmelt() %>%
  group_by(Sample) %>%
  mutate(RelAbund = Abundance / sum(Abundance) * 100) %>%
  ungroup()

top_family_c1 <- rel_c1_family %>%
  group_by(OTU) %>% summarise(m = mean(RelAbund)) %>%
  arrange(desc(m)) %>% pull(OTU)

top_family_c1_filt <- rel_c1_family %>%
  group_by(OTU, Group) %>%
  summarise(m = mean(RelAbund), .groups = "drop") %>%
  group_by(OTU) %>%
  summarise(max_m = max(m)) %>%
  filter(max_m >= 1) %>%
  arrange(desc(max_m)) %>%
  pull(OTU)

rel_c1_family_avg <- rel_c1_family %>%
  mutate(Order_plot = ifelse(OTU %in% top_family_c1_filt, OTU, "Other"),
         Group = factor(Group, levels = c(
           "BALB/c_No_SPF_weaning", "Donor_SPF",
           "BALB/c_No_Wild_weaning", "Donor_Wild"
         ))) %>%
  group_by(Sample, Group, Order_plot) %>%
  summarise(RelAbund = sum(RelAbund), .groups = "drop") %>%
  group_by(Group, Order_plot) %>%
  summarise(RelAbund = mean(RelAbund), .groups = "drop") %>%
  mutate(Order_plot = factor(Order_plot,
                             levels = c(top_family_c1_filt, "Other")))

family_col <- c(
  "Lachnospiraceae"   = "#9B7BAD",   # lilla
  "Muribaculaceae"    = "#6B9BA4",   # blågrøn
  "Bacteroidaceae"    = "#C47F7F",   # støvet rød
  "Lactobacillaceae"  = "#C4936A",   # varm brun
  "Rikenellaceae"     = "#3D7A8A",   # mørk petrol
  "Prevotellaceae"    = "#E9C46A",   # gul
  "Oscillospiraceae"  = "#6B4226",   # chokolade
  "Odoribacteraceae"  = "#A23B72",   # pink-lilla
  "Helicobacteraceae" = "#80B918",   # lime grøn
  "Other"             = "grey80"
)

p_family_c1 <- ggplot(rel_c1_family_avg,
                      aes(x = Group, y = RelAbund, fill = Order_plot)) +
  geom_bar(stat = "identity", color = "black", linewidth = 0.3) +
  scale_fill_manual(values = family_col,
                  breaks = c(top_family_c1_filt, "Other")) +
  scale_x_discrete(
    limits = c("BALB/c_No_SPF_weaning", "Donor_SPF",
               "BALB/c_No_Wild_weaning", "Donor_Wild"),
    labels = c(
      "BALB/c_No_SPF_weaning"  = "BALB/c\n+ SPF FMT",
      "Donor_SPF"              = "Donor\nSPF",
      "BALB/c_No_Wild_weaning" = "BALB/c\n+ Wild FMT",
      "Donor_Wild"             = "Donor\nWild"
    )) +
  theme_bw() +
  theme(axis.text.x  = element_text(size = 11),
        axis.title.y = element_text(size = 13),
        legend.text  = element_text(size = 9),
        legend.title = element_text(size = 10),
        plot.title = element_text(size = 15),
        plot.subtitle = element_text(size = 13)) +
  labs(x = NULL, y = "Relative abundance (%)", fill = "Family",
       title = "Relative Abundance at Family Level", subtitle = "BALB/c Mice at Weaning by FMT source, including donors")

p_family_c1

# ggsave("./outputs/comp1_relabund_family.jpeg", dpi = 300, height = 7, width = 10)

# Ranger alle 35 familier fra flest til mindst
family_ranked_all <- rel_c1_family %>%
  group_by(OTU) %>%
  summarise(m = mean(RelAbund)) %>%
  arrange(desc(m)) %>%
  pull(OTU)

# Farvepalette til alle 35 familier
family_col_all <- c(
  # Samme farver som family_col
  "Lachnospiraceae"                          = "#9B7BAD",
  "Muribaculaceae"                           = "#6B9BA4",
  "Bacteroidaceae"                           = "#C47F7F",
  "Lactobacillaceae"                         = "#C4936A",
  "Rikenellaceae"                            = "#3D7A8A",
  "Prevotellaceae"                           = "#E9C46A",
  "Oscillospiraceae"                         = "#6B4226",
  "Odoribacteraceae"                         = "#A23B72",
  "Helicobacteraceae"                        = "#80B918",
  "Eggerthellaceae"                          = "#2A9D8F",
  "Staphylococcaceae"                        = "#E84855",
  "Enterococcaceae"                          = "#3D5A80",
  "Streptococcaceae"                         = "#F18F01",
  "Christensenellaceae"                      = "#264653",
  "Acutalibacteraceae"                       = "#E76F51",
  "Butyricicoccaceae"                        = "#6D6875",
  "Clostridiaceae"                           = "#1D3557",
  "Eubacteriaceae"                           = "#F4A261",
  "Eubacteriales Family XIII. Incertae Sedis"= "#457B9D",
  "Peptococcaceae"                           = "#B5838D",
  "Pumilibacteraceae"                        = "#A8DADC",
  "Anaerotignaceae"                          = "#C77B3A",
  "Peptostreptococcaceae"                    = "#5C8A4A",
  "Coprobacillaceae"                         = "#7B4F8A",
  "Erysipelotrichaceae"                      = "#D4A853",
  "Tannerellaceae"                           = "#4A7B8A",
  "Candidatus Nanosyncoccacae"               = "#8A4A5C",
  "Mucispirillaceae"                         = "#5C7B3A",
  "Elusimicrobiaceae"                        = "#8A7B4A",
  "Fusobacteriaceae"                         = "#3A5C7B",
  "Anaeroplasmataceae"                       = "#7B8A4A",
  "Sutterellaceae"                           = "#C45C8A",
  "Enterobacteriaceae"                       = "#4A8A7B",
  "Desulfovibrionaceae"                      = "#8A5C3A",
  "Akkermansiaceae"                          = "#5C3A8A"
)

# Plot data
rel_c1_family_all <- rel_c1_family %>%
  mutate(Order_plot = OTU,
         Group = factor(Group, levels = c(
           "BALB/c_No_SPF_weaning", "Donor_SPF",
           "BALB/c_No_Wild_weaning", "Donor_Wild"
         ))) %>%
  group_by(Sample, Group, Order_plot) %>%
  summarise(RelAbund = sum(RelAbund), .groups = "drop") %>%
  group_by(Group, Order_plot) %>%
  summarise(RelAbund = mean(RelAbund), .groups = "drop") %>%
  filter(RelAbund >= 0.01) %>%
  mutate(Order_plot = factor(Order_plot, levels = family_ranked_all))

plot_c1_family_all <- ggplot(rel_c1_family_all, aes(x = Group, y = RelAbund, fill = Order_plot)) +
  geom_bar(stat = "identity", color = "black", linewidth = 0.3) +
  scale_fill_manual(values = family_col_all,
                    breaks = family_ranked_all) +
  scale_x_discrete(
    limits = c("BALB/c_No_SPF_weaning", "Donor_SPF",
               "BALB/c_No_Wild_weaning", "Donor_Wild"),
    labels = c(
      "BALB/c_No_SPF_weaning"  = "BALB/c\n+ SPF FMT",
      "Donor_SPF"              = "Donor\nSPF",
      "BALB/c_No_Wild_weaning" = "BALB/c\n+ Wild FMT",
      "Donor_Wild"             = "Donor\nWild"
    )) +
  theme_bw() +
  theme(axis.text.x  = element_text(size = 11),
        axis.title.y = element_text(size = 13),
        legend.text  = element_text(size = 9),
        legend.title = element_text(size = 10),
        plot.title = element_text(size = 15),
        plot.subtitle = element_text(size = 13)) +
  labs(x = NULL, y = "Relative abundance (%)", fill = "Family",
       title = "Relative Abundance at Family Level", subtitle = "BALB/c Mice at Weaning by FMT source, including donors")

plot_c1_family_all

# ggsave("./outputs/comp1_relabund_family_all.jpeg", dpi = 300, height = 7, width = 14)

Genus level

# ============================================================
# GENUS NIVEAU
# ============================================================

ps_rel_c1_genus <- ps_rel_c1
ps_rel_c1_genus <- subset_taxa(ps_rel_c1_genus,
                               !is.na(Genus) & Genus != "__" & Genus != "")
ps_rel_c1_genus <- prune_taxa(taxa_sums(ps_rel_c1_genus) > 0, ps_rel_c1_genus)

glom_c1_genus <- gloomer(ps = ps_rel_c1_genus, taxa_level = "Genus", NArm = TRUE)

unique(tax_table(glom_c1_genus)[, "Genus"])

## Taxonomy Table:     [70 taxa by 1 taxonomic ranks]:
##                       Genus                  
## Adlercreutzia         "Adlercreutzia"        
## Staphylococcus        "Staphylococcus"       
## Enterococcus          "Enterococcus"         
## Lactobacillus         "Lactobacillus"        
## Ligilactobacillus     "Ligilactobacillus"    
## Limosilactobacillus   "Limosilactobacillus"  
## Lactococcus           "Lactococcus"          
## Streptococcus         "Streptococcus"        
## Candidatus Borkfalkia "Candidatus Borkfalkia"
## Christensenella       "Christensenella"      
## Acutalibacter         "Acutalibacter"        
## Clostridium           "Clostridium"          
## Eubacterium           "Eubacterium"          
## Zhenpiania            "Zhenpiania"           
## Anaerotruncus         "Anaerotruncus"        
## Angelakisella         "Angelakisella"        
## Hominilimicola        "Hominilimicola"       
## Neglectibacter        "Neglectibacter"       
## Vermiculatibacterium  "Vermiculatibacterium" 
## Dehalobacterium       "Dehalobacterium"      
## Pumilibacter          "Pumilibacter"         
## Anaerotignum          "Anaerotignum"         
## Acetatifactor         "Acetatifactor"        
## Anaerostipes          "Anaerostipes"         
## Blautia               "Blautia"              
## Enterocloster         "Enterocloster"        
## Fusimonas             "Fusimonas"            
## Jutongia              "Jutongia"             
## Lachnospira           "Lachnospira"          
## Mediterraneibacter    "Mediterraneibacter"   
## Parablautia           "Parablautia"          
## Petralouisia          "Petralouisia"         
## Roseburia             "Roseburia"            
## Schaedlerella         "Schaedlerella"        
## Sporofaciens          "Sporofaciens"         
## Tyzzerella            "Tyzzerella"           
## Clostridioides        "Clostridioides"       
## Coprobacillus         "Coprobacillus"        
## Thomasclavelia        "Thomasclavelia"       
## Amedibacillus         "Amedibacillus"        
## Faecalibaculum        "Faecalibaculum"       
## Stoquefichus          "Stoquefichus"         
## Bacteroides           "Bacteroides"          
## Phocaeicola           "Phocaeicola"          
## Duncaniella           "Duncaniella"          
## Heminiphilus          "Heminiphilus"         
## Muribaculum           "Muribaculum"          
## Paramuribaculum       "Paramuribaculum"      
## Sangeribacter         "Sangeribacter"        
## Odoribacter           "Odoribacter"          
## Palleniella           "Palleniella"          
## Paraprevotella        "Paraprevotella"       
## Segatella             "Segatella"            
## Xylanibacter          "Xylanibacter"         
## Alistipes             "Alistipes"            
## Rikenella             "Rikenella"            
## Parabacteroides       "Parabacteroides"      
## Helicobacter          "Helicobacter"         
## Nanosyncoccus         "Nanosyncoccus"        
## Mucispirillum         "Mucispirillum"        
## Elusimicrobium        "Elusimicrobium"       
## Fusobacterium         "Fusobacterium"        
## Anaeroplasma          "Anaeroplasma"         
## Parasutterella        "Parasutterella"       
## Escherichia           "Escherichia"          
## Klebsiella            "Klebsiella"           
## Bilophila             "Bilophila"            
## Desulfovibrio         "Desulfovibrio"        
## Taurinivorans         "Taurinivorans"        
## Akkermansia           "Akkermansia"

rel_c1_genus <- glom_c1_genus %>%
  psmelt() %>%
  group_by(Sample) %>%
  mutate(RelAbund = Abundance / sum(Abundance) * 100) %>%
  ungroup()

top_genus_c1_filt <- rel_c1_genus %>%
  group_by(OTU, Group) %>%
  summarise(m = mean(RelAbund), .groups = "drop") %>%
  group_by(OTU) %>%
  summarise(max_m = max(m)) %>%
  filter(max_m >= 1) %>%
  arrange(desc(max_m)) %>%
  pull(OTU)

genus_col <- c(
  "Bacteroides"         = "#6B9BA4",
  "Alistipes"           = "#9B7BAD",
  "Lactobacillus"       = "#C4936A",
  "Schaedlerella"       = "#8FAF7E",
  "Limosilactobacillus" = "#C47F7F",
  "Duncaniella"         = "#3D7A8A",
  "Xylanibacter"        = "#E9C46A",
  "Muribaculum"         = "#6B4226",
  "Ligilactobacillus"   = "#A23B72",
  "Odoribacter"         = "#80B918",
  "Helicobacter"        = "#2A9D8F",
  "Fusimonas"           = "#E84855",
  "Paraprevotella"      = "#3D5A80",
  "Anaerotignum"        = "#F18F01",
  "Paramuribaculum"     = "#6D6875",
  "Streptococcus"       = "#E76F51",
  "Other"               = "grey80"
)

rel_c1_genus_avg <- rel_c1_genus %>%
  mutate(Order_plot = ifelse(OTU %in% top_genus_c1_filt, OTU, "Other"),
         Group = factor(Group, levels = c(
           "BALB/c_No_SPF_weaning", "Donor_SPF",
           "BALB/c_No_Wild_weaning", "Donor_Wild"
         ))) %>%
  group_by(Sample, Group, Order_plot) %>%
  summarise(RelAbund = sum(RelAbund), .groups = "drop") %>%
  group_by(Group, Order_plot) %>%
  summarise(RelAbund = mean(RelAbund), .groups = "drop") %>%
  mutate(Order_plot = factor(Order_plot,
                             levels = c(top_genus_c1_filt, "Other")))

p_genus_c1 <- ggplot(rel_c1_genus_avg,
                     aes(x = Group, y = RelAbund, fill = Order_plot)) +
  geom_bar(stat = "identity", color = "black", linewidth = 0.3) +
  scale_fill_manual(values = genus_col,
                  breaks = c(top_genus_c1_filt, "Other")) +
  scale_x_discrete(
    limits = c("BALB/c_No_SPF_weaning", "Donor_SPF",
               "BALB/c_No_Wild_weaning", "Donor_Wild"),
    labels = c(
      "BALB/c_No_SPF_weaning"  = "BALB/c\n+ SPF FMT",
      "Donor_SPF"              = "Donor\nSPF",
      "BALB/c_No_Wild_weaning" = "BALB/c\n+ Wild FMT",
      "Donor_Wild"             = "Donor\nWild"
    )) +
  theme_bw() +
  theme(axis.text.x  = element_text(size = 11),
        axis.title.y = element_text(size = 13),
        legend.text  = element_text(size = 9),
        legend.title = element_text(size = 10),
        plot.title = element_text(size = 15),
        plot.subtitle = element_text(size = 13)) +
  labs(x = NULL, y = "Relative abundance (%)", fill = "Genus",
       title = "Relative Abundance at Genus Level", subtitle = "BALB/c Mice at Weaning by FMT source, including donors")

p_genus_c1

# ggsave("./outputs/comp1_relabund_genus.jpeg", dpi = 300, height = 7, width = 12)

top_genus_c1_001 <- rel_c1_genus %>%
  group_by(OTU, Group) %>%
  summarise(m = mean(RelAbund), .groups = "drop") %>%
  group_by(OTU) %>%
  summarise(max_m = max(m)) %>%
  filter(max_m >= 0.01) %>%
  arrange(desc(max_m)) %>%
  pull(OTU)

# Farvepalette til alle 58
genus_col_001 <- c(
  # Behold genus_col farver til top 16
  "Bacteroides"             = "#6B9BA4",
  "Alistipes"               = "#9B7BAD",
  "Lactobacillus"           = "#C4936A",
  "Schaedlerella"           = "#8FAF7E",
  "Limosilactobacillus"     = "#C47F7F",
  "Duncaniella"             = "#3D7A8A",
  "Xylanibacter"            = "#E9C46A",
  "Muribaculum"             = "#6B4226",
  "Ligilactobacillus"       = "#A23B72",
  "Odoribacter"             = "#80B918",
  "Helicobacter"            = "#2A9D8F",
  "Fusimonas"               = "#E84855",
  "Paraprevotella"          = "#3D5A80",
  "Anaerotignum"            = "#F18F01",
  "Paramuribaculum"         = "#6D6875",
  "Streptococcus"           = "#E76F51",
  "Desulfovibrio"           = "#A8DADC",
  "Acetatifactor"           = "#457B9D",
  "Eubacterium"             = "#B5838D",
  "Akkermansia"             = "#C77B3A",
  "Candidatus Borkfalkia"   = "#5C8A4A",
  "Heminiphilus"            = "#7B4F8A",
  "Mucispirillum"           = "#D4A853",
  "Faecalibaculum"          = "#4A7B8A",
  "Acutalibacter"           = "#8A4A5C",
  "Bilophila"               = "#5C7B3A",
  "Pumilibacter"            = "#8A7B4A",
  "Clostridium"             = "#3A5C7B",
  "Sporofaciens"            = "#7B8A4A",
  "Adlercreutzia"           = "#C45C8A",
  "Blautia"                 = "#4A8A7B",
  "Sangeribacter"           = "#8A5C3A",
  "Anaeroplasma"            = "#5C3A8A",
  "Nanosyncoccus"           = "#2E86AB",
  "Taurinivorans"           = "#C73E1D",
  "Mediterraneibacter"      = "#3BB273",
  "Angelakisella"           = "#7B2D8B",
  "Anaerotruncus"           = "#6B8E23",
  "Dehalobacterium"         = "#C49A6C",
  "Zhenpiania"              = "#8B6914",
  "Amedibacillus"           = "#2C4770",
  "Staphylococcus"          = "#C0392B",
  "Parablautia"             = "#16A085",
  "Vermiculatibacterium"    = "#8E44AD",
  "Parasutterella"          = "#E67E22",
  "Phocaeicola"             = "#1A5276",
  "Elusimicrobium"          = "#7D6608",
  "Neglectibacter"          = "#4A235A",
  "Jutongia"                = "#5CB87A",
  "Stoquefichus"            = "#C4845A",
  "Prevotellamassilia"      = "#5A8AAA",
  "Thomasclavelia"          = "#A8A85A",
  "Tyzzerella"              = "#5AA88A",
  "Parabacteroides"         = "#A85A8A",
  "Enterocloster"           = "#8AA85A",
  "Roseburia"               = "#5A6AA8",
  "Enterococcus"            = "#A8805A",
  "Petralouisia"            = "#6AA85A"
)

# Plot data
rel_c1_genus_001 <- rel_c1_genus %>%
  mutate(Order_plot = OTU,   # ingen "Other" — brug OTU direkte
         Group = factor(Group, levels = c(
           "BALB/c_No_SPF_weaning", "Donor_SPF",
           "BALB/c_No_Wild_weaning", "Donor_Wild"
         ))) %>%
  group_by(Sample, Group, Order_plot) %>%
  summarise(RelAbund = sum(RelAbund), .groups = "drop") %>%
  group_by(Group, Order_plot) %>%
  summarise(RelAbund = mean(RelAbund), .groups = "drop") %>%
  filter(RelAbund >= 0.01) %>%
  mutate(Order_plot = factor(Order_plot,
                             levels = top_genus_c1_001))   # ingen "Other" i levels

p_genus_c1_all <- ggplot(rel_c1_genus_001, aes(x = Group, y = RelAbund, fill = Order_plot)) +
  geom_bar(stat = "identity", color = "black", linewidth = 0.3) +
  scale_fill_manual(values = genus_col_001, breaks = top_genus_c1_001) +
  scale_x_discrete(
    limits = c("BALB/c_No_SPF_weaning", "Donor_SPF",
               "BALB/c_No_Wild_weaning", "Donor_Wild"),
    labels = c(
      "BALB/c_No_SPF_weaning"  = "BALB/c\n+ SPF FMT",
      "Donor_SPF"              = "Donor\nSPF",
      "BALB/c_No_Wild_weaning" = "BALB/c\n+ Wild FMT",
      "Donor_Wild"             = "Donor\nWild"
    )) +
  theme_bw() +
  theme(axis.text.x  = element_text(size = 11),
        axis.title.y = element_text(size = 13),
        legend.text  = element_text(size = 9),
        legend.title = element_text(size = 10),
        plot.title = element_text(size = 15),
        plot.subtitle = element_text(size = 13)) +
  labs(x = NULL, y = "Relative abundance (%)", fill = "Genus",
       title = "Relative Abundance at Genus Level", subtitle = "BALB/c Mice at Weaning by FMT source, including donors")

p_genus_c1_all

# "Genera with mean relative abundance ≥ 0.01% shown" REMEBER!!!!
# ggsave("./outputs/comp1_relabund_genus_all.jpeg", dpi = 300, height = 7, width = 16)

rel_c1_genus_avg %>%
  arrange(Group, desc(RelAbund)) %>%
  print(n = Inf)

## # A tibble: 92 × 3
##    Group                  Order_plot          RelAbund
##    <fct>                  <fct>                  <dbl>
##  1 BALB/c_No_SPF_weaning  Alistipes            17.2   
##  2 BALB/c_No_SPF_weaning  Lactobacillus        16.8   
##  3 BALB/c_No_SPF_weaning  Limosilactobacillus  12.7   
##  4 BALB/c_No_SPF_weaning  Schaedlerella         9.38  
##  5 BALB/c_No_SPF_weaning  Duncaniella           8.30  
##  6 BALB/c_No_SPF_weaning  Other                 5.95  
##  7 BALB/c_No_SPF_weaning  Bacteroides           5.60  
##  8 BALB/c_No_SPF_weaning  Odoribacter           5.19  
##  9 BALB/c_No_SPF_weaning  Anaerotignum          3.66  
## 10 BALB/c_No_SPF_weaning  Ligilactobacillus     3.25  
## 11 BALB/c_No_SPF_weaning  Paramuribaculum       2.80  
## 12 BALB/c_No_SPF_weaning  Fusimonas             2.60  
## 13 BALB/c_No_SPF_weaning  Muribaculum           1.89  
## 14 BALB/c_No_SPF_weaning  Xylanibacter          1.47  
## 15 BALB/c_No_SPF_weaning  Mucispirillum         1.15  
## 16 BALB/c_No_SPF_weaning  Desulfovibrio         1.03  
## 17 BALB/c_No_SPF_weaning  Acetatifactor         0.834 
## 18 BALB/c_No_SPF_weaning  Heminiphilus          0.104 
## 19 BALB/c_No_SPF_weaning  Phocaeicola           0.0732
## 20 BALB/c_No_SPF_weaning  Streptococcus         0.0429
## 21 BALB/c_No_SPF_weaning  Akkermansia           0.0117
## 22 BALB/c_No_SPF_weaning  Helicobacter          0     
## 23 BALB/c_No_SPF_weaning  Paraprevotella        0     
## 24 Donor_SPF              Muribaculum          20.3   
## 25 Donor_SPF              Duncaniella          15.5   
## 26 Donor_SPF              Bacteroides          11.4   
## 27 Donor_SPF              Lactobacillus         9.96  
## 28 Donor_SPF              Fusimonas             8.01  
## 29 Donor_SPF              Xylanibacter          5.74  
## 30 Donor_SPF              Paramuribaculum       5.63  
## 31 Donor_SPF              Other                 5.24  
## 32 Donor_SPF              Limosilactobacillus   4.45  
## 33 Donor_SPF              Alistipes             3.79  
## 34 Donor_SPF              Heminiphilus          2.46  
## 35 Donor_SPF              Schaedlerella         1.25  
## 36 Donor_SPF              Acetatifactor         1.02  
## 37 Donor_SPF              Phocaeicola           1.02  
## 38 Donor_SPF              Akkermansia           0.977 
## 39 Donor_SPF              Ligilactobacillus     0.899 
## 40 Donor_SPF              Odoribacter           0.899 
## 41 Donor_SPF              Desulfovibrio         0.742 
## 42 Donor_SPF              Anaerotignum          0.313 
## 43 Donor_SPF              Mucispirillum         0.234 
## 44 Donor_SPF              Streptococcus         0.156 
## 45 Donor_SPF              Helicobacter          0.0391
## 46 Donor_SPF              Paraprevotella        0.0391
## 47 BALB/c_No_Wild_weaning Bacteroides          44.3   
## 48 BALB/c_No_Wild_weaning Alistipes             6.92  
## 49 BALB/c_No_Wild_weaning Xylanibacter          6.89  
## 50 BALB/c_No_Wild_weaning Schaedlerella         6.65  
## 51 BALB/c_No_Wild_weaning Other                 5.57  
## 52 BALB/c_No_Wild_weaning Helicobacter          5.29  
## 53 BALB/c_No_Wild_weaning Lactobacillus         4.58  
## 54 BALB/c_No_Wild_weaning Duncaniella           3.91  
## 55 BALB/c_No_Wild_weaning Paraprevotella        3.74  
## 56 BALB/c_No_Wild_weaning Muribaculum           2.75  
## 57 BALB/c_No_Wild_weaning Streptococcus         2.09  
## 58 BALB/c_No_Wild_weaning Fusimonas             1.36  
## 59 BALB/c_No_Wild_weaning Akkermansia           1.27  
## 60 BALB/c_No_Wild_weaning Ligilactobacillus     1.05  
## 61 BALB/c_No_Wild_weaning Heminiphilus          1.03  
## 62 BALB/c_No_Wild_weaning Limosilactobacillus   0.957 
## 63 BALB/c_No_Wild_weaning Desulfovibrio         0.681 
## 64 BALB/c_No_Wild_weaning Acetatifactor         0.661 
## 65 BALB/c_No_Wild_weaning Paramuribaculum       0.257 
## 66 BALB/c_No_Wild_weaning Odoribacter           0.0490
## 67 BALB/c_No_Wild_weaning Mucispirillum         0.0175
## 68 BALB/c_No_Wild_weaning Anaerotignum          0.0127
## 69 BALB/c_No_Wild_weaning Phocaeicola           0     
## 70 Donor_Wild             Ligilactobacillus    37.3   
## 71 Donor_Wild             Lactobacillus        21.1   
## 72 Donor_Wild             Bacteroides           9.55  
## 73 Donor_Wild             Limosilactobacillus   8.73  
## 74 Donor_Wild             Duncaniella           8.23  
## 75 Donor_Wild             Muribaculum           4.61  
## 76 Donor_Wild             Streptococcus         4.13  
## 77 Donor_Wild             Other                 3.66  
## 78 Donor_Wild             Xylanibacter          0.962 
## 79 Donor_Wild             Desulfovibrio         0.629 
## 80 Donor_Wild             Schaedlerella         0.315 
## 81 Donor_Wild             Paraprevotella        0.278 
## 82 Donor_Wild             Alistipes             0.222 
## 83 Donor_Wild             Heminiphilus          0.0925
## 84 Donor_Wild             Paramuribaculum       0.0925
## 85 Donor_Wild             Acetatifactor         0.0370
## 86 Donor_Wild             Fusimonas             0.0370
## 87 Donor_Wild             Helicobacter          0.0370
## 88 Donor_Wild             Akkermansia           0     
## 89 Donor_Wild             Anaerotignum          0     
## 90 Donor_Wild             Mucispirillum         0     
## 91 Donor_Wild             Odoribacter           0     
## 92 Donor_Wild             Phocaeicola           0

Helicobacter

# Tjek Helicobacter relativ abundans per gruppe
rel_c1_genus %>%
  filter(OTU == "Helicobacter") %>%
  group_by(Group) %>%
  summarise(
    n              = n(),
    mean_RelAbund  = round(mean(RelAbund), 3),
    min_RelAbund   = round(min(RelAbund),  3),
    max_RelAbund   = round(max(RelAbund),  3),
    n_present      = sum(RelAbund > 0)
  ) %>%
  arrange(Group)

## # A tibble: 4 × 6
##   Group                      n mean_RelAbund min_RelAbund max_RelAbund n_present
##   <fct>                  <int>         <dbl>        <dbl>        <dbl>     <int>
## 1 BALB/c_No_SPF_weaning     19         0            0            0             0
## 2 BALB/c_No_Wild_weaning    20         5.30         0.245       20.1          20
## 3 Donor_SPF                  1         0.039        0.039        0.039         1
## 4 Donor_Wild                 1         0.037        0.037        0.037         1

# BALB/c + SPF FMT (n=0) ikke i nogen
# BALB/c + Wild FMT (n=20): i alle 20 prøver
# tilstede i begge donor

# Tjek hvilke Helicobacter species der er i Wild FMT gruppen
ps_rel %>%
  subset_samples(Group == "BALB/c_No_Wild_weaning") %>%
  subset_taxa(Genus == "Helicobacter") %>%
  psmelt() %>%
  group_by(OTU, Species) %>%
  summarise(mean_RelAbund = round(mean(Abundance / 
    ave(Abundance, Sample, FUN = sum) * 100), 3),
    n_present = sum(Abundance > 0),
    .groups = "drop") %>%
  filter(mean_RelAbund > 0) %>%
  arrange(desc(mean_RelAbund))

## # A tibble: 2 × 4
##   OTU                                            Species mean_RelAbund n_present
##   <chr>                                          <chr>           <dbl>     <int>
## 1 K__Bacteria;P__Campylobacterota;C__Epsilonpro… Helico…           100        20
## 2 K__Bacteria;P__Campylobacterota;C__Epsilonpro… __                100        20

ps_rel %>%
  subset_samples(Study == "Donor_inoculum_SPF(WP1Amix)") %>%
  subset_taxa(Genus == "Helicobacter") %>%
  psmelt() %>%
  filter(Abundance > 0) %>%
  select(OTU, Species, Abundance)

##                                                                                                                                                          OTU
## 1 K__Bacteria;P__Campylobacterota;C__Epsilonproteobacteria;O__Campylobacterales;F__Helicobacteraceae;G__Helicobacter;S__Helicobacter ganmani strain CMRI H02
##                                Species  Abundance
## 1 Helicobacter ganmani strain CMRI H02 0.01264063

ps_rel %>%
  subset_samples(Study == "Donor_inoculum_Wild(Zahir)") %>%
  subset_taxa(Genus == "Helicobacter") %>%
  psmelt() %>%
  filter(Abundance > 0) %>%
  select(OTU, Species, Abundance)

##                                                                                                                                                          OTU
## 1                                      K__Bacteria;P__Campylobacterota;C__Epsilonproteobacteria;O__Campylobacterales;F__Helicobacteraceae;G__Helicobacter;__
## 2 K__Bacteria;P__Campylobacterota;C__Epsilonproteobacteria;O__Campylobacterales;F__Helicobacteraceae;G__Helicobacter;S__Helicobacter ganmani strain CMRI H02
##                                Species  Abundance
## 1                                   __ 0.01255966
## 2 Helicobacter ganmani strain CMRI H02 0.01255966

# Use non-rarefied (pst_original) for DESeq2
ps_c1_deseq <- subset_samples(pst_original,
  sample_data(pst_original)$Group %in% c("BALB/c_No_SPF_weaning",
                                          "BALB/c_No_Wild_weaning"))

# Remove zero-count taxa
ps_c1_deseq <- prune_taxa(taxa_sums(ps_c1_deseq) > 0, ps_c1_deseq)

# Prune unassigned ranks before glomming — prevents duplicate taxa_names
ps_c1_deseq <- subset_taxa(ps_c1_deseq,
  !is.na(Genus)  & Genus  != "" & Genus  != "__" &
  !is.na(Family) & Family != "" & Family != "__" &
  !is.na(Order)  & Order  != "" & Order  != "__")

ps_c1_deseq <- prune_taxa(taxa_sums(ps_c1_deseq) > 0, ps_c1_deseq)

# Agglomerate to genus level
ps_c1_deseq_genus <- tax_glom(ps_c1_deseq, taxrank = "Genus", NArm = TRUE)

# Make genus names unique before assigning as taxa names
genus_names <- tax_table(ps_c1_deseq_genus)[, "Genus"]
taxa_names(ps_c1_deseq_genus) <- make.unique(as.character(genus_names))

Differential abundance

library(DESeq2)

library(DESeq2)
# ============================================================
# DIFFERENTIAL ABUNDANCE — DESeq2
# Comparison 1: Wild FMT vs SPF FMT at weaning
# ============================================================

# Forbered phyloseq objekt
m_ps <- ps_c1_deseq_genus
sample_data(m_ps)$Group <- factor(sample_data(m_ps)$Group,
                                   levels = c("BALB/c_No_SPF_weaning",
                                              "BALB/c_No_Wild_weaning"))

set.seed(123)

# Lav DESeq2 objekt og kør
m_dds <- phyloseq_to_deseq2(m_ps, ~ Group)
m_dds <- DESeq(m_dds)

# Tjek levels og resultatnavne
levels(colData(m_dds)$Group)
resultsNames(m_dds)

# Hent resultater
Wild_vs_SPF <- results(m_dds, contrast = c("Group",
                                           "BALB/c_No_Wild_weaning",
                                           "BALB/c_No_SPF_weaning"))

# Konverter til data frame
df_Wild_vs_SPF <- as.data.frame(Wild_vs_SPF)
df_Wild_vs_SPF$taxon <- rownames(df_Wild_vs_SPF)

# Signifikante taxa
sig_Wild_vs_SPF <- subset(df_Wild_vs_SPF, !is.na(padj) & padj < 0.05)
cat("Signifikante taxa:", nrow(sig_Wild_vs_SPF), "\n")
sig_Wild_vs_SPF %>% arrange(log2FoldChange)

# Tilføj diffexpressed kolonne
df_Wild_vs_SPF <- df_Wild_vs_SPF %>%
  mutate(diffexpressed = case_when(
    log2FoldChange > 0 & padj < 0.05 ~ "Higher in Wild FMT",
    log2FoldChange < 0 & padj < 0.05 ~ "Higher in SPF FMT",
    TRUE                             ~ "ns"
  ),
  diffexpressed = factor(diffexpressed,
                         levels = c("Higher in Wild FMT",
                                    "Higher in SPF FMT",
                                    "ns")))

# ============================================================
# VOLCANO PLOT
# ============================================================
library(ggtext)
library(ggrepel)

deseq_plot_labelled <- ggplot(df_Wild_vs_SPF,
                              aes(x = log2FoldChange,
                                  y = -log10(padj),
                                  col = diffexpressed)) +
  geom_point(size = 2.5, alpha = 0.8) +
  geom_vline(xintercept = 0,            lty = 2, color = "grey50") +
  geom_hline(yintercept = -log10(0.05), lty = 2, color = "grey50") +
  geom_label_repel(
    data     = df_Wild_vs_SPF %>% filter(padj < 0.05),
    aes(label = taxon),
    size         = 3,
    max.overlaps = 20,
    fontface     = "italic",
    label.r      = 0.10
  ) +
  scale_color_manual(values = c(
    "Higher in Wild FMT" = "#3E5871",
    "Higher in SPF FMT"  = "#8FBF8F",
    "ns"                 = "grey70"
  )) +
  theme_bw() +
  theme(legend.title = element_text(),
        axis.title   = element_text(),
        plot.title   = element_markdown(size = 15),
        plot.subtitle = element_markdown(size = 13)) +
  labs(title = "Differential Abundance at Genus Level",
       subtitle = "BALB/c Mice at Weaning: SPF vs Wild FMT",
       y     = "-log10(p-value)",
       x     = "log2 Fold Change",
       col   = NULL)

deseq_plot_labelled
# ggsave("./outputs/comp1_volcano_labelled.jpeg", plot = deseq_plot_labelled, width = 10, height = 6, dpi = 300)

# ============================================================
# LOG2 FOLD CHANGE PLOT
# ============================================================
p_lfc_c1 <- sig_Wild_vs_SPF %>%
  arrange(log2FoldChange) %>%
  mutate(taxon     = factor(taxon, levels = taxon),
         direction = ifelse(log2FoldChange > 0,
                            "Higher in Wild FMT",
                            "Higher in SPF FMT")) %>%
  ggplot(aes(x = log2FoldChange, y = taxon, fill = direction)) +
  geom_col(color = "white", linewidth = 0.3) +
  geom_errorbarh(aes(xmin = log2FoldChange - lfcSE,
                     xmax = log2FoldChange + lfcSE),
                 height = 0.3, color = "grey30") +
  geom_vline(xintercept = 0, lty = 2, color = "grey50") +
  scale_fill_manual(values = c(
    "Higher in Wild FMT" = "#3E5871",
    "Higher in SPF FMT"  = "#8FBF8F"
  )) +
  theme_bw() +
  theme(legend.title  = element_text(size = 11),
        legend.text   = element_text(size = 10),
        axis.title    = element_text(size = 12),
        axis.text.y   = element_text(face = "italic", size = 10),
        axis.text.x   = element_text(size = 10),
        plot.title    = element_markdown(size = 15),
        plot.subtitle = element_text(size = 13)) +
  labs(x        = "log2 Fold Change",
       y        = NULL,
       fill     = NULL,
       title    = "Differential Abundance at Genus Level",
       subtitle = "BALB/c Mice at Weaning: SPF vs Wild FMT",
       caption  = "Benjamini-Hochberg correction, only taxa with adjusted p < 0.05 shown")

p_lfc_c1
# ggsave("./outputs/comp1_lfc.jpeg", plot = p_lfc_c1, dpi = 300, height = 7, width = 10)

sig_Wild_vs_SPF %>%
  select(taxon, log2FoldChange, padj) %>%
  arrange(desc(log2FoldChange))

write.csv(
  sig_Wild_vs_SPF %>%
    select(taxon, log2FoldChange, padj) %>%
    arrange(desc(log2FoldChange)),
  file = "./outputs/comp1_deseq2_significant_taxa.csv",
  row.names = FALSE
)

write.csv(
  sig_Wild_vs_SPF %>%
    select(taxon, log2FoldChange, lfcSE, padj) %>%
    arrange(desc(log2FoldChange)),
  file = "./outputs/comp1_deseq2_significant_taxa_full.csv",
  row.names = FALSE
)

11. Comparison 2 — Adult Mice

comp2_groups <- c("BALB/c_Yes_SPF_Term", "BALB/c_No_Wild_colon",
                  "Wildling", "Pet Shop", "Wild")

ps_c2 <- subset_samples(ps_rar, sample_data(ps_rar)$Group %in% comp2_groups)

alpha_c2 <- alpha_df %>%
  filter(Group %in% comp2_groups) %>%
  mutate(Group = factor(Group, levels = comp2_groups))

Alpha diversity

# Chao1
lm_chao1_c2 <- lm(Chao1 ~ Group, data = alpha_c2)

par(mfrow = c(1, 2))
qqnorm(rstandard(lm_chao1_c2), main = "Q-Q plot — Chao1")
abline(0, 1, lty = 2)

plot(predict(lm_chao1_c2), rstandard(lm_chao1_c2),
     main = "Residuals vs Fitted — Chao1",
     xlab = "Fitted values", ylab = "Standardized residuals")
abline(h = 0, lty = 2)

par(mfrow = c(1, 1))

# Shannon
lm_shannon_c2 <- lm(Shannon ~ Group, data = alpha_c2)

par(mfrow = c(1, 2))
qqnorm(rstandard(lm_shannon_c2), main = "Q-Q plot — Shannon")
abline(0, 1, lty = 2)

plot(predict(lm_shannon_c2), rstandard(lm_shannon_c2),
     main = "Residuals vs Fitted — Shannon",
     xlab = "Fitted values", ylab = "Standardized residuals")
abline(h = 0, lty = 2)

par(mfrow = c(1, 1))

# Tilføj Sex til alpha_c2
alpha_c2 <- alpha_df %>%
  filter(Group %in% comp2_groups) %>%
  mutate(Group = factor(Group, levels = comp2_groups),
         Sex   = factor(Sex))

alpha_c2 %>%
  group_by(Group, Sex) %>%
  summarise(n = n(), .groups = "drop") %>%
  pivot_wider(names_from = Sex, values_from = n, values_fill = 0)

## # A tibble: 5 × 3
##   Group                Female  Male
##   <fct>                 <int> <int>
## 1 BALB/c_Yes_SPF_Term       7     7
## 2 BALB/c_No_Wild_colon      8     6
## 3 Wildling                  5     5
## 4 Pet Shop                  6     7
## 5 Wild                      3     1

# ============================================================
# CHAO1
# ============================================================
# One-way ANOVA Group + Sex (with sex as covariable)
anova_chao1_c2 <- aov(Chao1 ~ Group + Sex, data = alpha_c2)
summary(anova_chao1_c2)

##             Df Sum Sq Mean Sq F value   Pr(>F)    
## Group        4  22315    5579  11.735 9.21e-07 ***
## Sex          1    666     666   1.401    0.242    
## Residuals   49  23295     475                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

TukeyHSD(anova_chao1_c2)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Chao1 ~ Group + Sex, data = alpha_c2)
## 
## $Group
##                                                diff         lwr      upr
## BALB/c_No_Wild_colon-BALB/c_Yes_SPF_Term  21.119859  -2.2182178 44.45794
## Wildling-BALB/c_Yes_SPF_Term              47.064537  21.4989550 72.63012
## Pet Shop-BALB/c_Yes_SPF_Term              50.895025  27.1123732 74.67768
## Wild-BALB/c_Yes_SPF_Term                  35.828647   0.8215319 70.83576
## Wildling-BALB/c_No_Wild_colon             25.944678   0.3790959 51.51026
## Pet Shop-BALB/c_No_Wild_colon             29.775166   5.9925141 53.55782
## Wild-BALB/c_No_Wild_colon                 14.708788 -20.2983272 49.71590
## Pet Shop-Wildling                          3.830487 -22.1415684 29.80254
## Wild-Wildling                            -11.235890 -47.7657586 25.29398
## Wild-Pet Shop                            -15.066378 -50.3714313 20.23868
##                                              p adj
## BALB/c_No_Wild_colon-BALB/c_Yes_SPF_Term 0.0934756
## Wildling-BALB/c_Yes_SPF_Term             0.0000355
## Pet Shop-BALB/c_Yes_SPF_Term             0.0000018
## Wild-BALB/c_Yes_SPF_Term                 0.0424754
## Wildling-BALB/c_No_Wild_colon            0.0451209
## Pet Shop-BALB/c_No_Wild_colon            0.0074479
## Wild-BALB/c_No_Wild_colon                0.7570628
## Pet Shop-Wildling                        0.9934237
## Wild-Wildling                            0.9061034
## Wild-Pet Shop                            0.7464684
## 
## $Sex
##                 diff       lwr     upr     p adj
## Male-Female 6.893747 -4.940206 18.7277 0.2473985

# Hvis Sex ikke signifikant — kør Group only
anova_chao1_c2_group <- aov(Chao1 ~ Group, data = alpha_c2)
summary(anova_chao1_c2_group)

##             Df Sum Sq Mean Sq F value   Pr(>F)    
## Group        4  22315    5579   11.64 9.32e-07 ***
## Residuals   50  23961     479                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

TukeyHSD(anova_chao1_c2_group)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Chao1 ~ Group, data = alpha_c2)
## 
## $Group
##                                                diff         lwr      upr
## BALB/c_No_Wild_colon-BALB/c_Yes_SPF_Term  21.119859  -2.2939786 44.53370
## Wildling-BALB/c_Yes_SPF_Term              47.064537  21.4159632 72.71311
## Pet Shop-BALB/c_Yes_SPF_Term              50.895025  27.0351692 74.75488
## Wild-BALB/c_Yes_SPF_Term                  35.828647   0.7078908 70.94940
## Wildling-BALB/c_No_Wild_colon             25.944678   0.2961041 51.59325
## Pet Shop-BALB/c_No_Wild_colon             29.775166   5.9153101 53.63502
## Wild-BALB/c_No_Wild_colon                 14.708788 -20.4119683 49.82954
## Pet Shop-Wildling                          3.830487 -22.2258797 29.88685
## Wild-Wildling                            -11.235890 -47.8843429 25.41256
## Wild-Pet Shop                            -15.066378 -50.4860395 20.35328
##                                              p adj
## BALB/c_No_Wild_colon-BALB/c_Yes_SPF_Term 0.0952907
## Wildling-BALB/c_Yes_SPF_Term             0.0000365
## Pet Shop-BALB/c_Yes_SPF_Term             0.0000019
## Wild-BALB/c_Yes_SPF_Term                 0.0434624
## Wildling-BALB/c_No_Wild_colon            0.0461575
## Pet Shop-BALB/c_No_Wild_colon            0.0076633
## Wild-BALB/c_No_Wild_colon                0.7597369
## Pet Shop-Wildling                        0.9935277
## Wild-Wildling                            0.9073589
## Wild-Pet Shop                            0.7492219

# ============================================================
# SHANNON
# ============================================================
# One-way ANOVA Group + Sex (with sex as covariable)
anova_shannon_c2 <- aov(Shannon ~ Group + Sex, data = alpha_c2)
summary(anova_shannon_c2)

##             Df Sum Sq Mean Sq F value   Pr(>F)    
## Group        4  5.780  1.4450   9.145 1.34e-05 ***
## Sex          1  0.177  0.1774   1.122    0.295    
## Residuals   49  7.743  0.1580                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

TukeyHSD(anova_shannon_c2)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Shannon ~ Group + Sex, data = alpha_c2)
## 
## $Group
##                                                diff        lwr       upr
## BALB/c_No_Wild_colon-BALB/c_Yes_SPF_Term  0.7560324  0.3305450 1.1815199
## Wildling-BALB/c_Yes_SPF_Term              0.8575056  0.3914074 1.3236037
## Pet Shop-BALB/c_Yes_SPF_Term              0.5620502  0.1284575 0.9956429
## Wild-BALB/c_Yes_SPF_Term                  0.4000831 -0.2381481 1.0383142
## Wildling-BALB/c_No_Wild_colon             0.1014731 -0.3646250 0.5675713
## Pet Shop-BALB/c_No_Wild_colon            -0.1939822 -0.6275749 0.2396104
## Wild-BALB/c_No_Wild_colon                -0.3559494 -0.9941805 0.2822818
## Pet Shop-Wildling                        -0.2954554 -0.7689641 0.1780533
## Wild-Wildling                            -0.4574225 -1.1234157 0.2085707
## Wild-Pet Shop                            -0.1619671 -0.8056301 0.4816959
##                                              p adj
## BALB/c_No_Wild_colon-BALB/c_Yes_SPF_Term 0.0000660
## Wildling-BALB/c_Yes_SPF_Term             0.0000359
## Pet Shop-BALB/c_Yes_SPF_Term             0.0051684
## Wild-BALB/c_Yes_SPF_Term                 0.3993303
## Wildling-BALB/c_No_Wild_colon            0.9717740
## Pet Shop-BALB/c_No_Wild_colon            0.7123056
## Wild-BALB/c_No_Wild_colon                0.5174039
## Pet Shop-Wildling                        0.4040356
## Wild-Wildling                            0.3080015
## Wild-Pet Shop                            0.9526340
## 
## $Sex
##                  diff        lwr      upr     p adj
## Male-Female 0.1124947 -0.1032557 0.328245 0.2998647

# Hvis Sex ikke signifikant — kør Group only
anova_shannon_c2_group <- aov(Shannon ~ Group, data = alpha_c2)
summary(anova_shannon_c2_group)

##             Df Sum Sq Mean Sq F value  Pr(>F)    
## Group        4   5.78  1.4450   9.122 1.3e-05 ***
## Residuals   50   7.92  0.1584                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

TukeyHSD(anova_shannon_c2_group)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Shannon ~ Group, data = alpha_c2)
## 
## $Group
##                                                diff        lwr       upr
## BALB/c_No_Wild_colon-BALB/c_Yes_SPF_Term  0.7560324  0.3303453 1.1817196
## Wildling-BALB/c_Yes_SPF_Term              0.8575056  0.3911886 1.3238225
## Pet Shop-BALB/c_Yes_SPF_Term              0.5620502  0.1282540 0.9958464
## Wild-BALB/c_Yes_SPF_Term                  0.4000831 -0.2384477 1.0386138
## Wildling-BALB/c_No_Wild_colon             0.1014731 -0.3648438 0.5677901
## Pet Shop-BALB/c_No_Wild_colon            -0.1939822 -0.6277785 0.2398140
## Wild-BALB/c_No_Wild_colon                -0.3559494 -0.9944801 0.2825814
## Pet Shop-Wildling                        -0.2954554 -0.7691864 0.1782756
## Wild-Wildling                            -0.4574225 -1.1237283 0.2088833
## Wild-Pet Shop                            -0.1619671 -0.8059323 0.4819980
##                                              p adj
## BALB/c_No_Wild_colon-BALB/c_Yes_SPF_Term 0.0000648
## Wildling-BALB/c_Yes_SPF_Term             0.0000351
## Pet Shop-BALB/c_Yes_SPF_Term             0.0051648
## Wild-BALB/c_Yes_SPF_Term                 0.4003591
## Wildling-BALB/c_No_Wild_colon            0.9719137
## Pet Shop-BALB/c_No_Wild_colon            0.7132132
## Wild-BALB/c_No_Wild_colon                0.5184818
## Pet Shop-Wildling                        0.4050686
## Wild-Wildling                            0.3089116
## Wild-Pet Shop                            0.9528579

Plot

library(ggpubr)
library(ggh4x)

long_c2 <- melt(alpha_c2)
long_c2  <- long_c2[long_c2$variable %in% c("Chao1", "Shannon"), ]
long_c2$variable <- factor(long_c2$variable, levels = c("Chao1", "Shannon"))

fill_palette_c2 <- c(
  "BALB/c_No_Wild_colon"  = "#3E5871",   # dusty navy
  "BALB/c_Yes_SPF_Term"   = "#4E8A4E",   # dark green
  "Wildling"              = "#8B6F47",   # brown
  "Pet Shop"              = "#C9873E",   # orange
  "Wild"                  = "#6B2737"    # deep muted burgundy
)

# Opdater y-akse limits via coord_cartesian eller scale
alpha_p_c2 <- ggplot(long_c2, aes(x = Group, y = value)) +
  geom_violin(aes(fill = Group), trim = FALSE, alpha = 0.7) +
  geom_boxplot(width = 0.10, outlier.alpha = 0) +
  geom_jitter(aes(fill = Group, shape = Sex), alpha = 1, width = 0.2,
              stroke = 1, size = 2) +
  facet_wrap(~variable, scales = "free_y") +
  theme_bw() +
  scale_fill_manual(values = fill_palette_c2) +
  scale_shape_manual(values = c("Female" = 21, "Male" = 24),
                     na.translate = FALSE) +
  scale_x_discrete(labels = c(
    "BALB/c_Yes_SPF_Term"  = "BALB/c\nImmunized\n+ SPF FMT",
    "BALB/c_No_Wild_colon" = "BALB/c\n+ Wild FMT",
    "Wildling"             = "Wildling",
    "Pet Shop"             = "Pet Shop",
    "Wild"                 = "Wild"
  )) +
  guides(fill  = "none",
         shape = guide_legend(title = "Sex")) +
  theme(axis.text.x  = element_text(angle = 25, hjust = 1, size = 11),
        axis.title.y = element_text(size = 13),
        strip.text.x = element_text(size = 13),
        plot.title   = element_markdown(size = 15)) +
  labs(x = NULL, y = "Alpha diversity",
       title = "Alpha diversity") +
  facet_wrap(~variable, scales = "free_y") +
  # Sæt y-akse limits per facet via en hjælpefunktion
  ggh4x::facetted_pos_scales(y = list(
    variable == "Chao1"   ~ scale_y_continuous(limits = c(NA, 275), breaks = c(25, 50, 75, 100, 125, 150, 175, 200, 225, 250, 275)),
    variable == "Shannon" ~ scale_y_continuous(limits = c(NA, 4.0), breaks = c(0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4))
  ))

# Signifikansbrackets – y.position spredt ud over den nye plads
sig_chao1 <- data.frame(
  variable   = "Chao1",
  group1     = c("BALB/c_Yes_SPF_Term", "BALB/c_Yes_SPF_Term",
                 "BALB/c_Yes_SPF_Term", "BALB/c_No_Wild_colon",
                 "BALB/c_No_Wild_colon"),
  group2     = c("Wildling", "Pet Shop",
                 "Wild", "Wildling",
                 "Pet Shop"),
  y.position = c(261, 268, 275, 247, 254), 
  label      = c("***", "***", "*", "*", "**")
)

sig_shannon <- data.frame(
  variable   = "Shannon",
  group1     = c("BALB/c_Yes_SPF_Term", "BALB/c_Yes_SPF_Term",
                 "BALB/c_Yes_SPF_Term"),
  group2     = c("BALB/c_No_Wild_colon", "Wildling", "Pet Shop"),
  y.position = c(3.6, 3.8, 4),  
  label      = c("***", "***", "**")
)

sig_all <- bind_rows(sig_chao1, sig_shannon)

alpha_p_c2 <- alpha_p_c2 +
  stat_pvalue_manual(
    data       = sig_all,
    label      = "label",
    xmin       = "group1",
    xmax       = "group2",
    y.position = "y.position",
    tip.length = 0.0,
    size       = 3.0
  )

# Add p-value annotation in top right corner of each facet
stat_anova_anno_c2_alpha <- data.frame(
  variable = c("Chao1",  "Shannon"),
  label    = c("Group: p < 0.001", "Group: p < 0.001"),
  x        = c(Inf,      Inf),
  y        = c(Inf,      Inf)
)

alpha_p_c2 <- alpha_p_c2 +
  geom_text(data = stat_anova_anno_c2_alpha,
            aes(x = x, y = y, label = label),
            inherit.aes = FALSE,
            hjust = 1.1, vjust = 1.5,
            size = 3.5, fontface = "italic", color = "black")

alpha_p_c2

# ggsave("./outputs/comp2_alpha.jpeg", height = 8, width = 16, dpi = 300)

Beta diversity — Bray-Curtis PCoA

PCoA

fill_palette_c2 <- c(
  "BALB/c_Yes_SPF_Term"   = "#4E8A4E",   # dark green
  "BALB/c_No_Wild_colon"  = "#3E5871",   # dusty navy
  "Wildling"              = "#8B6F47",   # brown
  "Pet Shop"              = "#C9873E",   # orange
  "Wild"                  = "#6B2737"    # deep muted burgundy
)

colour_palette_c2 <- c(
  "BALB/c_Yes_SPF_Term"   = "#3A6F3A",   # deeper forest green
  "BALB/c_No_Wild_colon"  = "#2F4357",   # deeper dusty navy
  "Wildling"              = "#6F5736",   # darker brown
  "Pet Shop"              = "#A86E2E",   # darker muted orange
  "Wild"                  = "#4A1825"    # darker burgundy
)

ps_c2_log    <- transform_sample_counts(ps_c2, function(x){log(1 + x)})
bray_pcoa_c2 <- ordinate(ps_c2_log, method = "PCoA", distance = "bray")
evals_c2     <- bray_pcoa_c2$values$Eigenvalues

bray_p_c2 <- plot_ordination(
  ps_c2_log, bray_pcoa_c2, color = "Group", shape = "Sex"
) +
  labs(
    x     = sprintf("PCo1 [%s%%]", round(evals_c2 / sum(evals_c2) * 100, 1)[1]),
    y     = sprintf("PCo2 [%s%%]", round(evals_c2 / sum(evals_c2) * 100, 1)[2]),
    col   = "Group", shape = "Sex",
    title = "Bray-Curtis PCoA"
  ) +
  stat_ellipse(aes(group = Group, fill = Group), alpha = 0.1, geom = "polygon") +
  coord_fixed() +
  scale_color_manual(values = colour_palette_c2,   # darker outline colours
                     labels = c(
                       "BALB/c_Yes_SPF_Term"  = "BALB/c Immunized + SPF FMT",
                       "BALB/c_No_Wild_colon" = "BALB/c + Wild FMT",
                       "Wildling"             = "Wildling",
                       "Pet Shop"             = "Pet Shop",
                       "Wild"                 = "Wild"
                     )) +
  scale_fill_manual(values = fill_palette_c2,      # lighter fill for ellipses
                    labels = c(
                      "BALB/c_Yes_SPF_Term"  = "BALB/c Immunized + SPF FMT",
                      "BALB/c_No_Wild_colon" = "BALB/c + Wild FMT",
                      "Wildling"             = "Wildling",
                      "Pet Shop"             = "Pet Shop",
                      "Wild"                 = "Wild"
                    )) +
  geom_vline(xintercept = 0, lty = 2, alpha = 0.5, color = "blue") +
  geom_hline(yintercept = 0, lty = 2, alpha = 0.5, color = "blue") +
  theme_bw() +
  theme(axis.title   = element_text(),
        legend.title = element_text(size = 10),
        legend.text  = element_text(),
        axis.text    = element_text(size = 13),
        plot.title = element_text(size = 15)) +
  geom_point(size = 3)

library(ggtext)

bray_p_c2 <- bray_p_c2 +
  annotate("richtext",
           x = Inf, y = Inf,
           label = "PERMANOVA: Group<br>*R*² = 0.47, *p* < 0.001",
           hjust = 1.1, vjust = 1.5,
           size = 4, color = "black",
           fill = NA, label.color = NA)

bray_p_c2

# ggsave("./outputs/comp2_bray_pcoa_80perc.jpeg", dpi = 300, height = 7, width = 12)

set.seed(123)

df_c2        <- data.frame(sample_data(ps_c2_log))
bray_dist_c2 <- phyloseq::distance(ps_c2_log, method = "bray")
vegan::adonis2(bray_dist_c2 ~ Group, data = df_c2, permutations = 999)

## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## vegan::adonis2(formula = bray_dist_c2 ~ Group, data = df_c2, permutations = 999)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     4   3.0835 0.46882 11.033  0.001 ***
## Residual 50   3.4937 0.53118                  
## Total    54   6.5772 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

vegan::adonis2(bray_dist_c2 ~ Sex, data = df_c2, permutations = 999) # not significant, explain 1.948 % of variance

## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## vegan::adonis2(formula = bray_dist_c2 ~ Sex, data = df_c2, permutations = 999)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.1187 0.01805 0.9744  0.424
## Residual 53   6.4585 0.98195              
## Total    54   6.5772 1.00000

vegan::adonis2(bray_dist_c2 ~ Group, data = df_c2, permutations = 999) # significant and explain 46.878 % of variance

## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## vegan::adonis2(formula = bray_dist_c2 ~ Group, data = df_c2, permutations = 999)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     4   3.0835 0.46882 11.033  0.001 ***
## Residual 50   3.4937 0.53118                  
## Total    54   6.5772 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Donor vs term

# Beta diversity at term — FMT grupper versus deres donorer (samme donor som ved weaning)
# Donor-grupper: Donor_SPF og Donor_Wild — ekskluder resampling

comp2_term_groups  <- c("BALB/c_Yes_SPF_Term", "BALB/c_No_Wild_colon")
comp2_term_donors  <- c("Donor_SPF", "Donor_Wild")

# Subset: kun term-grupper + originale donorer (ingen resampling)
ps_c2_donors <- subset_samples(ps_rar,
  sample_data(ps_rar)$Group %in% c(comp2_term_groups, comp2_term_donors) &
  !Study %in% c("Donor_inoculum_SPF_resampling_1",
                "Donor_inoculum_SPF_resampling_2",
                "Donor_inoculum_Wild_resampling_1",
                "Donor_inoculum_Wild_resampling_2"))

ps_c2_donors_log    <- transform_sample_counts(ps_c2_donors, function(x){log(1 + x)})
bray_pcoa_c2_donors <- ordinate(ps_c2_donors_log, method = "PCoA", distance = "bray")
evals_c2_donors     <- bray_pcoa_c2_donors$values$Eigenvalues

bray_p_c2_donors <- plot_ordination(
  ps_c2_donors_log, bray_pcoa_c2_donors, color = "Group", shape = "Sex"
) +
  # Ellipser kun på term-grupper (ikke donors)
  stat_ellipse(data = . %>% filter(Group %in% comp2_term_groups),
             aes(group = Group, fill = Group, color = Group, shape = NULL),
             alpha = 0.15, geom = "polygon", linetype = 2) +
  # Term-prøver
  geom_point(data = . %>% filter(Group %in% comp2_term_groups),
             aes(color = Group, shape = Sex), size = 3, alpha = 0.9) +
  # Donor-prøver — plottet som diamonds (shape 18)
  geom_point(data = . %>% filter(Group %in% comp2_term_donors),
             aes(color = Group), shape = 18, size = 5, alpha = 1) +
  scale_color_manual(
    values = c(
      "BALB/c_Yes_SPF_Term"  = "#4E8A4E",   # dark green
      "BALB/c_No_Wild_colon" = "#3E5871",   # dusty navy
      "Donor_SPF"            = "#2d5a2d",   # darker green for donor
      "Donor_Wild"           = "#1a2f40"    # darker navy for donor
    ),
    labels = c(
      "BALB/c_Yes_SPF_Term"  = "BALB/c Immunized + SPF FMT",
      "BALB/c_No_Wild_colon" = "BALB/c + Wild FMT",
      "Donor_SPF"            = "Donor SPF",
      "Donor_Wild"           = "Donor Wild"
    )
  ) +
  scale_fill_manual(
    values = c(
      "BALB/c_Yes_SPF_Term"  = "#4E8A4E",
      "BALB/c_No_Wild_colon" = "#3E5871"
    ),
    labels = c(
      "BALB/c_Yes_SPF_Term"  = "BALB/c Immunized + SPF FMT",
      "BALB/c_No_Wild_colon" = "BALB/c + Wild FMT"
    )
  ) +
  scale_shape_manual(
    values       = c("Female" = 16, "Male" = 17),
    na.translate = FALSE,
    na.value     = NA
  ) +
  guides(
    color = guide_legend(title = "Group"),
    fill  = "none",
    shape = guide_legend(title = "Sex", na.translate = FALSE)
  ) +
  labs(
    x        = sprintf("PCo1 [%s%%]", round(evals_c2_donors / sum(evals_c2_donors) * 100, 1)[1]),
    y        = sprintf("PCo2 [%s%%]", round(evals_c2_donors / sum(evals_c2_donors) * 100, 1)[2]),
    color    = "Group", shape = "Sex",
    title    = "Bray-Curtis PCoA",
    subtitle = "Beta Diversity at Termination — FMT Groups vs Donor"
  ) +
  geom_vline(xintercept = 0, lty = 2, alpha = 0.5, color = "blue") +
  geom_hline(yintercept = 0, lty = 2, alpha = 0.5, color = "blue") +
  theme_bw() +
  theme(axis.title    = element_text(),
        legend.title  = element_text(size = 10),
        legend.text   = element_text(),
        axis.text     = element_text(size = 13),
        plot.title    = element_text(size = 15),
        plot.subtitle = element_text(size = 13))

library(ggtext)

bray_p_c2_donors <- bray_p_c2_donors +
  annotate("richtext",
           x = Inf, y = Inf,
           label = "PERMANOVA: Group<br>*R*² = 0.39, *p* < 0.001",
           hjust = 1.1, vjust = 1.5,
           size = 4, color = "black",
           fill = NA, label.color = NA)

bray_p_c2_donors

# ggsave("./outputs/comp2_bray_pcoa_donors.jpeg", dpi = 300, height = 7, width = 10)

# PERMANOVA for term FMT groups vs donor comparison
# NB: Donors ekskluderes fra PERMANOVA — testes kun på de biologiske grupper

ps_c2_donors_stat_log <- transform_sample_counts(
  subset_samples(ps_rar,
    sample_data(ps_rar)$Group %in% comp2_term_groups &
    !Study %in% c("Donor_inoculum_SPF_resampling_1",
                  "Donor_inoculum_SPF_resampling_2",
                  "Donor_inoculum_Wild_resampling_1",
                  "Donor_inoculum_Wild_resampling_2")),
  function(x){log(1 + x)})

df_c2_donors_stat   <- data.frame(sample_data(ps_c2_donors_stat_log))
bray_dist_c2_donors <- phyloseq::distance(ps_c2_donors_stat_log, method = "bray")

set.seed(123)
vegan::adonis2(bray_dist_c2_donors ~ Group * Sex, data = df_c2_donors_stat, permutations = 999)

## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## vegan::adonis2(formula = bray_dist_c2_donors ~ Group * Sex, data = df_c2_donors_stat, permutations = 999)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     3   1.2241 0.48717 7.5998  0.001 ***
## Residual 24   1.2885 0.51283                  
## Total    27   2.5126 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

set.seed(123)
vegan::adonis2(bray_dist_c2_donors ~ Group, data = df_c2_donors_stat, permutations = 999)

## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## vegan::adonis2(formula = bray_dist_c2_donors ~ Group, data = df_c2_donors_stat, permutations = 999)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1  0.96797 0.38525 16.294  0.001 ***
## Residual 26  1.54459 0.61475                  
## Total    27  2.51257 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Relative abundance barplot

comp2_groups <- c("BALB/c_Yes_SPF_Term", "BALB/c_No_Wild_colon",
                  "Wildling", "Pet Shop", "Wild")

# Fælles farvepalette til x-aksen labels
comp2_labels <- c(
  "BALB/c_Yes_SPF_Term"  = "BALB/c\nImmunized\n+ SPF FMT",
  "BALB/c_No_Wild_colon" = "BALB/c\n+ Wild FMT",
  "Wildling"             = "Wildling",
  "Pet Shop"             = "Pet Shop",
  "Wild"                 = "Wild"
)

# Subset ps_rel til Comparison 2 grupper
ps_rel_c2 <- subset_samples(ps_rel,
  sample_data(ps_rel)$Group %in% comp2_groups)

# Unload MicrobiotaProcess hvis loaded
if ("MicrobiotaProcess" %in% loadedNamespaces()) {
  devtools::unload("MicrobiotaProcess")
}

Phylum level

ps_rel_c2_phylum <- ps_rel_c2
ps_rel_c2_phylum <- subset_taxa(ps_rel_c2_phylum,
                                !is.na(Phylum) & Phylum != "__" & Phylum != "")
ps_rel_c2_phylum <- prune_taxa(taxa_sums(ps_rel_c2_phylum) > 0, ps_rel_c2_phylum)

glom_c2_phylum <- gloomer(ps = ps_rel_c2_phylum, taxa_level = "Phylum", NArm = TRUE)

unique(tax_table(glom_c2_phylum)[, "Phylum"])

## Taxonomy Table:     [13 taxa by 1 taxonomic ranks]:
##                             Phylum                       
## Actinomycetota              "Actinomycetota"             
## Bacillota                   "Bacillota"                  
## Bacteroidota                "Bacteroidota"               
## Campylobacterota            "Campylobacterota"           
## Candidatus Saccharibacteria "Candidatus Saccharibacteria"
## Deferribacterota            "Deferribacterota"           
## Elusimicrobiota             "Elusimicrobiota"            
## Fusobacteriota              "Fusobacteriota"             
## Mycoplasmatota              "Mycoplasmatota"             
## Pseudomonadota              "Pseudomonadota"             
## Spirochaetota               "Spirochaetota"              
## Thermodesulfobacteriota     "Thermodesulfobacteriota"    
## Verrucomicrobiota           "Verrucomicrobiota"

phylum_col_c2 <- c(
  "Bacillota"                   = "#C47F7F",
  "Bacteroidota"                = "#6B9BA4",
  "Pseudomonadota"              = "#A67B9B",
  "Campylobacterota"            = "#C4936A",
  "Mycoplasmatota"              = "#7B8FA6",
  "Thermodesulfobacteriota"     = "#6B8E7B",
  "Verrucomicrobiota"           = "#9B7BAD",
  "Fusobacteriota"              = "#7B9E87",
  "Actinomycetota"              = "#8FAF7E",
  "Candidatus Saccharibacteria" = "#8FA6A6",
  "Spirochaetota"               = "#C77B3A",
  "Deferribacterota"            = "#B8977E",
  "Elusimicrobiota"             = "#C4A882"
)

rel_c2_phylum <- glom_c2_phylum %>%
  psmelt() %>%
  group_by(Sample) %>%
  mutate(RelAbund = Abundance / sum(Abundance) * 100) %>%
  ungroup()

top_phylum_c2 <- rel_c2_phylum %>%
  group_by(OTU) %>%
  summarise(m = mean(RelAbund)) %>%
  arrange(desc(m)) %>%
  pull(OTU)

rel_c2_phylum_avg <- rel_c2_phylum %>%
  mutate(Order_plot = OTU,
         Group = factor(Group, levels = comp2_groups)) %>%
  group_by(Group, Order_plot) %>%
  summarise(RelAbund = mean(RelAbund), .groups = "drop") %>%
  mutate(Order_plot = factor(Order_plot, levels = top_phylum_c2))

p_phylum_c2 <- ggplot(rel_c2_phylum_avg,
                      aes(x = Group, y = RelAbund, fill = Order_plot)) +
  geom_bar(stat = "identity", color = "black", linewidth = 0.3) +
  scale_fill_manual(values = phylum_col_c2, breaks = top_phylum_c2) +
  scale_x_discrete(limits = comp2_groups, labels = comp2_labels) +
  theme_bw() +
  theme(axis.text.x  = element_text(size = 11),
        axis.title.y = element_text(size = 13),
        legend.text  = element_text(size = 9),
        legend.title = element_text(size = 10),
        plot.title   = element_text(size = 15)) +
  labs(x = NULL, y = "Relative abundance (%)", fill = "Phylum",
       title = "Relative Abundance at Phylum Level")

p_phylum_c2

# ggsave("./outputs/comp2_relabund_phylum.jpeg", dpi = 300, height = 7, width = 14)

Genus level

ps_rel_c2_genus_all5 <- ps_rel_c2
ps_rel_c2_genus_all5 <- subset_taxa(ps_rel_c2_genus_all5,
                               !is.na(Genus) & Genus != "__" & Genus != "")
ps_rel_c2_genus_all5 <- prune_taxa(taxa_sums(ps_rel_c2_genus_all5) > 0, ps_rel_c2_genus_all5)

glom_c2_genus_all5 <- gloomer(ps = ps_rel_c2_genus_all5, taxa_level = "Genus", NArm = TRUE)

unique(tax_table(glom_c2_genus_all5)[, "Genus"])

## Taxonomy Table:     [80 taxa by 1 taxonomic ranks]:
##                       Genus                  
## Bifidobacterium       "Bifidobacterium"      
## Adlercreutzia         "Adlercreutzia"        
## Staphylococcus        "Staphylococcus"       
## Enterococcus          "Enterococcus"         
## Lactobacillus         "Lactobacillus"        
## Ligilactobacillus     "Ligilactobacillus"    
## Limosilactobacillus   "Limosilactobacillus"  
## Lactococcus           "Lactococcus"          
## Streptococcus         "Streptococcus"        
## Candidatus Borkfalkia "Candidatus Borkfalkia"
## Christensenella       "Christensenella"      
## Acutalibacter         "Acutalibacter"        
## Butyricicoccus        "Butyricicoccus"       
## Clostridium           "Clostridium"          
## Eubacterium           "Eubacterium"          
## Zhenpiania            "Zhenpiania"           
## Anaerotruncus         "Anaerotruncus"        
## Angelakisella         "Angelakisella"        
## Hominilimicola        "Hominilimicola"       
## Neglectibacter        "Neglectibacter"       
## Vermiculatibacterium  "Vermiculatibacterium" 
## Dehalobacterium       "Dehalobacterium"      
## Pumilibacter          "Pumilibacter"         
## Anaerotignum          "Anaerotignum"         
## Acetatifactor         "Acetatifactor"        
## Anaerostipes          "Anaerostipes"         
## Blautia               "Blautia"              
## Enterocloster         "Enterocloster"        
## Fusimonas             "Fusimonas"            
## Jutongia              "Jutongia"             
## Lachnospira           "Lachnospira"          
## Mediterraneibacter    "Mediterraneibacter"   
## Parablautia           "Parablautia"          
## Petralouisia          "Petralouisia"         
## Roseburia             "Roseburia"            
## Schaedlerella         "Schaedlerella"        
## Sporofaciens          "Sporofaciens"         
## Tyzzerella            "Tyzzerella"           
## Clostridioides        "Clostridioides"       
## Paeniclostridium      "Paeniclostridium"     
## Coprobacillus         "Coprobacillus"        
## Thomasclavelia        "Thomasclavelia"       
## Allobaculum           "Allobaculum"          
## Amedibacillus         "Amedibacillus"        
## Dubosiella            "Dubosiella"           
## Faecalibaculum        "Faecalibaculum"       
## Stoquefichus          "Stoquefichus"         
## Bacteroides           "Bacteroides"          
## Phocaeicola           "Phocaeicola"          
## Duncaniella           "Duncaniella"          
## Heminiphilus          "Heminiphilus"         
## Muribaculum           "Muribaculum"          
## Paramuribaculum       "Paramuribaculum"      
## Sangeribacter         "Sangeribacter"        
## Odoribacter           "Odoribacter"          
## Palleniella           "Palleniella"          
## Paraprevotella        "Paraprevotella"       
## Segatella             "Segatella"            
## Xylanibacter          "Xylanibacter"         
## Alistipes             "Alistipes"            
## Rikenella             "Rikenella"            
## Parabacteroides       "Parabacteroides"      
## Helicobacter          "Helicobacter"         
## Nanosyncoccus         "Nanosyncoccus"        
## Mucispirillum         "Mucispirillum"        
## Elusimicrobium        "Elusimicrobium"       
## Fusobacterium         "Fusobacterium"        
## Metamycoplasma        "Metamycoplasma"       
## Malacoplasma          "Malacoplasma"         
## Anaeroplasma          "Anaeroplasma"         
## Parasutterella        "Parasutterella"       
## Sutterella            "Sutterella"           
## Escherichia           "Escherichia"          
## Klebsiella            "Klebsiella"           
## Proteus               "Proteus"              
## Treponema             "Treponema"            
## Bilophila             "Bilophila"            
## Desulfovibrio         "Desulfovibrio"        
## Taurinivorans         "Taurinivorans"        
## Akkermansia           "Akkermansia"

rel_c2_genus_all5 <- glom_c2_genus_all5 %>%
  psmelt() %>%
  group_by(Sample) %>%
  mutate(RelAbund = Abundance / sum(Abundance) * 100) %>%
  ungroup()

# Genera >= 1% i mindst én gruppe
top_genus_c2_filt <- rel_c2_genus_all5 %>%
  group_by(OTU, Group) %>%
  summarise(m = mean(RelAbund), .groups = "drop") %>%
  group_by(OTU) %>%
  summarise(max_m = max(m)) %>%
  filter(max_m >= 1) %>%
  arrange(desc(max_m)) %>%
  pull(OTU)

# Genera >= 0.01% i mindst én gruppe (til fuld plot)
top_genus_c2_001 <- rel_c2_genus_all5 %>%
  group_by(OTU, Group) %>%
  summarise(m = mean(RelAbund), .groups = "drop") %>%
  group_by(OTU) %>%
  summarise(max_m = max(m)) %>%
  filter(max_m >= 0.01) %>%
  arrange(desc(max_m)) %>%
  pull(OTU)

genus_col_c2 <- c(
  "Lactobacillus"         = "#C4936A",
  "Bacteroides"           = "#6B9BA4",
  "Ligilactobacillus"     = "#A23B72",
  "Fusimonas"             = "#E84855",
  "Christensenella"       = "#C77B3A",
  "Limosilactobacillus"   = "#C47F7F",
  "Alistipes"             = "#9B7BAD",
  "Clostridium"           = "#1D3557",
  "Mediterraneibacter"    = "#D4A853",
  "Schaedlerella"         = "#8FAF7E",
  "Acetatifactor"         = "#5C8A4A",
  "Duncaniella"           = "#3D7A8A",
  "Palleniella"           = "#B5838D",
  "Streptococcus"         = "#E76F51",
  "Phocaeicola"           = "#1A5276",
  "Allobaculum"           = "#E8C3A0",
  "Roseburia"             = "#5A6AA8",
  "Xylanibacter"          = "#E9C46A",
  "Odoribacter"           = "#80B918",
  "Helicobacter"          = "#2A9D8F",
  "Candidatus Borkfalkia" = "#5C7B3A",
  "Thomasclavelia"        = "#7B5EA7",
  "Blautia"               = "#4A8A7B",
  "Paraprevotella"        = "#3D5A80",
  "Anaerotignum"          = "#F18F01",
  "Heminiphilus"          = "#7B4F8A",
  "Desulfovibrio"         = "#A8DADC",
  "Pumilibacter"          = "#8A7B4A",
  "Muribaculum"           = "#6B4226",
  "Dubosiella"            = "#C45C8A",
  "Enterocloster"         = "#8AA85A",
  "Segatella"             = "#6B8E23",
  "Nanosyncoccus"         = "#2E86AB",
  "Eubacterium"           = "#C4745A",
  "Acutalibacter"         = "#8A4A5C",
  "Sporofaciens"          = "#7B8A4A",
  "Metamycoplasma"        = "#5C7B9A",
  "Adlercreutzia"         = "#C49A6C",
  "Parablautia"           = "#FFD700",
  "Fusobacterium"         = "#E84040",
  "Akkermansia"           = "#5C3A8A",
  "Other"                 = "grey80"
)

stopifnot(!any(duplicated(genus_col_c2)))

Grouped mean barplot (genera ≥ 1%)

rel_c2_genus_avg <- rel_c2_genus_all5 %>%
  mutate(Order_plot = ifelse(OTU %in% top_genus_c2_filt, OTU, "Other"),
         Group = factor(Group, levels = comp2_groups)) %>%
  group_by(Sample, Group, Order_plot) %>%
  summarise(RelAbund = sum(RelAbund), .groups = "drop") %>%
  group_by(Group, Order_plot) %>%
  summarise(RelAbund = mean(RelAbund), .groups = "drop") %>%
  mutate(Order_plot = factor(Order_plot,
                             levels = c(top_genus_c2_filt, "Other")))

p_genus_c2 <- ggplot(rel_c2_genus_avg,
                     aes(x = Group, y = RelAbund, fill = Order_plot)) +
  geom_bar(stat = "identity", color = "black", linewidth = 0.3) +
  scale_fill_manual(values = genus_col_c2,
                    breaks = c(top_genus_c2_filt, "Other")) +
  scale_x_discrete(limits = comp2_groups, labels = comp2_labels) +
  theme_bw() +
  theme(axis.text.x  = element_text(size = 11),
        axis.title.y = element_text(size = 13),
        legend.text  = element_text(size = 9),
        legend.title = element_text(size = 10),
        plot.title   = element_text(size = 15)) +
  labs(x = NULL, y = "Relative abundance (%)", fill = "Genus",
       title = "Relative Abundance at Genus Level")

p_genus_c2

# ggsave("./outputs/comp2_relabund_genus.jpeg", dpi = 300, height = 7, width = 14)

Grouped mean barplot (alle genera ≥ 0.01%)

genus_col_c2_001 <- c(
  "Lactobacillus"         = "#C4936A",
  "Bacteroides"           = "#6B9BA4",
  "Ligilactobacillus"     = "#A23B72",
  "Fusimonas"             = "#E84855",
  "Alistipes"             = "#9B7BAD",
  "Schaedlerella"         = "#8FAF7E",
  "Limosilactobacillus"   = "#C47F7F",
  "Duncaniella"           = "#3D7A8A",
  "Clostridium"           = "#3A5C7B",
  "Mediterraneibacter"    = "#3BB273",
  "Acetatifactor"         = "#457B9D",
  "Palleniella"           = "#B5838D",
  "Streptococcus"         = "#E76F51",
  "Roseburia"             = "#5A6AA8",
  "Phocaeicola"           = "#1A5276",
  "Helicobacter"          = "#2A9D8F",
  "Blautia"               = "#4A8A7B",
  "Odoribacter"           = "#80B918",
  "Anaerotignum"          = "#F18F01",
  "Xylanibacter"          = "#E9C46A",
  "Thomasclavelia"        = "#A8A85A",
  "Muribaculum"           = "#6B4226",
  "Christensenella"       = "#7B2D8B",
  "Paraprevotella"        = "#3D5A80",
  "Desulfovibrio"         = "#A8DADC",
  "Candidatus Borkfalkia" = "#5C8A4A",
  "Enterocloster"         = "#8AA85A",
  "Allobaculum"           = "#D4856A",
  "Acutalibacter"         = "#8A4A5C",
  "Segatella"             = "#6B8E23",
  "Dubosiella"            = "#C49A6C",
  "Sporofaciens"          = "#7B8A4A",
  "Heminiphilus"          = "#7B4F8A",
  "Metamycoplasma"        = "#7B8FA6",
  "Parablautia"           = "#FFD700",
  "Petralouisia"          = "#6AA85A",
  "Eubacterium"           = "#F4A261",
  "Akkermansia"           = "#5C3A8A",
  "Paramuribaculum"       = "#6D6875",
  "Fusobacterium"         = "#C73E1D",
  "Anaerostipes"          = "#5C7B3A",
  "Lactococcus"           = "#E8A020",
  "Jutongia"              = "#5CB87A",
  "Pumilibacter"          = "#8A7B4A",
  "Enterococcus"          = "#A8805A",
  "Nanosyncoccus"         = "#2E86AB",
  "Huintestinicola"       = "#9A5C7B",
  "Adlercreutzia"         = "#C45C8A",
  "Anaeroplasma"          = "#C49F6C",
  "Parabacteroides"       = "#A85A8A",
  "Taurinivorans"         = "#8E6B3E",
  "Angelakisella"         = "#6B5EA7",
  "Sangeribacter"         = "#8A5C3A",
  "Anaerotruncus"         = "#5C7B9A",
  "Faecalibaculum"        = "#4A7B8A",
  "Neglectibacter"        = "#4A235A",
  "Lachnospira"           = "#9A7B5C",
  "Dehalobacterium"       = "#2C4770",
  "Paeniclostridium"      = "#7B9A5C",
  "Coprobacillus"         = "#C4845A",
  "Escherichia"           = "#C0392B",
  "Mucispirillum"         = "#D4A853",
  "Sutterella"            = "#E67E22",
  "Treponema"             = "#A07830",
  "Rikenella"             = "#5A8AAA",
  "Stoquefichus"          = "#7D6608",
  "Bifidobacterium"       = "#264653",
  "Hominilimicola"        = "#6A9A6A",
  "Tyzzerella"            = "#5AA88A",
  "Prevotellamassilia"    = "#8A6A9A",
  "Vermiculatibacterium"  = "#8E44AD",
  "Proteus"               = "#C77B3A",
  "Amedibacillus"         = "#7B6914",
  "Malacoplasma"          = "#4A7B5C",
  "Clostridioides"        = "#9A4A3A",
  "Butyricicoccus"        = "#5C9A7B",
  "Parasutterella"        = "#7B5C9A",
  "Elusimicrobium"        = "#9A7B4A",
  "Bilophila"             = "#A8C5A0",
  "Klebsiella"            = "#C45A3A",
  "Zhenpiania"            = "#8B6914",
  "Staphylococcus"        = "#6B3A5C"
)

stopifnot(!any(duplicated(genus_col_c2_001)))

rel_c2_genus_all <- rel_c2_genus_all5 %>%
  filter(OTU %in% top_genus_c2_001) %>%
  mutate(Order_plot = OTU,
         Group = factor(Group, levels = comp2_groups)) %>%
  group_by(Sample, Group, Order_plot) %>%
  summarise(RelAbund = sum(RelAbund), .groups = "drop") %>%
  group_by(Group, Order_plot) %>%
  summarise(RelAbund = mean(RelAbund), .groups = "drop") %>%
  mutate(Order_plot = factor(Order_plot, levels = top_genus_c2_001))

p_c2_genus_all <- ggplot(rel_c2_genus_all, aes(x = Group, y = RelAbund, fill = Order_plot)) +
  geom_bar(stat = "identity", color = "black", linewidth = 0.3) +
  scale_fill_manual(values = genus_col_c2_001, breaks = top_genus_c2_001) +
  scale_x_discrete(limits = comp2_groups, labels = comp2_labels) +
  theme_bw() +
  theme(axis.text.x  = element_text(size = 11),
        axis.title.y = element_text(size = 13),
        legend.text  = element_text(size = 8),
        legend.title = element_text(size = 10),
        plot.title   = element_text(size = 15)) +
  labs(x = NULL, y = "Relative abundance (%)", fill = "Genus",
       title = "Relative Abundance at Genus Level")

p_c2_genus_all

# Husk: "Genera with mean relative abundance ≥ 0.01% shown"
# ggsave("./outputs/comp2_relabund_genus_all.jpeg", dpi = 300, height = 7, width = 16)

Per sample

# Subset to all comp2 groups
ps_rel_c2_persample <- subset_samples(ps_rel,
  sample_data(ps_rel)$Group %in% comp2_groups)

ps_rel_c2_persample <- subset_taxa(ps_rel_c2_persample,
  !is.na(Genus) & Genus != "" & Genus != "__")

ps_rel_c2_persample <- prune_taxa(taxa_sums(ps_rel_c2_persample) > 0, ps_rel_c2_persample)

# Agglomerate to genus level
ps_rel_c2_persample_genus <- tax_glom(ps_rel_c2_persample, taxrank = "Genus", NArm = TRUE)
taxa_names(ps_rel_c2_persample_genus) <- make.unique(
  as.character(tax_table(ps_rel_c2_persample_genus)[, "Genus"]))

# Melt and calculate relative abundance per sample
rel_c2_genus_persample <- psmelt(ps_rel_c2_persample_genus) %>%
  group_by(Sample) %>%
  mutate(RelAbund = Abundance / sum(Abundance) * 100) %>%
  ungroup()

# Plot data — reuse top_genus_c2_filt already defined in your pipeline
rel_c2_genus_sample <- rel_c2_genus_persample %>%
  mutate(Genus_plot = ifelse(OTU %in% top_genus_c2_filt, OTU, "Other"),
         Group = factor(Group, levels = comp2_groups)) %>%
  group_by(Sample, Group, Genus_plot) %>%
  summarise(RelAbund = sum(RelAbund), .groups = "drop") %>%
  mutate(Genus_plot = factor(Genus_plot,
                             levels = c(top_genus_c2_filt, "Other")))

# Per-sample stacked barplot
p_genus_c2_persample <- ggplot(rel_c2_genus_sample,
                                aes(x = Sample, y = RelAbund, fill = Genus_plot)) +
  geom_bar(stat = "identity", color = "black", linewidth = 0.2) +
  facet_wrap(~Group, scales = "free_x", labeller = labeller(Group = comp2_labels)) +
  scale_fill_manual(values = c(genus_col_c2, "Other" = "grey80"),
                    breaks = c(top_genus_c2_filt, "Other")) +
  theme_bw() +
  theme(axis.text.x  = element_blank(),
        axis.ticks.x = element_blank(),
        axis.title.y = element_text(size = 13),
        legend.text  = element_text(size = 9),
        legend.title = element_text(size = 10),
        strip.text   = element_text(size = 11),
        plot.title   = element_text(size = 15)) +
  labs(x     = "Sample",
       y     = "Relative abundance (%)",
       fill  = "Genus",
       title = "Relative Abundance at Genus Level")

p_genus_c2_persample

# ggsave("./outputs/comp2_genus_persample.jpeg", plot = p_genus_c2_persample, dpi = 300, height = 7, width = 18)

term fmt groups vs donor

# ============================================================
# GENUS NIVEAU — Term BALB/c grupper vs donorer
# ============================================================

comp2_term_groups <- c("BALB/c_Yes_SPF_Term", "BALB/c_No_Wild_colon")
comp2_term_donors <- c("Donor_SPF", "Donor_Wild")

ps_rel_c2_donors <- subset_samples(ps_rel,
  sample_data(ps_rel)$Group %in% c(comp2_term_groups, comp2_term_donors) &
  !sample_data(ps_rel)$Study %in% c(
    "Donor_inoculum_SPF_resampling_1",
    "Donor_inoculum_SPF_resampling_2",
    "Donor_inoculum_Wild_resampling_1",
    "Donor_inoculum_Wild_resampling_2"))

ps_rel_c2_genus_donors <- ps_rel_c2_donors
ps_rel_c2_genus_donors <- subset_taxa(ps_rel_c2_genus_donors,
                               !is.na(Genus) & Genus != "__" & Genus != "")
ps_rel_c2_genus_donors <- prune_taxa(taxa_sums(ps_rel_c2_genus_donors) > 0, ps_rel_c2_genus_donors)

glom_c2_genus_donors <- gloomer(ps = ps_rel_c2_genus_donors, taxa_level = "Genus", NArm = TRUE)

unique(tax_table(glom_c2_genus_donors)[, "Genus"])

## Taxonomy Table:     [78 taxa by 1 taxonomic ranks]:
##                       Genus                  
## Adlercreutzia         "Adlercreutzia"        
## Staphylococcus        "Staphylococcus"       
## Enterococcus          "Enterococcus"         
## Lactobacillus         "Lactobacillus"        
## Ligilactobacillus     "Ligilactobacillus"    
## Limosilactobacillus   "Limosilactobacillus"  
## Lactococcus           "Lactococcus"          
## Streptococcus         "Streptococcus"        
## Candidatus Borkfalkia "Candidatus Borkfalkia"
## Christensenella       "Christensenella"      
## Acutalibacter         "Acutalibacter"        
## Butyricicoccus        "Butyricicoccus"       
## Clostridium           "Clostridium"          
## Eubacterium           "Eubacterium"          
## Zhenpiania            "Zhenpiania"           
## Anaerotruncus         "Anaerotruncus"        
## Angelakisella         "Angelakisella"        
## Hominilimicola        "Hominilimicola"       
## Neglectibacter        "Neglectibacter"       
## Vermiculatibacterium  "Vermiculatibacterium" 
## Dehalobacterium       "Dehalobacterium"      
## Pumilibacter          "Pumilibacter"         
## Anaerotignum          "Anaerotignum"         
## Acetatifactor         "Acetatifactor"        
## Anaerostipes          "Anaerostipes"         
## Blautia               "Blautia"              
## Enterocloster         "Enterocloster"        
## Fusimonas             "Fusimonas"            
## Jutongia              "Jutongia"             
## Lachnospira           "Lachnospira"          
## Mediterraneibacter    "Mediterraneibacter"   
## Parablautia           "Parablautia"          
## Petralouisia          "Petralouisia"         
## Roseburia             "Roseburia"            
## Schaedlerella         "Schaedlerella"        
## Sporofaciens          "Sporofaciens"         
## Tyzzerella            "Tyzzerella"           
## Clostridioides        "Clostridioides"       
## Paeniclostridium      "Paeniclostridium"     
## Coprobacillus         "Coprobacillus"        
## Thomasclavelia        "Thomasclavelia"       
## Allobaculum           "Allobaculum"          
## Amedibacillus         "Amedibacillus"        
## Dubosiella            "Dubosiella"           
## Faecalibaculum        "Faecalibaculum"       
## Stoquefichus          "Stoquefichus"         
## Bacteroides           "Bacteroides"          
## Phocaeicola           "Phocaeicola"          
## Duncaniella           "Duncaniella"          
## Heminiphilus          "Heminiphilus"         
## Muribaculum           "Muribaculum"          
## Paramuribaculum       "Paramuribaculum"      
## Sangeribacter         "Sangeribacter"        
## Odoribacter           "Odoribacter"          
## Palleniella           "Palleniella"          
## Paraprevotella        "Paraprevotella"       
## Segatella             "Segatella"            
## Xylanibacter          "Xylanibacter"         
## Alistipes             "Alistipes"            
## Rikenella             "Rikenella"            
## Parabacteroides       "Parabacteroides"      
## Helicobacter          "Helicobacter"         
## Nanosyncoccus         "Nanosyncoccus"        
## Mucispirillum         "Mucispirillum"        
## Elusimicrobium        "Elusimicrobium"       
## Fusobacterium         "Fusobacterium"        
## Metamycoplasma        "Metamycoplasma"       
## Malacoplasma          "Malacoplasma"         
## Anaeroplasma          "Anaeroplasma"         
## Parasutterella        "Parasutterella"       
## Sutterella            "Sutterella"           
## Escherichia           "Escherichia"          
## Klebsiella            "Klebsiella"           
## Proteus               "Proteus"              
## Bilophila             "Bilophila"            
## Desulfovibrio         "Desulfovibrio"        
## Taurinivorans         "Taurinivorans"        
## Akkermansia           "Akkermansia"

rel_c2_genus_donors <- glom_c2_genus_donors %>%
  psmelt() %>%
  group_by(Sample) %>%
  mutate(RelAbund = Abundance / sum(Abundance) * 100) %>%
  ungroup()

top_genus_c2_filt_donors <- rel_c2_genus_donors %>%
  group_by(OTU, Group) %>%
  summarise(m = mean(RelAbund), .groups = "drop") %>%
  group_by(OTU) %>%
  summarise(max_m = max(m)) %>%
  filter(max_m >= 1) %>%
  arrange(desc(max_m)) %>%
  pull(OTU)

# Farvepalette til donors-plot
genus_col_c2_donors <- c(
  "Lactobacillus"         = "#C4936A",
  "Bacteroides"           = "#6B9BA4",
  "Ligilactobacillus"     = "#A23B72",
  "Fusimonas"             = "#E84855",
  "Christensenella"       = "#C77B3A",
  "Limosilactobacillus"   = "#C47F7F",
  "Alistipes"             = "#9B7BAD",
  "Clostridium"           = "#1D3557",
  "Mediterraneibacter"    = "#D4A853",
  "Schaedlerella"         = "#8FAF7E",
  "Acetatifactor"         = "#5C8A4A",
  "Duncaniella"           = "#3D7A8A",
  "Palleniella"           = "#B5838D",
  "Streptococcus"         = "#E76F51",
  "Phocaeicola"           = "#1A5276",
  "Allobaculum"           = "#E8C3A0",
  "Roseburia"             = "#5A6AA8",
  "Xylanibacter"          = "#E9C46A",
  "Odoribacter"           = "#80B918",
  "Helicobacter"          = "#2A9D8F",
  "Candidatus Borkfalkia" = "#5C7B3A",
  "Thomasclavelia"        = "#7B5EA7",
  "Blautia"               = "#4A8A7B",
  "Paraprevotella"        = "#3D5A80",
  "Anaerotignum"          = "#F18F01",
  "Heminiphilus"          = "#7B4F8A",
  "Desulfovibrio"         = "#A8DADC",
  "Pumilibacter"          = "#8A7B4A",
  "Muribaculum"           = "#6B4226",
  "Dubosiella"            = "#C45C8A",
  "Enterocloster"         = "#8AA85A",
  "Segatella"             = "#6B8E23",
  "Nanosyncoccus"         = "#2E86AB",
  "Eubacterium"           = "#C4745A",
  "Acutalibacter"         = "#8A4A5C",
  "Sporofaciens"          = "#7B8A4A",
  "Metamycoplasma"        = "#5C7B9A",
  "Adlercreutzia"         = "#C49A6C",
  "Parablautia"           = "#FFD700",
  "Fusobacterium"         = "#E84040",
  "Akkermansia"           = "#5C3A8A",
  "Other"                 = "grey80"
)

stopifnot(!any(duplicated(genus_col_c2_donors)))

rel_c2_genus_avg_donors <- rel_c2_genus_donors %>%
  mutate(Order_plot = ifelse(OTU %in% top_genus_c2_filt_donors, OTU, "Other"),
         Group = factor(Group, levels = c(
           "BALB/c_Yes_SPF_Term", "Donor_SPF",
           "BALB/c_No_Wild_colon", "Donor_Wild"
         ))) %>%
  group_by(Sample, Group, Order_plot) %>%
  summarise(RelAbund = sum(RelAbund), .groups = "drop") %>%
  group_by(Group, Order_plot) %>%
  summarise(RelAbund = mean(RelAbund), .groups = "drop") %>%
  mutate(Order_plot = factor(Order_plot,
                             levels = c(top_genus_c2_filt_donors, "Other")))

p_genus_c2_donors <- ggplot(rel_c2_genus_avg_donors,
                            aes(x = Group, y = RelAbund, fill = Order_plot)) +
  geom_bar(stat = "identity", color = "black", linewidth = 0.3) +
  scale_fill_manual(values = genus_col_c2_donors,
                    breaks = c(top_genus_c2_filt_donors, "Other")) +
  scale_x_discrete(
    limits = c("BALB/c_Yes_SPF_Term", "Donor_SPF",
               "BALB/c_No_Wild_colon", "Donor_Wild"),
    labels = c(
      "BALB/c_Yes_SPF_Term"  = "BALB/c\nImmunized\n+ SPF FMT",
      "Donor_SPF"            = "Donor\nSPF",
      "BALB/c_No_Wild_colon" = "BALB/c\n+ Wild FMT",
      "Donor_Wild"           = "Donor\nWild"
    )) +
  theme_bw() +
  theme(axis.text.x  = element_text(size = 11),
        axis.title.y = element_text(size = 13),
        legend.text  = element_text(size = 9),
        legend.title = element_text(size = 10),
        plot.title   = element_text(size = 15),
        plot.subtitle = element_text(size = 13)) +
  labs(x = NULL, y = "Relative abundance (%)", fill = "Genus",
       title = "Relative Abundance at Genus Level",
       subtitle = "BALB/c Mice at Term by FMT Source, Including Donors")

p_genus_c2_donors

# ggsave("./outputs/comp2_relabund_genus_donors.jpeg", dpi = 300, height = 7, width = 13)

Deseq - Differential abundance

Wild FMT as ref

set.seed(123)

library(DESeq2)
library(ggtext)
library(ggrepel)

# ============================================================
# DIFFERENTIAL ABUNDANCE — DESeq2
# Comparison 2: Wild-spectrum groups — Wild FMT as reference
# Groups: BALB/c_No_Wild_colon, Wildling, Pet Shop, Wild
# BALB/c_Yes_SPF_Term excluded (distinct colonization trajectory,
# see PCoA — forms isolated cluster separate from wild-spectrum groups)
# ============================================================

# Prepare phyloseq object — use non-rarefied (pst_original) for DESeq2
# Subset to wild-spectrum groups only
comp2_deseq_groups <- c("BALB/c_No_Wild_colon", "Wildling", "Pet Shop", "Wild")

ps_c2_deseq <- subset_samples(pst_original,
  sample_data(pst_original)$Group %in% comp2_deseq_groups)

# Remove taxa with zero counts across these samples
ps_c2_deseq <- prune_taxa(taxa_sums(ps_c2_deseq) > 0, ps_c2_deseq)

# Prune unassigned at multiple ranks before glomming — prevents duplicate taxa_names in gloomer
ps_c2_deseq <- subset_taxa(ps_c2_deseq,
  !is.na(Genus)  & Genus  != "" & Genus  != "__" &
  !is.na(Family) & Family != "" & Family != "__" &
  !is.na(Order)  & Order  != "" & Order  != "__")

ps_c2_deseq <- prune_taxa(taxa_sums(ps_c2_deseq) > 0, ps_c2_deseq)

# Check for duplicate genera before glomming
tx_check <- as.data.frame(tax_table(ps_c2_deseq))
tx_check[duplicated(tx_check$Genus) | duplicated(tx_check$Genus, fromLast = TRUE),
         c("Family", "Genus")]

# Agglomerate to genus level
# Use tax_glom directly instead of gloomer for DESeq2 object

ps_c2_deseq_genus <- tax_glom(ps_c2_deseq, taxrank = "Genus", NArm = TRUE)

# Make genus names unique before assigning as taxa names
genus_names <- tax_table(ps_c2_deseq_genus)[, "Genus"]
taxa_names(ps_c2_deseq_genus) <- make.unique(as.character(genus_names))

# Set Group factor — BALB/c_No_Wild_colon as reference
sample_data(ps_c2_deseq_genus)$Group <- factor(
  sample_data(ps_c2_deseq_genus)$Group,
  levels = c("BALB/c_No_Wild_colon",   # reference
             "Wildling",
             "Pet Shop",
             "Wild")
)

# Manual conversion — bypasses phyloseq_to_deseq2 entirely
count_mat <- as(otu_table(ps_c2_deseq_genus), "matrix")

# Ensure taxa are rows
if (!taxa_are_rows(ps_c2_deseq_genus)) {
  count_mat <- t(count_mat)
}

# Round to integers (DESeq2 requires integer counts)
count_mat <- round(count_mat)

# Sample metadata
col_data <- data.frame(sample_data(ps_c2_deseq_genus))
col_data$Group <- factor(col_data$Group,
                         levels = c("BALB/c_No_Wild_colon",
                                    "Wildling",
                                    "Pet Shop",
                                    "Wild"))

# Build DESeq2 object directly
dds_c2 <- DESeqDataSetFromMatrix(
  countData = count_mat,
  colData   = col_data,
  design    = ~ Group
)

dds_c2 <- DESeq(dds_c2)

# Check levels and result names
levels(colData(dds_c2)$Group)
resultsNames(dds_c2)

# ============================================================
# EXTRACT RESULTS — three contrasts vs BALB/c + Wild FMT
# ============================================================

# 1. Wildling vs BALB/c + Wild FMT
res_Wildling <- results(dds_c2, contrast = c("Group", "Wildling", "BALB/c_No_Wild_colon"))
df_Wildling  <- as.data.frame(res_Wildling) %>%
  mutate(taxon    = rownames(.),
         contrast = "Wildling vs Wild FMT")

# 2. Wild vs BALB/c + Wild FMT
res_Wild <- results(dds_c2, contrast = c("Group", "Wild", "BALB/c_No_Wild_colon"))
df_Wild  <- as.data.frame(res_Wild) %>%
  mutate(taxon    = rownames(.),
         contrast = "Wild vs Wild FMT")

# 3. Pet Shop vs BALB/c + Wild FMT
res_PetShop <- results(dds_c2, contrast = c("Group", "Pet Shop", "BALB/c_No_Wild_colon"))
df_PetShop  <- as.data.frame(res_PetShop) %>%
  mutate(taxon    = rownames(.),
         contrast = "Pet Shop vs Wild FMT")

# Significant taxa per contrast
cat("Significant taxa — Wildling vs Wild FMT:",
    nrow(subset(df_Wildling,  !is.na(padj) & padj < 0.05)), "\n")
cat("Significant taxa — Wild vs Wild FMT:",
    nrow(subset(df_Wild,      !is.na(padj) & padj < 0.05)), "\n")
cat("Significant taxa — Pet Shop vs Wild FMT:",
    nrow(subset(df_PetShop,   !is.na(padj) & padj < 0.05)), "\n")

# ============================================================
# VOLCANO PLOT FUNCTION
# ============================================================

volcano_plot_c2 <- function(df, contrast_name, col_pos, col_neg) {
  comparison_label <- sub(" vs .*", "", contrast_name)
  reference_label  <- sub(".* vs ", "", contrast_name)
  
  df <- df %>%
    filter(!is.na(padj)) %>%
    mutate(diffexpressed = case_when(
      log2FoldChange > 0 & padj < 0.05 ~ paste0("Higher in ", comparison_label),
      log2FoldChange < 0 & padj < 0.05 ~ paste0("Higher in ", reference_label),
      TRUE                             ~ "ns"
    ),
    diffexpressed = factor(diffexpressed,
                           levels = c(paste0("Higher in ", comparison_label),
                                      paste0("Higher in ", reference_label),
                                      "ns")))
  
  ggplot(df, aes(x = log2FoldChange,
                 y = -log10(padj),
                 col = diffexpressed)) +
    geom_point(size = 2.5, alpha = 0.8) +
    geom_vline(xintercept = 0,            lty = 2, color = "grey50") +
    geom_hline(yintercept = -log10(0.05), lty = 2, color = "grey50") +
    geom_label_repel(
      data     = df %>% filter(padj < 0.05),
      aes(label = taxon),
      size         = 3,
      max.overlaps = 20,
      fontface     = "italic",
      label.r      = 0.10
    ) +
    scale_color_manual(values = c(
      setNames(col_pos, paste0("Higher in ", comparison_label)),
      setNames(col_neg, paste0("Higher in ", reference_label)),
      "ns" = "grey70"
    )) +
    theme_bw() +
    theme(legend.title  = element_text(),
          axis.title    = element_text(),
          plot.title    = element_markdown(size = 15),  
          plot.subtitle = element_markdown(size = 13)   
    ) +
    labs(title    = "Differential Abundance at Genus Level",
         subtitle = paste0(contrast_name),
         y        = "-log10 (adjusted p-value)",
         x        = "log2 Fold Change",
         col      = NULL,
         caption  = "Benjamini-Hochberg correction, only taxa with adjusted p < 0.05 shown")
}

# ============================================================
# GENERATE VOLCANO PLOTS
# ============================================================

p_volcano_wildling <- volcano_plot_c2(df_Wildling,
                                      contrast_name = "Wildling vs Wild FMT",
                                      col_pos       = "#8B6F47",
                                      col_neg       = "#3E5871")

p_volcano_wild     <- volcano_plot_c2(df_Wild,
                                      contrast_name = "Wild vs Wild FMT",
                                      col_pos       = "#6B2737",
                                      col_neg       = "#3E5871")

p_volcano_petshop  <- volcano_plot_c2(df_PetShop,
                                      contrast_name = "Pet Shop vs Wild FMT",
                                      col_pos       = "#C9873E",
                                      col_neg       = "#3E5871")

p_volcano_wildling
p_volcano_wild
p_volcano_petshop

#ggsave("./outputs/comp2_volcano_wildFMT_wildling.jpeg", plot = p_volcano_wildling, width = 10, height = 6, dpi = 300)
#ggsave("./outputs/comp2_volcano_wildFMT_wild.jpeg",     plot = p_volcano_wild,     width = 10, height = 6, dpi = 300)
#ggsave("./outputs/comp2_volcano_wildFMT_petshop.jpeg",  plot = p_volcano_petshop,  width = 10, height = 6, dpi = 300)

# ============================================================
# LFC BAR CHART FUNCTION — Comp2 comparisons vs Wild FMT
# ============================================================

lfc_bar_chart_c2 <- function(df, contrast_name, col_pos, col_neg) {
  
  comparison_label <- sub(" vs .*", "", contrast_name)
  reference_label  <- sub(".* vs ", "", contrast_name)
  
  df_sig <- df %>%
    filter(!is.na(padj) & padj < 0.05) %>%
    mutate(
      diffexpressed = case_when(
        log2FoldChange > 0 ~ paste0("Higher in ", comparison_label),
        log2FoldChange < 0 ~ paste0("Higher in ", reference_label)
      ),
      diffexpressed = factor(diffexpressed,
                             levels = c(paste0("Higher in ", comparison_label),
                                        paste0("Higher in ", reference_label))),
      taxon = factor(taxon, levels = taxon[order(log2FoldChange)])
    )
  
  ggplot(df_sig, aes(x = log2FoldChange, y = taxon, fill = diffexpressed)) +
    geom_bar(stat = "identity") +
    geom_errorbarh(aes(xmin = log2FoldChange - lfcSE,
                       xmax = log2FoldChange + lfcSE),
                   height = 0.3, color = "grey30") +
    geom_vline(xintercept = 0, lty = 2, color = "grey50") +
    scale_fill_manual(values = c(
      setNames(col_pos, paste0("Higher in ", comparison_label)),
      setNames(col_neg, paste0("Higher in ", reference_label))
    )) +
    theme_bw() +
    theme(
      axis.text.y    = element_text(face = "italic", size = 10),
      axis.title     = element_text(),
      legend.title   = element_text(),
      plot.title     = element_markdown(size = 15),
      plot.subtitle  = element_markdown(size = 13)
    ) +
    labs(
      title    = "Differential Abundance at Genus Level",
      subtitle = contrast_name,
      x        = "log2 Fold Change",
      y        = NULL,
      fill     = NULL,
      caption  = "Benjamini-Hochberg correction, only taxa with adjusted p < 0.05 shown"
    )
}

# ============================================================
# GENERATE BAR CHARTS
# ============================================================

p_lfc_wildling <- lfc_bar_chart_c2(df_Wildling,
                                    contrast_name = "Wildling vs Wild FMT",
                                    col_pos       = "#8B6F47",
                                    col_neg       = "#3E5871")

p_lfc_wild     <- lfc_bar_chart_c2(df_Wild,
                                    contrast_name = "Wild vs Wild FMT",
                                    col_pos       = "#6B2737",
                                    col_neg       = "#3E5871")

p_lfc_petshop  <- lfc_bar_chart_c2(df_PetShop,
                                    contrast_name = "Pet Shop vs Wild FMT",
                                    col_pos       = "#C9873E",
                                    col_neg       = "#3E5871")

p_lfc_wildling
p_lfc_wild
p_lfc_petshop

# ggsave("./outputs/comp2_lfc_wildFMT_wildling.jpeg", plot = p_lfc_wildling, width = 10, height = 8, dpi = 300)
# ggsave("./outputs/comp2_lfc_wildFMT_wild.jpeg",     plot = p_lfc_wild,     width = 10, height = 8, dpi = 300)
# ggsave("./outputs/comp2_lfc_wildFMT_petshop.jpeg",  plot = p_lfc_petshop,  width = 10, height = 10, dpi = 300)

# Get significant taxa for each comparison with full stats
write.csv(
  df_Wildling %>%
    filter(!is.na(padj) & padj < 0.05) %>%
    select(taxon, log2FoldChange, lfcSE, padj) %>%
    arrange(desc(log2FoldChange)),
  file = "./outputs/comp2_deseq2_wildling_vs_wildfmt.csv",
  row.names = FALSE
)

write.csv(
  df_Wild %>%
    filter(!is.na(padj) & padj < 0.05) %>%
    select(taxon, log2FoldChange, lfcSE, padj) %>%
    arrange(desc(log2FoldChange)),
  file = "./outputs/comp2_deseq2_wild_vs_wildfmt.csv",
  row.names = FALSE
)

write.csv(
  df_PetShop %>%
    filter(!is.na(padj) & padj < 0.05) %>%
    select(taxon, log2FoldChange, lfcSE, padj) %>%
    arrange(desc(log2FoldChange)),
  file = "./outputs/comp2_deseq2_petshop_vs_wildfmt.csv",
  row.names = FALSE
)

# Print counts
cat("Wildling vs Wild FMT significant taxa:", nrow(df_Wildling %>% filter(!is.na(padj) & padj < 0.05)), "\n")
cat("Wild vs Wild FMT significant taxa:", nrow(df_Wild %>% filter(!is.na(padj) & padj < 0.05)), "\n")
cat("Pet Shop vs Wild FMT significant taxa:", nrow(df_PetShop %>% filter(!is.na(padj) & padj < 0.05)), "\n")

widling as ref

set.seed(123)

library(DESeq2)
library(ggtext)
library(ggrepel)

# ============================================================
# DIFFERENTIAL ABUNDANCE — DESeq2
# Comparison 2: Wildling as reference
# Contrasts: Wild vs Wildling, Pet Shop vs Wildling
# ============================================================

# Relevel — Wildling as reference
dds_c2$Group <- relevel(dds_c2$Group, ref = "Wildling")

# Re-run DESeq2 with new reference
dds_c2 <- DESeq(dds_c2)

resultsNames(dds_c2)

# Extract contrasts
res_Wild    <- results(dds_c2, contrast = c("Group", "Wild",     "Wildling"))
res_PetShop <- results(dds_c2, contrast = c("Group", "Pet Shop", "Wildling"))

df_Wild    <- as.data.frame(res_Wild)    %>% mutate(taxon = rownames(.))
df_PetShop <- as.data.frame(res_PetShop) %>% mutate(taxon = rownames(.))

cat("Significant taxa — Wild vs Wildling:",
    nrow(subset(df_Wild,    !is.na(padj) & padj < 0.05)), "\n")
cat("Significant taxa — Pet Shop vs Wildling:",
    nrow(subset(df_PetShop, !is.na(padj) & padj < 0.05)), "\n")

# ============================================================
# GENERATE VOLCANO PLOTS
# Colors match fill_palette_c2
# ============================================================

p_volcano_wildling_wild    <- volcano_plot_c2(df_Wild,
                                     contrast_name = "Wild vs Wildling",
                                     col_pos       = "#6B2737",   # Wild burgundy
                                     col_neg       = "#8B6F47")   # Wildling brown

p_volcano_wildling_petshop <- volcano_plot_c2(df_PetShop,
                                     contrast_name = "Pet Shop vs Wildling",
                                     col_pos       = "#C9873E",   # Pet Shop orange
                                     col_neg       = "#8B6F47")   # Wildling brown

p_volcano_wildling_wild
p_volcano_wildling_petshop

# ggsave("./outputs/comp2_volcano_wildlingVSwild.jpeg",    plot = p_volcano_wild,    width = 10, height = 6, dpi = 300)
# ggsave("./outputs/comp2_volcano_widlingVSpetshop.jpeg", plot = p_volcano_petshop, width = 10, height = 6, dpi = 300)

p_lfc_wildling_petshop <- lfc_bar_chart_c2(
  df_PetShop,
  contrast_name = "Pet Shop vs Wildling",
  col_pos       = "#C9873E",   # Pet Shop orange
  col_neg       = "#8B6F47"    # Wildling brown
)

p_lfc_wildling_petshop

ggsave("./outputs/comp2_da_wildlingVSpetshop.jpeg",
       plot = p_lfc_wildling_petshop, width = 10, height = 8, dpi = 300)

write.csv(
  df_PetShop %>%
    filter(!is.na(padj) & padj < 0.05) %>%
    select(taxon, log2FoldChange, lfcSE, padj) %>%
    arrange(desc(log2FoldChange)),
  file = "./outputs/comp2_deseq2_petshop_vs_wildling.csv",
  row.names = FALSE
)

cat("Pet Shop vs Wildling significant taxa:", nrow(df_PetShop %>% filter(!is.na(padj) & padj < 0.05)), "\n")

# ============================================================
# Helicobacter species prevalence — from non-agglomerated ps_rel
# ============================================================

check_groups <- c("BALB/c_No_Wild_colon", "Wildling", "Pet Shop", "Wild")

heli_otu_ids <- taxa_names(ps_rel)[
  grepl("G__Helicobacter", taxa_names(ps_rel))
]

ps_heli_prev <- subset_samples(ps_rel,
  sample_data(ps_rel)$Group %in% check_groups)
ps_heli_prev <- prune_taxa(heli_otu_ids, ps_heli_prev)
ps_heli_prev <- prune_taxa(taxa_sums(ps_heli_prev) > 0, ps_heli_prev)

# Clean species names
heli_name_map <- data.frame(otu_id = taxa_names(ps_heli_prev)) %>%
  mutate(
    species = sub(".*S__", "", otu_id),
    species = ifelse(grepl("^__", species), 
                     "Helicobacter sp. (unclassified)", species),
    species = sub(" strain.*", "", species),
    species = sub(" DSM.*",    "", species),
    species = sub(" MIT.*",    "", species),
    species = sub(" CMRI.*",   "", species),
    species = trimws(species)
  )

taxa_names(ps_heli_prev) <- heli_name_map$species

# Melt and calculate prevalence per species per group
heli_melt_prev <- psmelt(ps_heli_prev) %>%
  group_by(OTU, Group) %>%
  summarise(
    n_present  = sum(Abundance > 0),
    n_total    = n(),
    prevalence = round(n_present / n_total, 2),
    .groups    = "drop"
  ) %>%
  pivot_wider(
    names_from  = Group,
    values_from = c(n_present, n_total, prevalence),
    values_fill = 0
  ) %>%
  arrange(OTU)

print(heli_melt_prev %>% select(OTU, starts_with("prevalence")), n = Inf)

## # A tibble: 3 × 5
##   OTU           prevalence_BALB/c_No…¹ prevalence_Wildling `prevalence_Pet Shop`
##   <chr>                          <dbl>               <dbl>                 <dbl>
## 1 Helicobacter…                   0.21                 0.9                  0.85
## 2 Helicobacter…                   1                    0.4                  0.85
## 3 K__Bacteria;…                   0.86                 1                    1   
## # ℹ abbreviated name: ¹`prevalence_BALB/c_No_Wild_colon`
## # ℹ 1 more variable: prevalence_Wild <dbl>

Wild mice exclusive genera

# ============================================================
# GENERA EXCLUSIVE TO WILD MICE — Comparison 2 groups
# ============================================================

# Agglomerate ps_rel to genus level for all comp2 groups
ps_rel_c2_excl <- subset_samples(ps_rel,
  sample_data(ps_rel)$Group %in% comp2_groups)

ps_rel_c2_excl <- subset_taxa(ps_rel_c2_excl,
  !is.na(Genus) & Genus != "" & Genus != "__")

ps_rel_c2_excl <- prune_taxa(taxa_sums(ps_rel_c2_excl) > 0, ps_rel_c2_excl)

ps_rel_c2_excl_genus <- tax_glom(ps_rel_c2_excl, taxrank = "Genus", NArm = TRUE)
taxa_names(ps_rel_c2_excl_genus) <- make.unique(
  as.character(tax_table(ps_rel_c2_excl_genus)[, "Genus"]))

# Melt and calculate relative abundance per sample
rel_c2_excl <- psmelt(ps_rel_c2_excl_genus) %>%
  group_by(Sample) %>%
  mutate(RelAbund = Abundance / sum(Abundance) * 100) %>%
  ungroup()

# Calculate prevalence and mean relative abundance per genus per group
prev_c2 <- rel_c2_excl %>%
  group_by(OTU, Group) %>%
  summarise(
    n_present  = sum(Abundance > 0),
    n_total    = n(),
    prevalence = n_present / n_total,
    mean_rel   = mean(RelAbund),
    .groups    = "drop"
  ) %>%
  pivot_wider(names_from  = Group,
              values_from = c(n_present, n_total, prevalence, mean_rel),
              values_fill = 0)

wild_exclusive_relaxed2 <- prev_c2 %>%
  filter(
    prevalence_Wild                   >= 0,  
    prevalence_Wildling               == 0,
    `prevalence_Pet Shop`             == 0,
    `prevalence_BALB/c_Yes_SPF_Term`  == 0,
    `prevalence_BALB/c_No_Wild_colon` == 0
  ) %>%
  arrange(desc(mean_rel_Wild))

cat("Genera found in Wild but absent in all other groups:", nrow(wild_exclusive_relaxed2), "\n\n")

## Genera found in Wild but absent in all other groups: 0

wild_exclusive_relaxed2 %>%
  select(OTU, n_present_Wild, n_total_Wild, prevalence_Wild, mean_rel_Wild) %>%
  print(n = Inf)

## # A tibble: 0 × 5
## # ℹ 5 variables: OTU <chr>, n_present_Wild <int>, n_total_Wild <int>,
## #   prevalence_Wild <dbl>, mean_rel_Wild <dbl>

Helicobacter

# ============================================================
# Helicobacter relative abundance — Comp2 groups
# ============================================================

# Subset to comp2 groups, glom to genus, keep only Helicobacter
ps_heli_c2 <- subset_samples(ps_rel,
  sample_data(ps_rel)$Group %in% comp2_groups)

ps_heli_c2 <- subset_taxa(ps_heli_c2,
  !is.na(Genus) & Genus == "Helicobacter")

ps_heli_c2 <- prune_taxa(taxa_sums(ps_heli_c2) > 0, ps_heli_c2)

# Aggregate to genus level
ps_heli_c2_glom <- tax_glom(ps_heli_c2, taxrank = "Genus", NArm = TRUE)

# Melt and compute relative abundance within each sample
heli_c2_df <- psmelt(ps_heli_c2_glom) %>%
  group_by(Sample) %>%
  mutate(RelAbund = Abundance / sum(Abundance) * 100) %>%
  ungroup() %>%
  mutate(Group = factor(Group, levels = comp2_groups))

# Quick summary table: mean ± SD per group
heli_c2_df %>%
  group_by(Group) %>%
  summarise(
    n         = n(),
    mean_rel  = round(mean(RelAbund), 3),
    sd_rel    = round(sd(RelAbund),   3),
    n_present = sum(Abundance > 0),
    prevalence = round(n_present / n, 2),
    .groups = "drop"
  )

## # A tibble: 5 × 6
##   Group                    n mean_rel sd_rel n_present prevalence
##   <fct>                <int>    <dbl>  <dbl>     <int>      <dbl>
## 1 BALB/c_Yes_SPF_Term     14      NaN     NA         4       0.29
## 2 BALB/c_No_Wild_colon    14      100      0        14       1   
## 3 Wildling                10      100      0        10       1   
## 4 Pet Shop                13      100      0        13       1   
## 5 Wild                     4      NaN     NA         3       0.75

heli_c2_df <- psmelt(ps_heli_c2_glom) %>%
  mutate(Group = factor(Group, levels = comp2_groups))

# Summary per group
heli_c2_df %>%
  group_by(Group) %>%
  summarise(
    n          = n(),
    mean_rel   = round(mean(Abundance), 3),   # Abundance IS already % from ps_rel
    sd_rel     = round(sd(Abundance),   3),
    n_present  = sum(Abundance > 0),
    prevalence = round(n_present / n(),  2),
    .groups    = "drop"
  )

## # A tibble: 5 × 6
##   Group                    n mean_rel sd_rel n_present prevalence
##   <fct>                <int>    <dbl>  <dbl>     <int>      <dbl>
## 1 BALB/c_Yes_SPF_Term     14    0.004  0.006         4       0.29
## 2 BALB/c_No_Wild_colon    14    0.376  0.534        14       1   
## 3 Wildling                10    0.541  0.475        10       1   
## 4 Pet Shop                13    2.33   4.95         13       1   
## 5 Wild                     4    0.111  0.126         3       0.75

rel_c2_excl %>%
  filter(OTU == "Helicobacter") %>%
  group_by(Group) %>%
  summarise(
    n_present  = sum(Abundance > 0),
    n_total    = n(),
    prevalence = n_present / n_total,
    mean_rel   = round(mean(RelAbund), 3),
    .groups    = "drop"
  )

## # A tibble: 5 × 5
##   Group                n_present n_total prevalence mean_rel
##   <fct>                    <int>   <int>      <dbl>    <dbl>
## 1 BALB/c_Yes_SPF_Term          4      14      0.286    0.013
## 2 BALB/c_No_Wild_colon        14      14      1        1.06 
## 3 Wildling                    10      10      1        0.777
## 4 Pet Shop                    13      13      1        3.99 
## 5 Wild                         3       4      0.75     0.681

heli_c2 <- rel_c2_excl %>%
  filter(OTU == "Helicobacter") %>%
  mutate(Group = factor(Group, levels = comp2_groups))

p_heli_c2 <- ggplot(heli_c2, aes(x = Group, y = RelAbund, fill = Group)) +
  geom_boxplot(width = 0.4, outlier.alpha = 0) +
  geom_jitter(aes(shape = Sex), width = 0.15, size = 2.5,
              alpha = 0.8, stroke = 1) +
  scale_fill_manual(values = fill_palette_c2) +
  scale_shape_manual(values = c("Female" = 21, "Male" = 24),
                     na.translate = FALSE) +
  scale_y_continuous(limits = c(0,30), breaks = c(0, 5, 10, 15, 20, 25, 30)) +
  scale_x_discrete(labels = comp2_labels) +
  theme_bw() +
  theme(axis.text.x  = element_text(angle = 25, hjust = 1, size = 11),
        axis.title.y = element_text(size = 13),
        legend.text  = element_text(size = 10),
        plot.title   = element_markdown()) +
  guides(fill  = "none",
         shape = guide_legend(title = "Sex")) +
  labs(x     = NULL,
       y     = "Relative abundance (%)",
       title = "*Helicobacter*<br>Relative abundance across adult groups")

p_heli_c2

# ggsave("./outputs/comp2_helicobacter.jpeg", plot = p_heli_c2, dpi = 300, height = 6, width = 10)

# ============================================================
# HELICOBACTER SPECIES — relative abundance across adult groups
# ============================================================

library(ggtext)

heli_groups <- c("BALB/c_No_Wild_colon", "Wildling", "Pet Shop", "Wild")

heli_labels <- c(
  "BALB/c_No_Wild_colon" = "BALB/c\n+ Wild FMT",
  "Wildling"             = "Wildling",
  "Pet Shop"             = "Pet Shop",
  "Wild"                 = "Wild"
)

# ============================================================
# Extract Helicobacter OTUs from the NON-agglomerated ps_rel
# ============================================================

# Get all OTU IDs that are Helicobacter
heli_otu_ids <- taxa_names(ps_rel)[
  grepl("G__Helicobacter", taxa_names(ps_rel))
]

# Subset phyloseq to Helicobacter OTUs and target groups
ps_heli <- subset_samples(ps_rel,
  sample_data(ps_rel)$Group %in% heli_groups)
ps_heli <- prune_taxa(heli_otu_ids, ps_heli)
ps_heli <- prune_taxa(taxa_sums(ps_heli) > 0, ps_heli)

# Parse species names from OTU ID strings
species_lookup <- data.frame(
  otu_id = taxa_names(ps_heli)
) %>%
  mutate(
    species = sub(".*S__", "", otu_id),
    species = case_when(
      grepl("^__", species) ~ "Helicobacter sp. (unclassified)",
      TRUE                  ~ species
    ),
    # Strip strain designations for cleaner labels
    species = sub(" strain.*", "", species),
    species = sub(" DSM.*",    "", species),
    species = sub(" JCM.*",    "", species),
    species = sub(" MIT.*",    "", species),
    species = sub(" CMRI.*",   "", species),
    species = trimws(species)
  )

# Rename taxa in phyloseq object to species names
taxa_names(ps_heli) <- species_lookup$species

# Melt and calculate relative abundance per sample
heli_melt <- psmelt(ps_heli) %>%
  group_by(Sample) %>%
  mutate(RelAbund = Abundance / sum(Abundance) * 100) %>%
  ungroup() %>%
  mutate(Group = factor(Group, levels = heli_groups))

# ============================================================
# PLOT
# ============================================================

p_heli_species <- ggplot(heli_melt,
                         aes(x = Group, y = RelAbund, fill = Group)) +
  geom_boxplot(width = 0.45, outlier.alpha = 0) +
  geom_jitter(aes(shape = Sex), width = 0.15, size = 2.2,
              alpha = 0.8, stroke = 0.8) +
  facet_wrap(~ OTU, scales = "free_y", ncol = 3) +
  scale_fill_manual(values = fill_palette_c2) +
  scale_shape_manual(values = c("Female" = 21, "Male" = 24),
                     na.translate = FALSE) +
  scale_x_discrete(labels = heli_labels) +
  theme_bw() +
  theme(
    axis.text.x  = element_text(angle = 30, hjust = 1, size = 9),
    axis.title.y = element_text(size = 12),
    strip.text   = element_text(face = "italic", size = 10),
    legend.position = "none",
    plot.title   = element_text(size = 14)
  ) +
  labs(
    x       = NULL,
    y       = "Relative abundance (%)",
    title   = "Relative Abundance of Helicobacter species",
  )

p_heli_species

# ggsave("./outputs/comp2_helicobacter_species.jpeg",
#       plot   = p_heli_species,
#       width  = 12, height = 10, dpi = 300)

check_groups <- c("BALB/c_No_Wild_colon", "Wildling", "Pet Shop", "Wild")

ps_excl <- subset_samples(ps_rel,
  sample_data(ps_rel)$Group %in% check_groups)
ps_excl <- subset_taxa(ps_excl,
  !is.na(Genus) & Genus != "" & Genus != "__")
ps_excl <- prune_taxa(taxa_sums(ps_excl) > 0, ps_excl)

ps_excl_genus <- tax_glom(ps_excl, taxrank = "Genus", NArm = TRUE)
taxa_names(ps_excl_genus) <- make.unique(
  as.character(tax_table(ps_excl_genus)[, "Genus"]))

rel_excl <- psmelt(ps_excl_genus) %>%
  group_by(Sample) %>%
  mutate(RelAbund = Abundance / sum(Abundance) * 100) %>%
  ungroup()

prev_excl <- rel_excl %>%
  group_by(OTU, Group) %>%
  summarise(
    n_present  = sum(Abundance > 0),
    n_total    = n(),
    prevalence = n_present / n_total,
    mean_rel   = mean(RelAbund),
    .groups    = "drop"
  ) %>%
  pivot_wider(
    names_from  = Group,
    values_from = c(n_present, n_total, prevalence, mean_rel),
    values_fill = 0
  )

# Then run the filter
absent_in_wildfmt_all3 <- prev_excl %>%
  filter(
    `prevalence_BALB/c_No_Wild_colon` == 0,
    prevalence_Wildling   > 0,
    `prevalence_Pet Shop` > 0,
    prevalence_Wild       > 0
  ) %>%
  arrange(desc(mean_rel_Wild))

cat("Genera absent in Wild FMT but present in ALL THREE other groups:\n")

## Genera absent in Wild FMT but present in ALL THREE other groups:

absent_in_wildfmt_all3 %>%
  select(OTU,
         starts_with("prevalence_"),
         starts_with("mean_rel_")) %>%
  print(n = Inf)

## # A tibble: 4 × 9
##   OTU           prevalence_BALB/c_No…¹ prevalence_Wildling `prevalence_Pet Shop`
##   <chr>                          <dbl>               <dbl>                 <dbl>
## 1 Treponema                          0                 0.7                 0.154
## 2 Hominilimico…                      0                 0.3                 0.231
## 3 Enterococcus                       0                 0.6                 0.923
## 4 Escherichia                        0                 0.1                 0.615
## # ℹ abbreviated name: ¹`prevalence_BALB/c_No_Wild_colon`
## # ℹ 5 more variables: prevalence_Wild <dbl>,
## #   `mean_rel_BALB/c_No_Wild_colon` <dbl>, mean_rel_Wildling <dbl>,
## #   `mean_rel_Pet Shop` <dbl>, mean_rel_Wild <dbl>

absent_genera <- absent_in_wildfmt_all3$OTU

p_absent <- rel_excl %>%
  filter(OTU %in% absent_genera,
         Group != "BALB/c_No_Wild_colon") %>%
  mutate(Group = factor(Group, levels = c("Wildling", "Pet Shop", "Wild"))) %>%
  ggplot(aes(x = Group, y = RelAbund, fill = Group)) +
  geom_boxplot(width = 0.45, outlier.alpha = 0) +
  geom_jitter(aes(shape = Sex), width = 0.15, size = 2.2,
              alpha = 0.8, stroke = 0.8) +
  facet_wrap(~ OTU, scales = "free_y", ncol = 2) +
  scale_fill_manual(values = fill_palette_c2) +
  scale_shape_manual(values = c("Female" = 21, "Male" = 24),
                     na.translate = FALSE) +
  scale_x_discrete(labels = c(
    "Wildling"  = "Wildling",
    "Pet Shop"  = "Pet Shop",
    "Wild"      = "Wild"
  )) +
  theme_bw() +
  theme(
    axis.text.x   = element_text(angle = 25, hjust = 1, size = 10),
    axis.title.y  = element_text(size = 12),
    strip.text    = element_text(face = "italic", size = 11),
    legend.position = "none",
    plot.title    = element_markdown(size = 14),
    plot.subtitle = element_text(size = 11, color = "grey40")
  ) +
  labs(
    x        = NULL,
    y        = "Relative abundance (%)",
    title    = "Genera absent in BALB/c + Wild FMT",
    subtitle = "Present in Wildling, Pet Shop, and Wild mice"
  )

p_absent

# ggsave("./outputs/comp2_absent_wildfmt.jpeg", plot = p_absent, width = 10, height = 8, dpi = 300)

Session Info

sessionInfo()

## R version 4.4.1 (2024-06-14)
## Platform: x86_64-apple-darwin20
## Running under: macOS 15.7.5
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/Copenhagen
## tzcode source: internal
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] ggh4x_0.3.1         S4Vectors_0.44.0    BiocGenerics_0.52.0
##  [4] ggtext_0.1.2        decontam_1.26.0     rstatix_0.7.3      
##  [7] ggpubr_0.6.3        patchwork_1.3.2     readxl_1.4.5       
## [10] ape_5.8-1           glue_1.8.0          pheatmap_1.0.13    
## [13] reshape2_1.4.5      ggrepel_0.9.8       devtools_2.5.0     
## [16] vegan_2.7-3         BiocManager_1.30.27 gridExtra_2.3      
## [19] phyloseq_1.50.0     lubridate_1.9.5     forcats_1.0.1      
## [22] stringr_1.6.0       dplyr_1.2.0         purrr_1.2.1        
## [25] readr_2.2.0         tidyr_1.3.2         tibble_3.3.1       
## [28] ggplot2_4.0.2       tidyverse_2.0.0    
## 
## loaded via a namespace (and not attached):
##  [1] permute_0.9-10          rlang_1.1.7             magrittr_2.0.4         
##  [4] ade4_1.7-24             otel_0.2.0              compiler_4.4.1         
##  [7] mgcv_1.9-4              systemfonts_1.3.2       vctrs_0.7.2            
## [10] pkgconfig_2.0.3         crayon_1.5.3            fastmap_1.2.0          
## [13] backports_1.5.0         XVector_0.46.0          ellipsis_0.3.2         
## [16] labeling_0.4.3          utf8_1.2.6              rmarkdown_2.30         
## [19] markdown_2.0            sessioninfo_1.2.3       tzdb_0.5.0             
## [22] UCSC.utils_1.2.0        ragg_1.5.2              xfun_0.57              
## [25] zlibbioc_1.52.0         cachem_1.1.0            litedown_0.9           
## [28] GenomeInfoDb_1.42.3     jsonlite_2.0.0          biomformat_1.34.0      
## [31] rhdf5filters_1.18.1     Rhdf5lib_1.28.0         broom_1.0.12           
## [34] parallel_4.4.1          cluster_2.1.8.2         R6_2.6.1               
## [37] bslib_0.10.0            stringi_1.8.7           RColorBrewer_1.1-3     
## [40] car_3.1-5               pkgload_1.5.0           jquerylib_0.1.4        
## [43] cellranger_1.1.0        Rcpp_1.1.1              iterators_1.0.14       
## [46] knitr_1.51              usethis_3.2.1           pacman_0.5.1           
## [49] IRanges_2.40.1          Matrix_1.7-5            splines_4.4.1          
## [52] igraph_2.2.2            timechange_0.4.0        tidyselect_1.2.1       
## [55] abind_1.4-8             rstudioapi_0.18.0       yaml_2.3.12            
## [58] codetools_0.2-20        pkgbuild_1.4.8          lattice_0.22-9         
## [61] plyr_1.8.9              Biobase_2.66.0          withr_3.0.2            
## [64] S7_0.2.1                evaluate_1.0.5          survival_3.8-6         
## [67] xml2_1.5.2              Biostrings_2.74.1       pillar_1.11.1          
## [70] carData_3.0-6           foreach_1.5.2           generics_0.1.4         
## [73] hms_1.1.4               commonmark_2.0.0        scales_1.4.0           
## [76] tools_4.4.1             data.table_1.18.2.1     ggsignif_0.6.4         
## [79] fs_2.0.1                rhdf5_2.50.2            grid_4.4.1             
## [82] colorspace_2.1-2        nlme_3.1-168            GenomeInfoDbData_1.2.13
## [85] Formula_1.2-5           cli_3.6.5               textshaping_1.0.5      
## [88] gtable_0.3.6            sass_0.4.10             digest_0.6.39          
## [91] farver_2.1.2            memoise_2.0.1           htmltools_0.5.9        
## [94] multtest_2.62.0         lifecycle_1.0.5         httr_1.4.8             
## [97] gridtext_0.1.6          MASS_7.3-65

Microbiome Analysis Pipeline - Naturalization study

Josefine Haagen Eklund

2026-05-16

1. Packages

2. Import Data

Feature table

Taxonomy

Metadata

3. Build Phyloseq Object

4. Filtering and Preprocessing

Prevalence filtering

5. Singletone Detection & Removal

6. Decontam — Remove Contaminants Using Negative Controls

7. Rarefaction

Rarefaction curve

Rarefy

Abundance threshold filtering (post-rarefaction)

Transformation objects

8. Prep for Alpha Diversity

9. Gloomer Function

10. Comparison 1 — FMT Engraftment at Weaning

Alpha diversity

Plot w/o donors

Plot with donors

Beta diversity — Bray-Curtis PCoA

Transformation examples - logarithm

Bray PCoA

Relative abundance barplot

Order level

Phylum level

Family level

Genus level

Helicobacter

Differential abundance

11. Comparison 2 — Adult Mice

Alpha diversity

Plot

Beta diversity — Bray-Curtis PCoA

PCoA

Donor vs term

Relative abundance barplot

Phylum level

Genus level

Grouped mean barplot (genera ≥ 1%)

Grouped mean barplot (alle genera ≥ 0.01%)

Per sample

term fmt groups vs donor

Deseq - Differential abundance

Wild FMT as ref

widling as ref

Wild mice exclusive genera

Helicobacter

Session Info