Loading Custom scripts

Before analyzing, run the following codes:

# ANCOMBC function 
source("/Users/qcvp/R/custom script/ANCOMBC.R")

# Pairwise PERMANOVA
source("/Users/qcvp/R/custom script/pairwise.adonis2.R")

# Indices normality
source("/Users/qcvp/R/custom script/Indices_normality.R")

Loading context

Import additional the metadata information of Nem

context <- utils::read.table("/Users/qcvp/R/outputs/dada2_total/asv_table/metadata_longitudinal.txt", header = TRUE, row.names = 1, sep="\t")
rownames(context) <- base::gsub("-",".",rownames(context))
head(context)
##            Sequencing     Date  Transect Station   Biosample Biosample_type
## F1w1b          HM-W-1 Nov_2022 Fish_Farm      HM Environment          Water
## F1w2b          HM-W-2 Nov_2022 Fish_Farm      HM Environment          Water
## F1w3b          HM-W-3 Nov_2022 Fish_Farm      HM Environment          Water
## Fi.10.3.GM    Fi-10-3 Jul_2023  Hon_Mieu Farm_10     Animals            Gut
## Fi.10.4.GB    Fi-10-4 Jul_2023  Hon_Mieu Farm_10     Animals            Gut
## Fi.10.5.GB    Fi-10-5 Jul_2023  Hon_Mieu Farm_10     Animals            Gut
##            Biosample_cluster   Host_type Host_subtype Host_genus  Host_species
## F1w1b                  Water Environment        Water       <NA>          <NA>
## F1w2b                  Water Environment        Water       <NA>          <NA>
## F1w3b                  Water Environment        Water       <NA>          <NA>
## Fi.10.3.GM        Vertebrate  Vertebrate         Fish    Siganus canaliculatus
## Fi.10.4.GB        Vertebrate  Vertebrate         Fish    Siganus canaliculatus
## Fi.10.5.GB        Vertebrate  Vertebrate         Fish    Siganus canaliculatus
##                       Species_ID
## F1w1b                       <NA>
## F1w2b                       <NA>
## F1w3b                       <NA>
## Fi.10.3.GM Siganus_canaliculatus
## Fi.10.4.GB Siganus_canaliculatus
## Fi.10.5.GB Siganus_canaliculatus

##Loading data

Nem_hu_unrarefied <- base::readRDS("/Users/qcvp/R/inputs/data for PHPB comunity/Nem_hu_unrarefied.rds") # Uncut absolute abundance
Nem_hu_unrarefied_rel_whole <- base::readRDS("/Users/qcvp/R/inputs/data for PHPB comunity/Nem_hu_unrarefied_rel_whole.rds") # Uncutted relative abundance against whole community samples 
Nem_hu_rarefied <- base::readRDS("/Users/qcvp/R/inputs/data for PHPB comunity/Nem_hu_rarefied.rds") # Cutted absolute abundance
Nem_hu_unrarefied_rel_PHPB <- base::readRDS("/Users/qcvp/R/inputs/data for PHPB comunity/Nem_hu_unrarefied_rel_PHPB.rds") # Uncutted relative abundance against PHPB community samples 
physeq_Nem_long_rarefied <- base::readRDS("/Users/qcvp/R/outputs/Decomtaminated/physeq_Nem_long_rarefied.rds") # Uncutted relative abundance against PHPB community samples 

Alpha diversity analysis

Relative bundance of PHPB against whole bacterial community

Plotting

library(ggplot2)

# Extract samples reads and add metadata
PHPB_rel_whole <- as.data.frame(rowSums(Nem_hu_unrarefied_rel_whole@otu_table@.Data))
## Loading required package: phyloseq
PHPB_rel_whole <- cbind(PHPB_rel_whole, physeq_Nem_long_rarefied@sam_data)
colnames(PHPB_rel_whole)[colnames(PHPB_rel_whole) == "rowSums(Nem_hu_unrarefied_rel_whole@otu_table@.Data)"] <- "Abundance"
PHPB_rel_whole$Date = factor(PHPB_rel_whole$Date , levels = c("Nov_2022", "Jul_2023", "Dec_2023")) #rearrange date

PHPB_rel_whole$Biosample = factor(PHPB_rel_whole$Biosample , levels = c("Environment", "Animals","Human", "Control")) # rearrange date


p1 <- ggplot(PHPB_rel_whole, aes(x = Biosample, y = Abundance, color = Biosample)) + 
  geom_jitter(position = position_jitter(width = .20), alpha = 0.5, size = 3, aes(colour = Biosample), show.legend = TRUE) + theme_bw() +
  geom_boxplot(aes(colour = Biosample), alpha=0.1, outlier.colour = NA) + 
  scale_y_continuous(limits=c(0, 0.95)) +
   theme(axis.title.x = element_blank(),
        legend.position = "none",
        legend.title = element_blank(),
        legend.text = element_text(size = 13),
        strip.text = element_text(size = 13),
        axis.title.y = element_text(size = 15),
        axis.text.y = element_text(size=13),
        axis.text.x = element_text(size = 10, angle = 45, hjust = 1)) + 
  scale_colour_brewer(palette="Set1") +
  ylab("Relative Abundance") +   
    scale_colour_manual(breaks =c("Nov_22","Jul_23","Dec_23","Dec_21"), 
                      values = c("pink","purple2","brown","green")) +
  scale_colour_manual(breaks =c("Human","Animals","Environment","Control"), 
                      values = c("red","orange","blue","green")) 
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
p1

Statistical test whole data

#Parametric test - p<0.05 = non-parametric, p>0.05 = parametric
PHPB_rel_whole |>
  dplyr::select(Abundance) |>
  indices_normality(nrow = 2, ncol = 3)

stats::kruskal.test(Abundance ~ Biosample, data = PHPB_rel_whole)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by Biosample
## Kruskal-Wallis chi-squared = 26.677, df = 3, p-value = 6.879e-06
stats::kruskal.test(Abundance ~ Date, data = PHPB_rel_whole)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by Date
## Kruskal-Wallis chi-squared = 6.5863, df = 2, p-value = 0.03714
FSA::dunnTest(Abundance ~ Biosample, data = PHPB_rel_whole, method = "bh")
## Dunn (1964) Kruskal-Wallis multiple comparison
##   p-values adjusted with the Benjamini-Hochberg method.
##              Comparison         Z      P.unadj        P.adj
## 1     Animals - Control -2.649197 8.068333e-03 0.0121024997
## 2 Animals - Environment -2.029643 4.239286e-02 0.0508714308
## 3 Control - Environment  0.660496 5.089356e-01 0.5089356035
## 4       Animals - Human  2.748823 5.980964e-03 0.0119619276
## 5       Control - Human  3.898323 9.686121e-05 0.0005811672
## 6   Environment - Human  3.423778 6.175715e-04 0.0018527144
FSA::dunnTest(Abundance ~ Date, data = PHPB_rel_whole, method = "bh")
## Warning: Some rows deleted from 'x' and 'g' because missing data.
## Dunn (1964) Kruskal-Wallis multiple comparison
##   p-values adjusted with the Benjamini-Hochberg method.
##            Comparison         Z    P.unadj     P.adj
## 1 Dec_2023 - Jul_2023  2.566341 0.01027777 0.0308333
## 2 Dec_2023 - Nov_2022  1.314114 0.18880794 0.1888079
## 3 Jul_2023 - Nov_2022 -1.379936 0.16760625 0.2514094

Richness & Shannon $ LCBD index

Calculate indices

library(microbiome)
## 
## microbiome R package (microbiome.github.com)
##     
## 
## 
##  Copyright (C) 2011-2022 Leo Lahti, 
##     Sudarshan Shetty et al. <microbiome.github.io>
## 
## Attaching package: 'microbiome'
## The following object is masked from 'package:ggplot2':
## 
##     alpha
## The following object is masked from 'package:base':
## 
##     transform
library(adespatial)
## Registered S3 methods overwritten by 'adegraphics':
##   method         from
##   biplot.dudi    ade4
##   kplot.foucart  ade4
##   kplot.mcoa     ade4
##   kplot.mfa      ade4
##   kplot.pta      ade4
##   kplot.sepan    ade4
##   kplot.statis   ade4
##   scatter.coa    ade4
##   scatter.dudi   ade4
##   scatter.nipals ade4
##   scatter.pco    ade4
##   score.acm      ade4
##   score.mix      ade4
##   score.pca      ade4
##   screeplot.dudi ade4
## Registered S3 method overwritten by 'spdep':
##   method   from
##   plot.mst ape
## Registered S3 methods overwritten by 'adespatial':
##   method             from       
##   plot.multispati    adegraphics
##   print.multispati   ade4       
##   summary.multispati ade4
OTU<- Nem_hu_unrarefied@otu_table
LBCD <- beta.div(OTU, method = "hellinger", sqrt.D = FALSE, samp = TRUE, nperm = 999, clock = TRUE)
## Time for computation = 3.208000  sec
LCBD <- data.frame(LBCD$LCBD)

alpha_indices_PHPB <- microbiome::alpha(
  Nem_hu_rarefied,
  index = c("observed", "diversity_gini_simpson",
            "diversity_shannon", "evenness_pielou",
            "dominance_relative"))
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
#Creating data frame with indices included
alpha_indices_PHPB <- phyloseq::sample_data(alpha_indices_PHPB)

#Merge with phyloseq object
Nem_hu_rarefied<- phyloseq::merge_phyloseq(Nem_hu_rarefied, alpha_indices_PHPB)

#Change sample_data as a data frame
Indices_data_PHPB <- data.frame(sample_data(Nem_hu_rarefied))

#Rearrange time
Indices_data_PHPB$Date = factor(Indices_data_PHPB$Date , levels = c("Nov_2022", "Jul_2023", "Dec_2023", "Control"))

#Insert LCBD data 
Indices_data_PHPB$LCBD <- LCBD$LBCD.LCBD
Graph
library(gridExtra)
Indices_data_PHPB$Biosample = factor(Indices_data_PHPB$Biosample , levels = c("Environment", "Animals","Human", "Control")) # rearrange Biosample
#Richness
p2 <- ggplot(Indices_data_PHPB, aes(x = Biosample, y = observed, color = Biosample)) + 
  geom_jitter(position = position_jitter(width = .20), alpha = 0.5, size = 3, aes(colour = Biosample), show.legend = TRUE) + theme_bw() +
  geom_boxplot(aes(colour = Biosample), alpha=0.1, outlier.colour = NA) + 
      scale_y_continuous(limits=c(0, 36)) +
   theme(axis.title.x = element_blank(),
        legend.position = "none", #Remove legends
        legend.title = element_blank(),
        legend.text = element_text(size = 13),
        strip.text = element_text(size = 13),
        axis.title.y = element_text(size = 15),
        axis.text.y = element_text(size=13),
        axis.text.x = element_text(size = 10, angle = 45, hjust = 1)) + 
  ylab("PHPB Species richness")+
  scale_colour_brewer(palette="Set1") +
  labs(color ="Biosample") +
    scale_colour_manual(breaks =c("Nov_22","Jul_23","Dec_23","Dec_21"), 
                      values = c("pink","purple2","brown","green")) +
  scale_colour_manual(breaks =c("Human","Animals","Environment","Control"), 
                      values = c("red","orange","blue","green")) 
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
#----------------------------------------------------------------------------------------------------------------------------------------
#Shannon index
p3 <- ggplot(Indices_data_PHPB, aes(x = Biosample, y = diversity_shannon, color = Biosample)) + 
  geom_jitter(position = position_jitter(width = .20), alpha = 0.5, size = 3, aes(colour = Biosample), show.legend = TRUE) + theme_bw() +
  geom_boxplot(aes(colour = Biosample), alpha=0.1, outlier.colour = NA) + 
    scale_y_continuous(limits=c(0, 3.2)) +
   theme(axis.title.x = element_blank(),
       legend.position = "none", #Remove legends
        legend.title = element_blank(),
        legend.text = element_text(size = 13),
        strip.text = element_text(size = 13),
        axis.title.y = element_text(size = 15),
        axis.text.y = element_text(size=13),
        axis.text.x = element_text(size = 10, angle = 45, hjust = 1)) + 
  ylab("PHPB Shannon index")+
  scale_colour_brewer(palette="Set1") +
  labs(color ="Biosample") +
  scale_colour_manual(breaks =c("Nov_22","Jul_23","Dec_23","Dec_21"), 
                      values = c("pink","purple2","brown","green")) +
  scale_colour_manual(breaks =c("Human","Animals","Environment","Control"), 
                      values = c("red","orange","blue","green")) 
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
#----------------------------------------------------------------------------------------------------------------------------------------
#LCBD index
p2.1 <- ggplot(Indices_data_PHPB, aes(x = Biosample, y =  LCBD, color = Biosample)) + 
  geom_jitter(position = position_jitter(width = .20), alpha = 0.5, size = 3, aes(colour = Biosample), show.legend = TRUE) + theme_bw() +
  geom_boxplot(aes(colour = Biosample), alpha=0.1, outlier.colour = NA) + 
   theme(axis.title.x = element_blank(),
        legend.position = "none", #Remove legends
        legend.title = element_blank(),
        legend.text = element_text(size = 13),
        strip.text = element_text(size = 13),
        axis.title.y = element_text(size = 15),
        axis.text.y = element_text(size=13),
        axis.text.x = element_text(size = 10, angle = 45, hjust = 1)) + 
  ylab("LCBD indices")+
  labs(color ="Biosample") +
  scale_colour_manual(breaks =c("Human","Animals","Environment","Control"), 
                      values = c("red","orange","blue","green")) 
p2.1

#Combine plots
g2 <- grid.arrange(p2, p3, p2.1, nrow = 1)
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

g2
## TableGrob (1 x 3) "arrange": 3 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (1-1,3-3) arrange gtable[layout]
Statistical test for whole data

Select indices to test & run normality check

Indices_data_PHPB |>
  dplyr::select(observed,
                diversity_gini_simpson,
                diversity_shannon,
                evenness_pielou, dominance_relative, LCBD) |>
  indices_normality(nrow = 2, ncol = 3)

#p<0.05 = non-parametric, p>0.05 = parametric

Kruskal-Wallis: non-parametric & at least 3 groups

#Richness - Kruskal wallis rank sum test for p-value (<0.05 = reject null hypothesis)
stats::kruskal.test(observed ~ Biosample, data = Indices_data_PHPB)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  observed by Biosample
## Kruskal-Wallis chi-squared = 63.456, df = 3, p-value = 1.073e-13
stats::kruskal.test(observed ~ Date, data = Indices_data_PHPB)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  observed by Date
## Kruskal-Wallis chi-squared = 19.994, df = 3, p-value = 0.0001702
#Richness -DuNem test reveals which one is significantly different
FSA::dunnTest(observed ~ Biosample, data = Indices_data_PHPB, method = "bh")
##              Comparison         Z      P.unadj        P.adj
## 1     Animals - Control -5.545131 2.937346e-08 8.812038e-08
## 2 Animals - Environment -5.319609 1.039907e-07 2.079813e-07
## 3 Control - Environment  0.620922 5.346510e-01 5.346510e-01
## 4       Animals - Human -5.597206 2.178334e-08 1.307001e-07
## 5       Control - Human  3.137649 1.703085e-03 2.554628e-03
## 6   Environment - Human  2.628119 8.585847e-03 1.030302e-02
FSA::dunnTest(observed ~ Date, data = Indices_data_PHPB, method = "bh")
##            Comparison         Z      P.unadj        P.adj
## 1  Control - Dec_2023 3.4660326 5.281993e-04 1.056399e-03
## 2  Control - Jul_2023 3.7511924 1.759956e-04 5.279867e-04
## 3 Dec_2023 - Jul_2023 0.5656317 5.716442e-01 5.716442e-01
## 4  Control - Nov_2022 4.3974498 1.095302e-05 6.571812e-05
## 5 Dec_2023 - Nov_2022 1.7750292 7.589306e-02 1.138396e-01
## 6 Jul_2023 - Nov_2022 1.1627589 2.449273e-01 2.939128e-01
#----------------------------------------------------------------------------------------------------------------------------------------
#Shannon index - Kruskal wallis rank sum test for p-value (<0.05 = reject null hypothesis)
stats::kruskal.test(diversity_shannon ~ Biosample, data = Indices_data_PHPB) 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  diversity_shannon by Biosample
## Kruskal-Wallis chi-squared = 39.821, df = 3, p-value = 1.163e-08
stats::kruskal.test(diversity_shannon ~ Date, data = Indices_data_PHPB) 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  diversity_shannon by Date
## Kruskal-Wallis chi-squared = 17.903, df = 3, p-value = 0.0004605
#Shannon index - DuNem test reveals which one is significantly different
FSA::dunnTest(diversity_shannon ~ Biosample, data = Indices_data_PHPB, method = "bh")
##              Comparison         Z      P.unadj        P.adj
## 1     Animals - Control -4.927537 8.327247e-07 4.996348e-06
## 2 Animals - Environment -3.209812 1.328219e-03 2.656438e-03
## 3 Control - Environment  1.630440 1.030086e-01 1.236103e-01
## 4       Animals - Human -4.612983 3.969306e-06 1.190792e-05
## 5       Control - Human  2.947837 3.200062e-03 4.800093e-03
## 6   Environment - Human  0.973301 3.304037e-01 3.304037e-01
FSA::dunnTest(diversity_shannon ~ Date, data = Indices_data_PHPB, method = "bh")
##            Comparison         Z      P.unadj        P.adj
## 1  Control - Dec_2023 3.1971353 1.387998e-03 0.0027759960
## 2  Control - Jul_2023 3.4753731 5.101433e-04 0.0015304300
## 3 Dec_2023 - Jul_2023 0.5508011 5.817701e-01 0.5817700525
## 4  Control - Nov_2022 4.1379256 3.504599e-05 0.0002102759
## 5 Dec_2023 - Nov_2022 1.7997903 7.189376e-02 0.1078406379
## 6 Jul_2023 - Nov_2022 1.2026858 2.290979e-01 0.2749175069
#----------------------------------------------------------------------------------------------------------------------------------------
#LCBD index - Kruskal wallis rank sum test for p-value (<0.05 = reject null hypothesis)
stats::kruskal.test(LCBD ~ Biosample, data = Indices_data_PHPB) 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  LCBD by Biosample
## Kruskal-Wallis chi-squared = 43.823, df = 3, p-value = 1.645e-09
stats::kruskal.test(LCBD ~ Date, data = Indices_data_PHPB) 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  LCBD by Date
## Kruskal-Wallis chi-squared = 32.306, df = 3, p-value = 4.511e-07
#LCBD index - DuNem test reveals which one is significantly different
FSA::dunnTest(LCBD ~ Biosample, data = Indices_data_PHPB, method = "bh")
##              Comparison          Z      P.unadj        P.adj
## 1     Animals - Control  5.2430505 1.579433e-07 9.476599e-07
## 2 Animals - Environment  2.6998301 6.937489e-03 1.040623e-02
## 3 Control - Environment -2.2434968 2.486480e-02 2.983775e-02
## 4       Animals - Human  5.0522684 4.365937e-07 1.309781e-06
## 5       Control - Human -3.0729152 2.119787e-03 4.239574e-03
## 6   Environment - Human -0.2376326 8.121661e-01 8.121661e-01
FSA::dunnTest(LCBD ~ Date, data = Indices_data_PHPB, method = "bh")
##            Comparison         Z      P.unadj        P.adj
## 1  Control - Dec_2023 -2.730754 6.318954e-03 9.478431e-03
## 2  Control - Jul_2023 -4.760506 1.931086e-06 1.158651e-05
## 3 Dec_2023 - Jul_2023 -3.895476 9.800613e-05 1.960123e-04
## 4  Control - Nov_2022 -4.129380 3.637423e-05 1.091227e-04
## 5 Dec_2023 - Nov_2022 -2.721582 6.497022e-03 7.796426e-03
## 6 Jul_2023 - Nov_2022  1.376793 1.685763e-01 1.685763e-01

Wilconxon rank test: parametric

#Richness
ggpubr::compare_means(observed ~ Biosample, Indices_data_PHPB, method = "wilcox.test")
## # A tibble: 6 × 8
##   .y.      group1      group2              p      p.adj p.format p.signif method
##   <chr>    <chr>       <chr>           <dbl>      <dbl> <chr>    <chr>    <chr> 
## 1 observed Environment Animals 0.00000624       3.10e-5 6.2e-06  ****     Wilco…
## 2 observed Environment Human   0.000573         1.1 e-3 0.00057  ***      Wilco…
## 3 observed Environment Control 0.0331           3.3 e-2 0.03307  *        Wilco…
## 4 observed Animals     Human   0.00000000136    8.2 e-9 1.4e-09  ****     Wilco…
## 5 observed Animals     Control 0.0000217        8.70e-5 2.2e-05  ****     Wilco…
## 6 observed Human       Control 0.000105         3.2 e-4 0.00011  ***      Wilco…
ggpubr::compare_means(observed ~ Date, Indices_data_PHPB, method = "wilcox.test")
## # A tibble: 6 × 8
##   .y.      group1   group2           p   p.adj p.format p.signif method  
##   <chr>    <chr>    <chr>        <dbl>   <dbl> <chr>    <chr>    <chr>   
## 1 observed Nov_2022 Jul_2023 0.205     0.41    0.20548  ns       Wilcoxon
## 2 observed Nov_2022 Dec_2023 0.0742    0.22    0.07424  ns       Wilcoxon
## 3 observed Nov_2022 Control  0.0000692 0.00042 6.9e-05  ****     Wilcoxon
## 4 observed Jul_2023 Dec_2023 0.506     0.51    0.50619  ns       Wilcoxon
## 5 observed Jul_2023 Control  0.000155  0.00065 0.00016  ***      Wilcoxon
## 6 observed Dec_2023 Control  0.000129  0.00065 0.00013  ***      Wilcoxon
#-------------------------------------------------------------------------------------------------------------------------------------------
#Shannon index
ggpubr::compare_means(diversity_shannon ~ Biosample, Indices_data_PHPB, method = "wilcox.test")
## # A tibble: 6 × 8
##   .y.               group1      group2        p   p.adj p.format p.signif method
##   <chr>             <chr>       <chr>     <dbl>   <dbl> <chr>    <chr>    <chr> 
## 1 diversity_shannon Environment Animals 3.36e-3 1   e-2 0.00336  **       Wilco…
## 2 diversity_shannon Environment Human   1.98e-1 2   e-1 0.19805  ns       Wilco…
## 3 diversity_shannon Environment Control 1.15e-2 2.3 e-2 0.01154  *        Wilco…
## 4 diversity_shannon Animals     Human   1.04e-6 6.20e-6 1.0e-06  ****     Wilco…
## 5 diversity_shannon Animals     Control 9.45e-5 4.7 e-4 9.4e-05  ****     Wilco…
## 6 diversity_shannon Human       Control 3.60e-4 1.4 e-3 0.00036  ***      Wilco…
ggpubr::compare_means(diversity_shannon ~ Date, Indices_data_PHPB, method = "wilcox.test")
## # A tibble: 6 × 8
##   .y.               group1   group2           p   p.adj p.format p.signif method
##   <chr>             <chr>    <chr>        <dbl>   <dbl> <chr>    <chr>    <chr> 
## 1 diversity_shannon Nov_2022 Jul_2023 0.167     0.33    0.16655  ns       Wilco…
## 2 diversity_shannon Nov_2022 Dec_2023 0.0871    0.26    0.08705  ns       Wilco…
## 3 diversity_shannon Nov_2022 Control  0.000151  0.00075 0.00015  ***      Wilco…
## 4 diversity_shannon Jul_2023 Dec_2023 0.490     0.49    0.49030  ns       Wilco…
## 5 diversity_shannon Jul_2023 Control  0.0000296 0.00018 3e-05    ****     Wilco…
## 6 diversity_shannon Dec_2023 Control  0.000287  0.0011  0.00029  ***      Wilco…
#-------------------------------------------------------------------------------------------------------------------------------------------
#LCBD index
ggpubr::compare_means(LCBD ~ Biosample, Indices_data_PHPB, method = "wilcox.test")
## # A tibble: 6 × 8
##   .y.   group1      group2            p     p.adj p.format p.signif method  
##   <chr> <chr>       <chr>         <dbl>     <dbl> <chr>    <chr>    <chr>   
## 1 LCBD  Environment Animals 0.00317     0.0063    0.00317  **       Wilcoxon
## 2 LCBD  Environment Human   0.862       0.86      0.86222  ns       Wilcoxon
## 3 LCBD  Environment Control 0.000175    0.0007    0.00017  ***      Wilcoxon
## 4 LCBD  Animals     Human   0.000000183 0.0000011 1.8e-07  ****     Wilcoxon
## 5 LCBD  Animals     Control 0.0000386   0.00019   3.9e-05  ****     Wilcoxon
## 6 LCBD  Human       Control 0.000189    0.0007    0.00019  ***      Wilcoxon
ggpubr::compare_means(LCBD ~ Date, Indices_data_PHPB, method = "wilcox.test")
## # A tibble: 6 × 8
##   .y.   group1   group2              p      p.adj p.format p.signif method  
##   <chr> <chr>    <chr>           <dbl>      <dbl> <chr>    <chr>    <chr>   
## 1 LCBD  Nov_2022 Jul_2023 0.145        0.15       0.14539  ns       Wilcoxon
## 2 LCBD  Nov_2022 Dec_2023 0.00478      0.0096     0.00478  **       Wilcoxon
## 3 LCBD  Nov_2022 Control  0.0000591    0.00027    5.9e-05  ****     Wilcoxon
## 4 LCBD  Jul_2023 Dec_2023 0.0000548    0.00027    5.5e-05  ****     Wilcoxon
## 5 LCBD  Jul_2023 Control  0.0000000200 0.00000012 2.0e-08  ****     Wilcoxon
## 6 LCBD  Dec_2023 Control  0.000334     0.001      0.00033  ***      Wilcoxon

Beta diversity analysis

Bacterial composition analysis

Prepare data for bacterial composition analysis

library("dplyr")
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#Melting unrarefied Absolute Abundance
Melted_Nem_hu_unrarefied_abs <- Nem_hu_unrarefied %>%
  phyloseq::psmelt() %>%                                         # Melt to long format
  filter(Abundance > 0) %>%                         # Filter out absent abundance taxa reads
  arrange(Species) 

#Recalculate relative abundance among PHPB for bacterial composition analysis
Nem_hu_unrarefied_rel_PHPB <- phyloseq::transform_sample_counts(Nem_hu_unrarefied, function(x) x / sum(x)) #The relative abundance above was the relative abundance of PHPB within whole bacterial community so inorder to investigate the PHPB composition, we have recalculate the relative abundance for PHPB within itself in each sample only
saveRDS(Nem_hu_unrarefied_rel_PHPB,file="/Users/qcvp/R/inputs/data for PHPB comunity/Nem_hu_unrarefied_rel_PHPB.rds")

#Melting unrarefied Relative Abundance
Melted_Nem_hu_unrarefied_rel <- Nem_hu_unrarefied_rel_PHPB %>%
  phyloseq::psmelt() %>%                                         # Melt to long format
  filter(Abundance > 0) %>%                         # Filter out low abundance taxa reads
  arrange(Species)  

Extract ASV list for Venn diagram

#Combine taxonomy (Family + Genus + Species)
Melted_Nem_hu_unrarefied_abs <- Melted_Nem_hu_unrarefied_abs %>%
                                mutate(taxcom = paste(Family, Genus, Species, sep =";"))
#Subset samples occur in each Biosample
animal_sample <- base::subset(Melted_Nem_hu_unrarefied_abs, Biosample == "Animals")
Envi_sample <- base::subset(Melted_Nem_hu_unrarefied_abs, Biosample == "Environment")
Human_sample <- base::subset(Melted_Nem_hu_unrarefied_abs, Biosample == "Human")

#Extract list of ASV occur in sample types
list_tax_all <- Reduce(intersect, list(animal_sample$taxcom, Envi_sample$taxcom, Human_sample$taxcom)) #Species occur in all biosample types
list_tax_EA <- setdiff(intersect(Envi_sample$taxcom,animal_sample$taxcom), list_tax_all) #Species occur in both Environment and animal but not in Human
list_tax_EH <- setdiff(intersect(Envi_sample$taxcom,Human_sample$taxcom), list_tax_all) #Species occur in both Environment and human but not in animal
list_tax_AH <- setdiff(intersect(animal_sample$taxcom,Human_sample$taxcom), list_tax_all) #Species occur in both human and animal but not in environment
list_tax_A <- Reduce(setdiff, list(animal_sample$taxcom, list_tax_EA, list_tax_AH, list_tax_all)) #Species occur in only Animal
list_tax_A
##  [1] "Comamonadaceae;uncultured;Burkholderiales"          
##  [2] "Xanthomonadaceae;Xanthomonas;albilineans"           
##  [3] "Pseudomonadaceae;Pseudomonas;alcaligenes"           
##  [4] "Shewanellaceae;Shewanella;algae"                    
##  [5] "Prevotellaceae;Prevotella;amnii"                    
##  [6] "Peptostreptococcaceae;Peptostreptococcus;anaerobius"
##  [7] "Enterococcaceae;Enterococcus;cecorum"               
##  [8] "Xanthobacteraceae;Afipia;clevelandensis"            
##  [9] "Bacillaceae;Bacillus;coagulans"                     
## [10] "Lactobacillaceae;Weissella;confusa"                 
## [11] "Prevotellaceae;Prevotella;copri"                    
## [12] "Rhodobacteraceae;Roseobacter;denitrificans"         
## [13] "Corynebacteriaceae;Corynebacterium;durum"           
## [14] "Carnobacteriaceae;Granulicatella;elegans"           
## [15] "Porphyromonadaceae;Porphyromonas;endodontalis"      
## [16] "Streptococcaceae;Streptococcus;equinus"             
## [17] "Halomonadaceae;Pseudomonas;fluorescens"             
## [18] "Corynebacteriaceae;Corynebacterium;glucuronolyticum"
## [19] "Corynebacteriaceae;Corynebacterium;jeikeium"        
## [20] "Moraxellaceae;Acinetobacter;johnsonii"              
## [21] "Lachnospiraceae;Cellulosilyticum;lentocellum"       
## [22] "Rhodobacteraceae;Paracoccus;marinus"                
## [23] "Spirochaetaceae;Treponema;medium"                   
## [24] "Cellvibrionaceae;Cellvibrio;mixtus"                 
## [25] "Comamonadaceae;Variovorax;paradoxus"                
## [26] "Vibrionaceae;Vibrio;parahaemolyticus"               
## [27] "Pseudomonadaceae;Pseudomonas;putida"                
## [28] "Brevibacteriaceae;Brevibacterium;ravenspurgense"    
## [29] "Lactobacillaceae;Lactobacillus;rhamnosus"           
## [30] "Micrococcaceae;Kocuria;rhizophila"                  
## [31] "Corynebacteriaceae;Corynebacterium;riegelii"        
## [32] "Moraxellaceae;Psychrobacter;sanguinis"              
## [33] "Sphingomonadaceae;Sphingomonas;sanxanigenens"       
## [34] "Family XI;Peptoniphilus;senegalensis"               
## [35] "Planococcaceae;Lysinibacillus;sphaericus"           
## [36] "Corynebacteriaceae;Corynebacterium;tuscaniense"     
## [37] "Leptotrichiaceae;Leptotrichia;wadei"                
## [38] "Pseudomonadaceae;Pseudomonas;xanthomarina"
list_tax_H <- Reduce(setdiff, list(Human_sample$taxcom, list_tax_EH, list_tax_AH, list_tax_all)) #Species occur in only Human
list_tax_H
##  [1] "Rhodocyclaceae;uncultured;Sterolibacterium"       
##  [2] "Corynebacteriaceae;Corynebacterium;accolens"      
##  [3] "Propionibacteriaceae;Cutibacterium;acnes"         
##  [4] "Oxalobacteraceae;Massilia;aerilata"               
##  [5] "Enterobacteriaceae;Klebsiella;aerogenes"          
##  [6] "Moritellaceae;Paramoritella;alkaliphila"          
##  [7] "Rhizobiaceae;Aureimonas;altamirensis"             
##  [8] "Enterobacteriaceae;Enterobacter;asburiae"         
##  [9] "Alteromonadaceae;Alteromonas;australica"          
## [10] "Comamonadaceae;uncultured;bacterium"              
## [11] "Alcaligenaceae;uncultured;bacterium"              
## [12] "Methylophilaceae;uncultured;bacterium"            
## [13] "Moraxellaceae;Acinetobacter;baumannii"            
## [14] "Comamonadaceae;uncultured;beta"                   
## [15] "Azospirillaceae;Azospirillum;brasilense"          
## [16] "Bacteroidaceae;Bacteroides;caccae"                
## [17] "Bacillaceae;Bacillus;circulans"                   
## [18] "Bacteroidaceae;Bacteroides;coprocola"             
## [19] "Moraxellaceae;Psychrobacter;cryohalolentis"       
## [20] "Aerococcaceae;Abiotrophia;defectiva"              
## [21] "Prevotellaceae;Prevotella;disiens"                
## [22] "Enterobacteriaceae;Salmonella;enterica"           
## [23] "Bacillaceae;Staphylococcus;epidermidis"           
## [24] "Alcaligenaceae;Alcaligenes;faecalis"              
## [25] "Neisseriaceae;Neisseria;flavescens"               
## [26] "Lachnospiraceae;[Ruminococcus];gnavus"            
## [27] "Propionibacteriaceae;Propionibacterium;granulosum"
## [28] "Pasteurellaceae;Mannheimia;haemolytica"           
## [29] "Family XI;Peptoniphilus;harei"                    
## [30] "Prevotellaceae;Prevotella;histicola"              
## [31] "Corynebacteriaceae;Corynebacterium;imitans"       
## [32] "Selenomonadaceae;Selenomonas;infelix"             
## [33] "Lactobacillaceae;Lactobacillus;johnsonii"         
## [34] "Spirochaetaceae;Treponema;lecithinolyticum"       
## [35] "Micrococcaceae;Micrococcus;lylae"                 
## [36] "Family XI;Finegoldia;magna"                       
## [37] "Dietziaceae;Dietzia;maris"                        
## [38] "Family XI;Fenollaria;massiliensis"                
## [39] "Actinomycetaceae;Actinomyces;massiliensis"        
## [40] "Burkholderiaceae;Cupriavidus;metallidurans"       
## [41] "Family XI;Parvimonas;micra"                       
## [42] "Dermacoccaceae;Dermacoccus;nishinomiyaensis"      
## [43] "Xanthomonadaceae;Stenotrophomonas;nitritireducens"
## [44] "Moraxellaceae;Psychrobacter;nivimaris"            
## [45] "Fusobacteriaceae;Fusobacterium;nucleatum"         
## [46] "Actinomycetaceae;Schaalia;odontolytica"           
## [47] "Oxalobacteraceae;Undibacterium;oligocarboniphilum"
## [48] "Enterococcaceae;Raoultella;ornithinolytica"       
## [49] "Corynebacteriaceae;Corynebacterium;otitidis"      
## [50] "Carnobacteriaceae;Alloiococcus;otitis"            
## [51] "Spirochaetaceae;Treponema;pallidum"               
## [52] "Lactobacillaceae;Weissella;paramesenteroides"     
## [53] "Sphingomonadaceae;Sphingomonas;paucimobilis"      
## [54] "Lactobacillaceae;Pediococcus;pentosaceus"         
## [55] "Staphylococcaceae;Staphylococcus;pettenkoferi"    
## [56] "Microbacteriaceae;Microbacterium;phyllosphaerae"  
## [57] "Ruminococcaceae;Faecalibacterium;prausnitzii"     
## [58] "Streptococcaceae;Streptococcus;pyogenes"          
## [59] "Streptococcaceae;Lactococcus;raffinolactis"       
## [60] "Yersiniaceae;Yersinia;ruckeri"                    
## [61] "Prevotellaceae;Prevotella;salivae"                
## [62] "Leptotrichiaceae;Sneathia;sanguinegens"           
## [63] "Gemellaceae;Gemella;sanguinis"                    
## [64] "Pasteurellaceae;Aggregatibacter;segnis"           
## [65] "Rickettsiaceae;Rickettsia;slovaca"                
## [66] "Flavobacteriaceae;Capnocytophaga;sputigena"       
## [67] "Prevotellaceae;Alloprevotella;tannerae"           
## [68] "Oxalobacteraceae;Massilia;timonae"                
## [69] "Rhizobiaceae;Agrobacterium;tumefaciens"           
## [70] "Corynebacteriaceae;Corynebacterium;urealyticum"   
## [71] "Moraxellaceae;Acinetobacter;variabilis"           
## [72] "Streptomycetaceae;Streptomyces;violaceoruber"     
## [73] "Pseudomonadaceae;Pseudomonas;viridiflava"         
## [74] "Morganellaceae;Proteus;vulgaris"                  
## [75] "Lachnospiraceae;Blautia;wexlerae"                 
## [76] "Rhodobacteraceae;Paracoccus;yeei"
list_tax_E <- Reduce(setdiff, list(Envi_sample$taxcom, list_tax_EH, list_tax_EA, list_tax_all)) #Species occur in only Environment
list_tax_E
##  [1] "Pasteurellaceae;Terrahaemophilus;aromaticivorans"     
##  [2] "Aeromonadaceae;Shewanella;baltica"                    
##  [3] "Micrococcaceae;Rothia;dentocariosa"                   
##  [4] "Prevotellaceae;Prevotella;intermedia"                 
##  [5] "Alcaligenaceae;Achromobacter;marplatensis"            
##  [6] "Comamonadaceae;Diaphorobacter;nitroreducens"          
##  [7] "Xanthomonadaceae;Xanthomonas;oryzae"                  
##  [8] "Fusobacteriaceae;Fusobacterium;periodonticum"         
##  [9] "Carnobacteriaceae;Dolosigranulum;pigrum"              
## [10] "Xanthomonadaceae;Stenotrophomonas;rhizophila"         
## [11] "Clostridiaceae;Clostridium;saccharoperbutylacetonicum"
## [12] "Sphingomonadaceae;Sphingomonas;sanguinis"             
## [13] "Anaerovoracaceae;[Eubacterium];saphenum"              
## [14] "Streptococcaceae;Streptococcus;sinensis"              
## [15] "Enterobacteriaceae;Pseudescherichia;vulneris"         
## [16] "Sphingomonadaceae;Sphingomonas;wittichii"             
## [17] "Pseudomonadaceae;Pseudomonas;yamanorum"               
## [18] "Pseudomonadaceae;Pseudomonas;zhaodongensis"
#Checking if every taxonomy have been extracted
length(unique(Melted_Nem_hu_unrarefied_abs$taxcom)) - length(list_tax_E) + length(list_tax_H) + length(list_tax_A) + length(list_tax_all) + length(list_tax_EA) + length(list_tax_EH) + length(list_tax_AH)
## [1] 383

Distribution of pathogenic Taxonomies (venn diagram)

x <- list(A = animal_sample$taxcom, B = Envi_sample$taxcom, C = Human_sample$taxcom) # Combine list of Species of each Biosample
p4 <- ggVennDiagram::ggVennDiagram(x, 
              label = "count",) + # Adding label
              theme(legend.position = "none") + # Remove lengend
              scale_fill_gradient(low = "#F4FAFE", high = "#4981BF")
p4

ggsave("/Users/qcvp/R/Plots/Venn_diagram_PHPB.png", plot = p4, width = 11.5, height = 7)

Checking Relative abundance within pathobiome of each PHPB Classes in each Biosample

Total

#Calculate errors
physeq_PHPB_long_summary <- Melted_Nem_hu_unrarefied_rel %>%
   dplyr::group_by(Family, Sample) %>%
  dplyr::summarise(
    Sum = sum(Abundance, na.rm = TRUE))  %>% #Calculate sum abundance in a sample
  dplyr::ungroup() %>%
  dplyr::group_by(Family) %>%
  dplyr::summarise(
    Mean = mean(Sum, na.rm = TRUE))  %>% #Calculate mean abundance of families
  dplyr::arrange(desc(Mean))  %>% # Sort abundances 
  dplyr::slice_head(n = 15)  #Extracting top 15 highest abundance Families 
## `summarise()` has grouped output by 'Family'. You can override using the
## `.groups` argument.
#Plotting
ggplot(physeq_PHPB_long_summary, aes(x = reorder(Family, -Mean), y = Mean)) +
  geom_bar(stat = "identity", position = "dodge") +
  theme_minimal() +
  theme(legend.text = element_text(size = 15),
        legend.position = "top",
        plot.title = element_text(size = 20, face = "bold"),
        legend.title = element_text(size = 18),
        axis.title.x = element_blank(),
        axis.title.y = element_text(size = 18),
        axis.text.y = element_text(size = 15),
        strip.text = element_text(size = 15),
        axis.text.x = element_text(angle = 45, colour = "black", vjust = 1, hjust = 1, size = 15)) +
  labs(title = "Top abundance Family", y = "Relative abundance mean") +
  facet_grid( scales = "free")

Animal

animal_PHPB_long <- subset(Melted_Nem_hu_unrarefied_rel, Biosample == "Animals")

#Calculate errors
animal_PHPB_long_summary <- animal_PHPB_long %>%
  dplyr::group_by(Family, Sample) %>%
  dplyr::summarise(
    Sum = sum(Abundance, na.rm = TRUE))  %>% #Calculate sum abundance in a sample
  dplyr::ungroup() %>%
  dplyr::group_by(Family) %>%
  dplyr::summarise(
    Mean = mean(Sum, na.rm = TRUE))  %>% #Calculate mean abundance of families
  dplyr::arrange(desc(Mean))  %>% # Sort abundances 
  dplyr::slice_head(n = 15)  #Extracting top 15 highest abundance Families 
## `summarise()` has grouped output by 'Family'. You can override using the
## `.groups` argument.
#plotting
ggplot(animal_PHPB_long_summary, aes(x = reorder(Family, -Mean), y = Mean)) +
  geom_bar(stat = "identity", position = "dodge") +
  theme_minimal() +
  theme(legend.text = element_text(size = 15),
        legend.position = "top",
        plot.title = element_text(size = 20, face = "bold"),
        legend.title = element_text(size = 18),
        axis.title.x = element_blank(),
        axis.title.y = element_text(size = 18),
        axis.text.y = element_text(size = 15),
        strip.text = element_text(size = 15),
        axis.text.x = element_text(angle = 45, colour = "black", vjust = 1, hjust = 1, size = 15)) +
  labs(title = "Top abundance Family in Animal", y = "Relative abundance mean") +
  facet_grid( scales = "free")

Environment

envi_PHPB_long <- subset(Melted_Nem_hu_unrarefied_rel, Biosample == "Environment")
#Calculate errors
envi_PHPB_long_summary <- envi_PHPB_long %>%
  dplyr::group_by(Family, Sample) %>%
  dplyr::summarise(
    Sum = sum(Abundance, na.rm = TRUE))  %>% #Calculate sum abundance in a sample
  dplyr::ungroup() %>%
  dplyr::group_by(Family) %>%
  dplyr::summarise(
    Mean = mean(Sum, na.rm = TRUE))  %>% #Calculate mean abundance of families
  dplyr::arrange(desc(Mean))  %>% # Sort abundances 
  dplyr::slice_head(n = 15)   #Extracting top 15 highest abundance Families 
## `summarise()` has grouped output by 'Family'. You can override using the
## `.groups` argument.
#Plotting
ggplot(envi_PHPB_long_summary, aes(x = reorder(Family, -Mean), y = Mean)) +
  geom_bar(stat = "identity", position = "dodge") +
  theme_minimal() +
  theme(legend.text = element_text(size = 15),
        legend.position = "top",
        plot.title = element_text(size = 20, face = "bold"),
        legend.title = element_text(size = 18),
        axis.title.x = element_blank(),
        axis.title.y = element_text(size = 18),
        axis.text.y = element_text(size = 15),
        strip.text = element_text(size = 15),
        axis.text.x = element_text(angle = 45, colour = "black", vjust = 1, hjust = 1, size = 15)) +
  labs(title = "Top abundance Family in Environment", y = "Relative abundance mean") +
  facet_grid( scales = "free")

Human

human_PHPB_long <- subset(Melted_Nem_hu_unrarefied_rel, Biosample == "Human")
#Calculate means
human_PHPB_long_summary <- human_PHPB_long %>%
  dplyr::group_by(Family, Sample) %>%
  dplyr::summarise(
    Sum = sum(Abundance, na.rm = TRUE))  %>% #Calculate sum abundance in a sample
  dplyr::ungroup() %>%
  dplyr::group_by(Family) %>%
  dplyr::summarise(
    Mean = mean(Sum, na.rm = TRUE))  %>% #Calculate mean abundance of families
  dplyr::arrange(desc(Mean))  %>% # Sort abundances 
  dplyr::slice_head(n = 15) #Extracting top 15 highest abundance Families 
## `summarise()` has grouped output by 'Family'. You can override using the
## `.groups` argument.
#Plotting
ggplot(human_PHPB_long_summary, aes(x = reorder(Family, -Mean), y = Mean)) +
  geom_bar(stat = "identity", position = "dodge") +
  theme_minimal() +
  theme(legend.text = element_text(size = 15),
        legend.position = "top",
        plot.title = element_text(size = 20, face = "bold"),
        legend.title = element_text(size = 18),
        axis.title.x = element_blank(),
        axis.title.y = element_text(size = 18),
        axis.text.y = element_text(size = 15),
        strip.text = element_text(size = 15),
        axis.text.x = element_text(angle = 45, colour = "black", vjust = 1, hjust = 1, size = 15)) +
  labs(title = "Top abundance Family in Human", y = "Relative abundance mean") +
  facet_grid( scales = "free")

Extract most abundance Family

#Extract most abundant Family (5) names in each Biosample
most_abun_Family_animal <- dplyr::slice_head(animal_PHPB_long_summary, n = 15)$Family
most_abun_Family_envi <- dplyr::slice_head(envi_PHPB_long_summary, n = 15)$Family
most_abun_Family_human <- dplyr::slice_head(human_PHPB_long_summary, n = 15)$Family
most_abun_Family <- unique(c(most_abun_Family_animal, most_abun_Family_envi, most_abun_Family_human)) #Combine most abundant Families 
most_abun_Family
##  [1] "Neisseriaceae"      "Vibrionaceae"       "Xanthomonadaceae"  
##  [4] "Beijerinckiaceae"   "Shewanellaceae"     "Comamonadaceae"    
##  [7] "Sphingomonadaceae"  "Rhizobiaceae"       "Pseudomonadaceae"  
## [10] "Rhodobacteraceae"   "Lactobacillaceae"   "Streptococcaceae"  
## [13] "Planococcaceae"     "Moraxellaceae"      "Bacillaceae"       
## [16] "Burkholderiaceae"   "Halomonadaceae"     "Caulobacteraceae"  
## [19] "Erwiniaceae"        "Alcaligenaceae"     "Xanthobacteraceae" 
## [22] "Corynebacteriaceae" "Oxalobacteraceae"   "Rickettsiaceae"
#Modify family name (By keeping the family names of most abundant family while change the other in to "Other")
physeq_PHPB_long_topFamily <- Melted_Nem_hu_unrarefied_rel %>%
                              dplyr::mutate(Family = ifelse(Family %in% most_abun_Family, Family, "Other"))

Bacterial composition in each samples (bar plot)

physeq_PHPB_long_topFamily$Biosample = factor(physeq_PHPB_long_topFamily$Biosample , levels = c("Animals", "Human", "Environment", "Control")) # rearrange date

# Reorder 'Family' factor so that "Other" is last
physeq_PHPB_long_topFamily$Family <- factor(physeq_PHPB_long_topFamily$Family, 
                                              levels = c(setdiff(unique(physeq_PHPB_long_topFamily$Family), "Other"), "Other"))


# Set up custom color
custom_colors <- setNames(c(
  "#14232c", "#446196", "#6e89af", "#97a1a8", "#9f7878", "#a25551",
  "#b3272b", "#b7232c", "#7f1d1a", "#782c19", "#7c5b24", "#757235", "#706a2a",
  "#9a6f2a", "#a9792d", "#b86b2d","#b74b2d", "#c17d37", "#726654", "#3f3f44", "#2c3d56",
  "#876f9d", "#95599b", "#95169b"
), unique(physeq_PHPB_long_topFamily$Family[physeq_PHPB_long_topFamily$Family != "Other"])) #Prepare color for most abundance families
custom_colors <- c(custom_colors, "Other" = "gray") # Add color for other families

#Plotting
p5 <- ggplot(physeq_PHPB_long_topFamily , aes(x = Biosample, fill = Family)) + 
  geom_bar(position ="fill")  + 
  scale_fill_manual(values = custom_colors) +
  ylab("Pathobiome bacterial composition (%)") +
  scale_y_continuous(expand = c(0,0), labels = scales::percent_format(scale = 100)) + #remove the space below the 0 of the y axis in the graph
  facet_grid( ~ Biosample + Date, scales = "free", space = "free") +
  theme_bw() +
  theme(legend.text = element_text(face = "italic")) +
  theme(axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        panel.background = element_blank(),  #remove background
        panel.grid.major = element_blank(),  #remove major-grid labels
        panel.grid.minor = element_blank())  #remove minor-grid labels
p5

ggsave("/Users/qcvp/R/Plots/PHPB_barplot_composition.png", plot = p5, width = 11.5, height = 7)

Other

Total ASV, family

whole_ASVs <- phyloseq::taxa_sums(Nem_hu_unrarefied) 
whole_ASVs <- subset(whole_ASVs, whole_ASVs > 0) #remove ASV with 0 abundance
length(whole_ASVs)
## [1] 290
length(unique(Melted_Nem_hu_unrarefied_abs$Family))
## [1] 65

Calculation of % Family reads / total reads in pathobiome

animal_abs_PHPB <- subset(Melted_Nem_hu_unrarefied_abs, Biosample == "Animals")
envi_abs_PHPB <- subset(Melted_Nem_hu_unrarefied_abs, Biosample == "Environment")
human_abs_PHPB <- subset(Melted_Nem_hu_unrarefied_abs, Biosample == "Human")
#Calculate % of Family in all samples 
Melted_Nem_hu_unrarefied_abs %>%
  group_by(Family) %>%
  summarise(perc_read = sum(Abundance)/sum(Melted_Nem_hu_unrarefied_abs$Abundance)) %>%
  arrange(desc (perc_read))
## # A tibble: 65 × 2
##    Family            perc_read
##    <chr>                 <dbl>
##  1 Vibrionaceae         0.351 
##  2 Beijerinckiaceae     0.105 
##  3 Shewanellaceae       0.0820
##  4 Sphingomonadaceae    0.0627
##  5 Burkholderiaceae     0.0620
##  6 Halomonadaceae       0.0582
##  7 Moraxellaceae        0.0523
##  8 Pseudomonadaceae     0.0414
##  9 Oxalobacteraceae     0.0346
## 10 Bacillaceae          0.0312
## # ℹ 55 more rows
# For Animal sample
animal_abs_PHPB %>%
  group_by(Family) %>%
  summarise(perc_read = sum(Abundance)/sum(animal_abs_PHPB$Abundance)) %>%
  arrange(desc (perc_read))
## # A tibble: 44 × 2
##    Family            perc_read
##    <chr>                 <dbl>
##  1 Vibrionaceae        0.529  
##  2 Shewanellaceae      0.144  
##  3 Beijerinckiaceae    0.120  
##  4 Sphingomonadaceae   0.0858 
##  5 Pseudomonadaceae    0.0385 
##  6 Xanthomonadaceae    0.0265 
##  7 Moraxellaceae       0.0138 
##  8 Halomonadaceae      0.00574
##  9 Comamonadaceae      0.00571
## 10 Burkholderiaceae    0.00493
## # ℹ 34 more rows
# For Environment sample
envi_abs_PHPB %>%
  group_by(Family) %>%
  summarise(perc_read = sum(Abundance)/sum(envi_abs_PHPB$Abundance)) %>%
  arrange(desc (perc_read))
## # A tibble: 33 × 2
##    Family            perc_read
##    <chr>                 <dbl>
##  1 Vibrionaceae        0.361  
##  2 Burkholderiaceae    0.237  
##  3 Sphingomonadaceae   0.123  
##  4 Streptococcaceae    0.0849 
##  5 Lactobacillaceae    0.0810 
##  6 Halomonadaceae      0.0459 
##  7 Comamonadaceae      0.0151 
##  8 Moraxellaceae       0.00960
##  9 Rhizobiaceae        0.00898
## 10 Alcaligenaceae      0.00766
## # ℹ 23 more rows
# For Human sample
human_abs_PHPB %>%
  group_by(Family) %>%
  summarise(perc_read = sum(Abundance)/sum(human_abs_PHPB$Abundance)) %>%
  arrange(desc (perc_read))
## # A tibble: 55 × 2
##    Family            perc_read
##    <chr>                 <dbl>
##  1 Halomonadaceae       0.151 
##  2 Moraxellaceae        0.133 
##  3 Beijerinckiaceae     0.111 
##  4 Burkholderiaceae     0.110 
##  5 Oxalobacteraceae     0.104 
##  6 Bacillaceae          0.0924
##  7 Xanthobacteraceae    0.0719
##  8 Pseudomonadaceae     0.0595
##  9 Vibrionaceae         0.0408
## 10 Rhizobiaceae         0.0276
## # ℹ 45 more rows
#Extract list of Family occur in sample types
list_fam_all <- Reduce(intersect, list(animal_abs_PHPB$Family, envi_abs_PHPB$Family, human_abs_PHPB$Family)) #Species occur in all biosample types
list_fam_EA <- setdiff(intersect(envi_abs_PHPB$Family,animal_abs_PHPB$Family), list_fam_all) #Species occur in both Environment and animal but not in Human
list_fam_EH <- setdiff(intersect(envi_abs_PHPB$Family,human_abs_PHPB$Family), list_fam_all) #Species occur in both Environment and human but not in animal
list_fam_AH <- setdiff(intersect(animal_abs_PHPB$Family,human_abs_PHPB$Family), list_fam_all) #Species occur in both human and animal but not in environment
list_fam_A <- Reduce(setdiff, list(animal_abs_PHPB$Family, list_fam_EA, list_fam_AH, list_fam_all)) #Species occur in only Animal
list_fam_A
## [1] "Peptostreptococcaceae" "Porphyromonadaceae"    "Cellvibrionaceae"     
## [4] "Brevibacteriaceae"     "Planococcaceae"
list_fam_H <- Reduce(setdiff, list(human_abs_PHPB$Family, list_fam_EH, list_fam_AH, list_fam_all)) #Species occur in only Human
list_fam_H
##  [1] "Rhodocyclaceae"    "Methylophilaceae"  "Azospirillaceae"  
##  [4] "Aerococcaceae"     "Selenomonadaceae"  "Dietziaceae"      
##  [7] "Microbacteriaceae" "Ruminococcaceae"   "Yersiniaceae"     
## [10] "Gemellaceae"       "Rickettsiaceae"    "Flavobacteriaceae"
## [13] "Streptomycetaceae" "Morganellaceae"
list_fam_E <- Reduce(setdiff, list(envi_abs_PHPB$Family, list_fam_EH, list_fam_EA, list_fam_all)) #Species occur in only Environment
list_fam_E
## [1] "Aeromonadaceae"   "Clostridiaceae"   "Anaerovoracaceae"

Calculation of % Family / richness in pathobiome

tax <-  as.data.frame(Nem_hu_unrarefied@tax_table)
tax <- subset(tax, Family %in% unique(Melted_Nem_hu_unrarefied_abs$Family))
#Calculate % of Family in all samples 
tax %>%
  mutate(count =1) %>%
  group_by(Family) %>%
  summarise(perc_read = sum(count)/nrow(tax)) %>%
  arrange(desc (perc_read))
## # A tibble: 65 × 2
##    Family             perc_read
##    <chr>                  <dbl>
##  1 Pseudomonadaceae      0.0706
##  2 Moraxellaceae         0.0570
##  3 Comamonadaceae        0.0529
##  4 Corynebacteriaceae    0.0529
##  5 Lactobacillaceae      0.0488
##  6 Vibrionaceae          0.0407
##  7 Prevotellaceae        0.0366
##  8 Bacillaceae           0.0312
##  9 Micrococcaceae        0.0299
## 10 Staphylococcaceae     0.0299
## # ℹ 55 more rows

PCOA

Whole data with ASV PCOA

Making data for PCoA (with out control samples for statistic test)

Nem_hu_unrarefied_PCOA <- phyloseq::subset_samples(Nem_hu_unrarefied, !sample_names(Nem_hu_unrarefied) %in% c("Control.1", "Control.2","Control.3", "Control.4", "Control.5", "Control.6", "Control.7"))
otu.bray3 <- vegan::vegdist(Nem_hu_unrarefied_PCOA@otu_table@.Data, diag = TRUE, method = "bray") # computes dissimilarity indices (bray)
pcoa.sub3 <- ape::pcoa(otu.bray3) #computes principal coordinate decomposition

pcoa_coord3 <- pcoa.sub3$vectors[,1:2] # Extract coordination of PCOA plot
samp_data_P <- data.frame(phyloseq::sample_data(Nem_hu_unrarefied_PCOA)) #remove control sample

hull_PHPB3 <- cbind(pcoa_coord3, samp_data_P) # Combine metadata with PCOA coordination

Making data for PCoA (with control samples for PCOA plot)

otu.bray3.5 <- vegan::vegdist(Nem_hu_unrarefied@otu_table@.Data, diag = TRUE, method = "bray") # computes dissimilarity indices (bray)
pcoa.sub3.5 <- ape::pcoa(otu.bray3.5) #computes principal coordinate decomposition

pcoa_coord3.5 <- pcoa.sub3.5$vectors[,1:2] # Extract coordination of PCOA plot
samp_data_P <- data.frame(phyloseq::sample_data(Nem_hu_unrarefied)) #remove control sample

hull_PHPB3.5 <- cbind(pcoa_coord3.5, samp_data_P) # Combine metadata with PCOA coordination

Compare distance between biosamples with Wilcoxon rank sum test

# Extract sample names
animal_samp_data <- subset(samp_data_P, Biosample == "Animals")
envi_samp_data <- subset(samp_data_P, Biosample == "Environment")
human_samp_data <- subset(samp_data_P, Biosample == "Human")

# Melting distance matrix
dm3_long <- as.matrix(otu.bray3) %>% 
            as.data.frame()  %>%
            tibble::rownames_to_column("Sample.name")  %>%
            tidyr::gather(key = "Sample.name2", value = "Distance", -Sample.name)

# Characterize distances
dm3_long_biosample<- dm3_long %>%
        dplyr::mutate(Biosample_1=dplyr::case_when(
          Sample.name %in% rownames(animal_samp_data) ~ "Animals",
          Sample.name %in% rownames(envi_samp_data) ~ "Environment",
          Sample.name %in% rownames(human_samp_data) ~ "Human")) %>% # Adding Biosample information
        dplyr::mutate(Biosample_2=dplyr::case_when(
          Sample.name2 %in% rownames(animal_samp_data) ~ "Animals",
          Sample.name2 %in% rownames(envi_samp_data) ~ "Environment",
          Sample.name2 %in% rownames(human_samp_data) ~ "Human")) %>% # Adding Biosample information
        dplyr::mutate(Groups = paste(Biosample_1, Biosample_2, sep = "_to_")) %>% # Classify distances with Biosample types
        subset(Groups == c("Animals_to_Environment", "Human_to_Environment", "Animals_to_Human")) # Remove duplicate (ex: Animal_to_Human and Human_to_Animal)
## Warning in Groups == c("Animals_to_Environment", "Human_to_Environment", :
## longer object length is not a multiple of shorter object length
#Count number of observation
sum(dm3_long_biosample$Groups == "Animals_to_Environment")
## [1] 174
sum(dm3_long_biosample$Groups == "Animals_to_Human")
## [1] 1450
sum(dm3_long_biosample$Groups == "Human_to_Environment")
## [1] 225
#Wilcoxon rank sum test
ggpubr::compare_means(Distance ~ Groups,
                            dm3_long_biosample,
                            method = "wilcox.test")
## # A tibble: 3 × 8
##   .y.      group1              group2        p    p.adj p.format p.signif method
##   <chr>    <chr>               <chr>     <dbl>    <dbl> <chr>    <chr>    <chr> 
## 1 Distance Animals_to_Environ… Human… 1.20e- 4 2.4 e- 4 0.00012  ***      Wilco…
## 2 Distance Animals_to_Environ… Anima… 1.98e-15 5.90e-15 2e-15    ****     Wilco…
## 3 Distance Human_to_Environme… Anima… 5.40e- 4 5.4 e- 4 0.00054  ***      Wilco…
#plotting
p6 <- ggplot(dm3_long_biosample, aes(x = Groups, y = Distance, colour = Groups)) +
  theme_bw() +
  geom_boxplot(aes(colour = Groups), alpha=0.1, outlier.colour = NA) + 
   theme(axis.title.x = element_blank(),
        legend.position = "none",
        legend.title = element_blank(),
        legend.text = element_text(size = 13),
        strip.text = element_text(size = 18),
        axis.title.y = element_text(size = 20),
        axis.text.y = element_text(size=20),
        axis.text.x = element_blank()) + 
  labs(y = "Bray-Curtis Distance") +
  scale_y_continuous(limits = c(0.95, 1.005))
p6
## Warning: Removed 226 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

ggsave("/Users/qcvp/R/Plots/PHPB_Distance_BetweenBiosample_ASV.png", plot = p6, width = 11, height = 6)
## Warning: Removed 226 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

Compare distance between Animal and human from same and different farms with Wilcoxon rank sum test

# Extract sample farm information in farms with borh animal and human samples
farm3_samp_data <- subset(samp_data_P, Station == "Farm_03")
farm4_samp_data <- subset(samp_data_P, Station == "Farm_04")
farm5_samp_data <- subset(samp_data_P, Station == "Farm_05")
farm6_samp_data <- subset(samp_data_P, Station == "Farm_06")
farm10_samp_data <- subset(samp_data_P, Station == "Farm_10")

# Characterize distances
dm3_long_farm<- dm3_long_biosample %>%
        subset(Groups == c("Animals_to_Human")) %>% #keep only animals to human distances
        dplyr::mutate(farm_1=dplyr::case_when(
          Sample.name %in% rownames(farm3_samp_data) ~ "Farm3",
          Sample.name %in% rownames(farm4_samp_data) ~ "Farm4",
          Sample.name %in% rownames(farm5_samp_data) ~ "Farm5",
          Sample.name %in% rownames(farm6_samp_data) ~ "Farm6",
          Sample.name %in% rownames(farm10_samp_data) ~ "Farm10")) %>%  #Adding Farm infomation
        dplyr::mutate(farm_2=dplyr::case_when(
          Sample.name2 %in% rownames(farm3_samp_data) ~ "Farm3",
          Sample.name2 %in% rownames(farm4_samp_data) ~ "Farm4",
          Sample.name2 %in% rownames(farm5_samp_data) ~ "Farm5",
          Sample.name2 %in% rownames(farm6_samp_data) ~ "Farm6",
          Sample.name2 %in% rownames(farm10_samp_data) ~ "Farm10")) %>% #Adding Farm infomation
        dplyr::mutate(Groups = ifelse(farm_1 == farm_2, "Samefarm", "Difffarm")) %>% # Classify distances between human and animal from same or different farms
        subset(Groups != "NA") # Remove unclassified distances (belong to other farms)

#Count number of observation
sum(dm3_long_farm$Groups == "Samefarm")
## [1] 95
sum(dm3_long_farm$Groups == "Difffarm")
## [1] 365
#Wilcoxon rank sum test
ggpubr::compare_means(Distance ~ Groups,
                            dm3_long_farm,
                            method = "wilcox.test")
## # A tibble: 1 × 8
##   .y.      group1   group2       p p.adj p.format p.signif method  
##   <chr>    <chr>    <chr>    <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 Distance Difffarm Samefarm 0.674  0.67 0.67     ns       Wilcoxon
#plotting
p7 <- ggplot(dm3_long_farm, aes(x = Groups, y = Distance, colour = Groups)) +
  theme_bw() +
  geom_boxplot(aes(colour = Groups), alpha=0.1, outlier.colour = NA) + 
   theme(axis.title.x = element_blank(),
        legend.position = "none",
        legend.title = element_blank(),
        legend.text = element_text(size = 13),
        strip.text = element_text(size = 18),
        axis.title.y = element_text(size = 20),
        axis.text.y = element_text(size=20),
        axis.text.x = element_blank()) + 
  labs(y = "Bray-Curtis Distance") +
  scale_y_continuous(limits = c(0.98, 1.005))
p7
## Warning: Removed 114 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

ggsave("/Users/qcvp/R/Plots/PHPB_Distance_Same_DiffFarm_ASV.png", plot = p7, width = 11, height = 6)
## Warning: Removed 114 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

PERMANOVA Calculation

#PERMANOVA
vegan::adonis2(otu.bray3 ~ Biosample, data = hull_PHPB3, permutations = 1000, method = "bray")
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 1000
## 
## vegan::adonis2(formula = otu.bray3 ~ Biosample, data = hull_PHPB3, permutations = 1000, method = "bray")
##           Df SumOfSqs      R2      F   Pr(>F)    
## Model      2    8.387 0.13423 10.775 0.000999 ***
## Residual 139   54.096 0.86577                    
## Total    141   62.483 1.00000                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
vegan::adonis2(otu.bray3 ~ Date, data = hull_PHPB3, permutations = 1000, method = "bray")
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 1000
## 
## vegan::adonis2(formula = otu.bray3 ~ Date, data = hull_PHPB3, permutations = 1000, method = "bray")
##           Df SumOfSqs      R2      F   Pr(>F)    
## Model      2    1.883 0.03013 2.1591 0.000999 ***
## Residual 139   60.601 0.96987                    
## Total    141   62.483 1.00000                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Pairwise PERMANOVA 
pairwise.adonis2(otu.bray3~ Biosample,data=hull_PHPB3, nperms = 1000)
## $parent_call
## [1] "otu.bray3 ~ Biosample , strata = Null , permutations 999"
## 
## $Environment_vs_Animals
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1   2.6992 0.09406 6.7486  0.001 ***
## Residual 65  25.9975 0.90594                  
## Total    66  28.6967 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $Environment_vs_Human
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1    3.732 0.11355 10.504  0.001 ***
## Residual 82   29.136 0.88645                  
## Total    83   32.868 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $Animals_vs_Human
##           Df SumOfSqs      R2      F Pr(>F)    
## Model      1    5.238 0.08985 12.932  0.001 ***
## Residual 131   53.059 0.91015                  
## Total    132   58.297 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## attr(,"class")
## [1] "pwadstrata" "list"
pairwise.adonis2(otu.bray3~ Date,data=hull_PHPB3, nperms = 1000)
## $parent_call
## [1] "otu.bray3 ~ Date , strata = Null , permutations 999"
## 
## $Nov_2022_vs_Jul_2023
##          Df SumOfSqs      R2      F Pr(>F)  
## Model     1    0.771 0.01777 1.7185  0.022 *
## Residual 95   42.640 0.98223                
## Total    96   43.411 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $Nov_2022_vs_Dec_2023
##          Df SumOfSqs      R2     F Pr(>F)   
## Model     1    1.012 0.02366 2.351  0.003 **
## Residual 97   41.752 0.97634                
## Total    98   42.764 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $Jul_2023_vs_Dec_2023
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     1    1.049 0.02771 2.4512  0.003 **
## Residual 86   36.809 0.97229                 
## Total    87   37.858 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## attr(,"class")
## [1] "pwadstrata" "list"

PCoA plot

hull_PHPB3.5$Biosample = factor(hull_PHPB3.5$Biosample , levels = c("Environment", "Animals", "Human","Control"))
hull_PHPB3.5$Date = factor(hull_PHPB3.5$Date , levels = c("Nov_2022", "Jul_2023", "Dec_2023","Control"))

p8 <- ggplot() + 
  geom_hline(yintercept=0, colour="lightgrey", linetype = 2) + 
  geom_vline(xintercept=0, colour="lightgrey", linetype = 2) +
  geom_point(data = hull_PHPB3.5, aes(x=Axis.1, y=Axis.2, color = Biosample, shape = Date), alpha = 0.7, size = 5) +
   geom_point(data = hull_PHPB3.5, aes(x=Axis.1, y=Axis.2), colour = "white", size = 0.7) +
  xlab(paste("PCo1 (", round(pcoa.sub3$values$Relative_eig[1]*100, 1), "%)")) +
  ylab(paste("PCo2 (", round(pcoa.sub3$values$Relative_eig[2]*100, 1), "%)")) +
  theme_bw() +
  scale_shape_manual(values = c(16, 17, 15, 18)) +
  coord_equal() +
  stat_ellipse(data = hull_PHPB3,aes(x=Axis.1, y=Axis.2, group = Biosample, color = Biosample), level = 0.95) +
  labs(color = "Sample types") +
  theme(axis.title.x = element_text(size=14), # remove x-axis labels
        axis.title.y = element_text(size=14), # remove y-axis labels
             legend.position = "right",
        panel.background = element_blank(), 
        panel.grid.major = element_blank(),  #remove major-grid labels
        panel.grid.minor = element_blank(),  #remove minor-grid labels
        plot.background = element_blank()) + 
  scale_colour_manual(breaks =c("Human","Animals","Environment","Control"), 
                      values = c("red","orange","blue","green")) 
p8
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure

ggsave("/Users/qcvp/R/Plots/PHPB_ASV_PCOA_plots.png", plot = p8)
## Saving 7 x 5 in image
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure

Variability comparision

#Biosample
bdc5 <- vegan::betadisper(otu.bray3,hull_PHPB3$Biosample)
vegan::permutest(bdc5, pairwise=TRUE)
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##            Df  Sum Sq Mean Sq      F N.Perm Pr(>F)    
## Groups      2 0.88966 0.44483 65.018    999  0.001 ***
## Residuals 139 0.95099 0.00684                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Pairwise comparisons:
## (Observed p-value below diagonal, permuted p-value above diagonal)
##                Animals Environment Human
## Animals                 1.0000e-03 0.002
## Environment 1.2299e-15             0.001
## Human       6.9781e-04  2.2587e-14
#Date
bdc6 <- vegan::betadisper(otu.bray3,hull_PHPB3$Date)
vegan::permutest(bdc6, pairwise=TRUE)
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##            Df  Sum Sq   Mean Sq      F N.Perm Pr(>F)   
## Groups      2 0.03575 0.0178746 6.5276    999  0.005 **
## Residuals 139 0.38063 0.0027383                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Pairwise comparisons:
## (Observed p-value below diagonal, permuted p-value above diagonal)
##           Dec_2023  Jul_2023 Nov_2022
## Dec_2023           0.0150000    0.006
## Jul_2023 0.0123828              0.804
## Nov_2022 0.0031422 0.8044831

Whole data with Taxonomy PCoA

Making data for PCoA (without control samples for statistical tests)

physeq_Nem_long_genus2 <- phyloseq::tax_glom(Nem_hu_unrarefied, "Species") #extract genus taxonomy

physeq_Nem_long_genus2_PCOA <- phyloseq::subset_samples(physeq_Nem_long_genus2, !sample_names(physeq_Nem_long_genus2) %in% c("Control.1", "Control.2","Control.3", "Control.4", "Control.5", "Control.6", "Control.7"))

otu.bray4 <- vegan::vegdist(physeq_Nem_long_genus2_PCOA@otu_table@.Data, method = "bray", na.rm = TRUE) #Calculate distace matrix
pcoa.sub4 <- ape::pcoa(otu.bray4)

pcoa_coord4 <- pcoa.sub4$vectors[,1:2] # Extract PCOA coordination
samp_data_P <- data.frame(phyloseq::sample_data(physeq_Nem_long_genus2_PCOA))

hull_PHPB4 <- cbind(pcoa_coord4, samp_data_P) #Combind metadata and PCOA coordination

Making data for PCoA (with control samples for PCOA plot)

otu.bray4.5 <- vegan::vegdist(physeq_Nem_long_genus2@otu_table@.Data, method = "bray", na.rm = TRUE) #Calculate distace matrix
pcoa.sub4.5 <- ape::pcoa(otu.bray4.5)

pcoa_coord4.5 <- pcoa.sub4.5$vectors[,1:2] # Extract PCOA coordination
samp_data_P <- data.frame(phyloseq::sample_data(physeq_Nem_long_genus2))

hull_PHPB4.5 <- cbind(pcoa_coord4.5, samp_data_P) #Combind metadata and PCOA coordination

Compare distance between biosamples with Wilcoxon rank sum test

#Melting distance matrix
dm4_long <- as.matrix(otu.bray4) %>%
       as.data.frame()   %>%
       tibble::rownames_to_column("Sample.name") %>%
       tidyr::gather(key = "Sample.name2", value = "Distance", -Sample.name)

#Characterize distances
dm4_long_biosample<- dm4_long %>%
        dplyr::mutate(Biosample_1=dplyr::case_when(
          Sample.name %in% rownames(animal_samp_data) ~ "Animals",
          Sample.name %in% rownames(envi_samp_data) ~ "Environment",
          Sample.name %in% rownames(human_samp_data) ~ "Human")) %>% # Adding Biosample information
        dplyr::mutate(Biosample_2=dplyr::case_when(
          Sample.name2 %in% rownames(animal_samp_data) ~ "Animals",
          Sample.name2 %in% rownames(envi_samp_data) ~ "Environment",
          Sample.name2 %in% rownames(human_samp_data) ~ "Human")) %>% # Adding Biosample information
        dplyr::mutate(Groups = paste(Biosample_1, Biosample_2, sep = "_to_")) %>% # Classify distances with Biosample types
        subset(Groups == c("Animals_to_Environment", "Human_to_Environment", "Animals_to_Human")) # Remove duplicate (ex: Animal_to_Human and Human_to_Animal)
## Warning in Groups == c("Animals_to_Environment", "Human_to_Environment", :
## longer object length is not a multiple of shorter object length
#Count number of observation
sum(dm4_long_biosample$Groups == "Animals_to_Environment")
## [1] 174
sum(dm4_long_biosample$Groups == "Animals_to_Human")
## [1] 1450
sum(dm4_long_biosample$Groups == "Human_to_Environment")
## [1] 225
#Wilcoxon rank sum test
ggpubr::compare_means(Distance ~ Groups,
                            dm4_long_biosample,
                            method = "wilcox.test")
## # A tibble: 3 × 8
##   .y.      group1              group2        p    p.adj p.format p.signif method
##   <chr>    <chr>               <chr>     <dbl>    <dbl> <chr>    <chr>    <chr> 
## 1 Distance Animals_to_Environ… Human… 2.90e- 3 2.9 e- 3 0.0029   **       Wilco…
## 2 Distance Animals_to_Environ… Anima… 5.36e-16 1.10e-15 5.4e-16  ****     Wilco…
## 3 Distance Human_to_Environme… Anima… 2.88e-41 8.60e-41 < 2e-16  ****     Wilco…
#Plotting
p9 <- ggplot(dm4_long_biosample, aes(x = Groups, y = Distance, colour = Groups)) +
  theme_bw() +
  geom_boxplot(aes(colour = Groups), alpha=0.1, outlier.colour = NA) + 
   theme(axis.title.x = element_blank(),
        legend.position = "none",
        legend.title = element_blank(),
        legend.text = element_text(size = 13),
        strip.text = element_text(size = 13),
        axis.title.y = element_text(size = 15),
        axis.text.y = element_text(size=13),
        axis.text.x = element_text(size = 10, angle = 45, hjust = 1)) + 
  labs(y = "Bray-Curtis Distance") +
   scale_colour_manual(breaks =c("Animals_to_Environment","Animals_to_Human","Human_to_Environment"), 
                      values = c("gray","gray","gray"))
  scale_y_continuous(limits = c(0.95, 1.005))
## <ScaleContinuousPosition>
##  Range:  
##  Limits: 0.95 --    1
p9

Compare distance between Animal and human from same and different farms with Wilcoxon rank sum test

# Characterize distances
dm4_long_farm<- dm4_long_biosample %>%
        subset(Groups == c("Animals_to_Human")) %>% #kept only animals to human distances
        dplyr::mutate(farm_1=dplyr::case_when(
          Sample.name %in% rownames(farm3_samp_data) ~ "Farm3",
          Sample.name %in% rownames(farm4_samp_data) ~ "Farm4",
          Sample.name %in% rownames(farm5_samp_data) ~ "Farm5",
          Sample.name %in% rownames(farm6_samp_data) ~ "Farm6",
          Sample.name %in% rownames(farm10_samp_data) ~ "Farm10")) %>% 
        dplyr::mutate(farm_2=dplyr::case_when(
          Sample.name2 %in% rownames(farm3_samp_data) ~ "Farm3",
          Sample.name2 %in% rownames(farm4_samp_data) ~ "Farm4",
          Sample.name2 %in% rownames(farm5_samp_data) ~ "Farm5",
          Sample.name2 %in% rownames(farm6_samp_data) ~ "Farm6",
          Sample.name2 %in% rownames(farm10_samp_data) ~ "Farm10")) %>% 
        dplyr::mutate(Groups = ifelse(farm_1 == farm_2, "Samefarm", "Difffarm")) %>% # Classify distances between human and animal from same or different farms
        subset(Groups != "NA") # Remove unclassified distances (belong to other farms)

#Wilcoxon rank sum test
ggpubr::compare_means(Distance ~ Groups,
                            dm4_long_farm,
                            method = "wilcox.test")
## # A tibble: 1 × 8
##   .y.      group1   group2       p p.adj p.format p.signif method  
##   <chr>    <chr>    <chr>    <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 Distance Difffarm Samefarm 0.720  0.72 0.72     ns       Wilcoxon
#Count number of observation
sum(dm4_long_farm$Groups == "Samefarm")
## [1] 95
sum(dm4_long_farm$Groups == "Difffarm")
## [1] 365
#Plotting
p10 <- ggplot(dm4_long_farm, aes(x = Groups, y = Distance, colour = Groups)) +
  theme_bw() +
  geom_boxplot(aes(colour = Groups), alpha=0.1, outlier.colour = NA) + 
   theme(axis.title.x = element_blank(),
        legend.position = "none",
        legend.title = element_blank(),
        legend.text = element_text(size = 13),
        strip.text = element_text(size = 13),
        axis.title.y = element_text(size = 15),
        axis.text.y = element_text(size=13),
        axis.text.x = element_blank()) + 
  labs(y = "Bray-Curtis Distance") +
  scale_y_continuous(limits = c(0.98, 1.005))
p10
## Warning: Removed 151 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

# Combine 2 distances plot
g5 <- grid.arrange(p9, p10,nrow = 1)
## Warning: Removed 151 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

PERMANOVA Calculation

vegan::adonis2(otu.bray4 ~ Biosample, data = hull_PHPB4, permutations = 1000, method = "bray")
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 1000
## 
## vegan::adonis2(formula = otu.bray4 ~ Biosample, data = hull_PHPB4, permutations = 1000, method = "bray")
##           Df SumOfSqs     R2     F   Pr(>F)    
## Model      2    8.180 0.1331 10.67 0.000999 ***
## Residual 139   53.279 0.8669                   
## Total    141   61.458 1.0000                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
vegan::adonis2(otu.bray4 ~ Date, data = hull_PHPB4, permutations = 1000, method = "bray")
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 1000
## 
## vegan::adonis2(formula = otu.bray4 ~ Date, data = hull_PHPB4, permutations = 1000, method = "bray")
##           Df SumOfSqs     R2      F   Pr(>F)    
## Model      2    1.911 0.0311 2.2309 0.000999 ***
## Residual 139   59.547 0.9689                    
## Total    141   61.458 1.0000                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Pairwise PERMANOVA 
pairwise.adonis2(otu.bray4~ Biosample,data=hull_PHPB4, nperms = 1000)
## $parent_call
## [1] "otu.bray4 ~ Biosample , strata = Null , permutations 999"
## 
## $Environment_vs_Animals
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1    2.644 0.09405 6.7476  0.001 ***
## Residual 65   25.470 0.90595                  
## Total    66   28.114 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $Environment_vs_Human
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1    3.296 0.10257 9.3722  0.001 ***
## Residual 82   28.837 0.89743                  
## Total    83   32.133 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $Animals_vs_Human
##           Df SumOfSqs      R2      F Pr(>F)    
## Model      1     5.32 0.09241 13.338  0.001 ***
## Residual 131    52.25 0.90759                  
## Total    132    57.57 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## attr(,"class")
## [1] "pwadstrata" "list"
pairwise.adonis2(otu.bray4~ Date,data=hull_PHPB4, nperms = 1000)
## $parent_call
## [1] "otu.bray4 ~ Date , strata = Null , permutations 999"
## 
## $Nov_2022_vs_Jul_2023
##          Df SumOfSqs      R2      F Pr(>F)  
## Model     1    0.802 0.01877 1.8176  0.017 *
## Residual 95   41.935 0.98123                
## Total    96   42.737 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $Nov_2022_vs_Dec_2023
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     1    0.972 0.02314 2.2981  0.003 **
## Residual 97   41.015 0.97686                 
## Total    98   41.987 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $Jul_2023_vs_Dec_2023
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     1    1.107 0.02971 2.6334  0.002 **
## Residual 86   36.144 0.97029                 
## Total    87   37.251 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## attr(,"class")
## [1] "pwadstrata" "list"

PCoA plot

hull_PHPB4.5$Biosample = factor(hull_PHPB4.5$Biosample , levels = c("Environment", "Animals", "Human","Control"))
hull_PHPB4.5$Date = factor(hull_PHPB4.5$Date , levels = c("Nov_2022", "Jul_2023", "Dec_2023","Control"))

p11 <- ggplot() + 
  geom_hline(yintercept=0, colour="lightgrey", linetype = 2) + 
  geom_vline(xintercept=0, colour="lightgrey", linetype = 2) +
  geom_point(data = hull_PHPB4.5, aes(x=Axis.1, y=Axis.2, color = Biosample, shape = Date), alpha = 0.7, size = 5) +
  geom_point(data = hull_PHPB4.5, aes(x=Axis.1, y=Axis.2), colour = "white", size = 0.7) +
  xlab(paste("PCo1 (", round(pcoa.sub4$values$Relative_eig[1]*100, 1), "%)")) +
  ylab(paste("PCo2 (", round(pcoa.sub4$values$Relative_eig[2]*100, 1), "%)")) +
  theme_bw() +
  coord_equal() +
  labs(color = "Sample types") +
  stat_ellipse(data = hull_PHPB3,aes(x=Axis.1, y=Axis.2, group = Biosample, color = Biosample), level = 0.95) +
  scale_colour_brewer(palette="Set1") +
    scale_shape_manual(values = c(16, 17, 15, 18)) +
  theme(axis.title.x = element_text(size=14), # remove x-axis labels
        legend.position = "none",
        axis.title.y = element_text(size=14), # remove y-axis labels
        panel.background = element_blank(), 
        panel.grid.major = element_blank(),  #remove major-grid labels
        panel.grid.minor = element_blank(),  #remove minor-grid labels
        plot.background = element_blank()) +
  scale_colour_manual(breaks =c("Human","Animals","Environment","Control"), 
                      values = c("red","orange","blue","green")) 
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
p11
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure

ggsave("/Users/qcvp/R/Plots/PHPB_Taxonomy_PCOA_plots.png", plot = p11)
## Saving 7 x 5 in image
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure

Variability comparision

#Biosample
bdc7 <- vegan::betadisper(otu.bray4,hull_PHPB4$Biosample)
vegan::permutest(bdc7, pairwise=TRUE)
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##            Df  Sum Sq Mean Sq      F N.Perm Pr(>F)    
## Groups      2 0.85642 0.42821 61.463    999  0.001 ***
## Residuals 139 0.96841 0.00697                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Pairwise comparisons:
## (Observed p-value below diagonal, permuted p-value above diagonal)
##                Animals Environment Human
## Animals                 1.0000e-03 0.003
## Environment 5.2901e-15             0.001
## Human       2.2440e-03  2.7783e-14
#Date
bdc8 <- vegan::betadisper(otu.bray4,hull_PHPB4$Date)
vegan::permutest(bdc8, pairwise=TRUE)
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##            Df  Sum Sq   Mean Sq     F N.Perm Pr(>F)   
## Groups      2 0.03727 0.0186328 6.232    999  0.003 **
## Residuals 139 0.41559 0.0029899                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Pairwise comparisons:
## (Observed p-value below diagonal, permuted p-value above diagonal)
##           Dec_2023  Jul_2023 Nov_2022
## Dec_2023           0.0080000    0.005
## Jul_2023 0.0126127              0.826
## Nov_2022 0.0039234 0.8229743

PCOA for Human and Animal only

Prepare data

#Extract OTU table of only Animal and Human
sample_data <- Nem_hu_unrarefied@sam_data #Extract sample data
animal_sample_name <- row.names(animal_samp_data) #Extract name
human_sample_name <- row.names(human_samp_data) #Extract name

#Extract data from OTU table - ASV data
animal_otu_table_ASV <- subset(Nem_hu_unrarefied@otu_table@.Data, rownames(Nem_hu_unrarefied@otu_table@.Data) %in% animal_sample_name)
human_otu_table_ASV <- subset(Nem_hu_unrarefied@otu_table@.Data, rownames(Nem_hu_unrarefied@otu_table@.Data) %in% human_sample_name)

#Extract data from OTU table - taxonomy data
animal_otu_table_tax <- subset(physeq_Nem_long_genus2@otu_table@.Data, rownames(Nem_hu_unrarefied@otu_table@.Data) %in% animal_sample_name)
human_otu_table_tax <- subset(physeq_Nem_long_genus2@otu_table@.Data, rownames(Nem_hu_unrarefied@otu_table@.Data) %in% human_sample_name)

Compare distance of Human Skin vs Animal/ Human Gut vs Animal with Wilcoxon rank sum test - ASV

#Extract sample data
human_skin_sample <- subset(human_samp_data, Biosample_type =="Skin")
human_gut_sample <- subset(human_samp_data, Biosample_type =="Gut")

#Characterize distances
dm3_long_animal_human <- subset(dm3_long_biosample, Groups == "Animals_to_Human")
dm3_long_animal_skingut <- dm3_long_animal_human %>%
                            dplyr::mutate(biosample_type=dplyr::case_when(
                           Sample.name2 %in% rownames(human_skin_sample) ~ "Skin",
                           Sample.name2 %in% rownames(human_gut_sample) ~ "Gut")) %>% 
                          dplyr::mutate(Groups2 = paste(Biosample_1, biosample_type, sep = "_to_"))

#Count number of observation
sum(dm3_long_animal_skingut$Groups2 == "Animals_to_Skin")
## [1] 715
sum(dm3_long_animal_skingut$Groups2 == "Animals_to_Gut")
## [1] 735
#Wilcoxon rank sum test
ggpubr::compare_means(Distance ~ Groups2,
                            dm3_long_animal_skingut,
                            method = "wilcox.test")
## # A tibble: 1 × 8
##   .y.      group1         group2             p    p.adj p.format p.signif method
##   <chr>    <chr>          <chr>          <dbl>    <dbl> <chr>    <chr>    <chr> 
## 1 Distance Animals_to_Gut Animals_to… 9.50e-41 9.50e-41 <2e-16   ****     Wilco…
#Plotting
p13 <- ggplot(dm3_long_animal_skingut, aes(x = Groups, y = Distance, colour = Groups)) +
  theme_bw() +
  geom_boxplot(aes(colour = Groups2), alpha=0.1, outlier.colour = NA) + 
   theme(axis.title.x = element_blank(),
        legend.position = "none",
        legend.title = element_blank(),
        legend.text = element_text(size = 13),
        strip.text = element_text(size = 13),
        axis.title.y = element_text(size = 15),
        axis.text.y = element_text(size=13),
        axis.text.x = element_blank()) + 
  labs(y = "Bray-Curtis Distance") +
  scale_y_continuous(limits = c(0.95, 1.005))
p13
## Warning: Removed 148 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

Compare distance of Lobster vs Human/ Fish vs Human with Wilcoxon rank sum test - ASV

#Extract sample data
animal_lobster_sample <- subset(animal_samp_data, Host_subtype =="Lobster")
animal_fish_sample <- subset(animal_samp_data, Host_subtype =="Fish")

#Characterize distances
dm3_long_animal_human <- subset(dm3_long_biosample, Groups == "Animals_to_Human")
dm3_long_lobfish_human <- dm3_long_animal_human %>%
                            dplyr::mutate(host_type=dplyr::case_when(
                           Sample.name %in% rownames(animal_lobster_sample) ~ "Lobster",
                           Sample.name %in% rownames(animal_fish_sample) ~ "Fish")) %>% 
                          dplyr::mutate(Groups2 = paste(Biosample_2, host_type, sep = "_to_"))

#Count number of observation
sum(dm3_long_lobfish_human $Groups2 == "Human_to_Lobster")
## [1] 475
sum(dm3_long_lobfish_human $Groups2 == "Human_to_Fish")
## [1] 975
#Wilcoxon rank sum test
ggpubr::compare_means(Distance ~ Groups2,
                            dm3_long_lobfish_human,
                            method = "wilcox.test")
## # A tibble: 1 × 8
##   .y.      group1        group2              p    p.adj p.format p.signif method
##   <chr>    <chr>         <chr>           <dbl>    <dbl> <chr>    <chr>    <chr> 
## 1 Distance Human_to_Fish Human_to_Lo… 3.86e-11 3.90e-11 3.9e-11  ****     Wilco…
#Plotting
p14 <- ggplot(dm3_long_lobfish_human, aes(x = Groups, y = Distance, colour = Groups)) +
  theme_bw() +
  geom_boxplot(aes(colour = Groups2), alpha=0.1, outlier.colour = NA) + 
   theme(axis.title.x = element_blank(),
        legend.position = "none",
        legend.title = element_blank(),
        legend.text = element_text(size = 13),
        strip.text = element_text(size = 13),
        axis.title.y = element_text(size = 15),
        axis.text.y = element_text(size=13),
        axis.text.x = element_blank()) + 
  labs(y = "Bray-Curtis Distance") +
  scale_y_continuous(limits = c(0.95, 1.005))
p14
## Warning: Removed 148 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

Compare distance of Human Skin vs Animal/ Human Gut vs Animal with Wilcoxon rank sum test - Taxonomy data

#Extract sample data
human_skin_sample <- subset(human_samp_data, Biosample_type =="Skin")
human_gut_sample <- subset(human_samp_data, Biosample_type =="Gut")

#Characterize distances
dm4_long_animal_human <- subset(dm4_long_biosample, Groups == "Animals_to_Human")
dm4_long_animal_skingut <- dm4_long_animal_human %>%
                            dplyr::mutate(biosample_type=dplyr::case_when(
                           Sample.name2 %in% rownames(human_skin_sample) ~ "Skin",
                           Sample.name2 %in% rownames(human_gut_sample) ~ "Gut")) %>% 
                          dplyr::mutate(Groups2 = paste(Biosample_1, biosample_type, sep = "_to_"))

#Count number of observation
sum(dm4_long_animal_skingut$Groups2 == "Animals_to_Skin")
## [1] 715
sum(dm4_long_animal_skingut$Groups2 == "Animals_to_Gut")
## [1] 735
#Wilcoxon rank sum test
ggpubr::compare_means(Distance ~ Groups2,
                            dm4_long_animal_skingut,
                            method = "wilcox.test")
## # A tibble: 1 × 8
##   .y.      group1         group2                p p.adj p.format p.signif method
##   <chr>    <chr>          <chr>             <dbl> <dbl> <chr>    <chr>    <chr> 
## 1 Distance Animals_to_Gut Animals_to_Sk… 2.00e-34 2e-34 <2e-16   ****     Wilco…
#Plotting
p15 <- ggplot(dm4_long_animal_skingut, aes(x = Groups, y = Distance, colour = Groups)) +
  theme_bw() +
  geom_boxplot(aes(colour = Groups2), alpha=0.1, outlier.colour = NA) + 
   theme(axis.title.x = element_blank(),
        legend.position = "none",
        legend.title = element_blank(),
        legend.text = element_text(size = 13),
        strip.text = element_text(size = 13),
        axis.title.y = element_text(size = 15),
        axis.text.y = element_text(size=13),
        axis.text.x = element_blank()) + 
  labs(y = "Bray-Curtis Distance") +
  scale_y_continuous(limits = c(0.95, 1.005))
p15
## Warning: Removed 192 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

Compare distance of Lobster vs Human/ Fish vs Human with Wilcoxon rank sum test - Taxonomy data

#Extract sample data
animal_lobster_sample <- subset(animal_samp_data, Host_subtype =="Lobster")
animal_fish_sample <- subset(animal_samp_data, Host_subtype =="Fish")

#Characterize distances
dm4_long_animal_human <- subset(dm4_long_biosample, Groups == "Animals_to_Human")
dm4_long_lobfish_human <- dm4_long_animal_human %>%
                            dplyr::mutate(host_type=dplyr::case_when(
                           Sample.name %in% rownames(animal_lobster_sample) ~ "Lobster",
                           Sample.name %in% rownames(animal_fish_sample) ~ "Fish")) %>% 
                          dplyr::mutate(Groups2 = paste(Biosample_2, host_type, sep = "_to_"))

#Count number of observation
sum(dm4_long_lobfish_human $Groups == "Human_to_Lobster")
## [1] 0
sum(dm4_long_lobfish_human $Groups == "Human_to_Fish")
## [1] 0
#Wilcoxon rank sum test
ggpubr::compare_means(Distance ~ Groups2,
                            dm4_long_lobfish_human,
                            method = "wilcox.test")
## # A tibble: 1 × 8
##   .y.      group1        group2               p   p.adj p.format p.signif method
##   <chr>    <chr>         <chr>            <dbl>   <dbl> <chr>    <chr>    <chr> 
## 1 Distance Human_to_Fish Human_to_Lob… 2.81e-26 2.8e-26 <2e-16   ****     Wilco…
#Plotting
p16 <- ggplot(dm4_long_lobfish_human, aes(x = Groups, y = Distance, colour = Groups)) +
  theme_bw() +
  geom_boxplot(aes(colour = Groups2), alpha=0.1, outlier.colour = NA) + 
   theme(axis.title.x = element_blank(),
        legend.position = "none",
        legend.title = element_blank(),
        legend.text = element_text(size = 13),
        strip.text = element_text(size = 13),
        axis.title.y = element_text(size = 15),
        axis.text.y = element_text(size=13),
        axis.text.x = element_blank()) + 
  labs(y = "Bray-Curtis Distance") +
  scale_y_continuous(limits = c(0.95, 1.005))
p16
## Warning: Removed 192 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

Calculate distances

#ASV data - Animal
otu.bray5 <- vegan::vegdist(animal_otu_table_ASV, diag = TRUE, method = "bray") # computes dissimilarity indices (bray)
pcoa.sub5 <- ape::pcoa(otu.bray5) #computes principal coordinate decomposition
pcoa_coord5 <- pcoa.sub5$vectors[,1:2] # Extract coordination of PCOA plot
hull_PHPB5 <- cbind(pcoa_coord5, animal_samp_data) # Combine metadata with PCOA coordination

#ASV data - Human
otu.bray6  <- vegan::vegdist(human_otu_table_ASV, diag = TRUE, method = "bray") # computes dissimilarity indices (bray)
pcoa.sub6 <- ape::pcoa(otu.bray6) #computes principal coordinate decomposition
pcoa_coord6 <- pcoa.sub6$vectors[,1:2] # Extract coordination of PCOA plot
hull_PHPB6 <- cbind(pcoa_coord6, human_samp_data) # Combine metadata with PCOA coordination

#Taxonomy data - Animal
otu.bray7 <- vegan::vegdist(animal_otu_table_tax, diag = TRUE, method = "bray") # computes dissimilarity indices (bray)
pcoa.sub7 <- ape::pcoa(otu.bray7) #computes principal coordinate decomposition
pcoa_coord7 <- pcoa.sub7$vectors[,1:2] # Extract coordination of PCOA plot
hull_PHPB7 <- cbind(pcoa_coord7, animal_samp_data) # Combine metadata with PCOA coordination

#Taxonomy data - human
otu.bray8 <- vegan::vegdist(human_otu_table_tax, diag = TRUE, method = "bray") # computes dissimilarity indices (bray)
pcoa.sub8 <- ape::pcoa(otu.bray8) #computes principal coordinate decomposition
pcoa_coord8 <- pcoa.sub8$vectors[,1:2] # Extract coordination of PCOA plot
hull_PHPB8 <- cbind(pcoa_coord8, human_samp_data) # Combine metadata with PCOA coordination

PERMANOVA Calculation

#Prepare data
hull_PHPB3_animal <- subset(hull_PHPB3, Biosample == "Animals")
hull_PHPB3_human <- subset(hull_PHPB3, Biosample == "Human")
hull_PHPB4_animal <- subset(hull_PHPB4, Biosample == "Animals")
hull_PHPB4_human <- subset(hull_PHPB4, Biosample == "Human")
#ASV data
vegan::adonis2(otu.bray5 ~ Host_subtype, data = hull_PHPB3_animal, permutations = 1000, method = "bray") #Fish vs Lobster
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 1000
## 
## vegan::adonis2(formula = otu.bray5 ~ Host_subtype, data = hull_PHPB3_animal, permutations = 1000, method = "bray")
##          Df SumOfSqs      R2      F   Pr(>F)    
## Model     1   3.7605 0.15066 9.9336 0.000999 ***
## Residual 56  21.1999 0.84934                    
## Total    57  24.9604 1.00000                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
vegan::adonis2(otu.bray6 ~ Biosample_type, data = hull_PHPB3_human, permutations = 1000, method = "bray") #Gut vs skin
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 1000
## 
## vegan::adonis2(formula = otu.bray6 ~ Biosample_type, data = hull_PHPB3_human, permutations = 1000, method = "bray")
##          Df SumOfSqs      R2      F   Pr(>F)    
## Model     1    2.491 0.08865 7.1011 0.000999 ***
## Residual 73   25.608 0.91135                    
## Total    74   28.099 1.00000                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Taxonomy data
vegan::adonis2(otu.bray7 ~ Host_subtype, data = hull_PHPB4_animal, permutations = 1000, method = "bray") #Fish vs Lobster
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 1000
## 
## vegan::adonis2(formula = otu.bray7 ~ Host_subtype, data = hull_PHPB4_animal, permutations = 1000, method = "bray")
##          Df SumOfSqs      R2     F   Pr(>F)    
## Model     1   3.6273 0.14841 9.759 0.000999 ***
## Residual 56  20.8142 0.85159                   
## Total    57  24.4414 1.00000                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
vegan::adonis2(otu.bray8 ~ Biosample_type, data = hull_PHPB4_human, permutations = 1000, method = "bray") #Gut vs skin
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 1000
## 
## vegan::adonis2(formula = otu.bray8 ~ Biosample_type, data = hull_PHPB4_human, permutations = 1000, method = "bray")
##          Df SumOfSqs      R2      F   Pr(>F)    
## Model     1   2.4596 0.08845 7.0833 0.000999 ***
## Residual 73  25.3489 0.91155                    
## Total    74  27.8086 1.00000                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Plotting (Supplementary)

#ASV - Animal
pa <- ggplot() + 
  geom_hline(yintercept=0, colour="lightgrey", linetype = 2) + 
  geom_vline(xintercept=0, colour="lightgrey", linetype = 2) +
  geom_point(data = hull_PHPB5, aes(x=Axis.1, y=Axis.2, color = Host_subtype), alpha = 0.7, size = 5) +
  geom_point(data = hull_PHPB5, aes(x=Axis.1, y=Axis.2), colour = "white", size = 0.7) +
  xlab(paste("PCo1 (", round(pcoa.sub4$values$Relative_eig[1]*100, 1), "%)")) +
  ylab(paste("PCo2 (", round(pcoa.sub4$values$Relative_eig[2]*100, 1), "%)")) +
  theme_bw() +
  stat_ellipse(data = hull_PHPB3,aes(x=Axis.1, y=Axis.2, group = Biosample, color = Biosample), level = 0.95) +
  coord_equal() +
  labs(color = "Sample types") +
  scale_colour_brewer(palette="Set1") +
  theme(axis.title.x = element_text(size=14), # remove x-axis labels
        legend.position = "right",
        axis.title.y = element_text(size=14), # remove y-axis labels
        panel.background = element_blank(), 
        panel.grid.major = element_blank(),  #remove major-grid labels
        panel.grid.minor = element_blank(),  #remove minor-grid labels
        plot.background = element_blank())
pa
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure

#ASV - Human
pb <- ggplot() + 
  geom_hline(yintercept=0, colour="lightgrey", linetype = 2) + 
  geom_vline(xintercept=0, colour="lightgrey", linetype = 2) +
  geom_point(data = hull_PHPB6, aes(x=Axis.1, y=Axis.2, color = Biosample_type), alpha = 0.7, size = 5) +
  geom_point(data = hull_PHPB6, aes(x=Axis.1, y=Axis.2), colour = "white", size = 0.7) +
  xlab(paste("PCo1 (", round(pcoa.sub4$values$Relative_eig[1]*100, 1), "%)")) +
  ylab(paste("PCo2 (", round(pcoa.sub4$values$Relative_eig[2]*100, 1), "%)")) +
  theme_bw() +
  stat_ellipse(data = hull_PHPB3,aes(x=Axis.1, y=Axis.2, group = Biosample, color = Biosample), level = 0.95) +
  coord_equal() +
  labs(color = "Sample types") +
  scale_colour_brewer(palette="Set2") +
  theme(axis.title.x = element_text(size=14), # remove x-axis labels
        legend.position = "right",
        axis.title.y = element_text(size=14), # remove y-axis labels
        panel.background = element_blank(), 
        panel.grid.major = element_blank(),  #remove major-grid labels
        panel.grid.minor = element_blank(),  #remove minor-grid labels
        plot.background = element_blank())
pb
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure

#Tax - animal
pc <- ggplot() + 
  geom_hline(yintercept=0, colour="lightgrey", linetype = 2) + 
  geom_vline(xintercept=0, colour="lightgrey", linetype = 2) +
  geom_point(data = hull_PHPB7, aes(x=Axis.1, y=Axis.2, color = Host_subtype), alpha = 0.7, size = 5) +
  geom_point(data = hull_PHPB7, aes(x=Axis.1, y=Axis.2), colour = "white", size = 0.7) +
  xlab(paste("PCo1 (", round(pcoa.sub4$values$Relative_eig[1]*100, 1), "%)")) +
  ylab(paste("PCo2 (", round(pcoa.sub4$values$Relative_eig[2]*100, 1), "%)")) +
  stat_ellipse(data = hull_PHPB3,aes(x=Axis.1, y=Axis.2, group = Biosample, color = Biosample), level = 0.95) +
  theme_bw() +
  coord_equal() +
  labs(color = "Sample types") +
  scale_colour_brewer(palette="Set1") +
  theme(axis.title.x = element_text(size=14), # remove x-axis labels
        legend.position = "right",
        axis.title.y = element_text(size=14), # remove y-axis labels
        panel.background = element_blank(), 
        panel.grid.major = element_blank(),  #remove major-grid labels
        panel.grid.minor = element_blank(),  #remove minor-grid labels
        plot.background = element_blank())
pc
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure

#Tax - human
pd <- ggplot() + 
  geom_hline(yintercept=0, colour="lightgrey", linetype = 2) + 
  geom_vline(xintercept=0, colour="lightgrey", linetype = 2) +
  geom_point(data = hull_PHPB8, aes(x=Axis.1, y=Axis.2, color = Biosample_type), alpha = 0.7, size = 5) +
  geom_point(data = hull_PHPB8, aes(x=Axis.1, y=Axis.2), colour = "white", size = 0.7) +
  xlab(paste("PCo1 (", round(pcoa.sub4$values$Relative_eig[1]*100, 1), "%)")) +
  ylab(paste("PCo2 (", round(pcoa.sub4$values$Relative_eig[2]*100, 1), "%)")) +
  stat_ellipse(data = hull_PHPB3,aes(x=Axis.1, y=Axis.2, group = Biosample, color = Biosample), level = 0.95) +
  theme_bw() +
  coord_equal() +
  labs(color = "Sample types") +
  scale_colour_brewer(palette="Set2") +
  theme(axis.title.x = element_text(size=14), # remove x-axis labels
        legend.position = "right",
        axis.title.y = element_text(size=14), # remove y-axis labels
        panel.background = element_blank(), 
        panel.grid.major = element_blank(),  #remove major-grid labels
        panel.grid.minor = element_blank(),  #remove minor-grid labels
        plot.background = element_blank())
pd
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure

ggsave("/Users/qcvp/R/Plots/PHBP_PCOA_Fish_Lobster_vs_Human_Taxonomydata.png", plot = pc, width = 9, height = 9)
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
ggsave("/Users/qcvp/R/Plots/PHPB_PCOA_Skin_Gut_vs_Animals_Taxonomydata.png", plot = pd, width = 9, height = 9)
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure

Core ASV

Dispersion vs Occurence plot

#Agglomerate taxa to specices
Nem_hu_unrarefied_species <- tax_glom(Nem_hu_unrarefied, taxrank = "Species")
Melted_Nem_hu_unrarefied_species <- Nem_hu_unrarefied_species %>%
  phyloseq::psmelt() %>%                                         # Melt to long format
  filter(Abundance > 0) %>%                         # Filter out low abundance taxa reads
  arrange(Species)  

#Abundance vs occurence plot
Nem_otu_table2 <- Nem_hu_unrarefied_species@otu_table@.Data
abuoccplot_otu2 <- labdsv::abuocc(Nem_otu_table2)
## Registered S3 method overwritten by 'labdsv':
##   method       from
##   summary.dist ade4

## Press return for next plot

## Press return for next plot

## Do you want to identify individual species? Y/N : 
## Press return for next plot

## Do you want to identify individual plots? Y/N :
#Sub_objects of abuocc objects
utils::str(abuoccplot_otu2)
## List of 3
##  $ spc.plt: Named int [1:149] 27 29 23 7 5 7 20 6 5 10 ...
##   ..- attr(*, "names")= chr [1:149] "F1w1b" "F1w2b" "F1w3b" "Fi.10.3.GM" ...
##  $ plt.spc: Named int [1:456] 97 51 51 63 76 60 9 21 19 22 ...
##   ..- attr(*, "names")= chr [1:456] "ASV3" "ASV5" "ASV6" "ASV12" ...
##  $ mean   : Named num [1:456] 678 606 4283 447 692 ...
##   ..- attr(*, "names")= chr [1:456] "ASV3" "ASV5" "ASV6" "ASV12" ...
##  - attr(*, "call")= language labdsv::abuocc(comm = Nem_otu_table2)
##  - attr(*, "comm")= chr [1:3698] "structure(list(ASV3 = c(6776, 6430, 4914, 0, 0, 72, 926, 2014, " "0, 3756, 700, 388, 201, 407, 1366, 0, 0, 470, 271, 0, 0, 6, 268, " "1371, 636, 36, 28, 0, 2617, 3304, 5118, 1374, 11, 0, 413, 1378, " "46, 99, 27, 542, 16, 19, 680, 347, 1821, 1934, 3368, 4010, 0, " ...
##  - attr(*, "timestamp")= chr "Mon May 26 22:41:40 2025"
##  - attr(*, "class")= chr "abuocc"
#Transform spc.plt vector into table in order to calculate specific richness
Nem_richness_otu2 <- data.frame(abuoccplot_otu2$spc.plt)
head(Nem_richness_otu2,1)
##       abuoccplot_otu2.spc.plt
## F1w1b                      27
#Occurrence of each OTU
Nem_otu_occurence2 <- data.frame(abuoccplot_otu2$plt.spc)
head(Nem_otu_occurence2,1)
##      abuoccplot_otu2.plt.spc
## ASV3                      97
Nem_mean.abun_otu2 <- colSums(Nem_otu_table2)/Nem_otu_occurence2
head(Nem_mean.abun_otu2,1)
##      abuoccplot_otu2.plt.spc
## ASV3                678.4536
Nem_square_otu2 <- Nem_otu_table2^2
Nem_ss_otu2 <- data.frame(colSums(Nem_square_otu2))
head(Nem_ss_otu2,1)
##      colSums.Nem_square_otu2.
## ASV3                225387192
#Variance calculation
Nem_variance_otu2=Nem_ss_otu2/Nem_otu_occurence2-Nem_mean.abun_otu2^2
head(Nem_variance_otu2,1)
##      colSums.Nem_square_otu2.
## ASV3                  1863280
Nem_disp_otu2 <- (Nem_variance_otu2/Nem_mean.abun_otu2)*Nem_otu_occurence2
head(Nem_disp_otu2,1)
##      colSums.Nem_square_otu2.
## ASV3                 266397.2

Extract Tax and ID names

#Tax 
PHPB_bacteria_ASV_TAX <- data.frame(Nem_hu_unrarefied_species@tax_table)
ID <- rownames(PHPB_bacteria_ASV_TAX )
PHPB_bacteria_ASV_TAX <- cbind(ID=ID, PHPB_bacteria_ASV_TAX)

IC calculation for Poisson distribution using Chi square distribution (value and formula within Zar p574)

Nem_poisic_otu2 = epitools::pois.exact(Nem_otu_occurence2, conf.level = 0.95)

Nem_dstat_otu2 <- cbind(Nem_mean.abun_otu2, Nem_disp_otu2, Nem_otu_occurence2, Nem_poisic_otu2)
names(Nem_dstat_otu2) <- c("average","disp", "occurence", "x", "pt", "rate", "lower", "upper", "prob")

# ...selection of core OTUs----------------------------
Nem_core_otu2 <- Nem_dstat_otu2[Nem_dstat_otu2$disp > Nem_dstat_otu2$upper,]
Nem_core_otu2 <- na.omit(Nem_core_otu2)

Nem_core_otu2$Species <- PHPB_bacteria_ASV_TAX[rownames(PHPB_bacteria_ASV_TAX) %in% row.names(Nem_core_otu2),8]
Nem_core_otu2$Genus <- PHPB_bacteria_ASV_TAX[rownames(PHPB_bacteria_ASV_TAX) %in% row.names(Nem_core_otu2),7]
Nem_core_otu2$Family <- PHPB_bacteria_ASV_TAX[rownames(PHPB_bacteria_ASV_TAX) %in% row.names(Nem_core_otu2),6]
Nem_core_otu2$Phylum <- PHPB_bacteria_ASV_TAX[rownames(PHPB_bacteria_ASV_TAX) %in% row.names(Nem_core_otu2),3]

Nem_core_otu2 <- Nem_core_otu2 %>%
                dplyr::arrange(desc(occurence)) # Sort abundances 
Nem_core_otu2
##              average         disp occurence  x pt rate      lower      upper
## ASV3      678.453608 2.663972e+05        97 97  1   97 78.6604409 118.331774
## ASV17     692.355263 2.058814e+05        76 76  1   76 59.8793494  95.125333
## ASV112    196.765625 2.180195e+04        64 64  1   64 49.2877952  81.726569
## ASV12     446.825397 1.056666e+05        63 63  1   63 48.4109285  80.604366
## ASV115    170.721311 7.555731e+03        61 61  1   61 46.6601620  78.357041
## ASV18     789.316667 1.759163e+05        60 60  1   60 45.7863210  77.231877
## ASV253    111.600000 1.828859e+04        55 55  1   55 41.4335424  71.590090
## ASV5      606.098039 1.392515e+05        51 51  1   51 37.9728494  67.055575
## ASV6     4283.215686 1.219347e+06        51 51  1   51 37.9728494  67.055575
## ASV1604    15.612245 6.990431e+02        49 49  1   49 36.2504686  64.780597
## ASV38     467.826087 6.407597e+04        46 46  1   46 33.6777792  61.357554
## ASV51     619.622222 6.540742e+04        45 45  1   45 32.8233066  60.213540
## ASV47     534.850000 1.949549e+05        40 40  1   40 28.5765864  54.468643
## ASV61     761.270270 7.960393e+04        37 37  1   37 26.0514137  50.999627
## ASV66     402.771429 8.945238e+04        35 35  1   35 24.3787837  48.676532
## ASV545     53.558824 1.121839e+03        34 34  1   34 23.5459879  47.511601
## ASV102    246.812500 6.103124e+04        32 32  1   32 21.8879760  45.174476
## ASV121    487.666667 2.620746e+05        30 30  1   30 20.2408636  42.826851
## ASV49    1073.551724 1.029999e+05        29 29  1   29 19.4217851  41.648816
## ASV210    141.620690 2.596959e+04        29 29  1   29 19.4217851  41.648816
## ASV278    219.758621 7.575208e+03        29 29  1   29 19.4217851  41.648816
## ASV271    324.800000 3.718885e+04        25 25  1   25 16.1786837  36.904934
## ASV422    396.583333 6.888055e+04        24 24  1   24 15.3772578  35.710098
## ASV33    2168.000000 2.470477e+05        22 22  1   22 13.7872546  33.308265
## ASV21    3371.285714 5.394161e+05        21 21  1   21 12.9993564  32.100730
## ASV257     67.650000 5.443142e+03        20 20  1   20 12.2165428  30.888378
## ASV731     68.900000 1.884235e+03        20 20  1   20 12.2165428  30.888378
## ASV26     213.105263 5.757163e+03        19 19  1   19 11.4392691  29.670855
## ASV176    521.736842 1.528575e+04        19 19  1   19 11.4392691  29.670855
## ASV2925    21.736842 1.252513e+03        19 19  1   19 11.4392691  29.670855
## ASV496    178.705882 4.197084e+03        17 17  1   17  9.9031251  27.218647
## ASV547    190.647059 7.705788e+03        17 17  1   17  9.9031251  27.218647
## ASV891     85.176471 1.239315e+03        17 17  1   17  9.9031251  27.218647
## ASV5767     9.875000 1.885316e+02        16 16  1   16  9.1453783  25.982998
## ASV8753     5.875000 2.423404e+02        16 16  1   16  9.1453783  25.982998
## ASV119    229.666667 2.338003e+03        15 15  1   15  8.3953855  24.740220
## ASV243     93.400000 2.296163e+03        15 15  1   15  8.3953855  24.740220
## ASV309    182.866667 7.988092e+03        15 15  1   15  8.3953855  24.740220
## ASV341    140.333333 6.898599e+03        15 15  1   15  8.3953855  24.740220
## ASV1261    99.066667 2.895494e+03        15 15  1   15  8.3953855  24.740220
## ASV427    164.857143 6.544076e+03        14 14  1   14  7.6539338  23.489616
## ASV283    104.000000 2.074788e+03        13 13  1   13  6.9219517  22.230380
## ASV464     42.769231 3.616223e+02        13 13  1   13  6.9219517  22.230380
## ASV188     48.166667 4.668304e+02        12 12  1   12  6.2006035  20.961560
## ASV1587    75.833333 1.667943e+03        12 12  1   12  6.2006035  20.961560
## ASV5665    33.750000 3.104074e+02        12 12  1   12  6.2006035  20.961560
## ASV196     46.636364 9.979025e+02        11 11  1   11  5.4911525  19.682011
## ASV2177    44.090909 1.453113e+02        11 11  1   11  5.4911525  19.682011
## ASV3125    48.727273 2.426160e+03        11 11  1   11  5.4911525  19.682011
## ASV80    2539.400000 9.527418e+04        10 10  1   10  4.7953886  18.390358
## ASV487    334.700000 5.590051e+03        10 10  1   10  4.7953886  18.390358
## ASV19      66.555556 3.100000e+02         9  9  1    9  4.1153809  17.084805
## ASV82      40.555556 4.031562e+02         9  9  1    9  4.1153809  17.084805
## ASV379    152.222222 8.115869e+02         9  9  1    9  4.1153809  17.084805
## ASV582    229.777778 4.467671e+03         9  9  1    9  4.1153809  17.084805
## ASV1027   288.777778 8.993537e+03         9  9  1    9  4.1153809  17.084805
## ASV160   1702.875000 8.345799e+04         8  8  1    8  3.4538314  15.763189
## ASV413    232.750000 1.395435e+03         8  8  1    8  3.4538314  15.763189
## ASV4337    48.625000 4.679871e+02         8  8  1    8  3.4538314  15.763189
## ASV4558    59.000000 1.303186e+03         8  8  1    8  3.4538314  15.763189
## ASV6157    25.875000 1.129614e+02         8  8  1    8  3.4538314  15.763189
## ASV741     30.714286 9.290233e+01         7  7  1    7  2.8143575  14.422675
## ASV2586    73.857143 4.575706e+02         7  7  1    7  2.8143575  14.422675
## ASV2      296.428571 9.032386e+02         7  7  1    7  2.8143575  14.422675
## ASV504    108.833333 1.162772e+02         6  6  1    6  2.2018911  13.059479
## ASV1240    37.500000 1.796000e+01         6  6  1    6  2.2018911  13.059479
## ASV2373    40.666667 3.645574e+02         6  6  1    6  2.2018911  13.059479
## ASV2858    29.833333 7.759218e+01         6  6  1    6  2.2018911  13.059479
## ASV2922   100.333333 2.198266e+03         6  6  1    6  2.2018911  13.059479
## ASV7679    33.666667 1.070891e+02         6  6  1    6  2.2018911  13.059479
## ASV13312   20.000000 1.673000e+02         6  6  1    6  2.2018911  13.059479
## ASV600     23.200000 7.106897e+01         5  5  1    5  1.6234857  11.668322
## ASV2087    55.600000 1.191223e+02         5  5  1    5  1.6234857  11.668322
## ASV2242   175.800000 3.018589e+02         5  5  1    5  1.6234857  11.668322
## ASV2567    43.200000 1.620556e+02         5  5  1    5  1.6234857  11.668322
## ASV3730    49.000000 3.065306e+01         5  5  1    5  1.6234857  11.668322
## ASV7237    34.400000 1.206163e+02         5  5  1    5  1.6234857  11.668322
## ASV10154   21.200000 8.249057e+01         5  5  1    5  1.6234857  11.668322
## ASV15646    3.000000 1.266667e+01         5  5  1    5  1.6234857  11.668322
## ASV224    105.000000 8.875619e+02         4  4  1    4  1.0898892  10.241589
## ASV593    138.000000 1.477478e+03         4  4  1    4  1.0898892  10.241589
## ASV1122    60.250000 5.053527e+01         4  4  1    4  1.0898892  10.241589
## ASV1223    64.250000 1.311245e+02         4  4  1    4  1.0898892  10.241589
## ASV2757   107.500000 2.016465e+02         4  4  1    4  1.0898892  10.241589
## ASV3091     4.500000 1.355556e+01         4  4  1    4  1.0898892  10.241589
## ASV3235     3.500000 1.628571e+01         4  4  1    4  1.0898892  10.241589
## ASV3353    83.000000 2.572530e+02         4  4  1    4  1.0898892  10.241589
## ASV3974     8.000000 4.825000e+01         4  4  1    4  1.0898892  10.241589
## ASV6858    18.500000 3.724324e+01         4  4  1    4  1.0898892  10.241589
## ASV52      57.500000 5.459130e+01         4  4  1    4  1.0898892  10.241589
## ASV603     36.666667 6.885455e+01         3  3  1    3  0.6186712   8.767277
## ASV1255    49.666667 4.285906e+01         3  3  1    3  0.6186712   8.767277
## ASV1424   169.000000 6.427574e+02         3  3  1    3  0.6186712   8.767277
## ASV1435    39.333333 1.065932e+02         3  3  1    3  0.6186712   8.767277
## ASV3460    63.333333 8.780000e+01         3  3  1    3  0.6186712   8.767277
## ASV3940    47.000000 4.685106e+01         3  3  1    3  0.6186712   8.767277
## ASV4186    60.666667 2.392857e+02         3  3  1    3  0.6186712   8.767277
## ASV5760    81.333333 8.130328e+01         3  3  1    3  0.6186712   8.767277
## ASV6567    30.333333 2.507692e+01         3  3  1    3  0.6186712   8.767277
## ASV10153   31.000000 2.135484e+01         3  3  1    3  0.6186712   8.767277
## ASV10862   39.000000 1.126154e+02         3  3  1    3  0.6186712   8.767277
## ASV13872    4.666667 1.300000e+01         3  3  1    3  0.6186712   8.767277
## ASV23938    8.666667 9.307692e+00         3  3  1    3  0.6186712   8.767277
## ASV699    110.500000 2.170181e+02         2  2  1    2  0.2422036   7.224693
## ASV1357   775.000000 4.465961e+02         2  2  1    2  0.2422036   7.224693
## ASV2952    16.000000 8.000000e+00         2  2  1    2  0.2422036   7.224693
## ASV5170   118.000000 1.084746e+02         2  2  1    2  0.2422036   7.224693
## ASV5292   135.000000 1.423704e+01         2  2  1    2  0.2422036   7.224693
## ASV8801    76.000000 7.115789e+01         2  2  1    2  0.2422036   7.224693
## ASV10489   37.000000 5.535135e+01         2  2  1    2  0.2422036   7.224693
## ASV11109   32.000000 3.306250e+01         2  2  1    2  0.2422036   7.224693
## ASV12110   39.500000 4.406329e+01         2  2  1    2  0.2422036   7.224693
## ASV15618   26.500000 1.375472e+01         2  2  1    2  0.2422036   7.224693
##          prob              Species              Genus                Family
## ASV3     0.95           vulnificus             Vibrio          Vibrionaceae
## ASV17    0.95             otitidis        Pseudomonas        Halomonadaceae
## ASV112   0.95           wuhouensis      Acinetobacter         Moraxellaceae
## ASV12    0.95              syzygii          Ralstonia      Burkholderiaceae
## ASV115   0.95           oleivorans      Acinetobacter         Moraxellaceae
## ASV18    0.95        radiotolerans   Methylobacterium      Beijerinckiaceae
## ASV253   0.95              tandoii      Acinetobacter         Moraxellaceae
## ASV5     0.95         psychrolutea       Sphingomonas     Sphingomonadaceae
## ASV6     0.95         tasmaniensis             Vibrio          Vibrionaceae
## ASV1604  0.95                iners      Lactobacillus      Lactobacillaceae
## ASV38    0.95             broomeae             Afipia     Xanthobacteraceae
## ASV51    0.95             fungorum   Paraburkholderia      Burkholderiaceae
## ASV47    0.95             damselae     Photobacterium          Vibrionaceae
## ASV61    0.95              pumilus           Bacillus           Bacillaceae
## ASV66    0.95         solanacearum          Ralstonia      Pseudomonadaceae
## ASV545   0.95          vesicularis      Brevundimonas      Caulobacteraceae
## ASV102   0.95           venetianus      Acinetobacter         Moraxellaceae
## ASV121   0.95             stutzeri        Pseudomonas      Pseudomonadaceae
## ASV49    0.95          saxobsidens      Herminiimonas      Oxalobacteraceae
## ASV210   0.95              lwoffii      Acinetobacter         Moraxellaceae
## ASV278   0.95             diminuta      Brevundimonas      Caulobacteraceae
## ASV271   0.95            sanguinis      Streptococcus      Streptococcaceae
## ASV422   0.95              harveyi             Vibrio          Vibrionaceae
## ASV33    0.95          rhodesianum      Methylorubrum      Beijerinckiaceae
## ASV21    0.95                algae         Shewanella        Shewanellaceae
## ASV257   0.95     nigripulchritudo             Vibrio          Vibrionaceae
## ASV731   0.95          maltophilia   Stenotrophomonas      Xanthomonadaceae
## ASV26    0.95                sakei      Lactobacillus      Lactobacillaceae
## ASV176   0.95                 loti      Mesorhizobium          Rhizobiaceae
## ASV2925  0.95            bacterium         uncultured         Neisseriaceae
## ASV496   0.95         resinovorans        Pseudomonas      Pseudomonadaceae
## ASV547   0.95             pealeana         Shewanella        Shewanellaceae
## ASV891   0.95           flavescens          Neisseria         Neisseriaceae
## ASV5767  0.95 pseudodiphtheriticum    Corynebacterium    Corynebacteriaceae
## ASV8753  0.95          sedentarius         Kytococcus        Dermacoccaceae
## ASV119   0.95             carnosum        Leuconostoc      Lactobacillaceae
## ASV243   0.95     cellulosilyticum          Rhizobium          Rhizobiaceae
## ASV309   0.95           turicensis        Siccibacter           Erwiniaceae
## ASV341   0.95         miyukkimchii        Leuconostoc      Lactobacillaceae
## ASV1261  0.95         testosteroni          Comamonas        Comamonadaceae
## ASV427   0.95       melaninogenica         Prevotella        Prevotellaceae
## ASV283   0.95                  sp.      Aquabacterium        Comamonadaceae
## ASV464   0.95           macginleyi    Corynebacterium    Corynebacteriaceae
## ASV188   0.95         aestuarianus             Vibrio          Vibrionaceae
## ASV1587  0.95   zeaxanthinifaciens         Paracoccus      Rhodobacteraceae
## ASV5665  0.95            seifertii      Acinetobacter         Moraxellaceae
## ASV196   0.95              marinus         Paracoccus      Rhodobacteraceae
## ASV2177  0.95          alkaliphila      Paramoritella         Moritellaceae
## ASV3125  0.95      lipophiloflavum    Corynebacterium    Corynebacteriaceae
## ASV80    0.95        sanxanigenens       Sphingomonas     Sphingomonadaceae
## ASV487   0.95            nivimaris      Psychrobacter         Moraxellaceae
## ASV19    0.95     parahaemolyticus             Vibrio          Vibrionaceae
## ASV82    0.95              warneri     Staphylococcus     Staphylococcaceae
## ASV379   0.95        tsuruhatensis            Delftia        Comamonadaceae
## ASV582   0.95            baumannii      Acinetobacter         Moraxellaceae
## ASV1027  0.95           rhizophila   Stenotrophomonas      Pseudomonadaceae
## ASV160   0.95          albilineans        Xanthomonas      Xanthomonadaceae
## ASV413   0.95              equinus      Streptococcus      Streptococcaceae
## ASV4337  0.95              parvula        Veillonella       Veillonellaceae
## ASV4558  0.95               sonnei           Shigella    Enterobacteriaceae
## ASV6157  0.95              ruckeri           Yersinia          Yersiniaceae
## ASV741   0.95      aromaticivorans   Terrahaemophilus       Pasteurellaceae
## ASV2586  0.95              veronii        Pseudomonas      Pseudomonadaceae
## ASV2     0.95         mediterranea        Alteromonas      Alteromonadaceae
## ASV504   0.95         marplatensis      Achromobacter        Alcaligenaceae
## ASV1240  0.95           rhizophila   Stenotrophomonas      Xanthomonadaceae
## ASV2373  0.95        parasanguinis      Streptococcus      Streptococcaceae
## ASV2858  0.95        rhizosphaerae        Pseudomonas      Pseudomonadaceae
## ASV2922  0.95         pettenkoferi     Staphylococcus     Staphylococcaceae
## ASV7679  0.95                acnes  Propionibacterium  Propionibacteriaceae
## ASV13312 0.95               luteus        Micrococcus        Micrococcaceae
## ASV600   0.95          tumefaciens      Agrobacterium          Rhizobiaceae
## ASV2087  0.95             accolens    Corynebacterium    Corynebacteriaceae
## ASV2242  0.95              slovaca         Rickettsia        Rickettsiaceae
## ASV2567  0.95         tasmaniensis             Vibrio        Shewanellaceae
## ASV3730  0.95       parainfluenzae        Haemophilus       Pasteurellaceae
## ASV7237  0.95          tuscaniense    Corynebacterium    Corynebacteriaceae
## ASV10154 0.95          urealyticum    Corynebacterium    Corynebacteriaceae
## ASV15646 0.95           australica        Alteromonas      Alteromonadaceae
## ASV224   0.95                lylae        Micrococcus        Micrococcaceae
## ASV593   0.95   tuberculostearicum    Corynebacterium    Corynebacteriaceae
## ASV1122  0.95               vagans            Pantoea           Erwiniaceae
## ASV1223  0.95              xylosus     Staphylococcus     Staphylococcaceae
## ASV2757  0.95               putida        Pseudomonas      Pseudomonadaceae
## ASV3091  0.95              confusa          Weissella      Lactobacillaceae
## ASV3235  0.95             viscosus        Actinomyces      Actinomycetaceae
## ASV3353  0.95         massiliensis         Fenollaria             Family XI
## ASV3974  0.95                harei      Peptoniphilus             Family XI
## ASV6858  0.95           cambriense        Varibaculum      Actinomycetaceae
## ASV52    0.95             marcusii         Paracoccus      Rhodobacteraceae
## ASV603   0.95            macleodii        Alteromonas      Alteromonadaceae
## ASV1255  0.95              gelidum        Leuconostoc      Lactobacillaceae
## ASV1424  0.95                magna         Finegoldia             Family XI
## ASV1435  0.95       clevelandensis         Lawsonella    Corynebacteriaceae
## ASV3460  0.95                  sp.      Paramoritella         Moritellaceae
## ASV3940  0.95        oryzihabitans        Pseudomonas      Pseudomonadaceae
## ASV4186  0.95      nitritireducens   Stenotrophomonas      Xanthomonadaceae
## ASV5760  0.95               otitis       Alloiococcus     Carnobacteriaceae
## ASV6567  0.95             enterica         Salmonella    Enterobacteriaceae
## ASV10153 0.95           brasilense       Azospirillum       Azospirillaceae
## ASV10862 0.95  Atelocyanobacterium         Candidatus        Microcystaceae
## ASV13872 0.95          haemolytica         Mannheimia       Pasteurellaceae
## ASV23938 0.95                micra         Parvimonas             Family XI
## ASV699   0.95         paucimobilis       Sphingomonas     Sphingomonadaceae
## ASV1357  0.95         xanthomarina        Pseudomonas      Pseudomonadaceae
## ASV2952  0.95             adiacens     Granulicatella     Carnobacteriaceae
## ASV5170  0.95          alcaligenes        Pseudomonas      Pseudomonadaceae
## ASV5292  0.95           anaerobius Peptostreptococcus Peptostreptococcaceae
## ASV8801  0.95           variabilis      Acinetobacter         Moraxellaceae
## ASV10489 0.95          prausnitzii   Faecalibacterium       Ruminococcaceae
## ASV11109 0.95            bacterium         uncultured      Methylophilaceae
## ASV12110 0.95            bacterium         uncultured        Alcaligenaceae
## ASV15618 0.95             vulgatus        Bacteroides        Bacteroidaceae
##                    Phylum
## ASV3       Proteobacteria
## ASV17      Proteobacteria
## ASV112     Proteobacteria
## ASV12      Proteobacteria
## ASV115     Proteobacteria
## ASV18      Proteobacteria
## ASV253     Proteobacteria
## ASV5       Proteobacteria
## ASV6       Proteobacteria
## ASV1604        Firmicutes
## ASV38      Proteobacteria
## ASV51      Proteobacteria
## ASV47      Proteobacteria
## ASV61          Firmicutes
## ASV66      Proteobacteria
## ASV545     Proteobacteria
## ASV102     Proteobacteria
## ASV121     Proteobacteria
## ASV49      Proteobacteria
## ASV210     Proteobacteria
## ASV278     Proteobacteria
## ASV271         Firmicutes
## ASV422     Proteobacteria
## ASV33      Proteobacteria
## ASV21      Proteobacteria
## ASV257     Proteobacteria
## ASV731     Proteobacteria
## ASV26          Firmicutes
## ASV176     Proteobacteria
## ASV2925    Proteobacteria
## ASV496     Proteobacteria
## ASV547     Proteobacteria
## ASV891     Proteobacteria
## ASV5767  Actinobacteriota
## ASV8753  Actinobacteriota
## ASV119         Firmicutes
## ASV243     Proteobacteria
## ASV309     Proteobacteria
## ASV341         Firmicutes
## ASV1261    Proteobacteria
## ASV427       Bacteroidota
## ASV283     Proteobacteria
## ASV464   Actinobacteriota
## ASV188     Proteobacteria
## ASV1587    Proteobacteria
## ASV5665    Proteobacteria
## ASV196     Proteobacteria
## ASV2177    Proteobacteria
## ASV3125  Actinobacteriota
## ASV80      Proteobacteria
## ASV487     Proteobacteria
## ASV19      Proteobacteria
## ASV82          Firmicutes
## ASV379     Proteobacteria
## ASV582     Proteobacteria
## ASV1027    Proteobacteria
## ASV160     Proteobacteria
## ASV413         Firmicutes
## ASV4337        Firmicutes
## ASV4558    Proteobacteria
## ASV6157    Proteobacteria
## ASV741     Proteobacteria
## ASV2586    Proteobacteria
## ASV2       Proteobacteria
## ASV504     Proteobacteria
## ASV1240    Proteobacteria
## ASV2373        Firmicutes
## ASV2858    Proteobacteria
## ASV2922        Firmicutes
## ASV7679  Actinobacteriota
## ASV13312 Actinobacteriota
## ASV600     Proteobacteria
## ASV2087  Actinobacteriota
## ASV2242    Proteobacteria
## ASV2567    Proteobacteria
## ASV3730    Proteobacteria
## ASV7237  Actinobacteriota
## ASV10154 Actinobacteriota
## ASV15646   Proteobacteria
## ASV224   Actinobacteriota
## ASV593   Actinobacteriota
## ASV1122    Proteobacteria
## ASV1223        Firmicutes
## ASV2757    Proteobacteria
## ASV3091        Firmicutes
## ASV3235  Actinobacteriota
## ASV3353        Firmicutes
## ASV3974        Firmicutes
## ASV6858  Actinobacteriota
## ASV52      Proteobacteria
## ASV603     Proteobacteria
## ASV1255        Firmicutes
## ASV1424        Firmicutes
## ASV1435  Actinobacteriota
## ASV3460    Proteobacteria
## ASV3940    Proteobacteria
## ASV4186    Proteobacteria
## ASV5760        Firmicutes
## ASV6567    Proteobacteria
## ASV10153   Proteobacteria
## ASV10862    Cyanobacteria
## ASV13872   Proteobacteria
## ASV23938       Firmicutes
## ASV699     Proteobacteria
## ASV1357    Proteobacteria
## ASV2952        Firmicutes
## ASV5170    Proteobacteria
## ASV5292        Firmicutes
## ASV8801    Proteobacteria
## ASV10489       Firmicutes
## ASV11109   Proteobacteria
## ASV12110   Proteobacteria
## ASV15618     Bacteroidota

Abundance VS Occurrence and Index of dispersion plots

#Plot index of dispersion
index_disp_otu2 <- ggplot(Nem_dstat_otu2, aes(x=occurence, y=log10(disp))) + 
  geom_point(size = 5, shape = 21, col="black", alpha=0.75) + 
  geom_line(col="gray", linetype = 4, size = 2, aes(x=occurence, y=log10(upper))) + theme_bw() +
  theme(axis.text=element_text(size=18), axis.title=element_text(size=18,face="bold")) +
  labs(y="Index of dispersion (Log 10)")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
p23 <- index_disp_otu2 + geom_text(data=Nem_core_otu2, aes(x=occurence, y=log10(disp), label=rownames(Nem_core_otu2)), size = 0
                                   , alpha=0.75) 
p23
## Warning: Removed 245 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggsave("/Users/qcvp/R/Plots/PHPB_Index_of_dispersion_plots.png", plot = p23, width = 9, height = 9)
## Warning: Removed 245 rows containing missing values or values outside the scale range
## (`geom_point()`).

Distribution of high occurence ASV

# Subset most occurence ASV (>30 Occurence)
ASV3 <- subset(Melted_Nem_hu_unrarefied_species, OTU == "ASV3")
ASV17 <- subset(Melted_Nem_hu_unrarefied_species, OTU == "ASV17")
ASV112 <- subset(Melted_Nem_hu_unrarefied_species, OTU == "ASV112")
ASV12 <- subset(Melted_Nem_hu_unrarefied_species, OTU == "ASV12")
ASV115 <- subset(Melted_Nem_hu_unrarefied_species, OTU == "ASV115")
ASV18 <- subset(Melted_Nem_hu_unrarefied_species, OTU == "ASV18")
ASV253 <- subset(Melted_Nem_hu_unrarefied_species, OTU == "ASV253")
ASV5 <- subset(Melted_Nem_hu_unrarefied_species, OTU == "ASV5")
# plotting piechart for the distribution of each ASV
p23.1 <- ggplot(ASV3, aes(x= OTU, y=Abundance, fill=Biosample)) +
  geom_bar(stat="identity", width=1) +
  coord_polar(theta = "y") +
  theme_void()+   
  scale_fill_manual(breaks =c("Human","Animals","Environment","Control"), 
                      values = c("red","orange","blue","green")) 
p23.1

p23.2 <- ggplot(ASV17, aes(x= OTU, y=Abundance, fill=Biosample)) +
  geom_bar(stat="identity", width=1) +
  coord_polar(theta = "y") +
  theme_void()+   
  scale_fill_manual(breaks =c("Human","Animals","Environment","Control"), 
                      values = c("red","orange","blue","green")) 
p23.2

p23.3 <- ggplot(ASV112, aes(x= OTU, y=Abundance, fill=Biosample)) +
  geom_bar(stat="identity", width=1) +
  coord_polar(theta = "y") +
  theme_void()+   
  scale_fill_manual(breaks =c("Human","Animals","Environment","Control"), 
                      values = c("red","orange","blue","green")) 
p23.3

p23.4 <- ggplot(ASV12, aes(x= OTU, y=Abundance, fill=Biosample)) +
  geom_bar(stat="identity", width=1) +
  coord_polar(theta = "y") +
  theme_void()+   
  scale_fill_manual(breaks =c("Human","Animals","Environment","Control"), 
                      values = c("red","orange","blue","green")) 
p23.4

p23.6 <- ggplot(ASV115, aes(x= OTU, y=Abundance, fill=Biosample)) +
  geom_bar(stat="identity", width=1) +
  coord_polar(theta = "y") +
  theme_void()+   
  scale_fill_manual(breaks =c("Human","Animals","Environment","Control"), 
                      values = c("red","orange","blue","green")) 
p23.6

p23.7 <- ggplot(ASV18, aes(x= OTU, y=Abundance, fill=Biosample)) +
  geom_bar(stat="identity", width=1) +
  coord_polar(theta = "y") +
  theme_void() +
  scale_fill_manual(breaks =c("Human","Animals","Environment","Control"), 
                      values = c("red","orange","blue","green")) 
p23.7

p23.9 <- ggplot(ASV253, aes(x= OTU, y=Abundance, fill=Biosample)) +
  geom_bar(stat="identity", width=1) +
  coord_polar(theta = "y") +
  theme_void() +
    scale_fill_manual(breaks =c("Human","Animals","Environment","Control"), 
                      values = c("red","orange","blue","green")) 
p23.9

p23.10 <- ggplot(ASV5, aes(x= OTU, y=Abundance, fill=Biosample)) +
  geom_bar(stat="identity", width=1) +
  coord_polar(theta = "y") +
  theme_void() +
    scale_fill_manual(breaks =c("Human","Animals","Environment","Control"), 
                      values = c("red","orange","blue","green")) 
p23.10 

Composition of Classes within core OTU list

Nem_core_otu2_long <- Nem_core_otu2 %>%
            dplyr::mutate(value = 1) %>%
            dplyr::group_by(Family) %>%
            dplyr::summarise(RA = sum(value)/nrow(Nem_core_otu2)) %>% #Calculate % of each Families among all pathobiome list
            dplyr::arrange(desc(RA))
Nem_core_otu2_long
## # A tibble: 43 × 2
##    Family                 RA
##    <chr>               <dbl>
##  1 Pseudomonadaceae   0.0885
##  2 Moraxellaceae      0.0796
##  3 Corynebacteriaceae 0.0708
##  4 Vibrionaceae       0.0619
##  5 Lactobacillaceae   0.0531
##  6 Family XI          0.0354
##  7 Xanthomonadaceae   0.0354
##  8 Alteromonadaceae   0.0265
##  9 Comamonadaceae     0.0265
## 10 Pasteurellaceae    0.0265
## # ℹ 33 more rows

LEFse

Running Linear discriminant analysis Effect Size (LEFse)

mm_lefse2 <- microbiomeMarker::run_lefse(Nem_hu_unrarefied, norm = "CPM",
                                        wilcoxon_cutoff = 0.01,
                                        group = "Biosample",
                                        taxa_rank = "none",
                                        kw_cutoff = 0.01,
                                        multigrp_strat = TRUE,
                                        lda_cutoff = 4)
## Registered S3 method overwritten by 'gplots':
##   method         from     
##   reorder.factor DescTools

Plotting

PHPB_bacteria_ASV_TAX <- data.frame(Nem_hu_unrarefied@tax_table)
ID <- rownames(PHPB_bacteria_ASV_TAX )
PHPB_bacteria_ASV_TAX <- cbind(ID=ID, PHPB_bacteria_ASV_TAX)
p_LDAsc2 <- microbiomeMarker::plot_ef_bar(mm_lefse2)
y_labs2 <- ggplot2::ggplot_build(p_LDAsc2)$layout$panel_params[[1]]$y$get_labels()
p_abd2 <- microbiomeMarker::plot_abundance(mm_lefse2, group = "Biosample") +
  ggplot2::scale_y_discrete(limits = y_labs2)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
g4 <- gridExtra::grid.arrange(p_LDAsc2, p_abd2, nrow = 1)

ggsave("/Users/qcvp/R/Plots/LEFSE_PHPB.png", plot = g4, width = 11.5, height = 7) #Saving plot

Taxonomy of core biomarker

biomarkertaxa<- subset(PHPB_bacteria_ASV_TAX, rownames(PHPB_bacteria_ASV_TAX) %in% mm_lefse2@marker_table$feature)
biomarkertaxa 
##            ID  Kingdom         Phylum               Class            Order
## ASV3     ASV3 Bacteria Proteobacteria Gammaproteobacteria Enterobacterales
## ASV5     ASV5 Bacteria Proteobacteria Alphaproteobacteria Sphingomonadales
## ASV6     ASV6 Bacteria Proteobacteria Gammaproteobacteria Enterobacterales
## ASV17   ASV17 Bacteria Proteobacteria Gammaproteobacteria  Pseudomonadales
## ASV20   ASV20 Bacteria Proteobacteria Gammaproteobacteria  Pseudomonadales
## ASV33   ASV33 Bacteria Proteobacteria Alphaproteobacteria      Rhizobiales
## ASV38   ASV38 Bacteria Proteobacteria Alphaproteobacteria      Rhizobiales
## ASV49   ASV49 Bacteria Proteobacteria Gammaproteobacteria  Burkholderiales
## ASV51   ASV51 Bacteria Proteobacteria Gammaproteobacteria  Burkholderiales
## ASV61   ASV61 Bacteria     Firmicutes             Bacilli       Bacillales
## ASV66   ASV66 Bacteria Proteobacteria Gammaproteobacteria  Pseudomonadales
## ASV112 ASV112 Bacteria Proteobacteria Gammaproteobacteria  Pseudomonadales
## ASV115 ASV115 Bacteria Proteobacteria Gammaproteobacteria  Pseudomonadales
## ASV253 ASV253 Bacteria Proteobacteria Gammaproteobacteria  Pseudomonadales
## ASV278 ASV278 Bacteria Proteobacteria Alphaproteobacteria  Caulobacterales
##                   Family            Genus      Species
## ASV3        Vibrionaceae           Vibrio   vulnificus
## ASV5   Sphingomonadaceae     Sphingomonas psychrolutea
## ASV6        Vibrionaceae           Vibrio tasmaniensis
## ASV17     Halomonadaceae      Pseudomonas     otitidis
## ASV20     Halomonadaceae      Pseudomonas     otitidis
## ASV33   Beijerinckiaceae    Methylorubrum  rhodesianum
## ASV38  Xanthobacteraceae           Afipia     broomeae
## ASV49   Oxalobacteraceae    Herminiimonas  saxobsidens
## ASV51   Burkholderiaceae Paraburkholderia     fungorum
## ASV61        Bacillaceae         Bacillus      pumilus
## ASV66   Pseudomonadaceae        Ralstonia solanacearum
## ASV112     Moraxellaceae    Acinetobacter   wuhouensis
## ASV115     Moraxellaceae    Acinetobacter   oleivorans
## ASV253     Moraxellaceae    Acinetobacter      tandoii
## ASV278  Caulobacteraceae    Brevundimonas     diminuta