Before analyzing, run the following codes:
# ANCOMBC function
source("/Users/qcvp/R/custom script/ANCOMBC.R")
# Pairwise PERMANOVA
source("/Users/qcvp/R/custom script/pairwise.adonis2.R")
# Indices normality
source("/Users/qcvp/R/custom script/Indices_normality.R")
context <- utils::read.table("/Users/qcvp/R/outputs/dada2_total/asv_table/metadata_longitudinal.txt", header = TRUE, row.names = 1, sep="\t")
rownames(context) <- base::gsub("-",".",rownames(context))
head(context)
## Sequencing Date Transect Station Biosample Biosample_type
## F1w1b HM-W-1 Nov_2022 Fish_Farm HM Environment Water
## F1w2b HM-W-2 Nov_2022 Fish_Farm HM Environment Water
## F1w3b HM-W-3 Nov_2022 Fish_Farm HM Environment Water
## Fi.10.3.GM Fi-10-3 Jul_2023 Hon_Mieu Farm_10 Animals Gut
## Fi.10.4.GB Fi-10-4 Jul_2023 Hon_Mieu Farm_10 Animals Gut
## Fi.10.5.GB Fi-10-5 Jul_2023 Hon_Mieu Farm_10 Animals Gut
## Biosample_cluster Host_type Host_subtype Host_genus Host_species
## F1w1b Water Environment Water <NA> <NA>
## F1w2b Water Environment Water <NA> <NA>
## F1w3b Water Environment Water <NA> <NA>
## Fi.10.3.GM Vertebrate Vertebrate Fish Siganus canaliculatus
## Fi.10.4.GB Vertebrate Vertebrate Fish Siganus canaliculatus
## Fi.10.5.GB Vertebrate Vertebrate Fish Siganus canaliculatus
## Species_ID
## F1w1b <NA>
## F1w2b <NA>
## F1w3b <NA>
## Fi.10.3.GM Siganus_canaliculatus
## Fi.10.4.GB Siganus_canaliculatus
## Fi.10.5.GB Siganus_canaliculatus
##Loading data
Nem_hu_unrarefied <- base::readRDS("/Users/qcvp/R/inputs/data for PHPB comunity/Nem_hu_unrarefied.rds") # Uncut absolute abundance
Nem_hu_unrarefied_rel_whole <- base::readRDS("/Users/qcvp/R/inputs/data for PHPB comunity/Nem_hu_unrarefied_rel_whole.rds") # Uncutted relative abundance against whole community samples
Nem_hu_rarefied <- base::readRDS("/Users/qcvp/R/inputs/data for PHPB comunity/Nem_hu_rarefied.rds") # Cutted absolute abundance
Nem_hu_unrarefied_rel_PHPB <- base::readRDS("/Users/qcvp/R/inputs/data for PHPB comunity/Nem_hu_unrarefied_rel_PHPB.rds") # Uncutted relative abundance against PHPB community samples
physeq_Nem_long_rarefied <- base::readRDS("/Users/qcvp/R/outputs/Decomtaminated/physeq_Nem_long_rarefied.rds") # Uncutted relative abundance against PHPB community samples
Plotting
library(ggplot2)
# Extract samples reads and add metadata
PHPB_rel_whole <- as.data.frame(rowSums(Nem_hu_unrarefied_rel_whole@otu_table@.Data))
## Loading required package: phyloseq
PHPB_rel_whole <- cbind(PHPB_rel_whole, physeq_Nem_long_rarefied@sam_data)
colnames(PHPB_rel_whole)[colnames(PHPB_rel_whole) == "rowSums(Nem_hu_unrarefied_rel_whole@otu_table@.Data)"] <- "Abundance"
PHPB_rel_whole$Date = factor(PHPB_rel_whole$Date , levels = c("Nov_2022", "Jul_2023", "Dec_2023")) #rearrange date
PHPB_rel_whole$Biosample = factor(PHPB_rel_whole$Biosample , levels = c("Environment", "Animals","Human", "Control")) # rearrange date
p1 <- ggplot(PHPB_rel_whole, aes(x = Biosample, y = Abundance, color = Biosample)) +
geom_jitter(position = position_jitter(width = .20), alpha = 0.5, size = 3, aes(colour = Biosample), show.legend = TRUE) + theme_bw() +
geom_boxplot(aes(colour = Biosample), alpha=0.1, outlier.colour = NA) +
scale_y_continuous(limits=c(0, 0.95)) +
theme(axis.title.x = element_blank(),
legend.position = "none",
legend.title = element_blank(),
legend.text = element_text(size = 13),
strip.text = element_text(size = 13),
axis.title.y = element_text(size = 15),
axis.text.y = element_text(size=13),
axis.text.x = element_text(size = 10, angle = 45, hjust = 1)) +
scale_colour_brewer(palette="Set1") +
ylab("Relative Abundance") +
scale_colour_manual(breaks =c("Nov_22","Jul_23","Dec_23","Dec_21"),
values = c("pink","purple2","brown","green")) +
scale_colour_manual(breaks =c("Human","Animals","Environment","Control"),
values = c("red","orange","blue","green"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
p1
#Parametric test - p<0.05 = non-parametric, p>0.05 = parametric
PHPB_rel_whole |>
dplyr::select(Abundance) |>
indices_normality(nrow = 2, ncol = 3)
stats::kruskal.test(Abundance ~ Biosample, data = PHPB_rel_whole)
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by Biosample
## Kruskal-Wallis chi-squared = 26.677, df = 3, p-value = 6.879e-06
stats::kruskal.test(Abundance ~ Date, data = PHPB_rel_whole)
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by Date
## Kruskal-Wallis chi-squared = 6.5863, df = 2, p-value = 0.03714
FSA::dunnTest(Abundance ~ Biosample, data = PHPB_rel_whole, method = "bh")
## Dunn (1964) Kruskal-Wallis multiple comparison
## p-values adjusted with the Benjamini-Hochberg method.
## Comparison Z P.unadj P.adj
## 1 Animals - Control -2.649197 8.068333e-03 0.0121024997
## 2 Animals - Environment -2.029643 4.239286e-02 0.0508714308
## 3 Control - Environment 0.660496 5.089356e-01 0.5089356035
## 4 Animals - Human 2.748823 5.980964e-03 0.0119619276
## 5 Control - Human 3.898323 9.686121e-05 0.0005811672
## 6 Environment - Human 3.423778 6.175715e-04 0.0018527144
FSA::dunnTest(Abundance ~ Date, data = PHPB_rel_whole, method = "bh")
## Warning: Some rows deleted from 'x' and 'g' because missing data.
## Dunn (1964) Kruskal-Wallis multiple comparison
## p-values adjusted with the Benjamini-Hochberg method.
## Comparison Z P.unadj P.adj
## 1 Dec_2023 - Jul_2023 2.566341 0.01027777 0.0308333
## 2 Dec_2023 - Nov_2022 1.314114 0.18880794 0.1888079
## 3 Jul_2023 - Nov_2022 -1.379936 0.16760625 0.2514094
Calculate indices
library(microbiome)
##
## microbiome R package (microbiome.github.com)
##
##
##
## Copyright (C) 2011-2022 Leo Lahti,
## Sudarshan Shetty et al. <microbiome.github.io>
##
## Attaching package: 'microbiome'
## The following object is masked from 'package:ggplot2':
##
## alpha
## The following object is masked from 'package:base':
##
## transform
library(adespatial)
## Registered S3 methods overwritten by 'adegraphics':
## method from
## biplot.dudi ade4
## kplot.foucart ade4
## kplot.mcoa ade4
## kplot.mfa ade4
## kplot.pta ade4
## kplot.sepan ade4
## kplot.statis ade4
## scatter.coa ade4
## scatter.dudi ade4
## scatter.nipals ade4
## scatter.pco ade4
## score.acm ade4
## score.mix ade4
## score.pca ade4
## screeplot.dudi ade4
## Registered S3 method overwritten by 'spdep':
## method from
## plot.mst ape
## Registered S3 methods overwritten by 'adespatial':
## method from
## plot.multispati adegraphics
## print.multispati ade4
## summary.multispati ade4
OTU<- Nem_hu_unrarefied@otu_table
LBCD <- beta.div(OTU, method = "hellinger", sqrt.D = FALSE, samp = TRUE, nperm = 999, clock = TRUE)
## Time for computation = 3.208000 sec
LCBD <- data.frame(LBCD$LCBD)
alpha_indices_PHPB <- microbiome::alpha(
Nem_hu_rarefied,
index = c("observed", "diversity_gini_simpson",
"diversity_shannon", "evenness_pielou",
"dominance_relative"))
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
#Creating data frame with indices included
alpha_indices_PHPB <- phyloseq::sample_data(alpha_indices_PHPB)
#Merge with phyloseq object
Nem_hu_rarefied<- phyloseq::merge_phyloseq(Nem_hu_rarefied, alpha_indices_PHPB)
#Change sample_data as a data frame
Indices_data_PHPB <- data.frame(sample_data(Nem_hu_rarefied))
#Rearrange time
Indices_data_PHPB$Date = factor(Indices_data_PHPB$Date , levels = c("Nov_2022", "Jul_2023", "Dec_2023", "Control"))
#Insert LCBD data
Indices_data_PHPB$LCBD <- LCBD$LBCD.LCBD
library(gridExtra)
Indices_data_PHPB$Biosample = factor(Indices_data_PHPB$Biosample , levels = c("Environment", "Animals","Human", "Control")) # rearrange Biosample
#Richness
p2 <- ggplot(Indices_data_PHPB, aes(x = Biosample, y = observed, color = Biosample)) +
geom_jitter(position = position_jitter(width = .20), alpha = 0.5, size = 3, aes(colour = Biosample), show.legend = TRUE) + theme_bw() +
geom_boxplot(aes(colour = Biosample), alpha=0.1, outlier.colour = NA) +
scale_y_continuous(limits=c(0, 36)) +
theme(axis.title.x = element_blank(),
legend.position = "none", #Remove legends
legend.title = element_blank(),
legend.text = element_text(size = 13),
strip.text = element_text(size = 13),
axis.title.y = element_text(size = 15),
axis.text.y = element_text(size=13),
axis.text.x = element_text(size = 10, angle = 45, hjust = 1)) +
ylab("PHPB Species richness")+
scale_colour_brewer(palette="Set1") +
labs(color ="Biosample") +
scale_colour_manual(breaks =c("Nov_22","Jul_23","Dec_23","Dec_21"),
values = c("pink","purple2","brown","green")) +
scale_colour_manual(breaks =c("Human","Animals","Environment","Control"),
values = c("red","orange","blue","green"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
#----------------------------------------------------------------------------------------------------------------------------------------
#Shannon index
p3 <- ggplot(Indices_data_PHPB, aes(x = Biosample, y = diversity_shannon, color = Biosample)) +
geom_jitter(position = position_jitter(width = .20), alpha = 0.5, size = 3, aes(colour = Biosample), show.legend = TRUE) + theme_bw() +
geom_boxplot(aes(colour = Biosample), alpha=0.1, outlier.colour = NA) +
scale_y_continuous(limits=c(0, 3.2)) +
theme(axis.title.x = element_blank(),
legend.position = "none", #Remove legends
legend.title = element_blank(),
legend.text = element_text(size = 13),
strip.text = element_text(size = 13),
axis.title.y = element_text(size = 15),
axis.text.y = element_text(size=13),
axis.text.x = element_text(size = 10, angle = 45, hjust = 1)) +
ylab("PHPB Shannon index")+
scale_colour_brewer(palette="Set1") +
labs(color ="Biosample") +
scale_colour_manual(breaks =c("Nov_22","Jul_23","Dec_23","Dec_21"),
values = c("pink","purple2","brown","green")) +
scale_colour_manual(breaks =c("Human","Animals","Environment","Control"),
values = c("red","orange","blue","green"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
#----------------------------------------------------------------------------------------------------------------------------------------
#LCBD index
p2.1 <- ggplot(Indices_data_PHPB, aes(x = Biosample, y = LCBD, color = Biosample)) +
geom_jitter(position = position_jitter(width = .20), alpha = 0.5, size = 3, aes(colour = Biosample), show.legend = TRUE) + theme_bw() +
geom_boxplot(aes(colour = Biosample), alpha=0.1, outlier.colour = NA) +
theme(axis.title.x = element_blank(),
legend.position = "none", #Remove legends
legend.title = element_blank(),
legend.text = element_text(size = 13),
strip.text = element_text(size = 13),
axis.title.y = element_text(size = 15),
axis.text.y = element_text(size=13),
axis.text.x = element_text(size = 10, angle = 45, hjust = 1)) +
ylab("LCBD indices")+
labs(color ="Biosample") +
scale_colour_manual(breaks =c("Human","Animals","Environment","Control"),
values = c("red","orange","blue","green"))
p2.1
#Combine plots
g2 <- grid.arrange(p2, p3, p2.1, nrow = 1)
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
g2
## TableGrob (1 x 3) "arrange": 3 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (1-1,3-3) arrange gtable[layout]
Select indices to test & run normality check
Indices_data_PHPB |>
dplyr::select(observed,
diversity_gini_simpson,
diversity_shannon,
evenness_pielou, dominance_relative, LCBD) |>
indices_normality(nrow = 2, ncol = 3)
#p<0.05 = non-parametric, p>0.05 = parametric
Kruskal-Wallis: non-parametric & at least 3 groups
#Richness - Kruskal wallis rank sum test for p-value (<0.05 = reject null hypothesis)
stats::kruskal.test(observed ~ Biosample, data = Indices_data_PHPB)
##
## Kruskal-Wallis rank sum test
##
## data: observed by Biosample
## Kruskal-Wallis chi-squared = 63.456, df = 3, p-value = 1.073e-13
stats::kruskal.test(observed ~ Date, data = Indices_data_PHPB)
##
## Kruskal-Wallis rank sum test
##
## data: observed by Date
## Kruskal-Wallis chi-squared = 19.994, df = 3, p-value = 0.0001702
#Richness -DuNem test reveals which one is significantly different
FSA::dunnTest(observed ~ Biosample, data = Indices_data_PHPB, method = "bh")
## Comparison Z P.unadj P.adj
## 1 Animals - Control -5.545131 2.937346e-08 8.812038e-08
## 2 Animals - Environment -5.319609 1.039907e-07 2.079813e-07
## 3 Control - Environment 0.620922 5.346510e-01 5.346510e-01
## 4 Animals - Human -5.597206 2.178334e-08 1.307001e-07
## 5 Control - Human 3.137649 1.703085e-03 2.554628e-03
## 6 Environment - Human 2.628119 8.585847e-03 1.030302e-02
FSA::dunnTest(observed ~ Date, data = Indices_data_PHPB, method = "bh")
## Comparison Z P.unadj P.adj
## 1 Control - Dec_2023 3.4660326 5.281993e-04 1.056399e-03
## 2 Control - Jul_2023 3.7511924 1.759956e-04 5.279867e-04
## 3 Dec_2023 - Jul_2023 0.5656317 5.716442e-01 5.716442e-01
## 4 Control - Nov_2022 4.3974498 1.095302e-05 6.571812e-05
## 5 Dec_2023 - Nov_2022 1.7750292 7.589306e-02 1.138396e-01
## 6 Jul_2023 - Nov_2022 1.1627589 2.449273e-01 2.939128e-01
#----------------------------------------------------------------------------------------------------------------------------------------
#Shannon index - Kruskal wallis rank sum test for p-value (<0.05 = reject null hypothesis)
stats::kruskal.test(diversity_shannon ~ Biosample, data = Indices_data_PHPB)
##
## Kruskal-Wallis rank sum test
##
## data: diversity_shannon by Biosample
## Kruskal-Wallis chi-squared = 39.821, df = 3, p-value = 1.163e-08
stats::kruskal.test(diversity_shannon ~ Date, data = Indices_data_PHPB)
##
## Kruskal-Wallis rank sum test
##
## data: diversity_shannon by Date
## Kruskal-Wallis chi-squared = 17.903, df = 3, p-value = 0.0004605
#Shannon index - DuNem test reveals which one is significantly different
FSA::dunnTest(diversity_shannon ~ Biosample, data = Indices_data_PHPB, method = "bh")
## Comparison Z P.unadj P.adj
## 1 Animals - Control -4.927537 8.327247e-07 4.996348e-06
## 2 Animals - Environment -3.209812 1.328219e-03 2.656438e-03
## 3 Control - Environment 1.630440 1.030086e-01 1.236103e-01
## 4 Animals - Human -4.612983 3.969306e-06 1.190792e-05
## 5 Control - Human 2.947837 3.200062e-03 4.800093e-03
## 6 Environment - Human 0.973301 3.304037e-01 3.304037e-01
FSA::dunnTest(diversity_shannon ~ Date, data = Indices_data_PHPB, method = "bh")
## Comparison Z P.unadj P.adj
## 1 Control - Dec_2023 3.1971353 1.387998e-03 0.0027759960
## 2 Control - Jul_2023 3.4753731 5.101433e-04 0.0015304300
## 3 Dec_2023 - Jul_2023 0.5508011 5.817701e-01 0.5817700525
## 4 Control - Nov_2022 4.1379256 3.504599e-05 0.0002102759
## 5 Dec_2023 - Nov_2022 1.7997903 7.189376e-02 0.1078406379
## 6 Jul_2023 - Nov_2022 1.2026858 2.290979e-01 0.2749175069
#----------------------------------------------------------------------------------------------------------------------------------------
#LCBD index - Kruskal wallis rank sum test for p-value (<0.05 = reject null hypothesis)
stats::kruskal.test(LCBD ~ Biosample, data = Indices_data_PHPB)
##
## Kruskal-Wallis rank sum test
##
## data: LCBD by Biosample
## Kruskal-Wallis chi-squared = 43.823, df = 3, p-value = 1.645e-09
stats::kruskal.test(LCBD ~ Date, data = Indices_data_PHPB)
##
## Kruskal-Wallis rank sum test
##
## data: LCBD by Date
## Kruskal-Wallis chi-squared = 32.306, df = 3, p-value = 4.511e-07
#LCBD index - DuNem test reveals which one is significantly different
FSA::dunnTest(LCBD ~ Biosample, data = Indices_data_PHPB, method = "bh")
## Comparison Z P.unadj P.adj
## 1 Animals - Control 5.2430505 1.579433e-07 9.476599e-07
## 2 Animals - Environment 2.6998301 6.937489e-03 1.040623e-02
## 3 Control - Environment -2.2434968 2.486480e-02 2.983775e-02
## 4 Animals - Human 5.0522684 4.365937e-07 1.309781e-06
## 5 Control - Human -3.0729152 2.119787e-03 4.239574e-03
## 6 Environment - Human -0.2376326 8.121661e-01 8.121661e-01
FSA::dunnTest(LCBD ~ Date, data = Indices_data_PHPB, method = "bh")
## Comparison Z P.unadj P.adj
## 1 Control - Dec_2023 -2.730754 6.318954e-03 9.478431e-03
## 2 Control - Jul_2023 -4.760506 1.931086e-06 1.158651e-05
## 3 Dec_2023 - Jul_2023 -3.895476 9.800613e-05 1.960123e-04
## 4 Control - Nov_2022 -4.129380 3.637423e-05 1.091227e-04
## 5 Dec_2023 - Nov_2022 -2.721582 6.497022e-03 7.796426e-03
## 6 Jul_2023 - Nov_2022 1.376793 1.685763e-01 1.685763e-01
Wilconxon rank test: parametric
#Richness
ggpubr::compare_means(observed ~ Biosample, Indices_data_PHPB, method = "wilcox.test")
## # A tibble: 6 × 8
## .y. group1 group2 p p.adj p.format p.signif method
## <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 observed Environment Animals 0.00000624 3.10e-5 6.2e-06 **** Wilco…
## 2 observed Environment Human 0.000573 1.1 e-3 0.00057 *** Wilco…
## 3 observed Environment Control 0.0331 3.3 e-2 0.03307 * Wilco…
## 4 observed Animals Human 0.00000000136 8.2 e-9 1.4e-09 **** Wilco…
## 5 observed Animals Control 0.0000217 8.70e-5 2.2e-05 **** Wilco…
## 6 observed Human Control 0.000105 3.2 e-4 0.00011 *** Wilco…
ggpubr::compare_means(observed ~ Date, Indices_data_PHPB, method = "wilcox.test")
## # A tibble: 6 × 8
## .y. group1 group2 p p.adj p.format p.signif method
## <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 observed Nov_2022 Jul_2023 0.205 0.41 0.20548 ns Wilcoxon
## 2 observed Nov_2022 Dec_2023 0.0742 0.22 0.07424 ns Wilcoxon
## 3 observed Nov_2022 Control 0.0000692 0.00042 6.9e-05 **** Wilcoxon
## 4 observed Jul_2023 Dec_2023 0.506 0.51 0.50619 ns Wilcoxon
## 5 observed Jul_2023 Control 0.000155 0.00065 0.00016 *** Wilcoxon
## 6 observed Dec_2023 Control 0.000129 0.00065 0.00013 *** Wilcoxon
#-------------------------------------------------------------------------------------------------------------------------------------------
#Shannon index
ggpubr::compare_means(diversity_shannon ~ Biosample, Indices_data_PHPB, method = "wilcox.test")
## # A tibble: 6 × 8
## .y. group1 group2 p p.adj p.format p.signif method
## <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 diversity_shannon Environment Animals 3.36e-3 1 e-2 0.00336 ** Wilco…
## 2 diversity_shannon Environment Human 1.98e-1 2 e-1 0.19805 ns Wilco…
## 3 diversity_shannon Environment Control 1.15e-2 2.3 e-2 0.01154 * Wilco…
## 4 diversity_shannon Animals Human 1.04e-6 6.20e-6 1.0e-06 **** Wilco…
## 5 diversity_shannon Animals Control 9.45e-5 4.7 e-4 9.4e-05 **** Wilco…
## 6 diversity_shannon Human Control 3.60e-4 1.4 e-3 0.00036 *** Wilco…
ggpubr::compare_means(diversity_shannon ~ Date, Indices_data_PHPB, method = "wilcox.test")
## # A tibble: 6 × 8
## .y. group1 group2 p p.adj p.format p.signif method
## <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 diversity_shannon Nov_2022 Jul_2023 0.167 0.33 0.16655 ns Wilco…
## 2 diversity_shannon Nov_2022 Dec_2023 0.0871 0.26 0.08705 ns Wilco…
## 3 diversity_shannon Nov_2022 Control 0.000151 0.00075 0.00015 *** Wilco…
## 4 diversity_shannon Jul_2023 Dec_2023 0.490 0.49 0.49030 ns Wilco…
## 5 diversity_shannon Jul_2023 Control 0.0000296 0.00018 3e-05 **** Wilco…
## 6 diversity_shannon Dec_2023 Control 0.000287 0.0011 0.00029 *** Wilco…
#-------------------------------------------------------------------------------------------------------------------------------------------
#LCBD index
ggpubr::compare_means(LCBD ~ Biosample, Indices_data_PHPB, method = "wilcox.test")
## # A tibble: 6 × 8
## .y. group1 group2 p p.adj p.format p.signif method
## <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 LCBD Environment Animals 0.00317 0.0063 0.00317 ** Wilcoxon
## 2 LCBD Environment Human 0.862 0.86 0.86222 ns Wilcoxon
## 3 LCBD Environment Control 0.000175 0.0007 0.00017 *** Wilcoxon
## 4 LCBD Animals Human 0.000000183 0.0000011 1.8e-07 **** Wilcoxon
## 5 LCBD Animals Control 0.0000386 0.00019 3.9e-05 **** Wilcoxon
## 6 LCBD Human Control 0.000189 0.0007 0.00019 *** Wilcoxon
ggpubr::compare_means(LCBD ~ Date, Indices_data_PHPB, method = "wilcox.test")
## # A tibble: 6 × 8
## .y. group1 group2 p p.adj p.format p.signif method
## <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 LCBD Nov_2022 Jul_2023 0.145 0.15 0.14539 ns Wilcoxon
## 2 LCBD Nov_2022 Dec_2023 0.00478 0.0096 0.00478 ** Wilcoxon
## 3 LCBD Nov_2022 Control 0.0000591 0.00027 5.9e-05 **** Wilcoxon
## 4 LCBD Jul_2023 Dec_2023 0.0000548 0.00027 5.5e-05 **** Wilcoxon
## 5 LCBD Jul_2023 Control 0.0000000200 0.00000012 2.0e-08 **** Wilcoxon
## 6 LCBD Dec_2023 Control 0.000334 0.001 0.00033 *** Wilcoxon
Prepare data for bacterial composition analysis
library("dplyr")
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#Melting unrarefied Absolute Abundance
Melted_Nem_hu_unrarefied_abs <- Nem_hu_unrarefied %>%
phyloseq::psmelt() %>% # Melt to long format
filter(Abundance > 0) %>% # Filter out absent abundance taxa reads
arrange(Species)
#Recalculate relative abundance among PHPB for bacterial composition analysis
Nem_hu_unrarefied_rel_PHPB <- phyloseq::transform_sample_counts(Nem_hu_unrarefied, function(x) x / sum(x)) #The relative abundance above was the relative abundance of PHPB within whole bacterial community so inorder to investigate the PHPB composition, we have recalculate the relative abundance for PHPB within itself in each sample only
saveRDS(Nem_hu_unrarefied_rel_PHPB,file="/Users/qcvp/R/inputs/data for PHPB comunity/Nem_hu_unrarefied_rel_PHPB.rds")
#Melting unrarefied Relative Abundance
Melted_Nem_hu_unrarefied_rel <- Nem_hu_unrarefied_rel_PHPB %>%
phyloseq::psmelt() %>% # Melt to long format
filter(Abundance > 0) %>% # Filter out low abundance taxa reads
arrange(Species)
Extract ASV list for Venn diagram
#Combine taxonomy (Family + Genus + Species)
Melted_Nem_hu_unrarefied_abs <- Melted_Nem_hu_unrarefied_abs %>%
mutate(taxcom = paste(Family, Genus, Species, sep =";"))
#Subset samples occur in each Biosample
animal_sample <- base::subset(Melted_Nem_hu_unrarefied_abs, Biosample == "Animals")
Envi_sample <- base::subset(Melted_Nem_hu_unrarefied_abs, Biosample == "Environment")
Human_sample <- base::subset(Melted_Nem_hu_unrarefied_abs, Biosample == "Human")
#Extract list of ASV occur in sample types
list_tax_all <- Reduce(intersect, list(animal_sample$taxcom, Envi_sample$taxcom, Human_sample$taxcom)) #Species occur in all biosample types
list_tax_EA <- setdiff(intersect(Envi_sample$taxcom,animal_sample$taxcom), list_tax_all) #Species occur in both Environment and animal but not in Human
list_tax_EH <- setdiff(intersect(Envi_sample$taxcom,Human_sample$taxcom), list_tax_all) #Species occur in both Environment and human but not in animal
list_tax_AH <- setdiff(intersect(animal_sample$taxcom,Human_sample$taxcom), list_tax_all) #Species occur in both human and animal but not in environment
list_tax_A <- Reduce(setdiff, list(animal_sample$taxcom, list_tax_EA, list_tax_AH, list_tax_all)) #Species occur in only Animal
list_tax_A
## [1] "Comamonadaceae;uncultured;Burkholderiales"
## [2] "Xanthomonadaceae;Xanthomonas;albilineans"
## [3] "Pseudomonadaceae;Pseudomonas;alcaligenes"
## [4] "Shewanellaceae;Shewanella;algae"
## [5] "Prevotellaceae;Prevotella;amnii"
## [6] "Peptostreptococcaceae;Peptostreptococcus;anaerobius"
## [7] "Enterococcaceae;Enterococcus;cecorum"
## [8] "Xanthobacteraceae;Afipia;clevelandensis"
## [9] "Bacillaceae;Bacillus;coagulans"
## [10] "Lactobacillaceae;Weissella;confusa"
## [11] "Prevotellaceae;Prevotella;copri"
## [12] "Rhodobacteraceae;Roseobacter;denitrificans"
## [13] "Corynebacteriaceae;Corynebacterium;durum"
## [14] "Carnobacteriaceae;Granulicatella;elegans"
## [15] "Porphyromonadaceae;Porphyromonas;endodontalis"
## [16] "Streptococcaceae;Streptococcus;equinus"
## [17] "Halomonadaceae;Pseudomonas;fluorescens"
## [18] "Corynebacteriaceae;Corynebacterium;glucuronolyticum"
## [19] "Corynebacteriaceae;Corynebacterium;jeikeium"
## [20] "Moraxellaceae;Acinetobacter;johnsonii"
## [21] "Lachnospiraceae;Cellulosilyticum;lentocellum"
## [22] "Rhodobacteraceae;Paracoccus;marinus"
## [23] "Spirochaetaceae;Treponema;medium"
## [24] "Cellvibrionaceae;Cellvibrio;mixtus"
## [25] "Comamonadaceae;Variovorax;paradoxus"
## [26] "Vibrionaceae;Vibrio;parahaemolyticus"
## [27] "Pseudomonadaceae;Pseudomonas;putida"
## [28] "Brevibacteriaceae;Brevibacterium;ravenspurgense"
## [29] "Lactobacillaceae;Lactobacillus;rhamnosus"
## [30] "Micrococcaceae;Kocuria;rhizophila"
## [31] "Corynebacteriaceae;Corynebacterium;riegelii"
## [32] "Moraxellaceae;Psychrobacter;sanguinis"
## [33] "Sphingomonadaceae;Sphingomonas;sanxanigenens"
## [34] "Family XI;Peptoniphilus;senegalensis"
## [35] "Planococcaceae;Lysinibacillus;sphaericus"
## [36] "Corynebacteriaceae;Corynebacterium;tuscaniense"
## [37] "Leptotrichiaceae;Leptotrichia;wadei"
## [38] "Pseudomonadaceae;Pseudomonas;xanthomarina"
list_tax_H <- Reduce(setdiff, list(Human_sample$taxcom, list_tax_EH, list_tax_AH, list_tax_all)) #Species occur in only Human
list_tax_H
## [1] "Rhodocyclaceae;uncultured;Sterolibacterium"
## [2] "Corynebacteriaceae;Corynebacterium;accolens"
## [3] "Propionibacteriaceae;Cutibacterium;acnes"
## [4] "Oxalobacteraceae;Massilia;aerilata"
## [5] "Enterobacteriaceae;Klebsiella;aerogenes"
## [6] "Moritellaceae;Paramoritella;alkaliphila"
## [7] "Rhizobiaceae;Aureimonas;altamirensis"
## [8] "Enterobacteriaceae;Enterobacter;asburiae"
## [9] "Alteromonadaceae;Alteromonas;australica"
## [10] "Comamonadaceae;uncultured;bacterium"
## [11] "Alcaligenaceae;uncultured;bacterium"
## [12] "Methylophilaceae;uncultured;bacterium"
## [13] "Moraxellaceae;Acinetobacter;baumannii"
## [14] "Comamonadaceae;uncultured;beta"
## [15] "Azospirillaceae;Azospirillum;brasilense"
## [16] "Bacteroidaceae;Bacteroides;caccae"
## [17] "Bacillaceae;Bacillus;circulans"
## [18] "Bacteroidaceae;Bacteroides;coprocola"
## [19] "Moraxellaceae;Psychrobacter;cryohalolentis"
## [20] "Aerococcaceae;Abiotrophia;defectiva"
## [21] "Prevotellaceae;Prevotella;disiens"
## [22] "Enterobacteriaceae;Salmonella;enterica"
## [23] "Bacillaceae;Staphylococcus;epidermidis"
## [24] "Alcaligenaceae;Alcaligenes;faecalis"
## [25] "Neisseriaceae;Neisseria;flavescens"
## [26] "Lachnospiraceae;[Ruminococcus];gnavus"
## [27] "Propionibacteriaceae;Propionibacterium;granulosum"
## [28] "Pasteurellaceae;Mannheimia;haemolytica"
## [29] "Family XI;Peptoniphilus;harei"
## [30] "Prevotellaceae;Prevotella;histicola"
## [31] "Corynebacteriaceae;Corynebacterium;imitans"
## [32] "Selenomonadaceae;Selenomonas;infelix"
## [33] "Lactobacillaceae;Lactobacillus;johnsonii"
## [34] "Spirochaetaceae;Treponema;lecithinolyticum"
## [35] "Micrococcaceae;Micrococcus;lylae"
## [36] "Family XI;Finegoldia;magna"
## [37] "Dietziaceae;Dietzia;maris"
## [38] "Family XI;Fenollaria;massiliensis"
## [39] "Actinomycetaceae;Actinomyces;massiliensis"
## [40] "Burkholderiaceae;Cupriavidus;metallidurans"
## [41] "Family XI;Parvimonas;micra"
## [42] "Dermacoccaceae;Dermacoccus;nishinomiyaensis"
## [43] "Xanthomonadaceae;Stenotrophomonas;nitritireducens"
## [44] "Moraxellaceae;Psychrobacter;nivimaris"
## [45] "Fusobacteriaceae;Fusobacterium;nucleatum"
## [46] "Actinomycetaceae;Schaalia;odontolytica"
## [47] "Oxalobacteraceae;Undibacterium;oligocarboniphilum"
## [48] "Enterococcaceae;Raoultella;ornithinolytica"
## [49] "Corynebacteriaceae;Corynebacterium;otitidis"
## [50] "Carnobacteriaceae;Alloiococcus;otitis"
## [51] "Spirochaetaceae;Treponema;pallidum"
## [52] "Lactobacillaceae;Weissella;paramesenteroides"
## [53] "Sphingomonadaceae;Sphingomonas;paucimobilis"
## [54] "Lactobacillaceae;Pediococcus;pentosaceus"
## [55] "Staphylococcaceae;Staphylococcus;pettenkoferi"
## [56] "Microbacteriaceae;Microbacterium;phyllosphaerae"
## [57] "Ruminococcaceae;Faecalibacterium;prausnitzii"
## [58] "Streptococcaceae;Streptococcus;pyogenes"
## [59] "Streptococcaceae;Lactococcus;raffinolactis"
## [60] "Yersiniaceae;Yersinia;ruckeri"
## [61] "Prevotellaceae;Prevotella;salivae"
## [62] "Leptotrichiaceae;Sneathia;sanguinegens"
## [63] "Gemellaceae;Gemella;sanguinis"
## [64] "Pasteurellaceae;Aggregatibacter;segnis"
## [65] "Rickettsiaceae;Rickettsia;slovaca"
## [66] "Flavobacteriaceae;Capnocytophaga;sputigena"
## [67] "Prevotellaceae;Alloprevotella;tannerae"
## [68] "Oxalobacteraceae;Massilia;timonae"
## [69] "Rhizobiaceae;Agrobacterium;tumefaciens"
## [70] "Corynebacteriaceae;Corynebacterium;urealyticum"
## [71] "Moraxellaceae;Acinetobacter;variabilis"
## [72] "Streptomycetaceae;Streptomyces;violaceoruber"
## [73] "Pseudomonadaceae;Pseudomonas;viridiflava"
## [74] "Morganellaceae;Proteus;vulgaris"
## [75] "Lachnospiraceae;Blautia;wexlerae"
## [76] "Rhodobacteraceae;Paracoccus;yeei"
list_tax_E <- Reduce(setdiff, list(Envi_sample$taxcom, list_tax_EH, list_tax_EA, list_tax_all)) #Species occur in only Environment
list_tax_E
## [1] "Pasteurellaceae;Terrahaemophilus;aromaticivorans"
## [2] "Aeromonadaceae;Shewanella;baltica"
## [3] "Micrococcaceae;Rothia;dentocariosa"
## [4] "Prevotellaceae;Prevotella;intermedia"
## [5] "Alcaligenaceae;Achromobacter;marplatensis"
## [6] "Comamonadaceae;Diaphorobacter;nitroreducens"
## [7] "Xanthomonadaceae;Xanthomonas;oryzae"
## [8] "Fusobacteriaceae;Fusobacterium;periodonticum"
## [9] "Carnobacteriaceae;Dolosigranulum;pigrum"
## [10] "Xanthomonadaceae;Stenotrophomonas;rhizophila"
## [11] "Clostridiaceae;Clostridium;saccharoperbutylacetonicum"
## [12] "Sphingomonadaceae;Sphingomonas;sanguinis"
## [13] "Anaerovoracaceae;[Eubacterium];saphenum"
## [14] "Streptococcaceae;Streptococcus;sinensis"
## [15] "Enterobacteriaceae;Pseudescherichia;vulneris"
## [16] "Sphingomonadaceae;Sphingomonas;wittichii"
## [17] "Pseudomonadaceae;Pseudomonas;yamanorum"
## [18] "Pseudomonadaceae;Pseudomonas;zhaodongensis"
#Checking if every taxonomy have been extracted
length(unique(Melted_Nem_hu_unrarefied_abs$taxcom)) - length(list_tax_E) + length(list_tax_H) + length(list_tax_A) + length(list_tax_all) + length(list_tax_EA) + length(list_tax_EH) + length(list_tax_AH)
## [1] 383
x <- list(A = animal_sample$taxcom, B = Envi_sample$taxcom, C = Human_sample$taxcom) # Combine list of Species of each Biosample
p4 <- ggVennDiagram::ggVennDiagram(x,
label = "count",) + # Adding label
theme(legend.position = "none") + # Remove lengend
scale_fill_gradient(low = "#F4FAFE", high = "#4981BF")
p4
ggsave("/Users/qcvp/R/Plots/Venn_diagram_PHPB.png", plot = p4, width = 11.5, height = 7)
Total
#Calculate errors
physeq_PHPB_long_summary <- Melted_Nem_hu_unrarefied_rel %>%
dplyr::group_by(Family, Sample) %>%
dplyr::summarise(
Sum = sum(Abundance, na.rm = TRUE)) %>% #Calculate sum abundance in a sample
dplyr::ungroup() %>%
dplyr::group_by(Family) %>%
dplyr::summarise(
Mean = mean(Sum, na.rm = TRUE)) %>% #Calculate mean abundance of families
dplyr::arrange(desc(Mean)) %>% # Sort abundances
dplyr::slice_head(n = 15) #Extracting top 15 highest abundance Families
## `summarise()` has grouped output by 'Family'. You can override using the
## `.groups` argument.
#Plotting
ggplot(physeq_PHPB_long_summary, aes(x = reorder(Family, -Mean), y = Mean)) +
geom_bar(stat = "identity", position = "dodge") +
theme_minimal() +
theme(legend.text = element_text(size = 15),
legend.position = "top",
plot.title = element_text(size = 20, face = "bold"),
legend.title = element_text(size = 18),
axis.title.x = element_blank(),
axis.title.y = element_text(size = 18),
axis.text.y = element_text(size = 15),
strip.text = element_text(size = 15),
axis.text.x = element_text(angle = 45, colour = "black", vjust = 1, hjust = 1, size = 15)) +
labs(title = "Top abundance Family", y = "Relative abundance mean") +
facet_grid( scales = "free")
Animal
animal_PHPB_long <- subset(Melted_Nem_hu_unrarefied_rel, Biosample == "Animals")
#Calculate errors
animal_PHPB_long_summary <- animal_PHPB_long %>%
dplyr::group_by(Family, Sample) %>%
dplyr::summarise(
Sum = sum(Abundance, na.rm = TRUE)) %>% #Calculate sum abundance in a sample
dplyr::ungroup() %>%
dplyr::group_by(Family) %>%
dplyr::summarise(
Mean = mean(Sum, na.rm = TRUE)) %>% #Calculate mean abundance of families
dplyr::arrange(desc(Mean)) %>% # Sort abundances
dplyr::slice_head(n = 15) #Extracting top 15 highest abundance Families
## `summarise()` has grouped output by 'Family'. You can override using the
## `.groups` argument.
#plotting
ggplot(animal_PHPB_long_summary, aes(x = reorder(Family, -Mean), y = Mean)) +
geom_bar(stat = "identity", position = "dodge") +
theme_minimal() +
theme(legend.text = element_text(size = 15),
legend.position = "top",
plot.title = element_text(size = 20, face = "bold"),
legend.title = element_text(size = 18),
axis.title.x = element_blank(),
axis.title.y = element_text(size = 18),
axis.text.y = element_text(size = 15),
strip.text = element_text(size = 15),
axis.text.x = element_text(angle = 45, colour = "black", vjust = 1, hjust = 1, size = 15)) +
labs(title = "Top abundance Family in Animal", y = "Relative abundance mean") +
facet_grid( scales = "free")
Environment
envi_PHPB_long <- subset(Melted_Nem_hu_unrarefied_rel, Biosample == "Environment")
#Calculate errors
envi_PHPB_long_summary <- envi_PHPB_long %>%
dplyr::group_by(Family, Sample) %>%
dplyr::summarise(
Sum = sum(Abundance, na.rm = TRUE)) %>% #Calculate sum abundance in a sample
dplyr::ungroup() %>%
dplyr::group_by(Family) %>%
dplyr::summarise(
Mean = mean(Sum, na.rm = TRUE)) %>% #Calculate mean abundance of families
dplyr::arrange(desc(Mean)) %>% # Sort abundances
dplyr::slice_head(n = 15) #Extracting top 15 highest abundance Families
## `summarise()` has grouped output by 'Family'. You can override using the
## `.groups` argument.
#Plotting
ggplot(envi_PHPB_long_summary, aes(x = reorder(Family, -Mean), y = Mean)) +
geom_bar(stat = "identity", position = "dodge") +
theme_minimal() +
theme(legend.text = element_text(size = 15),
legend.position = "top",
plot.title = element_text(size = 20, face = "bold"),
legend.title = element_text(size = 18),
axis.title.x = element_blank(),
axis.title.y = element_text(size = 18),
axis.text.y = element_text(size = 15),
strip.text = element_text(size = 15),
axis.text.x = element_text(angle = 45, colour = "black", vjust = 1, hjust = 1, size = 15)) +
labs(title = "Top abundance Family in Environment", y = "Relative abundance mean") +
facet_grid( scales = "free")
Human
human_PHPB_long <- subset(Melted_Nem_hu_unrarefied_rel, Biosample == "Human")
#Calculate means
human_PHPB_long_summary <- human_PHPB_long %>%
dplyr::group_by(Family, Sample) %>%
dplyr::summarise(
Sum = sum(Abundance, na.rm = TRUE)) %>% #Calculate sum abundance in a sample
dplyr::ungroup() %>%
dplyr::group_by(Family) %>%
dplyr::summarise(
Mean = mean(Sum, na.rm = TRUE)) %>% #Calculate mean abundance of families
dplyr::arrange(desc(Mean)) %>% # Sort abundances
dplyr::slice_head(n = 15) #Extracting top 15 highest abundance Families
## `summarise()` has grouped output by 'Family'. You can override using the
## `.groups` argument.
#Plotting
ggplot(human_PHPB_long_summary, aes(x = reorder(Family, -Mean), y = Mean)) +
geom_bar(stat = "identity", position = "dodge") +
theme_minimal() +
theme(legend.text = element_text(size = 15),
legend.position = "top",
plot.title = element_text(size = 20, face = "bold"),
legend.title = element_text(size = 18),
axis.title.x = element_blank(),
axis.title.y = element_text(size = 18),
axis.text.y = element_text(size = 15),
strip.text = element_text(size = 15),
axis.text.x = element_text(angle = 45, colour = "black", vjust = 1, hjust = 1, size = 15)) +
labs(title = "Top abundance Family in Human", y = "Relative abundance mean") +
facet_grid( scales = "free")
Extract most abundance Family
#Extract most abundant Family (5) names in each Biosample
most_abun_Family_animal <- dplyr::slice_head(animal_PHPB_long_summary, n = 15)$Family
most_abun_Family_envi <- dplyr::slice_head(envi_PHPB_long_summary, n = 15)$Family
most_abun_Family_human <- dplyr::slice_head(human_PHPB_long_summary, n = 15)$Family
most_abun_Family <- unique(c(most_abun_Family_animal, most_abun_Family_envi, most_abun_Family_human)) #Combine most abundant Families
most_abun_Family
## [1] "Neisseriaceae" "Vibrionaceae" "Xanthomonadaceae"
## [4] "Beijerinckiaceae" "Shewanellaceae" "Comamonadaceae"
## [7] "Sphingomonadaceae" "Rhizobiaceae" "Pseudomonadaceae"
## [10] "Rhodobacteraceae" "Lactobacillaceae" "Streptococcaceae"
## [13] "Planococcaceae" "Moraxellaceae" "Bacillaceae"
## [16] "Burkholderiaceae" "Halomonadaceae" "Caulobacteraceae"
## [19] "Erwiniaceae" "Alcaligenaceae" "Xanthobacteraceae"
## [22] "Corynebacteriaceae" "Oxalobacteraceae" "Rickettsiaceae"
#Modify family name (By keeping the family names of most abundant family while change the other in to "Other")
physeq_PHPB_long_topFamily <- Melted_Nem_hu_unrarefied_rel %>%
dplyr::mutate(Family = ifelse(Family %in% most_abun_Family, Family, "Other"))
physeq_PHPB_long_topFamily$Biosample = factor(physeq_PHPB_long_topFamily$Biosample , levels = c("Animals", "Human", "Environment", "Control")) # rearrange date
# Reorder 'Family' factor so that "Other" is last
physeq_PHPB_long_topFamily$Family <- factor(physeq_PHPB_long_topFamily$Family,
levels = c(setdiff(unique(physeq_PHPB_long_topFamily$Family), "Other"), "Other"))
# Set up custom color
custom_colors <- setNames(c(
"#14232c", "#446196", "#6e89af", "#97a1a8", "#9f7878", "#a25551",
"#b3272b", "#b7232c", "#7f1d1a", "#782c19", "#7c5b24", "#757235", "#706a2a",
"#9a6f2a", "#a9792d", "#b86b2d","#b74b2d", "#c17d37", "#726654", "#3f3f44", "#2c3d56",
"#876f9d", "#95599b", "#95169b"
), unique(physeq_PHPB_long_topFamily$Family[physeq_PHPB_long_topFamily$Family != "Other"])) #Prepare color for most abundance families
custom_colors <- c(custom_colors, "Other" = "gray") # Add color for other families
#Plotting
p5 <- ggplot(physeq_PHPB_long_topFamily , aes(x = Biosample, fill = Family)) +
geom_bar(position ="fill") +
scale_fill_manual(values = custom_colors) +
ylab("Pathobiome bacterial composition (%)") +
scale_y_continuous(expand = c(0,0), labels = scales::percent_format(scale = 100)) + #remove the space below the 0 of the y axis in the graph
facet_grid( ~ Biosample + Date, scales = "free", space = "free") +
theme_bw() +
theme(legend.text = element_text(face = "italic")) +
theme(axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
panel.background = element_blank(), #remove background
panel.grid.major = element_blank(), #remove major-grid labels
panel.grid.minor = element_blank()) #remove minor-grid labels
p5
ggsave("/Users/qcvp/R/Plots/PHPB_barplot_composition.png", plot = p5, width = 11.5, height = 7)
Total ASV, family
whole_ASVs <- phyloseq::taxa_sums(Nem_hu_unrarefied)
whole_ASVs <- subset(whole_ASVs, whole_ASVs > 0) #remove ASV with 0 abundance
length(whole_ASVs)
## [1] 290
length(unique(Melted_Nem_hu_unrarefied_abs$Family))
## [1] 65
Calculation of % Family reads / total reads in pathobiome
animal_abs_PHPB <- subset(Melted_Nem_hu_unrarefied_abs, Biosample == "Animals")
envi_abs_PHPB <- subset(Melted_Nem_hu_unrarefied_abs, Biosample == "Environment")
human_abs_PHPB <- subset(Melted_Nem_hu_unrarefied_abs, Biosample == "Human")
#Calculate % of Family in all samples
Melted_Nem_hu_unrarefied_abs %>%
group_by(Family) %>%
summarise(perc_read = sum(Abundance)/sum(Melted_Nem_hu_unrarefied_abs$Abundance)) %>%
arrange(desc (perc_read))
## # A tibble: 65 × 2
## Family perc_read
## <chr> <dbl>
## 1 Vibrionaceae 0.351
## 2 Beijerinckiaceae 0.105
## 3 Shewanellaceae 0.0820
## 4 Sphingomonadaceae 0.0627
## 5 Burkholderiaceae 0.0620
## 6 Halomonadaceae 0.0582
## 7 Moraxellaceae 0.0523
## 8 Pseudomonadaceae 0.0414
## 9 Oxalobacteraceae 0.0346
## 10 Bacillaceae 0.0312
## # ℹ 55 more rows
# For Animal sample
animal_abs_PHPB %>%
group_by(Family) %>%
summarise(perc_read = sum(Abundance)/sum(animal_abs_PHPB$Abundance)) %>%
arrange(desc (perc_read))
## # A tibble: 44 × 2
## Family perc_read
## <chr> <dbl>
## 1 Vibrionaceae 0.529
## 2 Shewanellaceae 0.144
## 3 Beijerinckiaceae 0.120
## 4 Sphingomonadaceae 0.0858
## 5 Pseudomonadaceae 0.0385
## 6 Xanthomonadaceae 0.0265
## 7 Moraxellaceae 0.0138
## 8 Halomonadaceae 0.00574
## 9 Comamonadaceae 0.00571
## 10 Burkholderiaceae 0.00493
## # ℹ 34 more rows
# For Environment sample
envi_abs_PHPB %>%
group_by(Family) %>%
summarise(perc_read = sum(Abundance)/sum(envi_abs_PHPB$Abundance)) %>%
arrange(desc (perc_read))
## # A tibble: 33 × 2
## Family perc_read
## <chr> <dbl>
## 1 Vibrionaceae 0.361
## 2 Burkholderiaceae 0.237
## 3 Sphingomonadaceae 0.123
## 4 Streptococcaceae 0.0849
## 5 Lactobacillaceae 0.0810
## 6 Halomonadaceae 0.0459
## 7 Comamonadaceae 0.0151
## 8 Moraxellaceae 0.00960
## 9 Rhizobiaceae 0.00898
## 10 Alcaligenaceae 0.00766
## # ℹ 23 more rows
# For Human sample
human_abs_PHPB %>%
group_by(Family) %>%
summarise(perc_read = sum(Abundance)/sum(human_abs_PHPB$Abundance)) %>%
arrange(desc (perc_read))
## # A tibble: 55 × 2
## Family perc_read
## <chr> <dbl>
## 1 Halomonadaceae 0.151
## 2 Moraxellaceae 0.133
## 3 Beijerinckiaceae 0.111
## 4 Burkholderiaceae 0.110
## 5 Oxalobacteraceae 0.104
## 6 Bacillaceae 0.0924
## 7 Xanthobacteraceae 0.0719
## 8 Pseudomonadaceae 0.0595
## 9 Vibrionaceae 0.0408
## 10 Rhizobiaceae 0.0276
## # ℹ 45 more rows
#Extract list of Family occur in sample types
list_fam_all <- Reduce(intersect, list(animal_abs_PHPB$Family, envi_abs_PHPB$Family, human_abs_PHPB$Family)) #Species occur in all biosample types
list_fam_EA <- setdiff(intersect(envi_abs_PHPB$Family,animal_abs_PHPB$Family), list_fam_all) #Species occur in both Environment and animal but not in Human
list_fam_EH <- setdiff(intersect(envi_abs_PHPB$Family,human_abs_PHPB$Family), list_fam_all) #Species occur in both Environment and human but not in animal
list_fam_AH <- setdiff(intersect(animal_abs_PHPB$Family,human_abs_PHPB$Family), list_fam_all) #Species occur in both human and animal but not in environment
list_fam_A <- Reduce(setdiff, list(animal_abs_PHPB$Family, list_fam_EA, list_fam_AH, list_fam_all)) #Species occur in only Animal
list_fam_A
## [1] "Peptostreptococcaceae" "Porphyromonadaceae" "Cellvibrionaceae"
## [4] "Brevibacteriaceae" "Planococcaceae"
list_fam_H <- Reduce(setdiff, list(human_abs_PHPB$Family, list_fam_EH, list_fam_AH, list_fam_all)) #Species occur in only Human
list_fam_H
## [1] "Rhodocyclaceae" "Methylophilaceae" "Azospirillaceae"
## [4] "Aerococcaceae" "Selenomonadaceae" "Dietziaceae"
## [7] "Microbacteriaceae" "Ruminococcaceae" "Yersiniaceae"
## [10] "Gemellaceae" "Rickettsiaceae" "Flavobacteriaceae"
## [13] "Streptomycetaceae" "Morganellaceae"
list_fam_E <- Reduce(setdiff, list(envi_abs_PHPB$Family, list_fam_EH, list_fam_EA, list_fam_all)) #Species occur in only Environment
list_fam_E
## [1] "Aeromonadaceae" "Clostridiaceae" "Anaerovoracaceae"
Calculation of % Family / richness in pathobiome
tax <- as.data.frame(Nem_hu_unrarefied@tax_table)
tax <- subset(tax, Family %in% unique(Melted_Nem_hu_unrarefied_abs$Family))
#Calculate % of Family in all samples
tax %>%
mutate(count =1) %>%
group_by(Family) %>%
summarise(perc_read = sum(count)/nrow(tax)) %>%
arrange(desc (perc_read))
## # A tibble: 65 × 2
## Family perc_read
## <chr> <dbl>
## 1 Pseudomonadaceae 0.0706
## 2 Moraxellaceae 0.0570
## 3 Comamonadaceae 0.0529
## 4 Corynebacteriaceae 0.0529
## 5 Lactobacillaceae 0.0488
## 6 Vibrionaceae 0.0407
## 7 Prevotellaceae 0.0366
## 8 Bacillaceae 0.0312
## 9 Micrococcaceae 0.0299
## 10 Staphylococcaceae 0.0299
## # ℹ 55 more rows
Making data for PCoA (with out control samples for statistic test)
Nem_hu_unrarefied_PCOA <- phyloseq::subset_samples(Nem_hu_unrarefied, !sample_names(Nem_hu_unrarefied) %in% c("Control.1", "Control.2","Control.3", "Control.4", "Control.5", "Control.6", "Control.7"))
otu.bray3 <- vegan::vegdist(Nem_hu_unrarefied_PCOA@otu_table@.Data, diag = TRUE, method = "bray") # computes dissimilarity indices (bray)
pcoa.sub3 <- ape::pcoa(otu.bray3) #computes principal coordinate decomposition
pcoa_coord3 <- pcoa.sub3$vectors[,1:2] # Extract coordination of PCOA plot
samp_data_P <- data.frame(phyloseq::sample_data(Nem_hu_unrarefied_PCOA)) #remove control sample
hull_PHPB3 <- cbind(pcoa_coord3, samp_data_P) # Combine metadata with PCOA coordination
Making data for PCoA (with control samples for PCOA plot)
otu.bray3.5 <- vegan::vegdist(Nem_hu_unrarefied@otu_table@.Data, diag = TRUE, method = "bray") # computes dissimilarity indices (bray)
pcoa.sub3.5 <- ape::pcoa(otu.bray3.5) #computes principal coordinate decomposition
pcoa_coord3.5 <- pcoa.sub3.5$vectors[,1:2] # Extract coordination of PCOA plot
samp_data_P <- data.frame(phyloseq::sample_data(Nem_hu_unrarefied)) #remove control sample
hull_PHPB3.5 <- cbind(pcoa_coord3.5, samp_data_P) # Combine metadata with PCOA coordination
Compare distance between biosamples with Wilcoxon rank sum test
# Extract sample names
animal_samp_data <- subset(samp_data_P, Biosample == "Animals")
envi_samp_data <- subset(samp_data_P, Biosample == "Environment")
human_samp_data <- subset(samp_data_P, Biosample == "Human")
# Melting distance matrix
dm3_long <- as.matrix(otu.bray3) %>%
as.data.frame() %>%
tibble::rownames_to_column("Sample.name") %>%
tidyr::gather(key = "Sample.name2", value = "Distance", -Sample.name)
# Characterize distances
dm3_long_biosample<- dm3_long %>%
dplyr::mutate(Biosample_1=dplyr::case_when(
Sample.name %in% rownames(animal_samp_data) ~ "Animals",
Sample.name %in% rownames(envi_samp_data) ~ "Environment",
Sample.name %in% rownames(human_samp_data) ~ "Human")) %>% # Adding Biosample information
dplyr::mutate(Biosample_2=dplyr::case_when(
Sample.name2 %in% rownames(animal_samp_data) ~ "Animals",
Sample.name2 %in% rownames(envi_samp_data) ~ "Environment",
Sample.name2 %in% rownames(human_samp_data) ~ "Human")) %>% # Adding Biosample information
dplyr::mutate(Groups = paste(Biosample_1, Biosample_2, sep = "_to_")) %>% # Classify distances with Biosample types
subset(Groups == c("Animals_to_Environment", "Human_to_Environment", "Animals_to_Human")) # Remove duplicate (ex: Animal_to_Human and Human_to_Animal)
## Warning in Groups == c("Animals_to_Environment", "Human_to_Environment", :
## longer object length is not a multiple of shorter object length
#Count number of observation
sum(dm3_long_biosample$Groups == "Animals_to_Environment")
## [1] 174
sum(dm3_long_biosample$Groups == "Animals_to_Human")
## [1] 1450
sum(dm3_long_biosample$Groups == "Human_to_Environment")
## [1] 225
#Wilcoxon rank sum test
ggpubr::compare_means(Distance ~ Groups,
dm3_long_biosample,
method = "wilcox.test")
## # A tibble: 3 × 8
## .y. group1 group2 p p.adj p.format p.signif method
## <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 Distance Animals_to_Environ… Human… 1.20e- 4 2.4 e- 4 0.00012 *** Wilco…
## 2 Distance Animals_to_Environ… Anima… 1.98e-15 5.90e-15 2e-15 **** Wilco…
## 3 Distance Human_to_Environme… Anima… 5.40e- 4 5.4 e- 4 0.00054 *** Wilco…
#plotting
p6 <- ggplot(dm3_long_biosample, aes(x = Groups, y = Distance, colour = Groups)) +
theme_bw() +
geom_boxplot(aes(colour = Groups), alpha=0.1, outlier.colour = NA) +
theme(axis.title.x = element_blank(),
legend.position = "none",
legend.title = element_blank(),
legend.text = element_text(size = 13),
strip.text = element_text(size = 18),
axis.title.y = element_text(size = 20),
axis.text.y = element_text(size=20),
axis.text.x = element_blank()) +
labs(y = "Bray-Curtis Distance") +
scale_y_continuous(limits = c(0.95, 1.005))
p6
## Warning: Removed 226 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
ggsave("/Users/qcvp/R/Plots/PHPB_Distance_BetweenBiosample_ASV.png", plot = p6, width = 11, height = 6)
## Warning: Removed 226 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
Compare distance between Animal and human from same and different farms with Wilcoxon rank sum test
# Extract sample farm information in farms with borh animal and human samples
farm3_samp_data <- subset(samp_data_P, Station == "Farm_03")
farm4_samp_data <- subset(samp_data_P, Station == "Farm_04")
farm5_samp_data <- subset(samp_data_P, Station == "Farm_05")
farm6_samp_data <- subset(samp_data_P, Station == "Farm_06")
farm10_samp_data <- subset(samp_data_P, Station == "Farm_10")
# Characterize distances
dm3_long_farm<- dm3_long_biosample %>%
subset(Groups == c("Animals_to_Human")) %>% #keep only animals to human distances
dplyr::mutate(farm_1=dplyr::case_when(
Sample.name %in% rownames(farm3_samp_data) ~ "Farm3",
Sample.name %in% rownames(farm4_samp_data) ~ "Farm4",
Sample.name %in% rownames(farm5_samp_data) ~ "Farm5",
Sample.name %in% rownames(farm6_samp_data) ~ "Farm6",
Sample.name %in% rownames(farm10_samp_data) ~ "Farm10")) %>% #Adding Farm infomation
dplyr::mutate(farm_2=dplyr::case_when(
Sample.name2 %in% rownames(farm3_samp_data) ~ "Farm3",
Sample.name2 %in% rownames(farm4_samp_data) ~ "Farm4",
Sample.name2 %in% rownames(farm5_samp_data) ~ "Farm5",
Sample.name2 %in% rownames(farm6_samp_data) ~ "Farm6",
Sample.name2 %in% rownames(farm10_samp_data) ~ "Farm10")) %>% #Adding Farm infomation
dplyr::mutate(Groups = ifelse(farm_1 == farm_2, "Samefarm", "Difffarm")) %>% # Classify distances between human and animal from same or different farms
subset(Groups != "NA") # Remove unclassified distances (belong to other farms)
#Count number of observation
sum(dm3_long_farm$Groups == "Samefarm")
## [1] 95
sum(dm3_long_farm$Groups == "Difffarm")
## [1] 365
#Wilcoxon rank sum test
ggpubr::compare_means(Distance ~ Groups,
dm3_long_farm,
method = "wilcox.test")
## # A tibble: 1 × 8
## .y. group1 group2 p p.adj p.format p.signif method
## <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 Distance Difffarm Samefarm 0.674 0.67 0.67 ns Wilcoxon
#plotting
p7 <- ggplot(dm3_long_farm, aes(x = Groups, y = Distance, colour = Groups)) +
theme_bw() +
geom_boxplot(aes(colour = Groups), alpha=0.1, outlier.colour = NA) +
theme(axis.title.x = element_blank(),
legend.position = "none",
legend.title = element_blank(),
legend.text = element_text(size = 13),
strip.text = element_text(size = 18),
axis.title.y = element_text(size = 20),
axis.text.y = element_text(size=20),
axis.text.x = element_blank()) +
labs(y = "Bray-Curtis Distance") +
scale_y_continuous(limits = c(0.98, 1.005))
p7
## Warning: Removed 114 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
ggsave("/Users/qcvp/R/Plots/PHPB_Distance_Same_DiffFarm_ASV.png", plot = p7, width = 11, height = 6)
## Warning: Removed 114 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
PERMANOVA Calculation
#PERMANOVA
vegan::adonis2(otu.bray3 ~ Biosample, data = hull_PHPB3, permutations = 1000, method = "bray")
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 1000
##
## vegan::adonis2(formula = otu.bray3 ~ Biosample, data = hull_PHPB3, permutations = 1000, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Model 2 8.387 0.13423 10.775 0.000999 ***
## Residual 139 54.096 0.86577
## Total 141 62.483 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
vegan::adonis2(otu.bray3 ~ Date, data = hull_PHPB3, permutations = 1000, method = "bray")
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 1000
##
## vegan::adonis2(formula = otu.bray3 ~ Date, data = hull_PHPB3, permutations = 1000, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Model 2 1.883 0.03013 2.1591 0.000999 ***
## Residual 139 60.601 0.96987
## Total 141 62.483 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Pairwise PERMANOVA
pairwise.adonis2(otu.bray3~ Biosample,data=hull_PHPB3, nperms = 1000)
## $parent_call
## [1] "otu.bray3 ~ Biosample , strata = Null , permutations 999"
##
## $Environment_vs_Animals
## Df SumOfSqs R2 F Pr(>F)
## Model 1 2.6992 0.09406 6.7486 0.001 ***
## Residual 65 25.9975 0.90594
## Total 66 28.6967 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $Environment_vs_Human
## Df SumOfSqs R2 F Pr(>F)
## Model 1 3.732 0.11355 10.504 0.001 ***
## Residual 82 29.136 0.88645
## Total 83 32.868 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $Animals_vs_Human
## Df SumOfSqs R2 F Pr(>F)
## Model 1 5.238 0.08985 12.932 0.001 ***
## Residual 131 53.059 0.91015
## Total 132 58.297 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## attr(,"class")
## [1] "pwadstrata" "list"
pairwise.adonis2(otu.bray3~ Date,data=hull_PHPB3, nperms = 1000)
## $parent_call
## [1] "otu.bray3 ~ Date , strata = Null , permutations 999"
##
## $Nov_2022_vs_Jul_2023
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.771 0.01777 1.7185 0.022 *
## Residual 95 42.640 0.98223
## Total 96 43.411 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $Nov_2022_vs_Dec_2023
## Df SumOfSqs R2 F Pr(>F)
## Model 1 1.012 0.02366 2.351 0.003 **
## Residual 97 41.752 0.97634
## Total 98 42.764 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $Jul_2023_vs_Dec_2023
## Df SumOfSqs R2 F Pr(>F)
## Model 1 1.049 0.02771 2.4512 0.003 **
## Residual 86 36.809 0.97229
## Total 87 37.858 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## attr(,"class")
## [1] "pwadstrata" "list"
PCoA plot
hull_PHPB3.5$Biosample = factor(hull_PHPB3.5$Biosample , levels = c("Environment", "Animals", "Human","Control"))
hull_PHPB3.5$Date = factor(hull_PHPB3.5$Date , levels = c("Nov_2022", "Jul_2023", "Dec_2023","Control"))
p8 <- ggplot() +
geom_hline(yintercept=0, colour="lightgrey", linetype = 2) +
geom_vline(xintercept=0, colour="lightgrey", linetype = 2) +
geom_point(data = hull_PHPB3.5, aes(x=Axis.1, y=Axis.2, color = Biosample, shape = Date), alpha = 0.7, size = 5) +
geom_point(data = hull_PHPB3.5, aes(x=Axis.1, y=Axis.2), colour = "white", size = 0.7) +
xlab(paste("PCo1 (", round(pcoa.sub3$values$Relative_eig[1]*100, 1), "%)")) +
ylab(paste("PCo2 (", round(pcoa.sub3$values$Relative_eig[2]*100, 1), "%)")) +
theme_bw() +
scale_shape_manual(values = c(16, 17, 15, 18)) +
coord_equal() +
stat_ellipse(data = hull_PHPB3,aes(x=Axis.1, y=Axis.2, group = Biosample, color = Biosample), level = 0.95) +
labs(color = "Sample types") +
theme(axis.title.x = element_text(size=14), # remove x-axis labels
axis.title.y = element_text(size=14), # remove y-axis labels
legend.position = "right",
panel.background = element_blank(),
panel.grid.major = element_blank(), #remove major-grid labels
panel.grid.minor = element_blank(), #remove minor-grid labels
plot.background = element_blank()) +
scale_colour_manual(breaks =c("Human","Animals","Environment","Control"),
values = c("red","orange","blue","green"))
p8
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
ggsave("/Users/qcvp/R/Plots/PHPB_ASV_PCOA_plots.png", plot = p8)
## Saving 7 x 5 in image
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
Variability comparision
#Biosample
bdc5 <- vegan::betadisper(otu.bray3,hull_PHPB3$Biosample)
vegan::permutest(bdc5, pairwise=TRUE)
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 2 0.88966 0.44483 65.018 999 0.001 ***
## Residuals 139 0.95099 0.00684
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Pairwise comparisons:
## (Observed p-value below diagonal, permuted p-value above diagonal)
## Animals Environment Human
## Animals 1.0000e-03 0.002
## Environment 1.2299e-15 0.001
## Human 6.9781e-04 2.2587e-14
#Date
bdc6 <- vegan::betadisper(otu.bray3,hull_PHPB3$Date)
vegan::permutest(bdc6, pairwise=TRUE)
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 2 0.03575 0.0178746 6.5276 999 0.005 **
## Residuals 139 0.38063 0.0027383
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Pairwise comparisons:
## (Observed p-value below diagonal, permuted p-value above diagonal)
## Dec_2023 Jul_2023 Nov_2022
## Dec_2023 0.0150000 0.006
## Jul_2023 0.0123828 0.804
## Nov_2022 0.0031422 0.8044831
Making data for PCoA (without control samples for statistical tests)
physeq_Nem_long_genus2 <- phyloseq::tax_glom(Nem_hu_unrarefied, "Species") #extract genus taxonomy
physeq_Nem_long_genus2_PCOA <- phyloseq::subset_samples(physeq_Nem_long_genus2, !sample_names(physeq_Nem_long_genus2) %in% c("Control.1", "Control.2","Control.3", "Control.4", "Control.5", "Control.6", "Control.7"))
otu.bray4 <- vegan::vegdist(physeq_Nem_long_genus2_PCOA@otu_table@.Data, method = "bray", na.rm = TRUE) #Calculate distace matrix
pcoa.sub4 <- ape::pcoa(otu.bray4)
pcoa_coord4 <- pcoa.sub4$vectors[,1:2] # Extract PCOA coordination
samp_data_P <- data.frame(phyloseq::sample_data(physeq_Nem_long_genus2_PCOA))
hull_PHPB4 <- cbind(pcoa_coord4, samp_data_P) #Combind metadata and PCOA coordination
Making data for PCoA (with control samples for PCOA plot)
otu.bray4.5 <- vegan::vegdist(physeq_Nem_long_genus2@otu_table@.Data, method = "bray", na.rm = TRUE) #Calculate distace matrix
pcoa.sub4.5 <- ape::pcoa(otu.bray4.5)
pcoa_coord4.5 <- pcoa.sub4.5$vectors[,1:2] # Extract PCOA coordination
samp_data_P <- data.frame(phyloseq::sample_data(physeq_Nem_long_genus2))
hull_PHPB4.5 <- cbind(pcoa_coord4.5, samp_data_P) #Combind metadata and PCOA coordination
Compare distance between biosamples with Wilcoxon rank sum test
#Melting distance matrix
dm4_long <- as.matrix(otu.bray4) %>%
as.data.frame() %>%
tibble::rownames_to_column("Sample.name") %>%
tidyr::gather(key = "Sample.name2", value = "Distance", -Sample.name)
#Characterize distances
dm4_long_biosample<- dm4_long %>%
dplyr::mutate(Biosample_1=dplyr::case_when(
Sample.name %in% rownames(animal_samp_data) ~ "Animals",
Sample.name %in% rownames(envi_samp_data) ~ "Environment",
Sample.name %in% rownames(human_samp_data) ~ "Human")) %>% # Adding Biosample information
dplyr::mutate(Biosample_2=dplyr::case_when(
Sample.name2 %in% rownames(animal_samp_data) ~ "Animals",
Sample.name2 %in% rownames(envi_samp_data) ~ "Environment",
Sample.name2 %in% rownames(human_samp_data) ~ "Human")) %>% # Adding Biosample information
dplyr::mutate(Groups = paste(Biosample_1, Biosample_2, sep = "_to_")) %>% # Classify distances with Biosample types
subset(Groups == c("Animals_to_Environment", "Human_to_Environment", "Animals_to_Human")) # Remove duplicate (ex: Animal_to_Human and Human_to_Animal)
## Warning in Groups == c("Animals_to_Environment", "Human_to_Environment", :
## longer object length is not a multiple of shorter object length
#Count number of observation
sum(dm4_long_biosample$Groups == "Animals_to_Environment")
## [1] 174
sum(dm4_long_biosample$Groups == "Animals_to_Human")
## [1] 1450
sum(dm4_long_biosample$Groups == "Human_to_Environment")
## [1] 225
#Wilcoxon rank sum test
ggpubr::compare_means(Distance ~ Groups,
dm4_long_biosample,
method = "wilcox.test")
## # A tibble: 3 × 8
## .y. group1 group2 p p.adj p.format p.signif method
## <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 Distance Animals_to_Environ… Human… 2.90e- 3 2.9 e- 3 0.0029 ** Wilco…
## 2 Distance Animals_to_Environ… Anima… 5.36e-16 1.10e-15 5.4e-16 **** Wilco…
## 3 Distance Human_to_Environme… Anima… 2.88e-41 8.60e-41 < 2e-16 **** Wilco…
#Plotting
p9 <- ggplot(dm4_long_biosample, aes(x = Groups, y = Distance, colour = Groups)) +
theme_bw() +
geom_boxplot(aes(colour = Groups), alpha=0.1, outlier.colour = NA) +
theme(axis.title.x = element_blank(),
legend.position = "none",
legend.title = element_blank(),
legend.text = element_text(size = 13),
strip.text = element_text(size = 13),
axis.title.y = element_text(size = 15),
axis.text.y = element_text(size=13),
axis.text.x = element_text(size = 10, angle = 45, hjust = 1)) +
labs(y = "Bray-Curtis Distance") +
scale_colour_manual(breaks =c("Animals_to_Environment","Animals_to_Human","Human_to_Environment"),
values = c("gray","gray","gray"))
scale_y_continuous(limits = c(0.95, 1.005))
## <ScaleContinuousPosition>
## Range:
## Limits: 0.95 -- 1
p9
Compare distance between Animal and human from same and different farms with Wilcoxon rank sum test
# Characterize distances
dm4_long_farm<- dm4_long_biosample %>%
subset(Groups == c("Animals_to_Human")) %>% #kept only animals to human distances
dplyr::mutate(farm_1=dplyr::case_when(
Sample.name %in% rownames(farm3_samp_data) ~ "Farm3",
Sample.name %in% rownames(farm4_samp_data) ~ "Farm4",
Sample.name %in% rownames(farm5_samp_data) ~ "Farm5",
Sample.name %in% rownames(farm6_samp_data) ~ "Farm6",
Sample.name %in% rownames(farm10_samp_data) ~ "Farm10")) %>%
dplyr::mutate(farm_2=dplyr::case_when(
Sample.name2 %in% rownames(farm3_samp_data) ~ "Farm3",
Sample.name2 %in% rownames(farm4_samp_data) ~ "Farm4",
Sample.name2 %in% rownames(farm5_samp_data) ~ "Farm5",
Sample.name2 %in% rownames(farm6_samp_data) ~ "Farm6",
Sample.name2 %in% rownames(farm10_samp_data) ~ "Farm10")) %>%
dplyr::mutate(Groups = ifelse(farm_1 == farm_2, "Samefarm", "Difffarm")) %>% # Classify distances between human and animal from same or different farms
subset(Groups != "NA") # Remove unclassified distances (belong to other farms)
#Wilcoxon rank sum test
ggpubr::compare_means(Distance ~ Groups,
dm4_long_farm,
method = "wilcox.test")
## # A tibble: 1 × 8
## .y. group1 group2 p p.adj p.format p.signif method
## <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 Distance Difffarm Samefarm 0.720 0.72 0.72 ns Wilcoxon
#Count number of observation
sum(dm4_long_farm$Groups == "Samefarm")
## [1] 95
sum(dm4_long_farm$Groups == "Difffarm")
## [1] 365
#Plotting
p10 <- ggplot(dm4_long_farm, aes(x = Groups, y = Distance, colour = Groups)) +
theme_bw() +
geom_boxplot(aes(colour = Groups), alpha=0.1, outlier.colour = NA) +
theme(axis.title.x = element_blank(),
legend.position = "none",
legend.title = element_blank(),
legend.text = element_text(size = 13),
strip.text = element_text(size = 13),
axis.title.y = element_text(size = 15),
axis.text.y = element_text(size=13),
axis.text.x = element_blank()) +
labs(y = "Bray-Curtis Distance") +
scale_y_continuous(limits = c(0.98, 1.005))
p10
## Warning: Removed 151 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
# Combine 2 distances plot
g5 <- grid.arrange(p9, p10,nrow = 1)
## Warning: Removed 151 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
PERMANOVA Calculation
vegan::adonis2(otu.bray4 ~ Biosample, data = hull_PHPB4, permutations = 1000, method = "bray")
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 1000
##
## vegan::adonis2(formula = otu.bray4 ~ Biosample, data = hull_PHPB4, permutations = 1000, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Model 2 8.180 0.1331 10.67 0.000999 ***
## Residual 139 53.279 0.8669
## Total 141 61.458 1.0000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
vegan::adonis2(otu.bray4 ~ Date, data = hull_PHPB4, permutations = 1000, method = "bray")
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 1000
##
## vegan::adonis2(formula = otu.bray4 ~ Date, data = hull_PHPB4, permutations = 1000, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Model 2 1.911 0.0311 2.2309 0.000999 ***
## Residual 139 59.547 0.9689
## Total 141 61.458 1.0000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Pairwise PERMANOVA
pairwise.adonis2(otu.bray4~ Biosample,data=hull_PHPB4, nperms = 1000)
## $parent_call
## [1] "otu.bray4 ~ Biosample , strata = Null , permutations 999"
##
## $Environment_vs_Animals
## Df SumOfSqs R2 F Pr(>F)
## Model 1 2.644 0.09405 6.7476 0.001 ***
## Residual 65 25.470 0.90595
## Total 66 28.114 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $Environment_vs_Human
## Df SumOfSqs R2 F Pr(>F)
## Model 1 3.296 0.10257 9.3722 0.001 ***
## Residual 82 28.837 0.89743
## Total 83 32.133 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $Animals_vs_Human
## Df SumOfSqs R2 F Pr(>F)
## Model 1 5.32 0.09241 13.338 0.001 ***
## Residual 131 52.25 0.90759
## Total 132 57.57 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## attr(,"class")
## [1] "pwadstrata" "list"
pairwise.adonis2(otu.bray4~ Date,data=hull_PHPB4, nperms = 1000)
## $parent_call
## [1] "otu.bray4 ~ Date , strata = Null , permutations 999"
##
## $Nov_2022_vs_Jul_2023
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.802 0.01877 1.8176 0.017 *
## Residual 95 41.935 0.98123
## Total 96 42.737 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $Nov_2022_vs_Dec_2023
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.972 0.02314 2.2981 0.003 **
## Residual 97 41.015 0.97686
## Total 98 41.987 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $Jul_2023_vs_Dec_2023
## Df SumOfSqs R2 F Pr(>F)
## Model 1 1.107 0.02971 2.6334 0.002 **
## Residual 86 36.144 0.97029
## Total 87 37.251 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## attr(,"class")
## [1] "pwadstrata" "list"
PCoA plot
hull_PHPB4.5$Biosample = factor(hull_PHPB4.5$Biosample , levels = c("Environment", "Animals", "Human","Control"))
hull_PHPB4.5$Date = factor(hull_PHPB4.5$Date , levels = c("Nov_2022", "Jul_2023", "Dec_2023","Control"))
p11 <- ggplot() +
geom_hline(yintercept=0, colour="lightgrey", linetype = 2) +
geom_vline(xintercept=0, colour="lightgrey", linetype = 2) +
geom_point(data = hull_PHPB4.5, aes(x=Axis.1, y=Axis.2, color = Biosample, shape = Date), alpha = 0.7, size = 5) +
geom_point(data = hull_PHPB4.5, aes(x=Axis.1, y=Axis.2), colour = "white", size = 0.7) +
xlab(paste("PCo1 (", round(pcoa.sub4$values$Relative_eig[1]*100, 1), "%)")) +
ylab(paste("PCo2 (", round(pcoa.sub4$values$Relative_eig[2]*100, 1), "%)")) +
theme_bw() +
coord_equal() +
labs(color = "Sample types") +
stat_ellipse(data = hull_PHPB3,aes(x=Axis.1, y=Axis.2, group = Biosample, color = Biosample), level = 0.95) +
scale_colour_brewer(palette="Set1") +
scale_shape_manual(values = c(16, 17, 15, 18)) +
theme(axis.title.x = element_text(size=14), # remove x-axis labels
legend.position = "none",
axis.title.y = element_text(size=14), # remove y-axis labels
panel.background = element_blank(),
panel.grid.major = element_blank(), #remove major-grid labels
panel.grid.minor = element_blank(), #remove minor-grid labels
plot.background = element_blank()) +
scale_colour_manual(breaks =c("Human","Animals","Environment","Control"),
values = c("red","orange","blue","green"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
p11
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
ggsave("/Users/qcvp/R/Plots/PHPB_Taxonomy_PCOA_plots.png", plot = p11)
## Saving 7 x 5 in image
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
Variability comparision
#Biosample
bdc7 <- vegan::betadisper(otu.bray4,hull_PHPB4$Biosample)
vegan::permutest(bdc7, pairwise=TRUE)
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 2 0.85642 0.42821 61.463 999 0.001 ***
## Residuals 139 0.96841 0.00697
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Pairwise comparisons:
## (Observed p-value below diagonal, permuted p-value above diagonal)
## Animals Environment Human
## Animals 1.0000e-03 0.003
## Environment 5.2901e-15 0.001
## Human 2.2440e-03 2.7783e-14
#Date
bdc8 <- vegan::betadisper(otu.bray4,hull_PHPB4$Date)
vegan::permutest(bdc8, pairwise=TRUE)
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 2 0.03727 0.0186328 6.232 999 0.003 **
## Residuals 139 0.41559 0.0029899
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Pairwise comparisons:
## (Observed p-value below diagonal, permuted p-value above diagonal)
## Dec_2023 Jul_2023 Nov_2022
## Dec_2023 0.0080000 0.005
## Jul_2023 0.0126127 0.826
## Nov_2022 0.0039234 0.8229743
Prepare data
#Extract OTU table of only Animal and Human
sample_data <- Nem_hu_unrarefied@sam_data #Extract sample data
animal_sample_name <- row.names(animal_samp_data) #Extract name
human_sample_name <- row.names(human_samp_data) #Extract name
#Extract data from OTU table - ASV data
animal_otu_table_ASV <- subset(Nem_hu_unrarefied@otu_table@.Data, rownames(Nem_hu_unrarefied@otu_table@.Data) %in% animal_sample_name)
human_otu_table_ASV <- subset(Nem_hu_unrarefied@otu_table@.Data, rownames(Nem_hu_unrarefied@otu_table@.Data) %in% human_sample_name)
#Extract data from OTU table - taxonomy data
animal_otu_table_tax <- subset(physeq_Nem_long_genus2@otu_table@.Data, rownames(Nem_hu_unrarefied@otu_table@.Data) %in% animal_sample_name)
human_otu_table_tax <- subset(physeq_Nem_long_genus2@otu_table@.Data, rownames(Nem_hu_unrarefied@otu_table@.Data) %in% human_sample_name)
Compare distance of Human Skin vs Animal/ Human Gut vs Animal with Wilcoxon rank sum test - ASV
#Extract sample data
human_skin_sample <- subset(human_samp_data, Biosample_type =="Skin")
human_gut_sample <- subset(human_samp_data, Biosample_type =="Gut")
#Characterize distances
dm3_long_animal_human <- subset(dm3_long_biosample, Groups == "Animals_to_Human")
dm3_long_animal_skingut <- dm3_long_animal_human %>%
dplyr::mutate(biosample_type=dplyr::case_when(
Sample.name2 %in% rownames(human_skin_sample) ~ "Skin",
Sample.name2 %in% rownames(human_gut_sample) ~ "Gut")) %>%
dplyr::mutate(Groups2 = paste(Biosample_1, biosample_type, sep = "_to_"))
#Count number of observation
sum(dm3_long_animal_skingut$Groups2 == "Animals_to_Skin")
## [1] 715
sum(dm3_long_animal_skingut$Groups2 == "Animals_to_Gut")
## [1] 735
#Wilcoxon rank sum test
ggpubr::compare_means(Distance ~ Groups2,
dm3_long_animal_skingut,
method = "wilcox.test")
## # A tibble: 1 × 8
## .y. group1 group2 p p.adj p.format p.signif method
## <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 Distance Animals_to_Gut Animals_to… 9.50e-41 9.50e-41 <2e-16 **** Wilco…
#Plotting
p13 <- ggplot(dm3_long_animal_skingut, aes(x = Groups, y = Distance, colour = Groups)) +
theme_bw() +
geom_boxplot(aes(colour = Groups2), alpha=0.1, outlier.colour = NA) +
theme(axis.title.x = element_blank(),
legend.position = "none",
legend.title = element_blank(),
legend.text = element_text(size = 13),
strip.text = element_text(size = 13),
axis.title.y = element_text(size = 15),
axis.text.y = element_text(size=13),
axis.text.x = element_blank()) +
labs(y = "Bray-Curtis Distance") +
scale_y_continuous(limits = c(0.95, 1.005))
p13
## Warning: Removed 148 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
Compare distance of Lobster vs Human/ Fish vs Human with Wilcoxon rank sum test - ASV
#Extract sample data
animal_lobster_sample <- subset(animal_samp_data, Host_subtype =="Lobster")
animal_fish_sample <- subset(animal_samp_data, Host_subtype =="Fish")
#Characterize distances
dm3_long_animal_human <- subset(dm3_long_biosample, Groups == "Animals_to_Human")
dm3_long_lobfish_human <- dm3_long_animal_human %>%
dplyr::mutate(host_type=dplyr::case_when(
Sample.name %in% rownames(animal_lobster_sample) ~ "Lobster",
Sample.name %in% rownames(animal_fish_sample) ~ "Fish")) %>%
dplyr::mutate(Groups2 = paste(Biosample_2, host_type, sep = "_to_"))
#Count number of observation
sum(dm3_long_lobfish_human $Groups2 == "Human_to_Lobster")
## [1] 475
sum(dm3_long_lobfish_human $Groups2 == "Human_to_Fish")
## [1] 975
#Wilcoxon rank sum test
ggpubr::compare_means(Distance ~ Groups2,
dm3_long_lobfish_human,
method = "wilcox.test")
## # A tibble: 1 × 8
## .y. group1 group2 p p.adj p.format p.signif method
## <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 Distance Human_to_Fish Human_to_Lo… 3.86e-11 3.90e-11 3.9e-11 **** Wilco…
#Plotting
p14 <- ggplot(dm3_long_lobfish_human, aes(x = Groups, y = Distance, colour = Groups)) +
theme_bw() +
geom_boxplot(aes(colour = Groups2), alpha=0.1, outlier.colour = NA) +
theme(axis.title.x = element_blank(),
legend.position = "none",
legend.title = element_blank(),
legend.text = element_text(size = 13),
strip.text = element_text(size = 13),
axis.title.y = element_text(size = 15),
axis.text.y = element_text(size=13),
axis.text.x = element_blank()) +
labs(y = "Bray-Curtis Distance") +
scale_y_continuous(limits = c(0.95, 1.005))
p14
## Warning: Removed 148 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
Compare distance of Human Skin vs Animal/ Human Gut vs Animal with Wilcoxon rank sum test - Taxonomy data
#Extract sample data
human_skin_sample <- subset(human_samp_data, Biosample_type =="Skin")
human_gut_sample <- subset(human_samp_data, Biosample_type =="Gut")
#Characterize distances
dm4_long_animal_human <- subset(dm4_long_biosample, Groups == "Animals_to_Human")
dm4_long_animal_skingut <- dm4_long_animal_human %>%
dplyr::mutate(biosample_type=dplyr::case_when(
Sample.name2 %in% rownames(human_skin_sample) ~ "Skin",
Sample.name2 %in% rownames(human_gut_sample) ~ "Gut")) %>%
dplyr::mutate(Groups2 = paste(Biosample_1, biosample_type, sep = "_to_"))
#Count number of observation
sum(dm4_long_animal_skingut$Groups2 == "Animals_to_Skin")
## [1] 715
sum(dm4_long_animal_skingut$Groups2 == "Animals_to_Gut")
## [1] 735
#Wilcoxon rank sum test
ggpubr::compare_means(Distance ~ Groups2,
dm4_long_animal_skingut,
method = "wilcox.test")
## # A tibble: 1 × 8
## .y. group1 group2 p p.adj p.format p.signif method
## <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 Distance Animals_to_Gut Animals_to_Sk… 2.00e-34 2e-34 <2e-16 **** Wilco…
#Plotting
p15 <- ggplot(dm4_long_animal_skingut, aes(x = Groups, y = Distance, colour = Groups)) +
theme_bw() +
geom_boxplot(aes(colour = Groups2), alpha=0.1, outlier.colour = NA) +
theme(axis.title.x = element_blank(),
legend.position = "none",
legend.title = element_blank(),
legend.text = element_text(size = 13),
strip.text = element_text(size = 13),
axis.title.y = element_text(size = 15),
axis.text.y = element_text(size=13),
axis.text.x = element_blank()) +
labs(y = "Bray-Curtis Distance") +
scale_y_continuous(limits = c(0.95, 1.005))
p15
## Warning: Removed 192 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
Compare distance of Lobster vs Human/ Fish vs Human with Wilcoxon rank sum test - Taxonomy data
#Extract sample data
animal_lobster_sample <- subset(animal_samp_data, Host_subtype =="Lobster")
animal_fish_sample <- subset(animal_samp_data, Host_subtype =="Fish")
#Characterize distances
dm4_long_animal_human <- subset(dm4_long_biosample, Groups == "Animals_to_Human")
dm4_long_lobfish_human <- dm4_long_animal_human %>%
dplyr::mutate(host_type=dplyr::case_when(
Sample.name %in% rownames(animal_lobster_sample) ~ "Lobster",
Sample.name %in% rownames(animal_fish_sample) ~ "Fish")) %>%
dplyr::mutate(Groups2 = paste(Biosample_2, host_type, sep = "_to_"))
#Count number of observation
sum(dm4_long_lobfish_human $Groups == "Human_to_Lobster")
## [1] 0
sum(dm4_long_lobfish_human $Groups == "Human_to_Fish")
## [1] 0
#Wilcoxon rank sum test
ggpubr::compare_means(Distance ~ Groups2,
dm4_long_lobfish_human,
method = "wilcox.test")
## # A tibble: 1 × 8
## .y. group1 group2 p p.adj p.format p.signif method
## <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 Distance Human_to_Fish Human_to_Lob… 2.81e-26 2.8e-26 <2e-16 **** Wilco…
#Plotting
p16 <- ggplot(dm4_long_lobfish_human, aes(x = Groups, y = Distance, colour = Groups)) +
theme_bw() +
geom_boxplot(aes(colour = Groups2), alpha=0.1, outlier.colour = NA) +
theme(axis.title.x = element_blank(),
legend.position = "none",
legend.title = element_blank(),
legend.text = element_text(size = 13),
strip.text = element_text(size = 13),
axis.title.y = element_text(size = 15),
axis.text.y = element_text(size=13),
axis.text.x = element_blank()) +
labs(y = "Bray-Curtis Distance") +
scale_y_continuous(limits = c(0.95, 1.005))
p16
## Warning: Removed 192 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
Calculate distances
#ASV data - Animal
otu.bray5 <- vegan::vegdist(animal_otu_table_ASV, diag = TRUE, method = "bray") # computes dissimilarity indices (bray)
pcoa.sub5 <- ape::pcoa(otu.bray5) #computes principal coordinate decomposition
pcoa_coord5 <- pcoa.sub5$vectors[,1:2] # Extract coordination of PCOA plot
hull_PHPB5 <- cbind(pcoa_coord5, animal_samp_data) # Combine metadata with PCOA coordination
#ASV data - Human
otu.bray6 <- vegan::vegdist(human_otu_table_ASV, diag = TRUE, method = "bray") # computes dissimilarity indices (bray)
pcoa.sub6 <- ape::pcoa(otu.bray6) #computes principal coordinate decomposition
pcoa_coord6 <- pcoa.sub6$vectors[,1:2] # Extract coordination of PCOA plot
hull_PHPB6 <- cbind(pcoa_coord6, human_samp_data) # Combine metadata with PCOA coordination
#Taxonomy data - Animal
otu.bray7 <- vegan::vegdist(animal_otu_table_tax, diag = TRUE, method = "bray") # computes dissimilarity indices (bray)
pcoa.sub7 <- ape::pcoa(otu.bray7) #computes principal coordinate decomposition
pcoa_coord7 <- pcoa.sub7$vectors[,1:2] # Extract coordination of PCOA plot
hull_PHPB7 <- cbind(pcoa_coord7, animal_samp_data) # Combine metadata with PCOA coordination
#Taxonomy data - human
otu.bray8 <- vegan::vegdist(human_otu_table_tax, diag = TRUE, method = "bray") # computes dissimilarity indices (bray)
pcoa.sub8 <- ape::pcoa(otu.bray8) #computes principal coordinate decomposition
pcoa_coord8 <- pcoa.sub8$vectors[,1:2] # Extract coordination of PCOA plot
hull_PHPB8 <- cbind(pcoa_coord8, human_samp_data) # Combine metadata with PCOA coordination
PERMANOVA Calculation
#Prepare data
hull_PHPB3_animal <- subset(hull_PHPB3, Biosample == "Animals")
hull_PHPB3_human <- subset(hull_PHPB3, Biosample == "Human")
hull_PHPB4_animal <- subset(hull_PHPB4, Biosample == "Animals")
hull_PHPB4_human <- subset(hull_PHPB4, Biosample == "Human")
#ASV data
vegan::adonis2(otu.bray5 ~ Host_subtype, data = hull_PHPB3_animal, permutations = 1000, method = "bray") #Fish vs Lobster
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 1000
##
## vegan::adonis2(formula = otu.bray5 ~ Host_subtype, data = hull_PHPB3_animal, permutations = 1000, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Model 1 3.7605 0.15066 9.9336 0.000999 ***
## Residual 56 21.1999 0.84934
## Total 57 24.9604 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
vegan::adonis2(otu.bray6 ~ Biosample_type, data = hull_PHPB3_human, permutations = 1000, method = "bray") #Gut vs skin
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 1000
##
## vegan::adonis2(formula = otu.bray6 ~ Biosample_type, data = hull_PHPB3_human, permutations = 1000, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Model 1 2.491 0.08865 7.1011 0.000999 ***
## Residual 73 25.608 0.91135
## Total 74 28.099 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Taxonomy data
vegan::adonis2(otu.bray7 ~ Host_subtype, data = hull_PHPB4_animal, permutations = 1000, method = "bray") #Fish vs Lobster
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 1000
##
## vegan::adonis2(formula = otu.bray7 ~ Host_subtype, data = hull_PHPB4_animal, permutations = 1000, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Model 1 3.6273 0.14841 9.759 0.000999 ***
## Residual 56 20.8142 0.85159
## Total 57 24.4414 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
vegan::adonis2(otu.bray8 ~ Biosample_type, data = hull_PHPB4_human, permutations = 1000, method = "bray") #Gut vs skin
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 1000
##
## vegan::adonis2(formula = otu.bray8 ~ Biosample_type, data = hull_PHPB4_human, permutations = 1000, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Model 1 2.4596 0.08845 7.0833 0.000999 ***
## Residual 73 25.3489 0.91155
## Total 74 27.8086 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Plotting (Supplementary)
#ASV - Animal
pa <- ggplot() +
geom_hline(yintercept=0, colour="lightgrey", linetype = 2) +
geom_vline(xintercept=0, colour="lightgrey", linetype = 2) +
geom_point(data = hull_PHPB5, aes(x=Axis.1, y=Axis.2, color = Host_subtype), alpha = 0.7, size = 5) +
geom_point(data = hull_PHPB5, aes(x=Axis.1, y=Axis.2), colour = "white", size = 0.7) +
xlab(paste("PCo1 (", round(pcoa.sub4$values$Relative_eig[1]*100, 1), "%)")) +
ylab(paste("PCo2 (", round(pcoa.sub4$values$Relative_eig[2]*100, 1), "%)")) +
theme_bw() +
stat_ellipse(data = hull_PHPB3,aes(x=Axis.1, y=Axis.2, group = Biosample, color = Biosample), level = 0.95) +
coord_equal() +
labs(color = "Sample types") +
scale_colour_brewer(palette="Set1") +
theme(axis.title.x = element_text(size=14), # remove x-axis labels
legend.position = "right",
axis.title.y = element_text(size=14), # remove y-axis labels
panel.background = element_blank(),
panel.grid.major = element_blank(), #remove major-grid labels
panel.grid.minor = element_blank(), #remove minor-grid labels
plot.background = element_blank())
pa
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
#ASV - Human
pb <- ggplot() +
geom_hline(yintercept=0, colour="lightgrey", linetype = 2) +
geom_vline(xintercept=0, colour="lightgrey", linetype = 2) +
geom_point(data = hull_PHPB6, aes(x=Axis.1, y=Axis.2, color = Biosample_type), alpha = 0.7, size = 5) +
geom_point(data = hull_PHPB6, aes(x=Axis.1, y=Axis.2), colour = "white", size = 0.7) +
xlab(paste("PCo1 (", round(pcoa.sub4$values$Relative_eig[1]*100, 1), "%)")) +
ylab(paste("PCo2 (", round(pcoa.sub4$values$Relative_eig[2]*100, 1), "%)")) +
theme_bw() +
stat_ellipse(data = hull_PHPB3,aes(x=Axis.1, y=Axis.2, group = Biosample, color = Biosample), level = 0.95) +
coord_equal() +
labs(color = "Sample types") +
scale_colour_brewer(palette="Set2") +
theme(axis.title.x = element_text(size=14), # remove x-axis labels
legend.position = "right",
axis.title.y = element_text(size=14), # remove y-axis labels
panel.background = element_blank(),
panel.grid.major = element_blank(), #remove major-grid labels
panel.grid.minor = element_blank(), #remove minor-grid labels
plot.background = element_blank())
pb
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
#Tax - animal
pc <- ggplot() +
geom_hline(yintercept=0, colour="lightgrey", linetype = 2) +
geom_vline(xintercept=0, colour="lightgrey", linetype = 2) +
geom_point(data = hull_PHPB7, aes(x=Axis.1, y=Axis.2, color = Host_subtype), alpha = 0.7, size = 5) +
geom_point(data = hull_PHPB7, aes(x=Axis.1, y=Axis.2), colour = "white", size = 0.7) +
xlab(paste("PCo1 (", round(pcoa.sub4$values$Relative_eig[1]*100, 1), "%)")) +
ylab(paste("PCo2 (", round(pcoa.sub4$values$Relative_eig[2]*100, 1), "%)")) +
stat_ellipse(data = hull_PHPB3,aes(x=Axis.1, y=Axis.2, group = Biosample, color = Biosample), level = 0.95) +
theme_bw() +
coord_equal() +
labs(color = "Sample types") +
scale_colour_brewer(palette="Set1") +
theme(axis.title.x = element_text(size=14), # remove x-axis labels
legend.position = "right",
axis.title.y = element_text(size=14), # remove y-axis labels
panel.background = element_blank(),
panel.grid.major = element_blank(), #remove major-grid labels
panel.grid.minor = element_blank(), #remove minor-grid labels
plot.background = element_blank())
pc
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
#Tax - human
pd <- ggplot() +
geom_hline(yintercept=0, colour="lightgrey", linetype = 2) +
geom_vline(xintercept=0, colour="lightgrey", linetype = 2) +
geom_point(data = hull_PHPB8, aes(x=Axis.1, y=Axis.2, color = Biosample_type), alpha = 0.7, size = 5) +
geom_point(data = hull_PHPB8, aes(x=Axis.1, y=Axis.2), colour = "white", size = 0.7) +
xlab(paste("PCo1 (", round(pcoa.sub4$values$Relative_eig[1]*100, 1), "%)")) +
ylab(paste("PCo2 (", round(pcoa.sub4$values$Relative_eig[2]*100, 1), "%)")) +
stat_ellipse(data = hull_PHPB3,aes(x=Axis.1, y=Axis.2, group = Biosample, color = Biosample), level = 0.95) +
theme_bw() +
coord_equal() +
labs(color = "Sample types") +
scale_colour_brewer(palette="Set2") +
theme(axis.title.x = element_text(size=14), # remove x-axis labels
legend.position = "right",
axis.title.y = element_text(size=14), # remove y-axis labels
panel.background = element_blank(),
panel.grid.major = element_blank(), #remove major-grid labels
panel.grid.minor = element_blank(), #remove minor-grid labels
plot.background = element_blank())
pd
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
ggsave("/Users/qcvp/R/Plots/PHBP_PCOA_Fish_Lobster_vs_Human_Taxonomydata.png", plot = pc, width = 9, height = 9)
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
ggsave("/Users/qcvp/R/Plots/PHPB_PCOA_Skin_Gut_vs_Animals_Taxonomydata.png", plot = pd, width = 9, height = 9)
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
Dispersion vs Occurence plot
#Agglomerate taxa to specices
Nem_hu_unrarefied_species <- tax_glom(Nem_hu_unrarefied, taxrank = "Species")
Melted_Nem_hu_unrarefied_species <- Nem_hu_unrarefied_species %>%
phyloseq::psmelt() %>% # Melt to long format
filter(Abundance > 0) %>% # Filter out low abundance taxa reads
arrange(Species)
#Abundance vs occurence plot
Nem_otu_table2 <- Nem_hu_unrarefied_species@otu_table@.Data
abuoccplot_otu2 <- labdsv::abuocc(Nem_otu_table2)
## Registered S3 method overwritten by 'labdsv':
## method from
## summary.dist ade4
## Press return for next plot
## Press return for next plot
## Do you want to identify individual species? Y/N :
## Press return for next plot
## Do you want to identify individual plots? Y/N :
#Sub_objects of abuocc objects
utils::str(abuoccplot_otu2)
## List of 3
## $ spc.plt: Named int [1:149] 27 29 23 7 5 7 20 6 5 10 ...
## ..- attr(*, "names")= chr [1:149] "F1w1b" "F1w2b" "F1w3b" "Fi.10.3.GM" ...
## $ plt.spc: Named int [1:456] 97 51 51 63 76 60 9 21 19 22 ...
## ..- attr(*, "names")= chr [1:456] "ASV3" "ASV5" "ASV6" "ASV12" ...
## $ mean : Named num [1:456] 678 606 4283 447 692 ...
## ..- attr(*, "names")= chr [1:456] "ASV3" "ASV5" "ASV6" "ASV12" ...
## - attr(*, "call")= language labdsv::abuocc(comm = Nem_otu_table2)
## - attr(*, "comm")= chr [1:3698] "structure(list(ASV3 = c(6776, 6430, 4914, 0, 0, 72, 926, 2014, " "0, 3756, 700, 388, 201, 407, 1366, 0, 0, 470, 271, 0, 0, 6, 268, " "1371, 636, 36, 28, 0, 2617, 3304, 5118, 1374, 11, 0, 413, 1378, " "46, 99, 27, 542, 16, 19, 680, 347, 1821, 1934, 3368, 4010, 0, " ...
## - attr(*, "timestamp")= chr "Mon May 26 22:41:40 2025"
## - attr(*, "class")= chr "abuocc"
#Transform spc.plt vector into table in order to calculate specific richness
Nem_richness_otu2 <- data.frame(abuoccplot_otu2$spc.plt)
head(Nem_richness_otu2,1)
## abuoccplot_otu2.spc.plt
## F1w1b 27
#Occurrence of each OTU
Nem_otu_occurence2 <- data.frame(abuoccplot_otu2$plt.spc)
head(Nem_otu_occurence2,1)
## abuoccplot_otu2.plt.spc
## ASV3 97
Nem_mean.abun_otu2 <- colSums(Nem_otu_table2)/Nem_otu_occurence2
head(Nem_mean.abun_otu2,1)
## abuoccplot_otu2.plt.spc
## ASV3 678.4536
Nem_square_otu2 <- Nem_otu_table2^2
Nem_ss_otu2 <- data.frame(colSums(Nem_square_otu2))
head(Nem_ss_otu2,1)
## colSums.Nem_square_otu2.
## ASV3 225387192
#Variance calculation
Nem_variance_otu2=Nem_ss_otu2/Nem_otu_occurence2-Nem_mean.abun_otu2^2
head(Nem_variance_otu2,1)
## colSums.Nem_square_otu2.
## ASV3 1863280
Nem_disp_otu2 <- (Nem_variance_otu2/Nem_mean.abun_otu2)*Nem_otu_occurence2
head(Nem_disp_otu2,1)
## colSums.Nem_square_otu2.
## ASV3 266397.2
Extract Tax and ID names
#Tax
PHPB_bacteria_ASV_TAX <- data.frame(Nem_hu_unrarefied_species@tax_table)
ID <- rownames(PHPB_bacteria_ASV_TAX )
PHPB_bacteria_ASV_TAX <- cbind(ID=ID, PHPB_bacteria_ASV_TAX)
IC calculation for Poisson distribution using Chi square distribution (value and formula within Zar p574)
Nem_poisic_otu2 = epitools::pois.exact(Nem_otu_occurence2, conf.level = 0.95)
Nem_dstat_otu2 <- cbind(Nem_mean.abun_otu2, Nem_disp_otu2, Nem_otu_occurence2, Nem_poisic_otu2)
names(Nem_dstat_otu2) <- c("average","disp", "occurence", "x", "pt", "rate", "lower", "upper", "prob")
# ...selection of core OTUs----------------------------
Nem_core_otu2 <- Nem_dstat_otu2[Nem_dstat_otu2$disp > Nem_dstat_otu2$upper,]
Nem_core_otu2 <- na.omit(Nem_core_otu2)
Nem_core_otu2$Species <- PHPB_bacteria_ASV_TAX[rownames(PHPB_bacteria_ASV_TAX) %in% row.names(Nem_core_otu2),8]
Nem_core_otu2$Genus <- PHPB_bacteria_ASV_TAX[rownames(PHPB_bacteria_ASV_TAX) %in% row.names(Nem_core_otu2),7]
Nem_core_otu2$Family <- PHPB_bacteria_ASV_TAX[rownames(PHPB_bacteria_ASV_TAX) %in% row.names(Nem_core_otu2),6]
Nem_core_otu2$Phylum <- PHPB_bacteria_ASV_TAX[rownames(PHPB_bacteria_ASV_TAX) %in% row.names(Nem_core_otu2),3]
Nem_core_otu2 <- Nem_core_otu2 %>%
dplyr::arrange(desc(occurence)) # Sort abundances
Nem_core_otu2
## average disp occurence x pt rate lower upper
## ASV3 678.453608 2.663972e+05 97 97 1 97 78.6604409 118.331774
## ASV17 692.355263 2.058814e+05 76 76 1 76 59.8793494 95.125333
## ASV112 196.765625 2.180195e+04 64 64 1 64 49.2877952 81.726569
## ASV12 446.825397 1.056666e+05 63 63 1 63 48.4109285 80.604366
## ASV115 170.721311 7.555731e+03 61 61 1 61 46.6601620 78.357041
## ASV18 789.316667 1.759163e+05 60 60 1 60 45.7863210 77.231877
## ASV253 111.600000 1.828859e+04 55 55 1 55 41.4335424 71.590090
## ASV5 606.098039 1.392515e+05 51 51 1 51 37.9728494 67.055575
## ASV6 4283.215686 1.219347e+06 51 51 1 51 37.9728494 67.055575
## ASV1604 15.612245 6.990431e+02 49 49 1 49 36.2504686 64.780597
## ASV38 467.826087 6.407597e+04 46 46 1 46 33.6777792 61.357554
## ASV51 619.622222 6.540742e+04 45 45 1 45 32.8233066 60.213540
## ASV47 534.850000 1.949549e+05 40 40 1 40 28.5765864 54.468643
## ASV61 761.270270 7.960393e+04 37 37 1 37 26.0514137 50.999627
## ASV66 402.771429 8.945238e+04 35 35 1 35 24.3787837 48.676532
## ASV545 53.558824 1.121839e+03 34 34 1 34 23.5459879 47.511601
## ASV102 246.812500 6.103124e+04 32 32 1 32 21.8879760 45.174476
## ASV121 487.666667 2.620746e+05 30 30 1 30 20.2408636 42.826851
## ASV49 1073.551724 1.029999e+05 29 29 1 29 19.4217851 41.648816
## ASV210 141.620690 2.596959e+04 29 29 1 29 19.4217851 41.648816
## ASV278 219.758621 7.575208e+03 29 29 1 29 19.4217851 41.648816
## ASV271 324.800000 3.718885e+04 25 25 1 25 16.1786837 36.904934
## ASV422 396.583333 6.888055e+04 24 24 1 24 15.3772578 35.710098
## ASV33 2168.000000 2.470477e+05 22 22 1 22 13.7872546 33.308265
## ASV21 3371.285714 5.394161e+05 21 21 1 21 12.9993564 32.100730
## ASV257 67.650000 5.443142e+03 20 20 1 20 12.2165428 30.888378
## ASV731 68.900000 1.884235e+03 20 20 1 20 12.2165428 30.888378
## ASV26 213.105263 5.757163e+03 19 19 1 19 11.4392691 29.670855
## ASV176 521.736842 1.528575e+04 19 19 1 19 11.4392691 29.670855
## ASV2925 21.736842 1.252513e+03 19 19 1 19 11.4392691 29.670855
## ASV496 178.705882 4.197084e+03 17 17 1 17 9.9031251 27.218647
## ASV547 190.647059 7.705788e+03 17 17 1 17 9.9031251 27.218647
## ASV891 85.176471 1.239315e+03 17 17 1 17 9.9031251 27.218647
## ASV5767 9.875000 1.885316e+02 16 16 1 16 9.1453783 25.982998
## ASV8753 5.875000 2.423404e+02 16 16 1 16 9.1453783 25.982998
## ASV119 229.666667 2.338003e+03 15 15 1 15 8.3953855 24.740220
## ASV243 93.400000 2.296163e+03 15 15 1 15 8.3953855 24.740220
## ASV309 182.866667 7.988092e+03 15 15 1 15 8.3953855 24.740220
## ASV341 140.333333 6.898599e+03 15 15 1 15 8.3953855 24.740220
## ASV1261 99.066667 2.895494e+03 15 15 1 15 8.3953855 24.740220
## ASV427 164.857143 6.544076e+03 14 14 1 14 7.6539338 23.489616
## ASV283 104.000000 2.074788e+03 13 13 1 13 6.9219517 22.230380
## ASV464 42.769231 3.616223e+02 13 13 1 13 6.9219517 22.230380
## ASV188 48.166667 4.668304e+02 12 12 1 12 6.2006035 20.961560
## ASV1587 75.833333 1.667943e+03 12 12 1 12 6.2006035 20.961560
## ASV5665 33.750000 3.104074e+02 12 12 1 12 6.2006035 20.961560
## ASV196 46.636364 9.979025e+02 11 11 1 11 5.4911525 19.682011
## ASV2177 44.090909 1.453113e+02 11 11 1 11 5.4911525 19.682011
## ASV3125 48.727273 2.426160e+03 11 11 1 11 5.4911525 19.682011
## ASV80 2539.400000 9.527418e+04 10 10 1 10 4.7953886 18.390358
## ASV487 334.700000 5.590051e+03 10 10 1 10 4.7953886 18.390358
## ASV19 66.555556 3.100000e+02 9 9 1 9 4.1153809 17.084805
## ASV82 40.555556 4.031562e+02 9 9 1 9 4.1153809 17.084805
## ASV379 152.222222 8.115869e+02 9 9 1 9 4.1153809 17.084805
## ASV582 229.777778 4.467671e+03 9 9 1 9 4.1153809 17.084805
## ASV1027 288.777778 8.993537e+03 9 9 1 9 4.1153809 17.084805
## ASV160 1702.875000 8.345799e+04 8 8 1 8 3.4538314 15.763189
## ASV413 232.750000 1.395435e+03 8 8 1 8 3.4538314 15.763189
## ASV4337 48.625000 4.679871e+02 8 8 1 8 3.4538314 15.763189
## ASV4558 59.000000 1.303186e+03 8 8 1 8 3.4538314 15.763189
## ASV6157 25.875000 1.129614e+02 8 8 1 8 3.4538314 15.763189
## ASV741 30.714286 9.290233e+01 7 7 1 7 2.8143575 14.422675
## ASV2586 73.857143 4.575706e+02 7 7 1 7 2.8143575 14.422675
## ASV2 296.428571 9.032386e+02 7 7 1 7 2.8143575 14.422675
## ASV504 108.833333 1.162772e+02 6 6 1 6 2.2018911 13.059479
## ASV1240 37.500000 1.796000e+01 6 6 1 6 2.2018911 13.059479
## ASV2373 40.666667 3.645574e+02 6 6 1 6 2.2018911 13.059479
## ASV2858 29.833333 7.759218e+01 6 6 1 6 2.2018911 13.059479
## ASV2922 100.333333 2.198266e+03 6 6 1 6 2.2018911 13.059479
## ASV7679 33.666667 1.070891e+02 6 6 1 6 2.2018911 13.059479
## ASV13312 20.000000 1.673000e+02 6 6 1 6 2.2018911 13.059479
## ASV600 23.200000 7.106897e+01 5 5 1 5 1.6234857 11.668322
## ASV2087 55.600000 1.191223e+02 5 5 1 5 1.6234857 11.668322
## ASV2242 175.800000 3.018589e+02 5 5 1 5 1.6234857 11.668322
## ASV2567 43.200000 1.620556e+02 5 5 1 5 1.6234857 11.668322
## ASV3730 49.000000 3.065306e+01 5 5 1 5 1.6234857 11.668322
## ASV7237 34.400000 1.206163e+02 5 5 1 5 1.6234857 11.668322
## ASV10154 21.200000 8.249057e+01 5 5 1 5 1.6234857 11.668322
## ASV15646 3.000000 1.266667e+01 5 5 1 5 1.6234857 11.668322
## ASV224 105.000000 8.875619e+02 4 4 1 4 1.0898892 10.241589
## ASV593 138.000000 1.477478e+03 4 4 1 4 1.0898892 10.241589
## ASV1122 60.250000 5.053527e+01 4 4 1 4 1.0898892 10.241589
## ASV1223 64.250000 1.311245e+02 4 4 1 4 1.0898892 10.241589
## ASV2757 107.500000 2.016465e+02 4 4 1 4 1.0898892 10.241589
## ASV3091 4.500000 1.355556e+01 4 4 1 4 1.0898892 10.241589
## ASV3235 3.500000 1.628571e+01 4 4 1 4 1.0898892 10.241589
## ASV3353 83.000000 2.572530e+02 4 4 1 4 1.0898892 10.241589
## ASV3974 8.000000 4.825000e+01 4 4 1 4 1.0898892 10.241589
## ASV6858 18.500000 3.724324e+01 4 4 1 4 1.0898892 10.241589
## ASV52 57.500000 5.459130e+01 4 4 1 4 1.0898892 10.241589
## ASV603 36.666667 6.885455e+01 3 3 1 3 0.6186712 8.767277
## ASV1255 49.666667 4.285906e+01 3 3 1 3 0.6186712 8.767277
## ASV1424 169.000000 6.427574e+02 3 3 1 3 0.6186712 8.767277
## ASV1435 39.333333 1.065932e+02 3 3 1 3 0.6186712 8.767277
## ASV3460 63.333333 8.780000e+01 3 3 1 3 0.6186712 8.767277
## ASV3940 47.000000 4.685106e+01 3 3 1 3 0.6186712 8.767277
## ASV4186 60.666667 2.392857e+02 3 3 1 3 0.6186712 8.767277
## ASV5760 81.333333 8.130328e+01 3 3 1 3 0.6186712 8.767277
## ASV6567 30.333333 2.507692e+01 3 3 1 3 0.6186712 8.767277
## ASV10153 31.000000 2.135484e+01 3 3 1 3 0.6186712 8.767277
## ASV10862 39.000000 1.126154e+02 3 3 1 3 0.6186712 8.767277
## ASV13872 4.666667 1.300000e+01 3 3 1 3 0.6186712 8.767277
## ASV23938 8.666667 9.307692e+00 3 3 1 3 0.6186712 8.767277
## ASV699 110.500000 2.170181e+02 2 2 1 2 0.2422036 7.224693
## ASV1357 775.000000 4.465961e+02 2 2 1 2 0.2422036 7.224693
## ASV2952 16.000000 8.000000e+00 2 2 1 2 0.2422036 7.224693
## ASV5170 118.000000 1.084746e+02 2 2 1 2 0.2422036 7.224693
## ASV5292 135.000000 1.423704e+01 2 2 1 2 0.2422036 7.224693
## ASV8801 76.000000 7.115789e+01 2 2 1 2 0.2422036 7.224693
## ASV10489 37.000000 5.535135e+01 2 2 1 2 0.2422036 7.224693
## ASV11109 32.000000 3.306250e+01 2 2 1 2 0.2422036 7.224693
## ASV12110 39.500000 4.406329e+01 2 2 1 2 0.2422036 7.224693
## ASV15618 26.500000 1.375472e+01 2 2 1 2 0.2422036 7.224693
## prob Species Genus Family
## ASV3 0.95 vulnificus Vibrio Vibrionaceae
## ASV17 0.95 otitidis Pseudomonas Halomonadaceae
## ASV112 0.95 wuhouensis Acinetobacter Moraxellaceae
## ASV12 0.95 syzygii Ralstonia Burkholderiaceae
## ASV115 0.95 oleivorans Acinetobacter Moraxellaceae
## ASV18 0.95 radiotolerans Methylobacterium Beijerinckiaceae
## ASV253 0.95 tandoii Acinetobacter Moraxellaceae
## ASV5 0.95 psychrolutea Sphingomonas Sphingomonadaceae
## ASV6 0.95 tasmaniensis Vibrio Vibrionaceae
## ASV1604 0.95 iners Lactobacillus Lactobacillaceae
## ASV38 0.95 broomeae Afipia Xanthobacteraceae
## ASV51 0.95 fungorum Paraburkholderia Burkholderiaceae
## ASV47 0.95 damselae Photobacterium Vibrionaceae
## ASV61 0.95 pumilus Bacillus Bacillaceae
## ASV66 0.95 solanacearum Ralstonia Pseudomonadaceae
## ASV545 0.95 vesicularis Brevundimonas Caulobacteraceae
## ASV102 0.95 venetianus Acinetobacter Moraxellaceae
## ASV121 0.95 stutzeri Pseudomonas Pseudomonadaceae
## ASV49 0.95 saxobsidens Herminiimonas Oxalobacteraceae
## ASV210 0.95 lwoffii Acinetobacter Moraxellaceae
## ASV278 0.95 diminuta Brevundimonas Caulobacteraceae
## ASV271 0.95 sanguinis Streptococcus Streptococcaceae
## ASV422 0.95 harveyi Vibrio Vibrionaceae
## ASV33 0.95 rhodesianum Methylorubrum Beijerinckiaceae
## ASV21 0.95 algae Shewanella Shewanellaceae
## ASV257 0.95 nigripulchritudo Vibrio Vibrionaceae
## ASV731 0.95 maltophilia Stenotrophomonas Xanthomonadaceae
## ASV26 0.95 sakei Lactobacillus Lactobacillaceae
## ASV176 0.95 loti Mesorhizobium Rhizobiaceae
## ASV2925 0.95 bacterium uncultured Neisseriaceae
## ASV496 0.95 resinovorans Pseudomonas Pseudomonadaceae
## ASV547 0.95 pealeana Shewanella Shewanellaceae
## ASV891 0.95 flavescens Neisseria Neisseriaceae
## ASV5767 0.95 pseudodiphtheriticum Corynebacterium Corynebacteriaceae
## ASV8753 0.95 sedentarius Kytococcus Dermacoccaceae
## ASV119 0.95 carnosum Leuconostoc Lactobacillaceae
## ASV243 0.95 cellulosilyticum Rhizobium Rhizobiaceae
## ASV309 0.95 turicensis Siccibacter Erwiniaceae
## ASV341 0.95 miyukkimchii Leuconostoc Lactobacillaceae
## ASV1261 0.95 testosteroni Comamonas Comamonadaceae
## ASV427 0.95 melaninogenica Prevotella Prevotellaceae
## ASV283 0.95 sp. Aquabacterium Comamonadaceae
## ASV464 0.95 macginleyi Corynebacterium Corynebacteriaceae
## ASV188 0.95 aestuarianus Vibrio Vibrionaceae
## ASV1587 0.95 zeaxanthinifaciens Paracoccus Rhodobacteraceae
## ASV5665 0.95 seifertii Acinetobacter Moraxellaceae
## ASV196 0.95 marinus Paracoccus Rhodobacteraceae
## ASV2177 0.95 alkaliphila Paramoritella Moritellaceae
## ASV3125 0.95 lipophiloflavum Corynebacterium Corynebacteriaceae
## ASV80 0.95 sanxanigenens Sphingomonas Sphingomonadaceae
## ASV487 0.95 nivimaris Psychrobacter Moraxellaceae
## ASV19 0.95 parahaemolyticus Vibrio Vibrionaceae
## ASV82 0.95 warneri Staphylococcus Staphylococcaceae
## ASV379 0.95 tsuruhatensis Delftia Comamonadaceae
## ASV582 0.95 baumannii Acinetobacter Moraxellaceae
## ASV1027 0.95 rhizophila Stenotrophomonas Pseudomonadaceae
## ASV160 0.95 albilineans Xanthomonas Xanthomonadaceae
## ASV413 0.95 equinus Streptococcus Streptococcaceae
## ASV4337 0.95 parvula Veillonella Veillonellaceae
## ASV4558 0.95 sonnei Shigella Enterobacteriaceae
## ASV6157 0.95 ruckeri Yersinia Yersiniaceae
## ASV741 0.95 aromaticivorans Terrahaemophilus Pasteurellaceae
## ASV2586 0.95 veronii Pseudomonas Pseudomonadaceae
## ASV2 0.95 mediterranea Alteromonas Alteromonadaceae
## ASV504 0.95 marplatensis Achromobacter Alcaligenaceae
## ASV1240 0.95 rhizophila Stenotrophomonas Xanthomonadaceae
## ASV2373 0.95 parasanguinis Streptococcus Streptococcaceae
## ASV2858 0.95 rhizosphaerae Pseudomonas Pseudomonadaceae
## ASV2922 0.95 pettenkoferi Staphylococcus Staphylococcaceae
## ASV7679 0.95 acnes Propionibacterium Propionibacteriaceae
## ASV13312 0.95 luteus Micrococcus Micrococcaceae
## ASV600 0.95 tumefaciens Agrobacterium Rhizobiaceae
## ASV2087 0.95 accolens Corynebacterium Corynebacteriaceae
## ASV2242 0.95 slovaca Rickettsia Rickettsiaceae
## ASV2567 0.95 tasmaniensis Vibrio Shewanellaceae
## ASV3730 0.95 parainfluenzae Haemophilus Pasteurellaceae
## ASV7237 0.95 tuscaniense Corynebacterium Corynebacteriaceae
## ASV10154 0.95 urealyticum Corynebacterium Corynebacteriaceae
## ASV15646 0.95 australica Alteromonas Alteromonadaceae
## ASV224 0.95 lylae Micrococcus Micrococcaceae
## ASV593 0.95 tuberculostearicum Corynebacterium Corynebacteriaceae
## ASV1122 0.95 vagans Pantoea Erwiniaceae
## ASV1223 0.95 xylosus Staphylococcus Staphylococcaceae
## ASV2757 0.95 putida Pseudomonas Pseudomonadaceae
## ASV3091 0.95 confusa Weissella Lactobacillaceae
## ASV3235 0.95 viscosus Actinomyces Actinomycetaceae
## ASV3353 0.95 massiliensis Fenollaria Family XI
## ASV3974 0.95 harei Peptoniphilus Family XI
## ASV6858 0.95 cambriense Varibaculum Actinomycetaceae
## ASV52 0.95 marcusii Paracoccus Rhodobacteraceae
## ASV603 0.95 macleodii Alteromonas Alteromonadaceae
## ASV1255 0.95 gelidum Leuconostoc Lactobacillaceae
## ASV1424 0.95 magna Finegoldia Family XI
## ASV1435 0.95 clevelandensis Lawsonella Corynebacteriaceae
## ASV3460 0.95 sp. Paramoritella Moritellaceae
## ASV3940 0.95 oryzihabitans Pseudomonas Pseudomonadaceae
## ASV4186 0.95 nitritireducens Stenotrophomonas Xanthomonadaceae
## ASV5760 0.95 otitis Alloiococcus Carnobacteriaceae
## ASV6567 0.95 enterica Salmonella Enterobacteriaceae
## ASV10153 0.95 brasilense Azospirillum Azospirillaceae
## ASV10862 0.95 Atelocyanobacterium Candidatus Microcystaceae
## ASV13872 0.95 haemolytica Mannheimia Pasteurellaceae
## ASV23938 0.95 micra Parvimonas Family XI
## ASV699 0.95 paucimobilis Sphingomonas Sphingomonadaceae
## ASV1357 0.95 xanthomarina Pseudomonas Pseudomonadaceae
## ASV2952 0.95 adiacens Granulicatella Carnobacteriaceae
## ASV5170 0.95 alcaligenes Pseudomonas Pseudomonadaceae
## ASV5292 0.95 anaerobius Peptostreptococcus Peptostreptococcaceae
## ASV8801 0.95 variabilis Acinetobacter Moraxellaceae
## ASV10489 0.95 prausnitzii Faecalibacterium Ruminococcaceae
## ASV11109 0.95 bacterium uncultured Methylophilaceae
## ASV12110 0.95 bacterium uncultured Alcaligenaceae
## ASV15618 0.95 vulgatus Bacteroides Bacteroidaceae
## Phylum
## ASV3 Proteobacteria
## ASV17 Proteobacteria
## ASV112 Proteobacteria
## ASV12 Proteobacteria
## ASV115 Proteobacteria
## ASV18 Proteobacteria
## ASV253 Proteobacteria
## ASV5 Proteobacteria
## ASV6 Proteobacteria
## ASV1604 Firmicutes
## ASV38 Proteobacteria
## ASV51 Proteobacteria
## ASV47 Proteobacteria
## ASV61 Firmicutes
## ASV66 Proteobacteria
## ASV545 Proteobacteria
## ASV102 Proteobacteria
## ASV121 Proteobacteria
## ASV49 Proteobacteria
## ASV210 Proteobacteria
## ASV278 Proteobacteria
## ASV271 Firmicutes
## ASV422 Proteobacteria
## ASV33 Proteobacteria
## ASV21 Proteobacteria
## ASV257 Proteobacteria
## ASV731 Proteobacteria
## ASV26 Firmicutes
## ASV176 Proteobacteria
## ASV2925 Proteobacteria
## ASV496 Proteobacteria
## ASV547 Proteobacteria
## ASV891 Proteobacteria
## ASV5767 Actinobacteriota
## ASV8753 Actinobacteriota
## ASV119 Firmicutes
## ASV243 Proteobacteria
## ASV309 Proteobacteria
## ASV341 Firmicutes
## ASV1261 Proteobacteria
## ASV427 Bacteroidota
## ASV283 Proteobacteria
## ASV464 Actinobacteriota
## ASV188 Proteobacteria
## ASV1587 Proteobacteria
## ASV5665 Proteobacteria
## ASV196 Proteobacteria
## ASV2177 Proteobacteria
## ASV3125 Actinobacteriota
## ASV80 Proteobacteria
## ASV487 Proteobacteria
## ASV19 Proteobacteria
## ASV82 Firmicutes
## ASV379 Proteobacteria
## ASV582 Proteobacteria
## ASV1027 Proteobacteria
## ASV160 Proteobacteria
## ASV413 Firmicutes
## ASV4337 Firmicutes
## ASV4558 Proteobacteria
## ASV6157 Proteobacteria
## ASV741 Proteobacteria
## ASV2586 Proteobacteria
## ASV2 Proteobacteria
## ASV504 Proteobacteria
## ASV1240 Proteobacteria
## ASV2373 Firmicutes
## ASV2858 Proteobacteria
## ASV2922 Firmicutes
## ASV7679 Actinobacteriota
## ASV13312 Actinobacteriota
## ASV600 Proteobacteria
## ASV2087 Actinobacteriota
## ASV2242 Proteobacteria
## ASV2567 Proteobacteria
## ASV3730 Proteobacteria
## ASV7237 Actinobacteriota
## ASV10154 Actinobacteriota
## ASV15646 Proteobacteria
## ASV224 Actinobacteriota
## ASV593 Actinobacteriota
## ASV1122 Proteobacteria
## ASV1223 Firmicutes
## ASV2757 Proteobacteria
## ASV3091 Firmicutes
## ASV3235 Actinobacteriota
## ASV3353 Firmicutes
## ASV3974 Firmicutes
## ASV6858 Actinobacteriota
## ASV52 Proteobacteria
## ASV603 Proteobacteria
## ASV1255 Firmicutes
## ASV1424 Firmicutes
## ASV1435 Actinobacteriota
## ASV3460 Proteobacteria
## ASV3940 Proteobacteria
## ASV4186 Proteobacteria
## ASV5760 Firmicutes
## ASV6567 Proteobacteria
## ASV10153 Proteobacteria
## ASV10862 Cyanobacteria
## ASV13872 Proteobacteria
## ASV23938 Firmicutes
## ASV699 Proteobacteria
## ASV1357 Proteobacteria
## ASV2952 Firmicutes
## ASV5170 Proteobacteria
## ASV5292 Firmicutes
## ASV8801 Proteobacteria
## ASV10489 Firmicutes
## ASV11109 Proteobacteria
## ASV12110 Proteobacteria
## ASV15618 Bacteroidota
Abundance VS Occurrence and Index of dispersion plots
#Plot index of dispersion
index_disp_otu2 <- ggplot(Nem_dstat_otu2, aes(x=occurence, y=log10(disp))) +
geom_point(size = 5, shape = 21, col="black", alpha=0.75) +
geom_line(col="gray", linetype = 4, size = 2, aes(x=occurence, y=log10(upper))) + theme_bw() +
theme(axis.text=element_text(size=18), axis.title=element_text(size=18,face="bold")) +
labs(y="Index of dispersion (Log 10)")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
p23 <- index_disp_otu2 + geom_text(data=Nem_core_otu2, aes(x=occurence, y=log10(disp), label=rownames(Nem_core_otu2)), size = 0
, alpha=0.75)
p23
## Warning: Removed 245 rows containing missing values or values outside the scale range
## (`geom_point()`).
ggsave("/Users/qcvp/R/Plots/PHPB_Index_of_dispersion_plots.png", plot = p23, width = 9, height = 9)
## Warning: Removed 245 rows containing missing values or values outside the scale range
## (`geom_point()`).
Distribution of high occurence ASV
# Subset most occurence ASV (>30 Occurence)
ASV3 <- subset(Melted_Nem_hu_unrarefied_species, OTU == "ASV3")
ASV17 <- subset(Melted_Nem_hu_unrarefied_species, OTU == "ASV17")
ASV112 <- subset(Melted_Nem_hu_unrarefied_species, OTU == "ASV112")
ASV12 <- subset(Melted_Nem_hu_unrarefied_species, OTU == "ASV12")
ASV115 <- subset(Melted_Nem_hu_unrarefied_species, OTU == "ASV115")
ASV18 <- subset(Melted_Nem_hu_unrarefied_species, OTU == "ASV18")
ASV253 <- subset(Melted_Nem_hu_unrarefied_species, OTU == "ASV253")
ASV5 <- subset(Melted_Nem_hu_unrarefied_species, OTU == "ASV5")
# plotting piechart for the distribution of each ASV
p23.1 <- ggplot(ASV3, aes(x= OTU, y=Abundance, fill=Biosample)) +
geom_bar(stat="identity", width=1) +
coord_polar(theta = "y") +
theme_void()+
scale_fill_manual(breaks =c("Human","Animals","Environment","Control"),
values = c("red","orange","blue","green"))
p23.1
p23.2 <- ggplot(ASV17, aes(x= OTU, y=Abundance, fill=Biosample)) +
geom_bar(stat="identity", width=1) +
coord_polar(theta = "y") +
theme_void()+
scale_fill_manual(breaks =c("Human","Animals","Environment","Control"),
values = c("red","orange","blue","green"))
p23.2
p23.3 <- ggplot(ASV112, aes(x= OTU, y=Abundance, fill=Biosample)) +
geom_bar(stat="identity", width=1) +
coord_polar(theta = "y") +
theme_void()+
scale_fill_manual(breaks =c("Human","Animals","Environment","Control"),
values = c("red","orange","blue","green"))
p23.3
p23.4 <- ggplot(ASV12, aes(x= OTU, y=Abundance, fill=Biosample)) +
geom_bar(stat="identity", width=1) +
coord_polar(theta = "y") +
theme_void()+
scale_fill_manual(breaks =c("Human","Animals","Environment","Control"),
values = c("red","orange","blue","green"))
p23.4
p23.6 <- ggplot(ASV115, aes(x= OTU, y=Abundance, fill=Biosample)) +
geom_bar(stat="identity", width=1) +
coord_polar(theta = "y") +
theme_void()+
scale_fill_manual(breaks =c("Human","Animals","Environment","Control"),
values = c("red","orange","blue","green"))
p23.6
p23.7 <- ggplot(ASV18, aes(x= OTU, y=Abundance, fill=Biosample)) +
geom_bar(stat="identity", width=1) +
coord_polar(theta = "y") +
theme_void() +
scale_fill_manual(breaks =c("Human","Animals","Environment","Control"),
values = c("red","orange","blue","green"))
p23.7
p23.9 <- ggplot(ASV253, aes(x= OTU, y=Abundance, fill=Biosample)) +
geom_bar(stat="identity", width=1) +
coord_polar(theta = "y") +
theme_void() +
scale_fill_manual(breaks =c("Human","Animals","Environment","Control"),
values = c("red","orange","blue","green"))
p23.9
p23.10 <- ggplot(ASV5, aes(x= OTU, y=Abundance, fill=Biosample)) +
geom_bar(stat="identity", width=1) +
coord_polar(theta = "y") +
theme_void() +
scale_fill_manual(breaks =c("Human","Animals","Environment","Control"),
values = c("red","orange","blue","green"))
p23.10
Composition of Classes within core OTU list
Nem_core_otu2_long <- Nem_core_otu2 %>%
dplyr::mutate(value = 1) %>%
dplyr::group_by(Family) %>%
dplyr::summarise(RA = sum(value)/nrow(Nem_core_otu2)) %>% #Calculate % of each Families among all pathobiome list
dplyr::arrange(desc(RA))
Nem_core_otu2_long
## # A tibble: 43 × 2
## Family RA
## <chr> <dbl>
## 1 Pseudomonadaceae 0.0885
## 2 Moraxellaceae 0.0796
## 3 Corynebacteriaceae 0.0708
## 4 Vibrionaceae 0.0619
## 5 Lactobacillaceae 0.0531
## 6 Family XI 0.0354
## 7 Xanthomonadaceae 0.0354
## 8 Alteromonadaceae 0.0265
## 9 Comamonadaceae 0.0265
## 10 Pasteurellaceae 0.0265
## # ℹ 33 more rows
Running Linear discriminant analysis Effect Size (LEFse)
mm_lefse2 <- microbiomeMarker::run_lefse(Nem_hu_unrarefied, norm = "CPM",
wilcoxon_cutoff = 0.01,
group = "Biosample",
taxa_rank = "none",
kw_cutoff = 0.01,
multigrp_strat = TRUE,
lda_cutoff = 4)
## Registered S3 method overwritten by 'gplots':
## method from
## reorder.factor DescTools
Plotting
PHPB_bacteria_ASV_TAX <- data.frame(Nem_hu_unrarefied@tax_table)
ID <- rownames(PHPB_bacteria_ASV_TAX )
PHPB_bacteria_ASV_TAX <- cbind(ID=ID, PHPB_bacteria_ASV_TAX)
p_LDAsc2 <- microbiomeMarker::plot_ef_bar(mm_lefse2)
y_labs2 <- ggplot2::ggplot_build(p_LDAsc2)$layout$panel_params[[1]]$y$get_labels()
p_abd2 <- microbiomeMarker::plot_abundance(mm_lefse2, group = "Biosample") +
ggplot2::scale_y_discrete(limits = y_labs2)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
g4 <- gridExtra::grid.arrange(p_LDAsc2, p_abd2, nrow = 1)
ggsave("/Users/qcvp/R/Plots/LEFSE_PHPB.png", plot = g4, width = 11.5, height = 7) #Saving plot
Taxonomy of core biomarker
biomarkertaxa<- subset(PHPB_bacteria_ASV_TAX, rownames(PHPB_bacteria_ASV_TAX) %in% mm_lefse2@marker_table$feature)
biomarkertaxa
## ID Kingdom Phylum Class Order
## ASV3 ASV3 Bacteria Proteobacteria Gammaproteobacteria Enterobacterales
## ASV5 ASV5 Bacteria Proteobacteria Alphaproteobacteria Sphingomonadales
## ASV6 ASV6 Bacteria Proteobacteria Gammaproteobacteria Enterobacterales
## ASV17 ASV17 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales
## ASV20 ASV20 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales
## ASV33 ASV33 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales
## ASV38 ASV38 Bacteria Proteobacteria Alphaproteobacteria Rhizobiales
## ASV49 ASV49 Bacteria Proteobacteria Gammaproteobacteria Burkholderiales
## ASV51 ASV51 Bacteria Proteobacteria Gammaproteobacteria Burkholderiales
## ASV61 ASV61 Bacteria Firmicutes Bacilli Bacillales
## ASV66 ASV66 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales
## ASV112 ASV112 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales
## ASV115 ASV115 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales
## ASV253 ASV253 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales
## ASV278 ASV278 Bacteria Proteobacteria Alphaproteobacteria Caulobacterales
## Family Genus Species
## ASV3 Vibrionaceae Vibrio vulnificus
## ASV5 Sphingomonadaceae Sphingomonas psychrolutea
## ASV6 Vibrionaceae Vibrio tasmaniensis
## ASV17 Halomonadaceae Pseudomonas otitidis
## ASV20 Halomonadaceae Pseudomonas otitidis
## ASV33 Beijerinckiaceae Methylorubrum rhodesianum
## ASV38 Xanthobacteraceae Afipia broomeae
## ASV49 Oxalobacteraceae Herminiimonas saxobsidens
## ASV51 Burkholderiaceae Paraburkholderia fungorum
## ASV61 Bacillaceae Bacillus pumilus
## ASV66 Pseudomonadaceae Ralstonia solanacearum
## ASV112 Moraxellaceae Acinetobacter wuhouensis
## ASV115 Moraxellaceae Acinetobacter oleivorans
## ASV253 Moraxellaceae Acinetobacter tandoii
## ASV278 Caulobacteraceae Brevundimonas diminuta