The Kleefstra dataset contains of four groups namely patients (n=22), fathers (n=13), mothers (n=16) and siblings (n=9). The age of the patient group ranges from 4 to 39 years old. Due to this reason, BMI will be made into a categorial variable since BMI for kids is differently scored.
Analysis plan: Alpha and beta with no prevalence check corrected by age, gender, BMI, FamilyID and living conditions for Patients vs Family and intestinal complaints (with the whole dataset). For the patient datasets, doing alpha and beta for CBCL total score, ADOS comparative score and genetics corrected by age, gender and BMI.
For BMI, 7 samples had missing values so the data was imputed with the median. This included four siblings (median=Healthy), one father (median=Healthy) and two patients (median=Overweight).
Taxonomy on genus level with a prevalence check of 50% (looking only at the core taxa) looking for the difference between patients vs family and intestinal complaints corrected for age, gender, BMI, FamilyID and living conditions. For the patient datasets, looking for differences in taxonomy with CBCL total score, ADOS comparative score and genetics corrected for age, gender and BMI.
library(microbiome) # data analysis and visualisation
library(phyloseq) # also the basis of data object. Data analysis and visualisation
#library(microbiomeutilities) # some utility tools
library(ggpubr) # publication quality figures, based on ggplot2
library(DT) # interactive tables in html and markdown
library(data.table) # alternative to data.frame
library(dplyr) # data handling
library(ape) # reading tree
library(tibble) # rownames_to_column
library(picante) # phylogenetic distance
library(vegan) # beta diversity
library(lme4) # mixed models
library(ggplot2) # for nice plots
library(tidyr) # used for unite function
library(ART) # for RAOV test
library(readxl)
library(corrplot)
library(Hmisc)
library(tidyverse)
The biom, meta and tree file are loaded into a phyloseq object. After that, information about the object is given like dimensions of otu table, sample table, tax table and tree. when summarizing the data, you get information about the number of reads (min, max, mean, median), number of samples and the variables available in the meta data.
# metadata.file should be a comma separated text file
dataKleef <- read_phyloseq(otu.file = "kleefstra-feature-table-taxonomy-filtered240-260.biom",
taxonomy.file = NULL,
metadata.file = "meta_kleefstra_microbiomeR_v5_withinpvalues_geneticsadjusted.txt",
type = "biom")
## Time to complete depends on OTU file size
treefile_kleefstra <- read.tree("kleefstra_rootedtree_filtered240-260.nwk")
#Merge into phyloseq object
ps_kleefstra <- merge_phyloseq(dataKleef, treefile_kleefstra)
saveRDS(ps_kleefstra, "ps_kleefstra_final_prev50_v17.rds")
print(ps_kleefstra)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 6907 taxa and 63 samples ]
## sample_data() Sample Data: [ 63 samples by 43 sample variables ]
## tax_table() Taxonomy Table: [ 6907 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 6907 tips and 6876 internal nodes ]
summarize_phyloseq(ps_kleefstra)
## Compositional = NO2
## 1] Min. number of reads = 4891672] Max. number of reads = 8969523] Total number of reads = 404174614] Average number of reads = 6415475] Median number of reads = 6137537] Sparsity = 0.9303283303572866] Any OTU sum to 1 or less? YES8] Number of singletons = 89] Percent of OTUs that are singletons
## (i.e. exactly one read detected across all samples)0.11582452584334710] Number of sample variables are: 43Baseclear_Sample_nrOnderzoeksIDMedication1yes2noGenetic_variant3Genetic_variantGenetic_variant2Intestinal_complaints3Intestinal_complaints7Intestinal_complaintsBuikklachtenWonen1Gemeenschap2ThuisOnderzoeksnummerFamilyRelationFamiliesGroupedFamilyRelation_codePatient0orFamily1Patient0Parents1Siblings3Mozaiek1Niet0AgeMicrobiomeAgeCBCLGenderMale1Female2WeightBMIBMI_catCareOutside1CareFamily2DNANotesCBCLtot5_TotaalTscoreCBCLtot5_InternaliserenTscoreCBCLtot5_ExternaliserenTscoreCBCLtot5_EmotioneelreagerendTscoreCBCLtot5_AngstigDepressiefTscoreCBCLtot5_LichamelijkeklachtenTscoreCBCLtot5_TeruggetrokkenTscoreCBCLtot5_SlaapproblemenTscoreCBCLtot5_AandachtsproblemenTscoreCBCLtot5_AgressiefgedragTscoreCBCLtot5_AffectieveproblemenTscoreCBCLtot5_AngstproblemenTscoreCBCLtot5_PervasieveontwikkelingproblemenTscoreCBCLtot5_ADHDproblemenCBCLtot5_OppOpstproblemenCBCLtot5_Overig2
## [[1]]
## [1] "1] Min. number of reads = 489167"
##
## [[2]]
## [1] "2] Max. number of reads = 896952"
##
## [[3]]
## [1] "3] Total number of reads = 40417461"
##
## [[4]]
## [1] "4] Average number of reads = 641547"
##
## [[5]]
## [1] "5] Median number of reads = 613753"
##
## [[6]]
## [1] "7] Sparsity = 0.930328330357286"
##
## [[7]]
## [1] "6] Any OTU sum to 1 or less? YES"
##
## [[8]]
## [1] "8] Number of singletons = 8"
##
## [[9]]
## [1] "9] Percent of OTUs that are singletons \n (i.e. exactly one read detected across all samples)0.115824525843347"
##
## [[10]]
## [1] "10] Number of sample variables are: 43"
##
## [[11]]
## [1] "Baseclear_Sample_nr"
## [2] "OnderzoeksID"
## [3] "Medication1yes2no"
## [4] "Genetic_variant3"
## [5] "Genetic_variant"
## [6] "Genetic_variant2"
## [7] "Intestinal_complaints3"
## [8] "Intestinal_complaints7"
## [9] "Intestinal_complaints"
## [10] "Buikklachten"
## [11] "Wonen1Gemeenschap2Thuis"
## [12] "Onderzoeksnummer"
## [13] "FamilyRelation"
## [14] "FamiliesGrouped"
## [15] "FamilyRelation_code"
## [16] "Patient0orFamily1"
## [17] "Patient0Parents1Siblings3"
## [18] "Mozaiek1Niet0"
## [19] "AgeMicrobiome"
## [20] "AgeCBCL"
## [21] "GenderMale1Female2"
## [22] "Weight"
## [23] "BMI"
## [24] "BMI_cat"
## [25] "CareOutside1CareFamily2"
## [26] "DNA"
## [27] "Notes"
## [28] "CBCLtot5_TotaalTscore"
## [29] "CBCLtot5_InternaliserenTscore"
## [30] "CBCLtot5_ExternaliserenTscore"
## [31] "CBCLtot5_EmotioneelreagerendTscore"
## [32] "CBCLtot5_AngstigDepressiefTscore"
## [33] "CBCLtot5_LichamelijkeklachtenTscore"
## [34] "CBCLtot5_TeruggetrokkenTscore"
## [35] "CBCLtot5_SlaapproblemenTscore"
## [36] "CBCLtot5_AandachtsproblemenTscore"
## [37] "CBCLtot5_AgressiefgedragTscore"
## [38] "CBCLtot5_AffectieveproblemenTscore"
## [39] "CBCLtot5_AngstproblemenTscore"
## [40] "CBCLtot5_PervasieveontwikkelingproblemenTscore"
## [41] "CBCLtot5_ADHDproblemen"
## [42] "CBCLtot5_OppOpstproblemen"
## [43] "CBCLtot5_Overig"
The quality check for both number of reads and samples. First, samples that are not bacteria will be removed (Archaea, Eukaryota and Unassigned). Then, the number of reads per sample and per OTU will be checked. This could be a reason to remove a sample or OTU later on.
dataKleef<- readRDS("ps_kleefstra_final_prev50_v17.rds")
#Filter out only the non bacteria: Remove archea, eukaryotes, not-assigned bacteria
table(tax_table(dataKleef)[, "Domain"], exclude = NULL)
##
## D_0__Archaea D_0__Bacteria D_0__Eukaryota Unassigned
## 8 6771 1 127
dataKleef_2 <- subset_taxa(dataKleef, !is.na(Domain) & !Domain %in% c("", "Unassigned") & !Domain %in% c("", "D_0__Archaea") & !Domain %in% c("", "D_0__Eukaryota")) # filter out unclassified
table(tax_table(dataKleef_2)[, "Domain"])
##
## D_0__Bacteria
## 6771
sample_data(dataKleef_2)$PatientorFamily <- sample_data(dataKleef_2)$Patient0orFamily1
sample_data(dataKleef_2)$PatientorFamily <- gsub("0", "Patient", sample_data(dataKleef_2)$PatientorFamily)
sample_data(dataKleef_2)$PatientorFamily <- gsub("1", "Family", sample_data(dataKleef_2)$PatientorFamily)
# Check number of reads per sample
total_reads_per_sample <- data.frame(sort(sample_sums(dataKleef_2)))
total_reads_per_sample <- rownames_to_column(total_reads_per_sample)
plot(sort(sample_sums(dataKleef_2)), ylab="Number of reads per sample")
options(scipen=999)
hist(total_reads_per_sample$sort.sample_sums.dataKleef_2.., xlab="Read counts per sample", ylab="Number of samples", main="Number of reads per sample", ylim=c(0,20), labels=TRUE)
asv_df <- otu_table(dataKleef_2)
otus <- data.frame(colSums(asv_df != 0))
otus <- rownames_to_column(otus)
data_per_sample <- merge(total_reads_per_sample, otus, by="rowname")
datatable(data_per_sample, rownames=FALSE, colnames=c("Sample ID", "Total number of reads", "Total number of OTUs"))
# Check number of reads per OTU
reads_per_otu <- data.frame(taxa_sums(dataKleef_2))
reads_per_otu <- rownames_to_column(reads_per_otu)
datatable(reads_per_otu, rownames=FALSE, colnames=c("OTU ID", "Total number of reads"))
otu_tab <- t(abundances(dataKleef_2))
vegan::rarecurve(otu_tab, step = 100, label = TRUE, col = "blue", cex = 0.5, ylab = "OTU richness", xlab = "Number of reads")
plot_richness(dataKleef_2, measures = "Observed")
plot_richness(dataKleef_2, measures = "Shannon")
plot_taxa_prevalence(dataKleef_2, "Phylum", detection=1) + theme(legend.position = "none")
tax_table(dataKleef_2)[, colnames(tax_table(dataKleef_2))] <- gsub(tax_table(dataKleef_2)[, colnames(tax_table(dataKleef_2))],pattern = "[A-Z]_[0-9]__", replacement = "")
print(dataKleef_2)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 6771 taxa and 63 samples ]
## sample_data() Sample Data: [ 63 samples by 44 sample variables ]
## tax_table() Taxonomy Table: [ 6771 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 6771 tips and 6740 internal nodes ]
saveRDS(dataKleef_2, "dataKleef_qc_final_50%_v17.rds")
This section contains the analysis using the whole dataset to test the difference between patients and family members and look at intestinal complaints.
Analysis was done to compare alpha diversity between patients or family members.The ANOVA test is used to test the difference in alpha diversity between the groups. For each test, these variables were taken into account: Age at microbiome collection, BMI in categories (healthy, overweight, obese), Gender, Wonen1gemeenschap2Thuis (living conditions), and for strata a family ID number was used to correct for family effects.
dataKleef_2 <- readRDS("dataKleef_qc_final_50%_v17.rds")
mapping_file <- meta(dataKleef_2) # get the metadata as a separate object
mapping_file <- rownames_to_column(mapping_file)
alpha_div <- alpha(dataKleef_2) #calculate alpha
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
alpha_div <- select(alpha_div, "observed", "diversity_inverse_simpson", "diversity_shannon")
alpha_div <- rownames_to_column(alpha_div)
dataKleef_2_otu_table <- as.data.frame(dataKleef_2@otu_table) # calculate PD
dataKleef_2_tree <- dataKleef_2@phy_tree
dataKleef_2_tree #check if the tree is rooted
##
## Phylogenetic tree with 6771 tips and 6740 internal nodes.
##
## Tip labels:
## f1cd3b78f762cadcffc40bd0e670e0ef, 231d38920cec7821896f473d317012e8, b4a0981afbca77cd1546eb4ba7fef279, b9a412890f37c6d852f3593f99ad3040, 318bebabf38f50ca26f13634e2f997a3, 5d4cfaec2202e0e02881d0309b79ad78, ...
## Node labels:
## root, 0.932, 0.727, 0.738, 0.604, 0.533, ...
##
## Rooted; includes branch lengths.
df_pd <- pd(t(dataKleef_2_otu_table), dataKleef_2_tree, include.root = T) #t(otu_table) transposes the otu table for use in picante
df_pd <- rownames_to_column(df_pd)
alpha_PD <- merge(alpha_div,df_pd, by = "rowname")
meta_alpha_PD <- merge(alpha_PD,mapping_file, by = "rowname") # merge it with metadata
saveRDS(meta_alpha_PD, "alpha50%_kleefstra_final_v17.rds")
#Alpha plots - Patients vs Family
cbp1_familyorpatient <- c("#009E73", "#E69F00")
shannon_aov <- aov(diversity_shannon~Patient0orFamily1+AgeMicrobiome+BMI_cat+GenderMale1Female2+FamiliesGrouped+Wonen1Gemeenschap2Thuis, meta_alpha_PD)
summary(shannon_aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## Patient0orFamily1 1 2.025 2.0252 6.495 0.0136 *
## AgeMicrobiome 1 0.907 0.9072 2.910 0.0937 .
## BMI_cat 2 0.441 0.2206 0.708 0.4973
## GenderMale1Female2 1 0.064 0.0640 0.205 0.6524
## FamiliesGrouped 1 0.010 0.0097 0.031 0.8609
## Wonen1Gemeenschap2Thuis 1 0.021 0.0210 0.067 0.7960
## Residuals 55 17.149 0.3118
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pval_shannon <- round(summary(shannon_aov)[[1]][1,5],digits=4)
meta_alpha_PD$PatientorFamily <- factor(meta_alpha_PD$PatientorFamily, levels=c("Patient", "Family"))
groupsize <- xtabs(~PatientorFamily, data=meta_alpha_PD)
# Mean difference: patient = 0, family members = 1
means <- aggregate(x=meta_alpha_PD$diversity_shannon, by=list(meta_alpha_PD$Patient0orFamily1), FUN=mean)
means
## Group.1 x
## 1 0 3.375495
## 2 1 3.747893
meandiff <- means[1,2] - means[2,2]
meandiff
## [1] -0.3723975
sd <- aggregate(x=meta_alpha_PD$diversity_shannon, by=list(meta_alpha_PD$Patient0orFamily1), FUN=sd)
sd
## Group.1 x
## 1 0 0.5011203
## 2 1 0.5788336
# plot alpha
countnonzero <- aggregate(meta_alpha_PD$diversity_shannon, list(meta_alpha_PD$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplot_shannon <- ggboxplot(meta_alpha_PD, x="PatientorFamily", y="diversity_shannon", fill = "PatientorFamily", palette = cbp1_familyorpatient, title="Shannon diversity index", xlab= FALSE, ylab = FALSE) + theme(panel.border = element_rect(color = "black", size = 1, fill = NA),legend.position = "none")+ geom_text(aes(y=5, x=1.5, label=paste0("P-val = ",pval_shannon)), size=4) + scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ","n=", countnonzero[1:2,3]))
boxplot_shannon
Shannon diversity index is significantly different for patients or families (p=0.0136).
Analysis was done to compare alpha diversity between patients and family members with and withour intestinal complaints. The ANOVA test is used to test the difference in alpha diversity between the groups. These variables were taken into account: Age at microbiome collection, BMI in categories (healthy, overweight, obese), Gender, Wonen1Gemeenschap2Thuis (living conditions), and for strata a family ID number was used to correct for family effects.
# ANOVA to test difference in alpha diversity between patients and family members with or without intestinal complaints
meta_alpha_kleef <- readRDS("alpha50%_kleefstra_final_v17.rds")
meta_alpha_PD_bk <- subset(meta_alpha_PD, OnderzoeksID != "KSMB11003" & OnderzoeksID != "KSMB15001" & OnderzoeksID != "KSMB23" & OnderzoeksID != "KSMB3003" & OnderzoeksID != "KSMB3")
meta_alpha_PD_bk$PatientorFamily <- factor(meta_alpha_PD_bk$PatientorFamily, levels=c("Patient", "Family"))
meta_alpha_PD_bk$Intestinal_complaints <- factor(meta_alpha_PD_bk$Intestinal_complaints, levels=c("No","Yes"))
meta_alpha_PD_bk$Intestinal_complaints3 <- factor(meta_alpha_PD_bk$Intestinal_complaints3, levels=c("Never","Few","Many"))
shannon_aov_bk <- aov(diversity_shannon~Intestinal_complaints+Intestinal_complaints*Patient0orFamily1+AgeMicrobiome+BMI_cat+GenderMale1Female2+FamiliesGrouped+Wonen1Gemeenschap2Thuis, meta_alpha_PD_bk)
summary(shannon_aov_bk)
## Df Sum Sq Mean Sq F value Pr(>F)
## Intestinal_complaints 1 0.235 0.2354 0.681 0.413
## Patient0orFamily1 1 1.451 1.4507 4.196 0.046 *
## AgeMicrobiome 1 1.106 1.1062 3.200 0.080 .
## BMI_cat 2 0.456 0.2282 0.660 0.521
## GenderMale1Female2 1 0.095 0.0954 0.276 0.602
## FamiliesGrouped 1 0.011 0.0113 0.033 0.858
## Wonen1Gemeenschap2Thuis 1 0.003 0.0028 0.008 0.929
## Intestinal_complaints:Patient0orFamily1 1 0.039 0.0390 0.113 0.739
## Residuals 48 16.595 0.3457
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pval_shannon_bk <- round(summary(shannon_aov_bk)[[1]][8,5],digits=4)
pval_bk <- round(summary(shannon_aov_bk)[[1]][1,5], digits=4)
groupsize <- xtabs(~Intestinal_complaints, data=meta_alpha_PD_bk)
# calculate mean difference; patient = 0, family = 1
meansd_bk <- aggregate(diversity_shannon~Patient0orFamily1+Intestinal_complaints, meta_alpha_PD_bk, function(x) c(mean=mean(x), sd=sd(x)))
meansd_bk
## Patient0orFamily1 Intestinal_complaints diversity_shannon.mean
## 1 0 No 3.4195091
## 2 1 No 3.7710079
## 3 0 Yes 3.3814034
## 4 1 Yes 3.7166586
## diversity_shannon.sd
## 1 0.2282881
## 2 0.6935099
## 3 0.5767090
## 4 0.5297649
# plot results in boxplot
countnonzero <- aggregate(meta_alpha_PD_bk$diversity_shannon, list(meta_alpha_PD_bk$Intestinal_complaints), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["No"]],groupsize[["Yes"]])
boxplot_shannon_bk2 <- ggboxplot(meta_alpha_PD_bk, x="Intestinal_complaints", y="diversity_shannon", fill = "PatientorFamily",
title="Shannon diversity index", ylab = FALSE) + xlab("Intestinal complaints") +
theme(legend.position="top",panel.border = element_rect(color = "black", size = 1, fill = NA))+ geom_text(aes(y=5, x=1.5, label=paste0("P-val = ",pval_shannon_bk)), size=4) + scale_x_discrete(labels=paste0(countnonzero[1:3,1]," ","n=", countnonzero[1:3,2])) + scale_fill_manual(name="", labels=c("Patients", "Family members"), values=c(cbp1_familyorpatient))
boxplot_shannon_bk2
This part contains the results of the beta diversity analysis using a CAP plot with the distance measure: Bray-Curtis. Analysis was done to compare Beta diversity between patients and family members. The PERMANOVA is used to test the difference in beta diversity between the groups. These variables were taken into account: Age at microbiome collection, BMI in categories (healthy, overweight, obese), Gender, living conditions, and for strata a family ID number was used to correct for family effects.
dataKleef_2 <- readRDS("dataKleef_qc_final_50%_v17.rds")
dataKleef_2_prev_ra <- microbiome::transform(dataKleef_2, "compositional")
sample_data(dataKleef_2_prev_ra)$PatientorFamily <- factor(sample_data(dataKleef_2_prev_ra)$PatientorFamily, levels=c("Patient", "Family"))
#### PERMANOVA
otu <- dataKleef_2_prev_ra@otu_table
meta <- data.frame(sample_data(dataKleef_2_prev_ra))
set.seed(38698) #reproducible results
perm <- how(nperm=999)
setBlocks(perm) <- with(meta, FamiliesGrouped)
print(adonis2(t(otu) ~ PatientorFamily + AgeMicrobiome + BMI_cat + GenderMale1Female2 + Wonen1Gemeenschap2Thuis, method="bray", na.action = na.pass, data=meta, permutations = perm))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks: with(meta, FamiliesGrouped)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = t(otu) ~ PatientorFamily + AgeMicrobiome + BMI_cat + GenderMale1Female2 + Wonen1Gemeenschap2Thuis, data = meta, permutations = perm, method = "bray", na.action = na.pass)
## Df SumOfSqs R2 F Pr(>F)
## PatientorFamily 1 0.5407 0.03006 1.8782 0.011 *
## AgeMicrobiome 1 0.3256 0.01810 1.1309 0.679
## BMI_cat 2 0.3982 0.02214 0.6916 0.988
## GenderMale1Female2 1 0.3246 0.01805 1.1276 0.319
## Wonen1Gemeenschap2Thuis 1 0.2778 0.01544 0.9648 0.650
## Residual 56 16.1220 0.89622
## Total 62 17.9889 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# CAP analysis
sample_data(dataKleef_2_prev_ra)$PatientorFamily <- factor(sample_data(dataKleef_2_prev_ra)$PatientorFamily, levels=c("Patient", "Family"))
weighted_cap1 <- ordinate(dataKleef_2_prev_ra, "CAP", "bray", formula= ~ dataKleef_2_prev_ra@sam_data$PatientorFamily, weighted=TRUE)
plot_weighted_cap1 <- plot_ordination(dataKleef_2_prev_ra, weighted_cap1, color="PatientorFamily") + ggtitle("CAP - Disease status") + geom_point(size=2) + theme_classic()+theme(legend.position="top") + scale_color_manual(name="",labels=c("Patients", "Family members"),values=cbp1_familyorpatient, aesthetics = "color")
plot_weighted_cap1
The PERMANOVA gives a significant result for a difference in beta diversity between patients and families (p-val = 0.012). The CAP plot does seperate the two groups; patients or families).
Analysis was done to compare Beta diversity for intestinal complaints. The PERMANOVA is used to test the difference in beta diversity between the groups. These variables were taken into account: Age at microbiome collection, BMI in categories (healthy, overweight, obese), Gender, and for strata a family ID number was used to correct for family effects.
dataKleef_2 <- readRDS("dataKleef_qc_final_50%_v17.rds")
dataKleef_2_sub <- subset_samples(dataKleef_2, OnderzoeksID != "KSMB11003" & OnderzoeksID != "KSMB15001" & OnderzoeksID != "KSMB23" & OnderzoeksID != "KSMB3003" & OnderzoeksID != "KSMB3")
dataKleef_2_prev_ra <- microbiome::transform(dataKleef_2_sub, "compositional")
# PERMANOVA
otu_bk <- dataKleef_2_prev_ra@otu_table
meta_bk <- data.frame(sample_data(dataKleef_2_sub))
#meta_bk$Age2 <- (meta_bk$AgeMicrobiome)^2
set.seed(38698) #reproducible results
perm <- how(nperm=999)
setBlocks(perm) <- with(meta_bk, FamiliesGrouped)
print(adonis2(t(otu_bk) ~ Intestinal_complaints + Patient0orFamily1 + AgeMicrobiome + BMI_cat + GenderMale1Female2 + Wonen1Gemeenschap2Thuis + Intestinal_complaints*Patient0orFamily1, method="bray", na.action = na.pass, data=meta_bk, permutations = perm))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks: with(meta_bk, FamiliesGrouped)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = t(otu_bk) ~ Intestinal_complaints + Patient0orFamily1 + AgeMicrobiome + BMI_cat + GenderMale1Female2 + Wonen1Gemeenschap2Thuis + Intestinal_complaints * Patient0orFamily1, data = meta_bk, permutations = perm, method = "bray", na.action = na.pass)
## Df SumOfSqs R2 F Pr(>F)
## Intestinal_complaints 1 0.3023 0.01817 1.0528 0.169
## Patient0orFamily1 1 0.5556 0.03341 1.9350 0.010 **
## AgeMicrobiome 1 0.3676 0.02210 1.2801 0.756
## BMI_cat 2 0.4073 0.02449 0.7092 0.945
## GenderMale1Female2 1 0.3233 0.01944 1.1259 0.322
## Wonen1Gemeenschap2Thuis 1 0.2637 0.01585 0.9183 0.629
## Intestinal_complaints:Patient0orFamily1 1 0.3428 0.02061 1.1937 0.168
## Residual 49 14.0694 0.84593
## Total 57 16.6318 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# CAP analysis
sample_data(dataKleef_2_prev_ra)$PatientorFamily <- factor(sample_data(dataKleef_2_prev_ra)$PatientorFamily, levels=c("Patient", "Family"))
weighted_cap_bp <- ordinate(dataKleef_2_prev_ra, "CAP", "bray", formula= ~ dataKleef_2_prev_ra@sam_data$Intestinal_complaints+dataKleef_2_prev_ra@sam_data$Patient0orFamily1, weighted=TRUE)
plot_weighted_cap_bp <- plot_ordination(dataKleef_2_prev_ra, weighted_cap_bp, shape="Intestinal_complaints", color="PatientorFamily") + scale_color_manual(name="", labels=c("Patients","Family members"),values=cbp1_familyorpatient, aesthetics = "color") + scale_shape_manual(name="", values=c(17,19)) + labs(fill="") + ggtitle("CAP - Disease status * Intestinal complaints") + geom_point(size=2) + theme_classic()+ theme(legend.position="top")
plot_weighted_cap_bp
This part contains the results of the taxonomy analysis on genus level using the aligned rank transform test which is a non-parametric ANOVA.
An ASV table including taxonomic information is being prepared for the analysis. Samples with no data for Age at microbiome collection, BMI in categories (healthy, overweight, obese), Gender and family ID were removed. After the data is made compositional (relative abundance). A prevalence cut-off of 50% is done for genera and a 10% cut-off is done for the samples.
dataKleef_2 <- readRDS("dataKleef_qc_final_50%_v17.rds")
print(dataKleef_2)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 6771 taxa and 63 samples ]
## sample_data() Sample Data: [ 63 samples by 44 sample variables ]
## tax_table() Taxonomy Table: [ 6771 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 6771 tips and 6740 internal nodes ]
ps_overall_genus <- microbiome::aggregate_taxa(dataKleef_2, "Genus")
print(ps_overall_genus)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 417 taxa and 63 samples ]
## sample_data() Sample Data: [ 63 samples by 44 sample variables ]
## tax_table() Taxonomy Table: [ 417 taxa by 7 taxonomic ranks ]
# Make compositional first, then remove taxa with lower than 50% prevalence in the dataset
otu_table_rel <- microbiome::transform(ps_overall_genus, "compositional")
print(otu_table_rel)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 417 taxa and 63 samples ]
## sample_data() Sample Data: [ 63 samples by 44 sample variables ]
## tax_table() Taxonomy Table: [ 417 taxa by 7 taxonomic ranks ]
listprevalence <- prevalence(otu_table_rel@otu_table, sort=TRUE)
plot(listprevalence)
prevalence_cutoff = 0.5
ps_overall_prevalence <- microbiome::core(otu_table_rel, detection = 0, prevalence = prevalence_cutoff)
print(ps_overall_prevalence)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 138 taxa and 63 samples ]
## sample_data() Sample Data: [ 63 samples by 44 sample variables ]
## tax_table() Taxonomy Table: [ 138 taxa by 7 taxonomic ranks ]
# make into a matrix
taxa_table <- data.frame(tax_table(ps_overall_prevalence))
taxa_table <- taxa_table %>% unite(unique,c("Domain","Phylum","Class","Order","Family","Genus"),sep="__",remove=T)
otu_table_rel <- as.data.frame(otu_table(ps_overall_prevalence))
otu_taxa <- merge(otu_table_rel, taxa_table, by="row.names")
out_taxa_keep <- otu_taxa
rownames(otu_taxa) <- otu_taxa$Row.names
otu_taxa$Row.names <- NULL
# put long taxonomic names as row names
otu_taxa_genus <- aggregate(otu_taxa[,-(length(otu_taxa))], by=list(otu_taxa$unique), sum)
# remove samples which were present in fewer than 10% of OTUs
otu_taxa_prev <- otu_taxa_genus[,which(colSums(otu_taxa_genus>0)>=(nrow(otu_taxa_genus)*.1))]
otu_taxa_prev1 <- sweep(otu_taxa_prev[,-1],2,colSums(otu_taxa_prev[,-1]),"/")
rownames(otu_taxa_prev1) <- otu_taxa_prev$Group.1
otu_taxa_prev1 <- as.data.frame(t(otu_taxa_prev1), stringsAsFactors = FALSE)
metadata <- data.frame(sample_data(ps_overall_prevalence))
otu_taxa_meta <- merge(otu_taxa_prev1, metadata, by="row.names")
rownames(otu_taxa_meta) <- otu_taxa_meta$Row.names
otu_taxa_meta$Row.names <- NULL
saveRDS(otu_taxa_meta, "otu_taxa_metaKleef_final_prev50%_v17.rds")
Analysis was done to compare relative abundance per genus between patients or family members. For each test, these variables were taken into account: Age at microbiome collection, BMI in categories (healthy, overweight, obese), Gender, and for strata a family ID number was used to correct for family effects.
In total, 138 genera are tested and no multiple corrected significant taxa were found. One adjusted significant taxa was detected and 16 nominal significant taxa were detected.
otu_taxa_meta_fam <- readRDS("otu_taxa_metaKleef_final_prev50%_v17.rds")
otu_taxa_meta_fam$FamiliesGrouped <- as.factor(otu_taxa_meta_fam$FamiliesGrouped)
colnames(otu_taxa_meta_fam) <- gsub(" ","_",colnames(otu_taxa_meta_fam))
colnames(otu_taxa_meta_fam) <- gsub("-","_",colnames(otu_taxa_meta_fam))
colnames(otu_taxa_meta_fam) <- gsub("\\[","",colnames(otu_taxa_meta_fam))
colnames(otu_taxa_meta_fam) <- gsub("\\]","",colnames(otu_taxa_meta_fam))
# Analysis with ART
results_patientsvsfamily <- data.frame(matrix(nrow=138, ncol=8))
colnames(results_patientsvsfamily) <- c("Genus","pval_PatientorFamily","fval_PatientorFamily", "pval_AgeMicrobiome","pval_BMI_cat","pval_GenderMale1Female2", "pval_FamiliesGrouped", "pval_Wonen1Gemeenschap2Thuis")
for (i in 1:138){
results_patientsvsfamily[i,1] <- colnames(otu_taxa_meta_fam)[i]
otu_taxa_meta_fam$var <- otu_taxa_meta_fam[,i]
fit_art <- aligned.rank.transform(var~Patient0orFamily1+AgeMicrobiome+BMI_cat+GenderMale1Female2+FamiliesGrouped+Wonen1Gemeenschap2Thuis,data=otu_taxa_meta_fam)
results_patientsvsfamily[i,2] <- fit_art$significance[1,4]
results_patientsvsfamily[i,3] <- fit_art$significance[1,3]
results_patientsvsfamily[i,4] <- fit_art$significance[2,4]
results_patientsvsfamily[i,5] <- fit_art$significance[3,4]
results_patientsvsfamily[i,6] <- fit_art$significance[4,4]
results_patientsvsfamily[i,7] <- fit_art$significance[5,4]
results_patientsvsfamily[i,8] <- fit_art$significance[6,4]
}
results_patientsvsfamily$adjust_PatientorFamily <- p.adjust(results_patientsvsfamily$pval_PatientorFamily, method="fdr")
results_patientsvsfamily$adjust_Age <- p.adjust(results_patientsvsfamily$pval_AgeMicrobiome, method="fdr")
results_patientsvsfamily$adjust_BMI_cat <- p.adjust(results_patientsvsfamily$pval_BMI_cat, method="fdr")
results_patientsvsfamily$adjust_Gender <- p.adjust(results_patientsvsfamily$pval_GenderMale1Female2, method="fdr")
results_patientsvsfamily$adjust_FamiliesGrouped <- p.adjust(results_patientsvsfamily$pval_FamiliesGrouped, method="fdr")
results_patientsvsfamily$adjust_Wonen <- p.adjust(results_patientsvsfamily$pval_Wonen1Gemeenschap2Thuis, method="fdr")
datatable(results_patientsvsfamily, rownames=FALSE, colnames=c("Genus", "P-value PatientorFamily", "F-value PatientorFamily", "P-value Age", "P-value BMI", "P-value Gender", "P-value FamilyGrouped","P-value Wonen1Gemeenschap2Thuis", "Adjust. P-value PatientorFamily", "Adjust. P-value Age", "Adjust. P-value BMI", "Adjust. P-value Gender","Adjust. P-value FamilyGrouped","Adjust. P-value Wonen1Gemeenschap2Thuis"))
# Number of FDR corrected results
PF_FDR_Genera <- results_patientsvsfamily$Genus[which(results_patientsvsfamily$adjust_PatientorFamily < 0.05)]
length(PF_FDR_Genera)
## [1] 1
# Mean difference of FDR corrected results: patient = 0, family members = 1
otu_taxa_meta_fam[,1:138] <- otu_taxa_meta_fam[,1:138]*100
means <- aggregate(x=otu_taxa_meta_fam[,PF_FDR_Genera], by=list(otu_taxa_meta_fam$Patient0orFamily1), FUN=mean)
means
## Group.1 x
## 1 0 0.1567622
## 2 1 0.2922127
meandiff <- means[1,2] - means[2,2]
meandiff
## [1] -0.1354505
sd <- aggregate(x=otu_taxa_meta_fam[,PF_FDR_Genera], by=list(otu_taxa_meta_fam$Patient0orFamily1), FUN=sd)
sd
## Group.1 x
## 1 0 0.2027883
## 2 1 0.2360933
means <- aggregate(x=otu_taxa_meta_fam$Bacteria__Actinobacteria__Coriobacteriia__Coriobacteriales__Atopobiaceae__uncultured, by=list(otu_taxa_meta_fam$Patient0orFamily1), FUN=mean)
# Number of nominal results
PatientFamilyGenera <- results_patientsvsfamily$Genus[which(results_patientsvsfamily$pval_PatientorFamily < 0.05)]
length(PatientFamilyGenera)
## [1] 17
otu_taxa_meta_fam$PatientorFamily <- factor(otu_taxa_meta_fam$PatientorFamily, levels=c("Patient", "Family"))
groupsize <- xtabs(~PatientorFamily, data=otu_taxa_meta_fam)
# plot FDR and nominal results
countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[1]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily1 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[1], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[1])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily1
countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[2]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily2 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[2], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[2])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily2
countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[3]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily3 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[3], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[3])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily3
countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[4]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily4 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[4], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[4])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily4
countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[5]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily5 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[5], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[5])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily5
countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[6]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily6 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[6], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[6])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily6
countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[7]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily7 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[7], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[7])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily7
countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[8]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily8 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[8], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[8])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily8
countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[9]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily9 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[9], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[9])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily9
countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[10]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily10 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[10], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[10])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily10
countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[11]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily11 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[11], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[11])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily11
countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[12]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily12 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[12], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[12])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily12
countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[13]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily13 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[13], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[13])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily13
countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[14]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily14 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[14], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[14])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily14
countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[15]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily15 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[15], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[15])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily15
countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[16]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily16 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[16], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[16])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily16
countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[17]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily17 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[17], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[17])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily17
Analysis was done to compare relative abundance per genus between the interaction with intestinal complaints and patients or family members. For each test, these variables were taken into account: Age at microbiome collection, BMI in categories (healthy, overweight, obese), Gender, living conditions and a family ID number was used to correct for family effects.
In total, 138 genera are tested and no multiple corrected significant taxa were found. 10 nominal significant taxa were detected for the interaction between intestinal complaints and patients vs family members. These results were plotted in scatter plot with two linear lines showing the direction for the patients and family members. The results are also plotted in a boxplot showing the sample size per intestinal complaints group.
otu_taxa_meta_fam <- readRDS("otu_taxa_metaKleef_final_prev50%_v17.rds")
colnames(otu_taxa_meta_fam) <- gsub(" ","_",colnames(otu_taxa_meta_fam))
colnames(otu_taxa_meta_fam) <- gsub("-","_",colnames(otu_taxa_meta_fam))
colnames(otu_taxa_meta_fam) <- gsub("\\[","",colnames(otu_taxa_meta_fam))
colnames(otu_taxa_meta_fam) <- gsub("\\]","",colnames(otu_taxa_meta_fam))
otu_taxa_meta_fam_sub <- subset(otu_taxa_meta_fam, OnderzoeksID != "KSMB11003" & OnderzoeksID != "KSMB3" & OnderzoeksID != "KSMB3003" & OnderzoeksID != "KSMB23" & OnderzoeksID != "KSMB15001")
#### intestincal complaints vs genetics
chisq.test(otu_taxa_meta_fam_sub$Genetic_variant2,otu_taxa_meta_fam_sub$Intestinal_complaints)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: otu_taxa_meta_fam_sub$Genetic_variant2 and otu_taxa_meta_fam_sub$Intestinal_complaints
## X-squared = 0.0000000000000000000000000000015226, df = 1, p-value = 1
chisq.test(otu_taxa_meta_fam_sub$Genetic_variant3,otu_taxa_meta_fam_sub$Intestinal_complaints)
##
## Pearson's Chi-squared test
##
## data: otu_taxa_meta_fam_sub$Genetic_variant3 and otu_taxa_meta_fam_sub$Intestinal_complaints
## X-squared = 1.5932, df = 2, p-value = 0.4508
# Analysis using ART
results_buikklachten <- data.frame(matrix(nrow=138, ncol=9))
colnames(results_buikklachten) <- c("Genus","pval_Buikklachten", "pval_AgeMicrobiome","pval_BMI_cat","pval_GenderMale1Female2","pval_FamiliesGrouped","pval_Wonen","pval_interaction", "fval_Buikklachten")
set.seed(2)
for (i in 1:138){
results_buikklachten[i,1] <- colnames(otu_taxa_meta_fam_sub)[i]
otu_taxa_meta_fam_sub$var <- otu_taxa_meta_fam_sub[,i]
fit_art <- aligned.rank.transform(var~Intestinal_complaints+AgeMicrobiome+BMI_cat+GenderMale1Female2+FamiliesGrouped+Wonen1Gemeenschap2Thuis+Intestinal_complaints*Patient0orFamily1,data=otu_taxa_meta_fam_sub)
results_buikklachten[i,2] <- fit_art$significance[1,4]
results_buikklachten[i,3] <- fit_art$significance[2,4]
results_buikklachten[i,4] <- fit_art$significance[3,4]
results_buikklachten[i,5] <- fit_art$significance[4,4]
results_buikklachten[i,6] <- fit_art$significance[5,4]
results_buikklachten[i,7] <- fit_art$significance[6,4]
results_buikklachten[i,8] <- fit_art$significance[8,4]
results_buikklachten[i,9] <- fit_art$significance[8,3]
}
results_buikklachten$adjust_buikklachten <- p.adjust(results_buikklachten$pval_Buikklachten, method="fdr")
results_buikklachten$adjust_Age <- p.adjust(results_buikklachten$pval_AgeMicrobiome, method="fdr")
results_buikklachten$adjust_BMI_cat <- p.adjust(results_buikklachten$pval_BMI_cat, method="fdr")
results_buikklachten$adjust_Gender <- p.adjust(results_buikklachten$pval_GenderMale1Female2, method="fdr")
results_buikklachten$adjust_FamiliesGrouped <- p.adjust(results_buikklachten$pval_FamiliesGrouped, method="fdr")
results_buikklachten$adjust_Wonen <- p.adjust(results_buikklachten$pval_Wonen, method="fdr")
results_buikklachten$adjust_interaction <- p.adjust(results_buikklachten$pval_interaction, method="fdr")
datatable(results_buikklachten, rownames=FALSE, colnames=c("Genus", "P-value Buikklachten", "P-value Age", "P-value BMI", "P-value Gender", "P-value FamiliesGrouped", "P-value Wonen", "P-value Buikklachten*PatientFamily","F-value Buikklachten*PatientorFamily", "Adjust. P-value Buikklachten", "Adjust. P-value Age", "Adjust. P-value BMI", "Adjust. P-value Gender", "Adjust. P-value FamiliesGrouped", "Adjust. P-value Wonen","Adjust. P-value Buikklachten*PatientFamily"))
# number of FDR corrected results
bk_FDR_Genera <- results_buikklachten$Genus[which(results_buikklachten$adjust_interaction < 0.05)]
length(bk_FDR_Genera)
## [1] 1
# Mean difference for FDR corrected results; : patient = 0, family members = 1
otu_taxa_meta_fam_sub[,1:138] <- otu_taxa_meta_fam_sub[,1:138]*100
meandiffsd_bk <- aggregate(Bacteria__Firmicutes__Erysipelotrichia__Erysipelotrichales__Erysipelotrichaceae__Merdibacter~Patient0orFamily1+Intestinal_complaints, otu_taxa_meta_fam_sub, function(x) c(mean=mean(x), sd=sd(x)))
meandiffsd_bk
## Patient0orFamily1 Intestinal_complaints
## 1 0 No
## 2 1 No
## 3 0 Yes
## 4 1 Yes
## Bacteria__Firmicutes__Erysipelotrichia__Erysipelotrichales__Erysipelotrichaceae__Merdibacter.mean
## 1 0.048301464
## 2 0.005513095
## 3 0.001721823
## 4 0.013777342
## Bacteria__Firmicutes__Erysipelotrichia__Erysipelotrichales__Erysipelotrichaceae__Merdibacter.sd
## 1 0.091806208
## 2 0.009110992
## 3 0.002292279
## 4 0.022838511
# number of nominal results
bk_Genera <- results_buikklachten$Genus[which(results_buikklachten$pval_interaction < 0.05)]
length(bk_Genera)
## [1] 17
# plot all significant results
otu_taxa_meta_fam_sub$PatientorFamily <- factor(otu_taxa_meta_fam_sub$PatientorFamily, levels=c("Patient", "Family"))
otu_taxa_meta_fam_sub$Intestinal_complaints <- factor(otu_taxa_meta_fam_sub$Intestinal_complaints, levels=c("No", "Yes"))
otu_taxa_meta_fam_sub$pf_intestinal <- paste(otu_taxa_meta_fam_sub$PatientorFamily,"-",otu_taxa_meta_fam_sub$Intestinal_complaints,sep=" ")
otu_taxa_meta_fam_sub$pf_intestinal <- factor(otu_taxa_meta_fam_sub$pf_intestinal, levels=c("Patient - No","Family - No","Patient - Yes","Family - Yes"))
groupsize <- xtabs(~pf_intestinal, data=otu_taxa_meta_fam_sub)
my_comp <- list(c("Patient - No","Family - No"),c("Patient - Yes","Family - Yes"))
countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[1]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk1 <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[1], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[1]))) + ylab("Relative abundance %") + xlab("Disease status - Intestinal complaints") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"),legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3])) + stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk1
countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[2]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk2 <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[2], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[2]))) + ylab("Relative abundance %")+ xlab("Disease status - Intestinal complaints") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"), legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3]))+ stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk2
countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[3]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[3], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[3]))) + ylab("Relative abundance %")+ xlab("Disease status - Intestinal complaints") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"), legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3]))+ stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk
countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[4]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk4 <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[4], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[4]))) + ylab("Relative abundance %")+ xlab("Disease status - Intestinal complaints") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"), legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3]))+ stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk4
countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[5]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk5 <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[5], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[5]))) + ylab("Relative abundance %")+ xlab("Disease status - Intestinal complaints") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"), legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3]))+ stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk5
countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[6]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk6 <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[6], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[6]))) + ylab("Relative abundance %") + xlab("Disease status - Intestinal complaints")+ theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"), legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3]))+ stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk6
countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[7]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk7 <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[7], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[7]))) + ylab("Relative abundance %")+ xlab("Disease status - Intestinal complaints") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"), legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3]))+ stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk7
countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[8]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk8 <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[8], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[8]))) + ylab("Relative abundance %")+ xlab("Disease status - Intestinal complaints") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"), legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3]))+ stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk8
countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[9]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk9 <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[9], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[9]))) + ylab("Relative abundance %") + xlab("Disease status - Intestinal complaints")+ theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"), legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3]))+ stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk9
countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[10]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk10 <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[10], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[10]))) + ylab("Relative abundance %") + xlab("Disease status - Intestinal complaints")+ theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"), legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3]))+ stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk10
countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[11]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk11 <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[11], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[11]))) + ylab("Relative abundance %") + xlab("Disease status - Intestinal complaints")+ theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"), legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3]))+ stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk11
countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[12]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk12 <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[12], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[12]))) + ylab("Relative abundance %") + xlab("Disease status - Intestinal complaints")+ theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"), legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3]))+ stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk12
countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[13]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk13 <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[13], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[13]))) + ylab("Relative abundance %") + xlab("Disease status - Intestinal complaints")+ theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"), legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3]))+ stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk13
countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[14]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk14 <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[14], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[14]))) + ylab("Relative abundance %") + xlab("Disease status - Intestinal complaints")+ theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"), legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3]))+ stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk14
countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[15]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk15 <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[15], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[15]))) + ylab("Relative abundance %")+ xlab("Disease status - Intestinal complaints") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"), legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3]))+ stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk15
countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[16]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk16 <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[16], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[16]))) + ylab("Relative abundance %")+ xlab("Disease status - Intestinal complaints") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"), legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3]))+ stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk16
countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[17]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk17 <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[17], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[17]))) + ylab("Relative abundance %")+ xlab("Disease status - Intestinal complaints") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"), legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3]))+ stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk17
This section contains the analysis using the patients only dataset to test the difference in symptom severity and genetic variants.
Analysis was done to compare alpha diversity between symptom severity using the CBCL total problems score and ADOS comparative score. For CBCL and ADOS, this will be done using a correlation plot, the non-significant p-values are plotted within the plot.
### CBCL total
alpha_kleef <- readRDS("alpha50%_kleefstra_final_v17.rds")
alpha_kleef_cbcl <- subset(alpha_kleef, PatientorFamily == "Patient" & OnderzoeksID != "KSMB10")
shapiro.test(alpha_kleef_cbcl$CBCLtot5_TotaalTscore)
##
## Shapiro-Wilk normality test
##
## data: alpha_kleef_cbcl$CBCLtot5_TotaalTscore
## W = 0.96474, p-value = 0.5904
shapiro.test(alpha_kleef_cbcl$diversity_shannon)
##
## Shapiro-Wilk normality test
##
## data: alpha_kleef_cbcl$diversity_shannon
## W = 0.97241, p-value = 0.7662
#Correlation matrix with significance levels (p-value)
cor_Bacteria_p <- rcorr(alpha_kleef_cbcl$CBCLtot5_TotaalTscore,alpha_kleef_cbcl$diversity_shannon, type = "pearson")
r_Bacteria_cbcl <- cor_Bacteria_p$r
r_Bacteria_cbcl
## x y
## x 1.0000000 0.1137313
## y 0.1137313 1.0000000
p_Bacteria_cbcl <- cor_Bacteria_p$P
p_Bacteria_cbcl
## x y
## x NA 0.6142973
## y 0.6142973 NA
### ADOS comp
ADOS <- read_excel("ADOS.xlsx")
alpha_ados <- merge(alpha_kleef, ADOS, by.x="Onderzoeksnummer",by.y="OnderzoeksID")
shapiro.test(alpha_ados$ADOS_comp)
##
## Shapiro-Wilk normality test
##
## data: alpha_ados$ADOS_comp
## W = 0.8483, p-value = 0.003169
#Correlation matrix with significance levels (p-value)
cor_Bacteria_p <- rcorr(alpha_ados$ADOS_comp,alpha_ados$diversity_shannon, type = "spearman")
r_Bacteria_ados <- cor_Bacteria_p$r
r_Bacteria_ados
## x y
## x 1.00000000 0.01810729
## y 0.01810729 1.00000000
p_Bacteria_ados <- cor_Bacteria_p$P
p_Bacteria_ados
## x y
## x NA 0.9362537
## y 0.9362537 NA
#Correlation matrix with significance levels (p-value)
cor_Bacteria_p <- rcorr(alpha_ados$CBCLtot5_TotaalTscore, alpha_ados$ADOS_comp, type = "spearman")
r_Bacteria_cbcl_ados <- cor_Bacteria_p$r
r_Bacteria_cbcl_ados
## x y
## x 1.0000000 -0.1131639
## y -0.1131639 1.0000000
p_Bacteria_cbcl_ados <- cor_Bacteria_p$P
p_Bacteria_cbcl_ados
## x y
## x NA 0.6252655
## y 0.6252655 NA
Analysis was done to compare alpha diversity between genetics using two genetic variables, genetics2 (mutation or deletion) and genetics3 (mutation, deletion > 1mb and deletion < 1mb). The ANOVA test is used to test the difference in alpha diversity between the genetic groups. For each test, these variables were taken into account: Age at microbiome collection, BMI in categories (healthy, overweight, obese) and Gender.
meta_alpha_PD <- readRDS("alpha50%_kleefstra_final_v17.rds")
meta_alpha_PD <- subset(meta_alpha_PD, FamilyRelation == "patient" & OnderzoeksID != "KSMB23")
cbp_gen2 <- c("#D55E00","#56B4E9")
cbp_gen3 <- c("#882255","#D55E00","#56B4E9")
meta_alpha_PD$PatientorFamily <- factor(meta_alpha_PD$PatientorFamily, levels=c("Patient", "Family"))
meta_alpha_PD$Genetic_variant3 <- factor(meta_alpha_PD$Genetic_variant3, levels=c("Pathogenic mutation","Deletion < 1MB", "Deletion > 1MB"))
meta_alpha_PD$Genetic_variant2 <- factor(meta_alpha_PD$Genetic_variant2, levels=c("Mutation + deletion < 1MB","Deletion > 1MB"))
## ANOVA test Genetic_variant3
meta_alpha_PD$Genetic_variant3 <- factor(meta_alpha_PD$Genetic_variant3, levels=c("Pathogenic mutation","Deletion < 1MB", "Deletion > 1MB"))
shannon_aov_gen3 <- aov(diversity_shannon~Genetic_variant3+AgeMicrobiome+BMI_cat+GenderMale1Female2, meta_alpha_PD)
summary(shannon_aov_gen3)
## Df Sum Sq Mean Sq F value Pr(>F)
## Genetic_variant3 2 0.030 0.0150 0.049 0.952
## AgeMicrobiome 1 0.153 0.1529 0.501 0.490
## BMI_cat 2 0.704 0.3518 1.153 0.342
## GenderMale1Female2 1 0.046 0.0459 0.151 0.703
## Residuals 15 4.575 0.3050
pval_shannon_gen3 <- round(summary(shannon_aov_gen3)[[1]][1,5],digits=4)
groupsize <- xtabs(~Genetic_variant3, meta_alpha_PD)
my_comp <- list(c("Pathogenic mutation","Deletion < 1MB"),c("Deletion < 1MB","Deletion > 1MB"),c("Pathogenic mutation","Deletion > 1MB"))
# mean difference genetic variant 3
means <- aggregate(x=meta_alpha_PD$diversity_shannon, by=list(meta_alpha_PD$Genetic_variant3), FUN=mean)
means
## Group.1 x
## 1 Pathogenic mutation 3.408572
## 2 Deletion < 1MB 3.322191
## 3 Deletion > 1MB 3.387878
sd <- aggregate(x=meta_alpha_PD$diversity_shannon, by=list(meta_alpha_PD$Genetic_variant3), FUN=sd)
sd
## Group.1 x
## 1 Pathogenic mutation 0.4453270
## 2 Deletion < 1MB 0.6605720
## 3 Deletion > 1MB 0.6095338
# plot alpha diversity genetic variant 3
countnonzero <- aggregate(meta_alpha_PD$diversity_shannon, list(meta_alpha_PD$Genetic_variant3), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Pathogenic mutation"]],groupsize[["Deletion < 1MB"]],groupsize[["Deletion > 1MB"]])
boxplot_shannon_gen3 <- ggboxplot(meta_alpha_PD, x="Genetic_variant3", y="diversity_shannon", fill = "Genetic_variant3", palette = cbp_gen3, title="Shannon diversity index", ylab = FALSE)+ xlab("Genetic variants (3)") + theme(panel.border = element_rect(color = "black", size = 1, fill = NA), legend.position = "none")+ geom_text(aes(y=5, x=0.75, label=paste0("P-val = ",pval_shannon_gen3)), size=4)+ scale_x_discrete(labels=paste0(countnonzero[1:3,1]," ","n=", countnonzero[1:3,3]))+ stat_compare_means(comparisons=my_comp, method="t.test",label="p.signif",paired=F)
boxplot_shannon_gen3
### ANOVA for genetic variant 2: 1) mutation + del < 1mb and 2) del > 1mb
shannon_aov_gen2 <- aov(diversity_shannon~Genetic_variant2+AgeMicrobiome+BMI_cat+GenderMale1Female2, meta_alpha_PD)
summary(shannon_aov_gen2)
## Df Sum Sq Mean Sq F value Pr(>F)
## Genetic_variant2 1 0.000 0.00021 0.001 0.979
## AgeMicrobiome 1 0.127 0.12734 0.420 0.526
## BMI_cat 2 0.502 0.25110 0.829 0.455
## GenderMale1Female2 1 0.030 0.02991 0.099 0.757
## Residuals 16 4.848 0.30301
pval_shannon_gen2 <- round(summary(shannon_aov_gen2)[[1]][1,5],digits=4)
groupsize <- xtabs(~Genetic_variant2, meta_alpha_PD)
# Mean difference genetic variant 2
means <- aggregate(x=meta_alpha_PD$diversity_shannon, by=list(meta_alpha_PD$Genetic_variant2), FUN=mean)
means
## Group.1 x
## 1 Mutation + deletion < 1MB 3.379778
## 2 Deletion > 1MB 3.387878
meandiff <- means[1,2] - means[2,2]
meandiff
## [1] -0.008100053
sd <- aggregate(x=meta_alpha_PD$diversity_shannon, by=list(meta_alpha_PD$Genetic_variant2), FUN=sd)
sd
## Group.1 x
## 1 Mutation + deletion < 1MB 0.5083481
## 2 Deletion > 1MB 0.6095338
# plot alpha diversity genetic variant 2
countnonzero <- aggregate(meta_alpha_PD$diversity_shannon, list(meta_alpha_PD$Genetic_variant2), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Mutation + deletion < 1MB"]],groupsize[["Deletion > 1MB"]])
boxplot_shannon_gen2 <- ggboxplot(meta_alpha_PD, x="Genetic_variant2", y="diversity_shannon", fill = "Genetic_variant2", palette = cbp_gen2, title="Shannon diversity index", ylab = FALSE) + xlab("Genetic variants (2)")+
theme(panel.border = element_rect(color = "black", size = 1, fill = NA), legend.position = "none")+ geom_text(aes(y=5, x=0.75, label=paste0("P-val = ",pval_shannon_gen2)), size=4)+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ","n=", countnonzero[1:2,3]))
boxplot_shannon_gen2
Analysis was done to compare beta diversity between symptom severity using the CBCL total problems score and ADOS comparative score. The PERMANOVA is used to test the difference in beta diversity. These variables were taken into account: Age at microbiome collection, BMI in categories (healthy, overweight, obese) and Gender.
#### CBCL totaal
dataKleef_2 <- readRDS("dataKleef_qc_final_50%_v17.rds")
dataKleef_2_sub <- subset_samples(dataKleef_2, PatientorFamily == "Patient" & OnderzoeksID != "KSMB10")
dataKleef_2_prev_ra <- microbiome::transform(dataKleef_2_sub, "compositional")
# PERMANOVA
otu <- dataKleef_2_prev_ra@otu_table
meta <- data.frame(sample_data(dataKleef_2_sub))
set.seed(3246) #reproducible results
print(adonis2(t(otu) ~ CBCLtot5_TotaalTscore + AgeMicrobiome + BMI_cat + GenderMale1Female2, method="bray", na.action = na.pass, data=meta, permutations = 999))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = t(otu) ~ CBCLtot5_TotaalTscore + AgeMicrobiome + BMI_cat + GenderMale1Female2, data = meta, permutations = 999, method = "bray", na.action = na.pass)
## Df SumOfSqs R2 F Pr(>F)
## CBCLtot5_TotaalTscore 1 0.4953 0.07377 1.6155 0.047 *
## AgeMicrobiome 1 0.2722 0.04054 0.8878 0.567
## BMI_cat 2 0.6255 0.09315 1.0200 0.397
## GenderMale1Female2 1 0.4160 0.06196 1.3570 0.134
## Residual 16 4.9053 0.73058
## Total 21 6.7143 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# CAP analysis
weighted_cap <- ordinate(dataKleef_2_prev_ra, "CAP", "bray", formula= ~ dataKleef_2_prev_ra@sam_data$CBCLtot5_TotaalTscore, weighted=TRUE)
plot_weighted_cap <- plot_ordination(dataKleef_2_prev_ra, weighted_cap, color="CBCLtot5_TotaalTscore") + ggtitle("CAP - CBCL total problems score") + geom_point(size=2) + theme_classic()+ theme(legend.position="top") + scale_color_gradient(name="",low="gray80",high="black")
plot_weighted_cap
#### ADOS comp
dataKleef_2 <- readRDS("dataKleef_qc_final_50%_v17.rds")
dataKleef_2_sub <- subset_samples(dataKleef_2_sub, PatientorFamily == "Patient" & OnderzoeksID != "KSMB22")
dataKleef_2_prev_ra <- microbiome::transform(dataKleef_2_sub, "compositional")
meta <- data.frame(sample_data(dataKleef_2_prev_ra))
ADOS <- read_excel("ADOS.xlsx")
meta_ados <- merge(meta, ADOS,by.x="Onderzoeksnummer",by.y="OnderzoeksID")
sam_new <- sample_data(meta_ados)
sam_new$ADOS_comp <- as.numeric(sam_new$ADOS_comp)
dataKleef_2_prev_ra_ados <- merge_phyloseq(dataKleef_2_prev_ra, sam_new)
dataKleef_2_prev_ra_ados@sam_data$ADOS_comp <- sam_new[["ADOS_comp"]]
# PERMANOVA
otu <- dataKleef_2_prev_ra_ados@otu_table
meta_ados <- data.frame(sample_data(dataKleef_2_prev_ra_ados))
set.seed(38698) #reproducible results
print(adonis2(t(otu) ~ ADOS_comp + AgeMicrobiome + BMI_cat + GenderMale1Female2, method="bray", na.action = na.pass, data=meta_ados, permutations = 999))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = t(otu) ~ ADOS_comp + AgeMicrobiome + BMI_cat + GenderMale1Female2, data = meta_ados, permutations = 999, method = "bray", na.action = na.pass)
## Df SumOfSqs R2 F Pr(>F)
## ADOS_comp 1 0.2322 0.03575 0.6991 0.841
## AgeMicrobiome 1 0.2918 0.04494 0.8787 0.586
## BMI_cat 2 0.6041 0.09303 0.9095 0.614
## GenderMale1Female2 1 0.3844 0.05919 1.1574 0.261
## Residual 15 4.9815 0.76709
## Total 20 6.4940 1.00000
# CAP analysis
weighted_cap <- ordinate(dataKleef_2_prev_ra_ados, "CAP", "bray", formula= ~ dataKleef_2_prev_ra_ados@sam_data$ADOS_comp, weighted=TRUE)
plot_weighted_cap <- plot_ordination(dataKleef_2_prev_ra_ados, weighted_cap, color="ADOS_comp") + ggtitle("CAP - ADOS comparative score") + geom_point(size=2) + theme_classic() +theme(legend.position="top") + scale_color_gradient(name="",low="gray80",high="black")
plot_weighted_cap
Analysis was done to compare Beta diversity for genetics using the variables genetics2 (mutation + deletion < 1MB or deletion > 1MB) and genetics3 (mutation, deletion > 1mb or deletion < 1mb). The PERMANOVA is used to test the difference in beta diversity between the groups. For each test, these variables were taken into account: Age at microbiome collection, BMI in categories (healthy, overweight, obese) and Gender.
dataKleef_2 <- readRDS("dataKleef_qc_final_50%_v17.rds")
dataKleef_2_sub <- subset_samples(dataKleef_2, PatientorFamily == "Patient" & OnderzoeksID != "KSMB23")
dataKleef_2_prev_ra <- microbiome::transform(dataKleef_2_sub, "compositional")
### genetics3
# PERMANOVA
otu_gen3 <- dataKleef_2_prev_ra@otu_table
meta_gen3 <- data.frame(sample_data(dataKleef_2_sub))
set.seed(38698) #reproducible results
print(adonis2(t(otu_gen3) ~ Genetic_variant3 + Patient0orFamily1 + AgeMicrobiome + BMI_cat + GenderMale1Female2, method="bray", na.action = na.pass, data=meta_gen3, permutations = 999))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = t(otu_gen3) ~ Genetic_variant3 + Patient0orFamily1 + AgeMicrobiome + BMI_cat + GenderMale1Female2, data = meta_gen3, permutations = 999, method = "bray", na.action = na.pass)
## Df SumOfSqs R2 F Pr(>F)
## Genetic_variant3 2 0.5044 0.07591 0.8154 0.796
## AgeMicrobiome 1 0.4171 0.06277 1.3486 0.145
## BMI_cat 2 0.6653 0.10013 1.0755 0.332
## GenderMale1Female2 1 0.4185 0.06298 1.3531 0.138
## Residual 15 4.6390 0.69821
## Total 21 6.6441 1.00000
# CAP analysis
sample_data(dataKleef_2_prev_ra)$Genetic_variant3 <- factor(sample_data(dataKleef_2_prev_ra)$Genetic_variant3, levels=c("Pathogenic mutation","Deletion < 1MB", "Deletion > 1MB"))
weighted_cap_gen3 <- ordinate(dataKleef_2_prev_ra, "CAP", "bray", formula= ~ dataKleef_2_prev_ra@sam_data$Genetic_variant3, weighted=TRUE)
plot_weighted_cap_gen3 <- plot_ordination(dataKleef_2_prev_ra, weighted_cap_gen3, color="Genetic_variant3") + ggtitle("CAP - Genetic variant (3)") + geom_point(size=2)+ scale_color_manual(name="",values=cbp_gen3, aesthetics = "color") + theme_classic()+ theme(legend.position="top")
plot_weighted_cap_gen3
### genetics2
# PERMANOVA
otu_gen2 <- dataKleef_2_prev_ra@otu_table
meta_gen2 <- data.frame(sample_data(dataKleef_2_sub))
set.seed(38698) #reproducible results
print(adonis2(t(otu_gen2) ~ Genetic_variant2 + Patient0orFamily1 + AgeMicrobiome + BMI_cat + GenderMale1Female2, method="bray", na.action = na.pass, data=meta_gen2, permutations = 999))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = t(otu_gen2) ~ Genetic_variant2 + Patient0orFamily1 + AgeMicrobiome + BMI_cat + GenderMale1Female2, data = meta_gen2, permutations = 999, method = "bray", na.action = na.pass)
## Df SumOfSqs R2 F Pr(>F)
## Genetic_variant2 1 0.3189 0.04800 1.0430 0.349
## AgeMicrobiome 1 0.3972 0.05978 1.2990 0.164
## BMI_cat 2 0.6302 0.09486 1.0306 0.407
## GenderMale1Female2 1 0.4055 0.06104 1.3263 0.157
## Residual 16 4.8922 0.73633
## Total 21 6.6441 1.00000
# CAP analysis
sample_data(dataKleef_2_prev_ra)$Genetic_variant2 <- factor(sample_data(dataKleef_2_prev_ra)$Genetic_variant2, levels=c("Mutation + deletion < 1MB","Deletion > 1MB"))
weighted_cap_gen2 <- ordinate(dataKleef_2_prev_ra, "CAP", "bray", formula= ~ dataKleef_2_prev_ra@sam_data$Genetic_variant2, weighted=TRUE)
plot_weighted_cap_gen2 <- plot_ordination(dataKleef_2_prev_ra, weighted_cap_gen2, color="Genetic_variant2") + ggtitle("CAP - Genetic variant (2)") + geom_point(size=2)+ scale_color_manual(name="",values=cbp_gen2, aesthetics = "color") + theme_classic()+ theme(legend.position="top")
plot_weighted_cap_gen2
Analysis was done to test differences in relative abundance per genus across total CBCL scores. For each test, these variables were taken into account: Age at microbiome collection, BMI in categories (healthy, overweight, obese) and Gender. #(For analysis with CBCL and ADOS, familygrouped and wonen are not included in this analysis since it only contains patients which don’t live together and do not belong to the same family)
otu_taxa_meta_fam <- readRDS("otu_taxa_metaKleef_final_prev50%_v17.rds")
otu_taxa_meta_fam_pat_na <- subset(otu_taxa_meta_fam, PatientorFamily == "Patient" & OnderzoeksID != "KSMB10")
colnames(otu_taxa_meta_fam_pat_na) <- gsub(" ","_",colnames(otu_taxa_meta_fam_pat_na))
colnames(otu_taxa_meta_fam_pat_na) <- gsub("-","_",colnames(otu_taxa_meta_fam_pat_na))
colnames(otu_taxa_meta_fam_pat_na) <- gsub("\\[","",colnames(otu_taxa_meta_fam_pat_na))
colnames(otu_taxa_meta_fam_pat_na) <- gsub("\\]","",colnames(otu_taxa_meta_fam_pat_na))
######## t.test with intestinal complaints and genetics
otu_cbcl <- subset(otu_taxa_meta_fam_pat_na, Onderzoeksnummer != 23 & Onderzoeksnummer != 3)
otu_cbcl$Genetic_variant2 <- factor(otu_cbcl$Genetic_variant2, levels=c("Mutation + deletion < 1MB","Deletion > 1MB"))
otu_cbcl$Genetic_variant3 <- factor(otu_cbcl$Genetic_variant3, levels=c("Pathogenic mutation","Deletion < 1MB", "Deletion > 1MB"))
shapiro.test(otu_cbcl$CBCLtot5_TotaalTscore)
##
## Shapiro-Wilk normality test
##
## data: otu_cbcl$CBCLtot5_TotaalTscore
## W = 0.9523, p-value = 0.4034
ttest_intest <- t.test(CBCLtot5_TotaalTscore~Intestinal_complaints,data=otu_cbcl)
ttest_intest
##
## Welch Two Sample t-test
##
## data: CBCLtot5_TotaalTscore by Intestinal_complaints
## t = -3.1707, df = 6.3918, p-value = 0.01772
## alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
## 95 percent confidence interval:
## -22.004915 -2.995085
## sample estimates:
## mean in group No mean in group Yes
## 60.25 72.75
boxplot_CBCL_intest <- ggboxplot(otu_cbcl, x="Intestinal_complaints", y="CBCLtot5_TotaalTscore", fill="Intestinal_complaints", title="CBCL total problems score vs Intestinal complaints", ylab = "CBCL total problems score") + xlab("Intestinal complaints") + theme(legend.position="none",panel.border = element_rect(color = "black", size = 1, fill = NA))
boxplot_CBCL_intest
ttest_gen2 <- t.test(CBCLtot5_TotaalTscore~Genetic_variant2,data=otu_cbcl)
ttest_gen2
##
## Welch Two Sample t-test
##
## data: CBCLtot5_TotaalTscore by Genetic_variant2
## t = 0.18064, df = 2.1827, p-value = 0.8721
## alternative hypothesis: true difference in means between group Mutation + deletion < 1MB and group Deletion > 1MB is not equal to 0
## 95 percent confidence interval:
## -39.13110 42.85659
## sample estimates:
## mean in group Mutation + deletion < 1MB mean in group Deletion > 1MB
## 70.52941 68.66667
boxplot_CBCL_gen2 <- ggboxplot(otu_cbcl, x="Genetic_variant2", y="CBCLtot5_TotaalTscore", fill="Genetic_variant2", title="CBCL total problems score vs Genetic variants (2)", ylab = "CBCL total problems score", xlab="Genetic variants (2)") +scale_color_manual(values = cbp_gen2, aesthetics = "fill")+ theme(legend.position="none",panel.border = element_rect(color = "black", size = 1, fill = NA))
boxplot_CBCL_gen2
ttest_gen3 <- aov(CBCLtot5_TotaalTscore~Genetic_variant3,data=otu_cbcl)
summary(ttest_gen3)
## Df Sum Sq Mean Sq F value Pr(>F)
## Genetic_variant3 2 24.6 12.31 0.114 0.893
## Residuals 17 1835.1 107.95
boxplot_CBCL_intest <- ggboxplot(otu_cbcl, x="Genetic_variant3", y="CBCLtot5_TotaalTscore", fill="Genetic_variant3", title="CBCL total problems score vs Genetic variants (3)", ylab = "CBCL total problems score", xlab="Genetic variants (3)") +scale_color_manual(values = cbp_gen3, aesthetics = "fill")+ theme(legend.position="none",panel.border = element_rect(color = "black", size = 1, fill = NA))
boxplot_CBCL_intest
# Analysis with ART
results_CBCL_total <- data.frame(matrix(nrow=138, ncol=6))
colnames(results_CBCL_total) <- c("Genus","pval_CBCL_total","fval_CBCL_total", "pval_AgeMicrobiome","pval_BMI_cat","pval_GenderMale1Female2")
set.seed(2)
for (i in 1:138){
results_CBCL_total[i,1] <- colnames(otu_taxa_meta_fam_pat_na)[i]
otu_taxa_meta_fam_pat_na$var <- otu_taxa_meta_fam_pat_na[,i]
fit_art <- aligned.rank.transform(var~CBCLtot5_TotaalTscore+AgeMicrobiome+BMI_cat+GenderMale1Female2,data=otu_taxa_meta_fam_pat_na)
results_CBCL_total[i,2] <- fit_art$significance[1,4]
results_CBCL_total[i,3] <- fit_art$significance[1,3]
results_CBCL_total[i,4] <- fit_art$significance[2,4]
results_CBCL_total[i,5] <- fit_art$significance[3,4]
results_CBCL_total[i,6] <- fit_art$significance[4,4]
}
results_CBCL_total$adjust_CBCL_total <- p.adjust(results_CBCL_total$pval_CBCL_total, method="fdr")
results_CBCL_total$adjust_Age <- p.adjust(results_CBCL_total$pval_AgeMicrobiome, method="fdr")
results_CBCL_total$adjust_BMI_cat <- p.adjust(results_CBCL_total$pval_BMI_cat, method="fdr")
results_CBCL_total$adjust_Gender <- p.adjust(results_CBCL_total$pval_GenderMale1Female2, method="fdr")
datatable(results_CBCL_total, rownames=FALSE, colnames=c("Genus", "P-value CBCL total", "F-value CBCL total", "P-value Age", "P-value BMI", "P-value Gender", "Adjust. P-value CBCL total", "Adjust. P-value Age", "Adjust. P-value BMI", "Adjust. P-value Gender"))
# number of FDR corrected results
CBCL_FDR_Genera <- results_CBCL_total$Genus[which(results_CBCL_total$adjust_CBCL_total < 0.05)]
length(CBCL_FDR_Genera)
## [1] 2
# number of nominal significant results
CBCLGenera <- results_CBCL_total$Genus[which(results_CBCL_total$pval_CBCL_total < 0.05)]
length(CBCLGenera)
## [1] 9
# Plot all significant results
otu_taxa_meta_fam_pat_na[,1:138] <- otu_taxa_meta_fam_pat_na[,1:138]*100
plotCBCL1 <- ggplot(otu_taxa_meta_fam_pat_na, aes(x=CBCLtot5_TotaalTscore, y=otu_taxa_meta_fam_pat_na[,CBCLGenera[1]])) + geom_point() + geom_smooth(method=lm, se=F) + labs(y="Relative abundance %",x="CBCL total problems score") + theme_classic()+ggtitle(gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",CBCLGenera[1])))+ theme(plot.title = element_text( size=14, face="bold.italic"))
plotCBCL1
plotCBCL2 <- ggplot(otu_taxa_meta_fam_pat_na, aes(x=CBCLtot5_TotaalTscore, y=otu_taxa_meta_fam_pat_na[,CBCLGenera[2]])) + geom_point() + geom_smooth(method=lm, se=F) + labs(y="Relative abundance %",x="CBCL total problems score") + theme_classic()+ggtitle(gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",CBCLGenera[2])))+ theme(plot.title = element_text( size=14, face="bold.italic"))
plotCBCL2
plotCBCL3 <- ggplot(otu_taxa_meta_fam_pat_na, aes(x=CBCLtot5_TotaalTscore, y=otu_taxa_meta_fam_pat_na[,CBCLGenera[3]])) + geom_point() + geom_smooth(method=lm, se=F) + labs(y="Relative abundance %",x="CBCL total problems score") +theme_classic()+ggtitle(gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",CBCLGenera[3])))+ theme(plot.title = element_text( size=14, face="bold.italic"))
plotCBCL3
plotCBCL4 <- ggplot(otu_taxa_meta_fam_pat_na, aes(x=CBCLtot5_TotaalTscore, y=otu_taxa_meta_fam_pat_na[,CBCLGenera[4]])) + geom_point() + geom_smooth(method=lm, se=F) + labs(y="Relative abundance %",x="CBCL total problems score") +ggtitle(gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",CBCLGenera[4])))+ theme_classic()+ theme(plot.title = element_text( size=14, face="bold.italic"))
plotCBCL4
plotCBCL5 <- ggplot(otu_taxa_meta_fam_pat_na, aes(x=CBCLtot5_TotaalTscore, y=otu_taxa_meta_fam_pat_na[,CBCLGenera[5]])) + geom_point() + geom_smooth(method=lm, se=F) + labs(y="Relative abundance %",x="CBCL total problems score") +ggtitle(gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",CBCLGenera[5])))+ theme_classic()+ theme(plot.title = element_text( size=14, face="bold.italic"))
plotCBCL5
plotCBCL6 <- ggplot(otu_taxa_meta_fam_pat_na, aes(x=CBCLtot5_TotaalTscore, y=otu_taxa_meta_fam_pat_na[,CBCLGenera[6]])) + geom_point() + geom_smooth(method=lm, se=F) + labs(y="Relative abundance %",x="CBCL total problems score") +ggtitle(gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",CBCLGenera[6])))+ theme_classic()+ theme(plot.title = element_text( size=14, face="bold.italic"))
plotCBCL6
plotCBCL7 <- ggplot(otu_taxa_meta_fam_pat_na, aes(x=CBCLtot5_TotaalTscore, y=otu_taxa_meta_fam_pat_na[,CBCLGenera[7]])) + geom_point() + geom_smooth(method=lm, se=F) + labs(y="Relative abundance %",x="CBCL total problems score") +ggtitle(gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",CBCLGenera[7])))+ theme_classic()+ theme(plot.title = element_text( size=14, face="bold.italic"))
plotCBCL7
plotCBCL8 <- ggplot(otu_taxa_meta_fam_pat_na, aes(x=CBCLtot5_TotaalTscore, y=otu_taxa_meta_fam_pat_na[,CBCLGenera[8]])) + geom_point() + geom_smooth(method=lm, se=F) + labs(y="Relative abundance %",x="CBCL total problems score") +ggtitle(gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",CBCLGenera[8])))+ theme_classic()+ theme(plot.title = element_text( size=14, face="bold.italic"))
plotCBCL8
plotCBCL9 <- ggplot(otu_taxa_meta_fam_pat_na, aes(x=CBCLtot5_TotaalTscore, y=otu_taxa_meta_fam_pat_na[,CBCLGenera[9]])) + geom_point() + geom_smooth(method=lm, se=F) + labs(y="Relative abundance %",x="CBCL total problems score") +ggtitle(gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",CBCLGenera[9])))+ theme_classic()+ theme(plot.title = element_text( size=14, face="bold.italic"))
plotCBCL9
Analysis was done to compare relative abundance per genus between patients or family members. For each test, these variables were taken into account: Age at microbiome collection, Age squared (to explain linear and non-linear effects of age), BMI in categories (healthy, overweight, obese) and Gender. #(For analysis with CBCL and ADOS, familygrouped and wonen are not included in this analysis since it only contains patients which don’t live together and do not belong to the same family)
otu_taxa_meta_fam <- readRDS("otu_taxa_metaKleef_final_prev50%_v17.rds")
ADOS <- read_excel("ADOS.xlsx")
otu_taxa_meta_fam_ados <- merge(otu_taxa_meta_fam, ADOS, by.x="Onderzoeksnummer",by.y="OnderzoeksID")
otu_taxa_meta_fam_ados <- subset(otu_taxa_meta_fam_ados, OnderzoeksID != "KSMB22")
#otu_taxa_meta_fam_ados$Age2 <- (otu_taxa_meta_fam_ados$AgeMicrobiome)^2
colnames(otu_taxa_meta_fam_ados) <- gsub(" ","_",colnames(otu_taxa_meta_fam_ados))
colnames(otu_taxa_meta_fam_ados) <- gsub("-","_",colnames(otu_taxa_meta_fam_ados))
colnames(otu_taxa_meta_fam_ados) <- gsub("\\[","",colnames(otu_taxa_meta_fam_ados))
colnames(otu_taxa_meta_fam_ados) <- gsub("\\]","",colnames(otu_taxa_meta_fam_ados))
################# non-parametric test with intestinal complaints and genetics
otu_ados <- subset(otu_taxa_meta_fam_ados, Onderzoeksnummer != 23)
otu_ados$Genetic_variant3 <- factor(otu_ados$Genetic_variant3, levels=c("Pathogenic mutation","Deletion < 1MB", "Deletion > 1MB"))
otu_ados$Genetic_variant2 <- factor(otu_ados$Genetic_variant2, levels=c("Mutation + deletion < 1MB","Deletion > 1MB"))
shapiro.test(otu_ados$ADOS_comp)
##
## Shapiro-Wilk normality test
##
## data: otu_ados$ADOS_comp
## W = 0.85646, p-value = 0.005499
ados_int <- subset(otu_ados, Onderzoeksnummer != 3)
ttest_intest <- wilcox.test(ADOS_comp~Intestinal_complaints,data=ados_int)
ttest_intest
##
## Wilcoxon rank sum test with continuity correction
##
## data: ADOS_comp by Intestinal_complaints
## W = 28, p-value = 0.7322
## alternative hypothesis: true location shift is not equal to 0
boxplot_ados_intest <- ggboxplot(ados_int, x="Intestinal_complaints", y="ADOS_comp", fill="Intestinal_complaints", title="ADOS comparative score vs Intestinal complaints", ylab = "ADOS comparative score", xlab="Intestinal complaints") + theme(legend.position="none",panel.border = element_rect(color = "black", size = 1, fill = NA))
boxplot_ados_intest
ttest_gen2 <- wilcox.test(ADOS_comp~Genetic_variant2,data=otu_ados)
ttest_gen2
##
## Wilcoxon rank sum test with continuity correction
##
## data: ADOS_comp by Genetic_variant2
## W = 38.5, p-value = 0.7122
## alternative hypothesis: true location shift is not equal to 0
boxplot_ados_gen2 <- ggboxplot(otu_ados, x="Genetic_variant2", y="ADOS_comp", fill="Genetic_variant2", title="ADOS comparative score vs Genetic variants (2)", ylab = "ADOS comparative score", xlab="Genetic variants (2)") +scale_color_manual(values = cbp_gen2, aesthetics = "fill")+ theme(legend.position="none",panel.border = element_rect(color = "black", size = 1, fill = NA))
boxplot_ados_gen2
ttest_gen3 <- kruskal.test(ADOS_comp~Genetic_variant3,data=otu_ados)
ttest_gen3
##
## Kruskal-Wallis rank sum test
##
## data: ADOS_comp by Genetic_variant3
## Kruskal-Wallis chi-squared = 5.0016, df = 2, p-value = 0.08202
boxplot_ados_gen3 <- ggboxplot(otu_ados, x="Genetic_variant3", y="ADOS_comp", fill="Genetic_variant3", title="ADOS comparative score vs Genetic variants (3)", ylab = "ADOS comparative score") +scale_color_manual(values = cbp_gen3, aesthetics = "fill")+ theme(legend.position="none",panel.border = element_rect(color = "black", size = 1, fill = NA))
boxplot_ados_gen3
# Analysis with ART
results_ADOS_comp <- data.frame(matrix(nrow=139, ncol=6))
colnames(results_ADOS_comp) <- c("Genus","pval_ADOS_comp", "fval_ADOS_comp", "pval_AgeMicrobiome","pval_BMI_cat","pval_GenderMale1Female2")
set.seed(2)
for (i in 2:139){
results_ADOS_comp[i,1] <- colnames(otu_taxa_meta_fam_ados)[i]
otu_taxa_meta_fam_ados$var <- otu_taxa_meta_fam_ados[,i]
fit_art <- aligned.rank.transform(var~ADOS_comp+AgeMicrobiome+BMI_cat+GenderMale1Female2,data=otu_taxa_meta_fam_ados)
results_ADOS_comp[i,2] <- fit_art$significance[1,4]
results_ADOS_comp[i,3] <- fit_art$significance[1,3]
results_ADOS_comp[i,4] <- fit_art$significance[2,4]
results_ADOS_comp[i,5] <- fit_art$significance[3,4]
results_ADOS_comp[i,6] <- fit_art$significance[4,4]
}
results_ADOS_comp$adjust_ADOS_comp <- p.adjust(results_ADOS_comp$pval_ADOS_comp, method="fdr")
results_ADOS_comp$adjust_Age <- p.adjust(results_ADOS_comp$pval_AgeMicrobiome, method="fdr")
results_ADOS_comp$adjust_BMI_cat <- p.adjust(results_ADOS_comp$pval_BMI_cat, method="fdr")
results_ADOS_comp$adjust_Gender <- p.adjust(results_ADOS_comp$pval_GenderMale1Female2, method="fdr")
datatable(results_ADOS_comp, rownames=FALSE, colnames=c("Genus", "P-value ADOS comp", "F-value ADOS comp", "P-value Age", "P-value BMI", "P-value Gender", "Adjust. P-value ADOS comp", "Adjust. P-value Age", "Adjust. P-value BMI", "Adjust. P-value Gender"))
# Plot nominal significant taxa
ADOSGenera <- results_ADOS_comp$Genus[which(results_ADOS_comp$pval_ADOS_comp < 0.05)]
length(ADOSGenera)
## [1] 1
otu_taxa_meta_fam_ados[,2:139] <- otu_taxa_meta_fam_ados[,2:139]*100
plotADOS1 <- ggplot(otu_taxa_meta_fam_ados, aes(x=ADOS_comp, y=otu_taxa_meta_fam_ados[,ADOSGenera[1]])) + geom_point() + geom_smooth(method=lm, se=F) + labs(y="Relative abundance %",x="ADOS comparative score") + theme_classic()+ggtitle(gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",ADOSGenera[1])))+ theme(plot.title = element_text( size=14, face="bold.italic"))
plotADOS1