1 Analysis plan for Kleefstra data

The Kleefstra dataset contains of four groups namely patients (n=22), fathers (n=13), mothers (n=16) and siblings (n=9). The age of the patient group ranges from 4 to 39 years old. Due to this reason, BMI will be made into a categorial variable since BMI for kids is differently scored.

Analysis plan: Alpha and beta with no prevalence check corrected by age, gender, BMI, FamilyID and living conditions for Patients vs Family and intestinal complaints (with the whole dataset). For the patient datasets, doing alpha and beta for CBCL total score, ADOS comparative score and genetics corrected by age, gender and BMI.

For BMI, 7 samples had missing values so the data was imputed with the median. This included four siblings (median=Healthy), one father (median=Healthy) and two patients (median=Overweight).

Taxonomy on genus level with a prevalence check of 50% (looking only at the core taxa) looking for the difference between patients vs family and intestinal complaints corrected for age, gender, BMI, FamilyID and living conditions. For the patient datasets, looking for differences in taxonomy with CBCL total score, ADOS comparative score and genetics corrected for age, gender and BMI.

2 Loading libraries

library(microbiome) # data analysis and visualisation
library(phyloseq) # also the basis of data object. Data analysis and visualisation
#library(microbiomeutilities) # some utility tools
library(ggpubr) # publication quality figures, based on ggplot2
library(DT) # interactive tables in html and markdown
library(data.table) # alternative to data.frame
library(dplyr) # data handling
library(ape) # reading tree
library(tibble) # rownames_to_column
library(picante) # phylogenetic distance
library(vegan) # beta diversity
library(lme4) # mixed models
library(ggplot2) # for nice plots
library(tidyr) # used for unite function
library(ART) # for RAOV test 
library(readxl)
library(corrplot)
library(Hmisc)
library(tidyverse)

3 Read data

The biom, meta and tree file are loaded into a phyloseq object. After that, information about the object is given like dimensions of otu table, sample table, tax table and tree. when summarizing the data, you get information about the number of reads (min, max, mean, median), number of samples and the variables available in the meta data.

# metadata.file should be a comma separated text file
dataKleef <- read_phyloseq(otu.file = "kleefstra-feature-table-taxonomy-filtered240-260.biom", 
                      taxonomy.file = NULL, 
                      metadata.file = "meta_kleefstra_microbiomeR_v5_withinpvalues_geneticsadjusted.txt", 
                      type = "biom")

## Time to complete depends on OTU file size

treefile_kleefstra <- read.tree("kleefstra_rootedtree_filtered240-260.nwk")

#Merge into phyloseq object
ps_kleefstra <- merge_phyloseq(dataKleef, treefile_kleefstra)
saveRDS(ps_kleefstra, "ps_kleefstra_final_prev50_v17.rds")
print(ps_kleefstra)

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 6907 taxa and 63 samples ]
## sample_data() Sample Data:       [ 63 samples by 43 sample variables ]
## tax_table()   Taxonomy Table:    [ 6907 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 6907 tips and 6876 internal nodes ]

summarize_phyloseq(ps_kleefstra)

## Compositional = NO2

## 1] Min. number of reads = 4891672] Max. number of reads = 8969523] Total number of reads = 404174614] Average number of reads = 6415475] Median number of reads = 6137537] Sparsity = 0.9303283303572866] Any OTU sum to 1 or less? YES8] Number of singletons = 89] Percent of OTUs that are singletons 
##         (i.e. exactly one read detected across all samples)0.11582452584334710] Number of sample variables are: 43Baseclear_Sample_nrOnderzoeksIDMedication1yes2noGenetic_variant3Genetic_variantGenetic_variant2Intestinal_complaints3Intestinal_complaints7Intestinal_complaintsBuikklachtenWonen1Gemeenschap2ThuisOnderzoeksnummerFamilyRelationFamiliesGroupedFamilyRelation_codePatient0orFamily1Patient0Parents1Siblings3Mozaiek1Niet0AgeMicrobiomeAgeCBCLGenderMale1Female2WeightBMIBMI_catCareOutside1CareFamily2DNANotesCBCLtot5_TotaalTscoreCBCLtot5_InternaliserenTscoreCBCLtot5_ExternaliserenTscoreCBCLtot5_EmotioneelreagerendTscoreCBCLtot5_AngstigDepressiefTscoreCBCLtot5_LichamelijkeklachtenTscoreCBCLtot5_TeruggetrokkenTscoreCBCLtot5_SlaapproblemenTscoreCBCLtot5_AandachtsproblemenTscoreCBCLtot5_AgressiefgedragTscoreCBCLtot5_AffectieveproblemenTscoreCBCLtot5_AngstproblemenTscoreCBCLtot5_PervasieveontwikkelingproblemenTscoreCBCLtot5_ADHDproblemenCBCLtot5_OppOpstproblemenCBCLtot5_Overig2

## [[1]]
## [1] "1] Min. number of reads = 489167"
## 
## [[2]]
## [1] "2] Max. number of reads = 896952"
## 
## [[3]]
## [1] "3] Total number of reads = 40417461"
## 
## [[4]]
## [1] "4] Average number of reads = 641547"
## 
## [[5]]
## [1] "5] Median number of reads = 613753"
## 
## [[6]]
## [1] "7] Sparsity = 0.930328330357286"
## 
## [[7]]
## [1] "6] Any OTU sum to 1 or less? YES"
## 
## [[8]]
## [1] "8] Number of singletons = 8"
## 
## [[9]]
## [1] "9] Percent of OTUs that are singletons \n        (i.e. exactly one read detected across all samples)0.115824525843347"
## 
## [[10]]
## [1] "10] Number of sample variables are: 43"
## 
## [[11]]
##  [1] "Baseclear_Sample_nr"                           
##  [2] "OnderzoeksID"                                  
##  [3] "Medication1yes2no"                             
##  [4] "Genetic_variant3"                              
##  [5] "Genetic_variant"                               
##  [6] "Genetic_variant2"                              
##  [7] "Intestinal_complaints3"                        
##  [8] "Intestinal_complaints7"                        
##  [9] "Intestinal_complaints"                         
## [10] "Buikklachten"                                  
## [11] "Wonen1Gemeenschap2Thuis"                       
## [12] "Onderzoeksnummer"                              
## [13] "FamilyRelation"                                
## [14] "FamiliesGrouped"                               
## [15] "FamilyRelation_code"                           
## [16] "Patient0orFamily1"                             
## [17] "Patient0Parents1Siblings3"                     
## [18] "Mozaiek1Niet0"                                 
## [19] "AgeMicrobiome"                                 
## [20] "AgeCBCL"                                       
## [21] "GenderMale1Female2"                            
## [22] "Weight"                                        
## [23] "BMI"                                           
## [24] "BMI_cat"                                       
## [25] "CareOutside1CareFamily2"                       
## [26] "DNA"                                           
## [27] "Notes"                                         
## [28] "CBCLtot5_TotaalTscore"                         
## [29] "CBCLtot5_InternaliserenTscore"                 
## [30] "CBCLtot5_ExternaliserenTscore"                 
## [31] "CBCLtot5_EmotioneelreagerendTscore"            
## [32] "CBCLtot5_AngstigDepressiefTscore"              
## [33] "CBCLtot5_LichamelijkeklachtenTscore"           
## [34] "CBCLtot5_TeruggetrokkenTscore"                 
## [35] "CBCLtot5_SlaapproblemenTscore"                 
## [36] "CBCLtot5_AandachtsproblemenTscore"             
## [37] "CBCLtot5_AgressiefgedragTscore"                
## [38] "CBCLtot5_AffectieveproblemenTscore"            
## [39] "CBCLtot5_AngstproblemenTscore"                 
## [40] "CBCLtot5_PervasieveontwikkelingproblemenTscore"
## [41] "CBCLtot5_ADHDproblemen"                        
## [42] "CBCLtot5_OppOpstproblemen"                     
## [43] "CBCLtot5_Overig"

4 QC

The quality check for both number of reads and samples. First, samples that are not bacteria will be removed (Archaea, Eukaryota and Unassigned). Then, the number of reads per sample and per OTU will be checked. This could be a reason to remove a sample or OTU later on.

dataKleef<- readRDS("ps_kleefstra_final_prev50_v17.rds")

 #Filter out only the non bacteria: Remove archea, eukaryotes, not-assigned bacteria 
table(tax_table(dataKleef)[, "Domain"], exclude = NULL)

## 
##   D_0__Archaea  D_0__Bacteria D_0__Eukaryota     Unassigned 
##              8           6771              1            127

dataKleef_2 <- subset_taxa(dataKleef, !is.na(Domain) & !Domain %in% c("", "Unassigned") & !Domain %in% c("", "D_0__Archaea") & !Domain %in% c("", "D_0__Eukaryota")) # filter out unclassified
table(tax_table(dataKleef_2)[, "Domain"])

## 
## D_0__Bacteria 
##          6771

sample_data(dataKleef_2)$PatientorFamily <- sample_data(dataKleef_2)$Patient0orFamily1
sample_data(dataKleef_2)$PatientorFamily <- gsub("0", "Patient", sample_data(dataKleef_2)$PatientorFamily)
sample_data(dataKleef_2)$PatientorFamily <- gsub("1", "Family", sample_data(dataKleef_2)$PatientorFamily)

# Check number of reads per sample
total_reads_per_sample <- data.frame(sort(sample_sums(dataKleef_2)))
total_reads_per_sample <- rownames_to_column(total_reads_per_sample)
plot(sort(sample_sums(dataKleef_2)), ylab="Number of reads per sample")

options(scipen=999)
hist(total_reads_per_sample$sort.sample_sums.dataKleef_2.., xlab="Read counts per sample", ylab="Number of samples", main="Number of reads per sample", ylim=c(0,20), labels=TRUE)

asv_df <- otu_table(dataKleef_2)
otus <- data.frame(colSums(asv_df != 0))
otus <- rownames_to_column(otus)

data_per_sample <- merge(total_reads_per_sample, otus, by="rowname")

datatable(data_per_sample, rownames=FALSE, colnames=c("Sample ID", "Total number of reads", "Total number of OTUs"))

# Check number of reads per OTU
reads_per_otu <- data.frame(taxa_sums(dataKleef_2))
reads_per_otu <- rownames_to_column(reads_per_otu)
datatable(reads_per_otu, rownames=FALSE, colnames=c("OTU ID", "Total number of reads"))

otu_tab <- t(abundances(dataKleef_2))
vegan::rarecurve(otu_tab, step = 100, label = TRUE, col = "blue", cex = 0.5, ylab = "OTU richness", xlab = "Number of reads")

plot_richness(dataKleef_2, measures = "Observed")

plot_richness(dataKleef_2, measures = "Shannon")

plot_taxa_prevalence(dataKleef_2,  "Phylum", detection=1) + theme(legend.position = "none")

tax_table(dataKleef_2)[, colnames(tax_table(dataKleef_2))] <- gsub(tax_table(dataKleef_2)[, colnames(tax_table(dataKleef_2))],pattern = "[A-Z]_[0-9]__", replacement = "")

print(dataKleef_2)

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 6771 taxa and 63 samples ]
## sample_data() Sample Data:       [ 63 samples by 44 sample variables ]
## tax_table()   Taxonomy Table:    [ 6771 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 6771 tips and 6740 internal nodes ]

saveRDS(dataKleef_2, "dataKleef_qc_final_50%_v17.rds")

5 Whole dataset

This section contains the analysis using the whole dataset to test the difference between patients and family members and look at intestinal complaints.

5.1 Alpha diversity patients vs family members

Analysis was done to compare alpha diversity between patients or family members.The ANOVA test is used to test the difference in alpha diversity between the groups. For each test, these variables were taken into account: Age at microbiome collection, BMI in categories (healthy, overweight, obese), Gender, Wonen1gemeenschap2Thuis (living conditions), and for strata a family ID number was used to correct for family effects.

dataKleef_2 <- readRDS("dataKleef_qc_final_50%_v17.rds")


mapping_file <- meta(dataKleef_2) # get the metadata as a separate object
mapping_file <- rownames_to_column(mapping_file)
alpha_div <- alpha(dataKleef_2) #calculate alpha

## Observed richness

## Other forms of richness

## Diversity

## Evenness

## Dominance

## Rarity

alpha_div <- select(alpha_div, "observed", "diversity_inverse_simpson", "diversity_shannon")
alpha_div <- rownames_to_column(alpha_div)

dataKleef_2_otu_table <- as.data.frame(dataKleef_2@otu_table) # calculate PD
dataKleef_2_tree <- dataKleef_2@phy_tree
dataKleef_2_tree #check if the tree is rooted

## 
## Phylogenetic tree with 6771 tips and 6740 internal nodes.
## 
## Tip labels:
##   f1cd3b78f762cadcffc40bd0e670e0ef, 231d38920cec7821896f473d317012e8, b4a0981afbca77cd1546eb4ba7fef279, b9a412890f37c6d852f3593f99ad3040, 318bebabf38f50ca26f13634e2f997a3, 5d4cfaec2202e0e02881d0309b79ad78, ...
## Node labels:
##   root, 0.932, 0.727, 0.738, 0.604, 0.533, ...
## 
## Rooted; includes branch lengths.

df_pd <- pd(t(dataKleef_2_otu_table), dataKleef_2_tree, include.root = T) #t(otu_table) transposes the otu table for use in picante
df_pd <- rownames_to_column(df_pd)
alpha_PD <- merge(alpha_div,df_pd, by = "rowname")

meta_alpha_PD <- merge(alpha_PD,mapping_file, by = "rowname") # merge it with metadata
saveRDS(meta_alpha_PD, "alpha50%_kleefstra_final_v17.rds")



#Alpha plots - Patients vs Family
cbp1_familyorpatient <- c("#009E73", "#E69F00")

shannon_aov <- aov(diversity_shannon~Patient0orFamily1+AgeMicrobiome+BMI_cat+GenderMale1Female2+FamiliesGrouped+Wonen1Gemeenschap2Thuis, meta_alpha_PD)
summary(shannon_aov)

##                         Df Sum Sq Mean Sq F value Pr(>F)  
## Patient0orFamily1        1  2.025  2.0252   6.495 0.0136 *
## AgeMicrobiome            1  0.907  0.9072   2.910 0.0937 .
## BMI_cat                  2  0.441  0.2206   0.708 0.4973  
## GenderMale1Female2       1  0.064  0.0640   0.205 0.6524  
## FamiliesGrouped          1  0.010  0.0097   0.031 0.8609  
## Wonen1Gemeenschap2Thuis  1  0.021  0.0210   0.067 0.7960  
## Residuals               55 17.149  0.3118                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

pval_shannon <- round(summary(shannon_aov)[[1]][1,5],digits=4)
meta_alpha_PD$PatientorFamily <- factor(meta_alpha_PD$PatientorFamily, levels=c("Patient", "Family"))
groupsize <- xtabs(~PatientorFamily, data=meta_alpha_PD)

# Mean difference: patient = 0, family members = 1
means <- aggregate(x=meta_alpha_PD$diversity_shannon, by=list(meta_alpha_PD$Patient0orFamily1), FUN=mean)
means

##   Group.1        x
## 1       0 3.375495
## 2       1 3.747893

meandiff <- means[1,2] - means[2,2]
meandiff

## [1] -0.3723975

sd <- aggregate(x=meta_alpha_PD$diversity_shannon, by=list(meta_alpha_PD$Patient0orFamily1), FUN=sd)
sd

##   Group.1         x
## 1       0 0.5011203
## 2       1 0.5788336

# plot alpha
countnonzero <- aggregate(meta_alpha_PD$diversity_shannon, list(meta_alpha_PD$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplot_shannon <- ggboxplot(meta_alpha_PD, x="PatientorFamily", y="diversity_shannon", fill = "PatientorFamily", palette = cbp1_familyorpatient, title="Shannon diversity index",  xlab= FALSE, ylab = FALSE) + theme(panel.border = element_rect(color = "black", size = 1, fill = NA),legend.position = "none")+ geom_text(aes(y=5, x=1.5, label=paste0("P-val = ",pval_shannon)), size=4) + scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ","n=", countnonzero[1:2,3]))
boxplot_shannon

Shannon diversity index is significantly different for patients or families (p=0.0136).

5.2 Alpha diversity - intestinal complaints

Analysis was done to compare alpha diversity between patients and family members with and withour intestinal complaints. The ANOVA test is used to test the difference in alpha diversity between the groups. These variables were taken into account: Age at microbiome collection, BMI in categories (healthy, overweight, obese), Gender, Wonen1Gemeenschap2Thuis (living conditions), and for strata a family ID number was used to correct for family effects.

# ANOVA to test difference in alpha diversity between patients and family members with or without intestinal complaints
meta_alpha_kleef <- readRDS("alpha50%_kleefstra_final_v17.rds")
meta_alpha_PD_bk <- subset(meta_alpha_PD, OnderzoeksID != "KSMB11003" & OnderzoeksID != "KSMB15001" & OnderzoeksID != "KSMB23" & OnderzoeksID != "KSMB3003" & OnderzoeksID != "KSMB3")

meta_alpha_PD_bk$PatientorFamily <- factor(meta_alpha_PD_bk$PatientorFamily, levels=c("Patient", "Family"))
meta_alpha_PD_bk$Intestinal_complaints <- factor(meta_alpha_PD_bk$Intestinal_complaints, levels=c("No","Yes"))
meta_alpha_PD_bk$Intestinal_complaints3 <- factor(meta_alpha_PD_bk$Intestinal_complaints3, levels=c("Never","Few","Many"))


shannon_aov_bk <- aov(diversity_shannon~Intestinal_complaints+Intestinal_complaints*Patient0orFamily1+AgeMicrobiome+BMI_cat+GenderMale1Female2+FamiliesGrouped+Wonen1Gemeenschap2Thuis, meta_alpha_PD_bk)
summary(shannon_aov_bk)

##                                         Df Sum Sq Mean Sq F value Pr(>F)  
## Intestinal_complaints                    1  0.235  0.2354   0.681  0.413  
## Patient0orFamily1                        1  1.451  1.4507   4.196  0.046 *
## AgeMicrobiome                            1  1.106  1.1062   3.200  0.080 .
## BMI_cat                                  2  0.456  0.2282   0.660  0.521  
## GenderMale1Female2                       1  0.095  0.0954   0.276  0.602  
## FamiliesGrouped                          1  0.011  0.0113   0.033  0.858  
## Wonen1Gemeenschap2Thuis                  1  0.003  0.0028   0.008  0.929  
## Intestinal_complaints:Patient0orFamily1  1  0.039  0.0390   0.113  0.739  
## Residuals                               48 16.595  0.3457                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

pval_shannon_bk <- round(summary(shannon_aov_bk)[[1]][8,5],digits=4)
pval_bk <- round(summary(shannon_aov_bk)[[1]][1,5], digits=4)
groupsize <- xtabs(~Intestinal_complaints, data=meta_alpha_PD_bk)

# calculate mean difference; patient = 0, family = 1
meansd_bk <- aggregate(diversity_shannon~Patient0orFamily1+Intestinal_complaints, meta_alpha_PD_bk, function(x) c(mean=mean(x), sd=sd(x)))
meansd_bk

##   Patient0orFamily1 Intestinal_complaints diversity_shannon.mean
## 1                 0                    No              3.4195091
## 2                 1                    No              3.7710079
## 3                 0                   Yes              3.3814034
## 4                 1                   Yes              3.7166586
##   diversity_shannon.sd
## 1            0.2282881
## 2            0.6935099
## 3            0.5767090
## 4            0.5297649

# plot results in boxplot
countnonzero <- aggregate(meta_alpha_PD_bk$diversity_shannon, list(meta_alpha_PD_bk$Intestinal_complaints), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["No"]],groupsize[["Yes"]])
boxplot_shannon_bk2 <- ggboxplot(meta_alpha_PD_bk, x="Intestinal_complaints", y="diversity_shannon", fill = "PatientorFamily", 
  title="Shannon diversity index", ylab = FALSE) + xlab("Intestinal complaints") +
  theme(legend.position="top",panel.border = element_rect(color = "black", size = 1, fill = NA))+    geom_text(aes(y=5, x=1.5, label=paste0("P-val = ",pval_shannon_bk)), size=4) + scale_x_discrete(labels=paste0(countnonzero[1:3,1]," ","n=", countnonzero[1:3,2])) + scale_fill_manual(name="", labels=c("Patients", "Family members"), values=c(cbp1_familyorpatient))
boxplot_shannon_bk2

5.3 Beta diversity - patients vs family members

This part contains the results of the beta diversity analysis using a CAP plot with the distance measure: Bray-Curtis. Analysis was done to compare Beta diversity between patients and family members. The PERMANOVA is used to test the difference in beta diversity between the groups. These variables were taken into account: Age at microbiome collection, BMI in categories (healthy, overweight, obese), Gender, living conditions, and for strata a family ID number was used to correct for family effects.

dataKleef_2 <- readRDS("dataKleef_qc_final_50%_v17.rds")
dataKleef_2_prev_ra <- microbiome::transform(dataKleef_2, "compositional")

sample_data(dataKleef_2_prev_ra)$PatientorFamily <- factor(sample_data(dataKleef_2_prev_ra)$PatientorFamily, levels=c("Patient", "Family"))


#### PERMANOVA
otu <- dataKleef_2_prev_ra@otu_table
meta <- data.frame(sample_data(dataKleef_2_prev_ra))

set.seed(38698) #reproducible results
perm <- how(nperm=999)
setBlocks(perm) <- with(meta, FamiliesGrouped)
print(adonis2(t(otu) ~ PatientorFamily + AgeMicrobiome + BMI_cat + GenderMale1Female2 + Wonen1Gemeenschap2Thuis, method="bray", na.action = na.pass, data=meta, permutations = perm))

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks:  with(meta, FamiliesGrouped) 
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = t(otu) ~ PatientorFamily + AgeMicrobiome + BMI_cat + GenderMale1Female2 + Wonen1Gemeenschap2Thuis, data = meta, permutations = perm, method = "bray", na.action = na.pass)
##                         Df SumOfSqs      R2      F Pr(>F)  
## PatientorFamily          1   0.5407 0.03006 1.8782  0.011 *
## AgeMicrobiome            1   0.3256 0.01810 1.1309  0.679  
## BMI_cat                  2   0.3982 0.02214 0.6916  0.988  
## GenderMale1Female2       1   0.3246 0.01805 1.1276  0.319  
## Wonen1Gemeenschap2Thuis  1   0.2778 0.01544 0.9648  0.650  
## Residual                56  16.1220 0.89622                
## Total                   62  17.9889 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# CAP analysis
sample_data(dataKleef_2_prev_ra)$PatientorFamily <- factor(sample_data(dataKleef_2_prev_ra)$PatientorFamily, levels=c("Patient", "Family"))

weighted_cap1 <- ordinate(dataKleef_2_prev_ra, "CAP", "bray", formula= ~ dataKleef_2_prev_ra@sam_data$PatientorFamily, weighted=TRUE)
plot_weighted_cap1 <- plot_ordination(dataKleef_2_prev_ra, weighted_cap1, color="PatientorFamily") + ggtitle("CAP - Disease status") + geom_point(size=2) +  theme_classic()+theme(legend.position="top") +  scale_color_manual(name="",labels=c("Patients", "Family members"),values=cbp1_familyorpatient, aesthetics = "color")
plot_weighted_cap1

The PERMANOVA gives a significant result for a difference in beta diversity between patients and families (p-val = 0.012). The CAP plot does seperate the two groups; patients or families).

5.4 Beta diversity - intestinal complaints

Analysis was done to compare Beta diversity for intestinal complaints. The PERMANOVA is used to test the difference in beta diversity between the groups. These variables were taken into account: Age at microbiome collection, BMI in categories (healthy, overweight, obese), Gender, and for strata a family ID number was used to correct for family effects.

dataKleef_2 <- readRDS("dataKleef_qc_final_50%_v17.rds")
dataKleef_2_sub <- subset_samples(dataKleef_2, OnderzoeksID != "KSMB11003" & OnderzoeksID != "KSMB15001" & OnderzoeksID != "KSMB23" & OnderzoeksID != "KSMB3003" & OnderzoeksID != "KSMB3")

dataKleef_2_prev_ra <- microbiome::transform(dataKleef_2_sub, "compositional")

# PERMANOVA
otu_bk <- dataKleef_2_prev_ra@otu_table
meta_bk <- data.frame(sample_data(dataKleef_2_sub))
#meta_bk$Age2 <- (meta_bk$AgeMicrobiome)^2

set.seed(38698) #reproducible results
perm <- how(nperm=999)
setBlocks(perm) <- with(meta_bk, FamiliesGrouped)
print(adonis2(t(otu_bk) ~ Intestinal_complaints + Patient0orFamily1 + AgeMicrobiome + BMI_cat + GenderMale1Female2 + Wonen1Gemeenschap2Thuis + Intestinal_complaints*Patient0orFamily1, method="bray", na.action = na.pass, data=meta_bk, permutations = perm))

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks:  with(meta_bk, FamiliesGrouped) 
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = t(otu_bk) ~ Intestinal_complaints + Patient0orFamily1 + AgeMicrobiome + BMI_cat + GenderMale1Female2 + Wonen1Gemeenschap2Thuis + Intestinal_complaints * Patient0orFamily1, data = meta_bk, permutations = perm, method = "bray", na.action = na.pass)
##                                         Df SumOfSqs      R2      F Pr(>F)   
## Intestinal_complaints                    1   0.3023 0.01817 1.0528  0.169   
## Patient0orFamily1                        1   0.5556 0.03341 1.9350  0.010 **
## AgeMicrobiome                            1   0.3676 0.02210 1.2801  0.756   
## BMI_cat                                  2   0.4073 0.02449 0.7092  0.945   
## GenderMale1Female2                       1   0.3233 0.01944 1.1259  0.322   
## Wonen1Gemeenschap2Thuis                  1   0.2637 0.01585 0.9183  0.629   
## Intestinal_complaints:Patient0orFamily1  1   0.3428 0.02061 1.1937  0.168   
## Residual                                49  14.0694 0.84593                 
## Total                                   57  16.6318 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# CAP analysis
sample_data(dataKleef_2_prev_ra)$PatientorFamily <- factor(sample_data(dataKleef_2_prev_ra)$PatientorFamily, levels=c("Patient", "Family"))

weighted_cap_bp <- ordinate(dataKleef_2_prev_ra, "CAP", "bray", formula= ~ dataKleef_2_prev_ra@sam_data$Intestinal_complaints+dataKleef_2_prev_ra@sam_data$Patient0orFamily1, weighted=TRUE)
plot_weighted_cap_bp <- plot_ordination(dataKleef_2_prev_ra, weighted_cap_bp, shape="Intestinal_complaints", color="PatientorFamily") + scale_color_manual(name="", labels=c("Patients","Family members"),values=cbp1_familyorpatient, aesthetics = "color") + scale_shape_manual(name="", values=c(17,19)) + labs(fill="") + ggtitle("CAP - Disease status * Intestinal complaints")  + geom_point(size=2)  + theme_classic()+ theme(legend.position="top")
plot_weighted_cap_bp

5.5 Taxonomy

This part contains the results of the taxonomy analysis on genus level using the aligned rank transform test which is a non-parametric ANOVA.

5.5.1 Prepare files

An ASV table including taxonomic information is being prepared for the analysis. Samples with no data for Age at microbiome collection, BMI in categories (healthy, overweight, obese), Gender and family ID were removed. After the data is made compositional (relative abundance). A prevalence cut-off of 50% is done for genera and a 10% cut-off is done for the samples.

dataKleef_2 <- readRDS("dataKleef_qc_final_50%_v17.rds")
print(dataKleef_2)

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 6771 taxa and 63 samples ]
## sample_data() Sample Data:       [ 63 samples by 44 sample variables ]
## tax_table()   Taxonomy Table:    [ 6771 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 6771 tips and 6740 internal nodes ]

ps_overall_genus <- microbiome::aggregate_taxa(dataKleef_2, "Genus")
print(ps_overall_genus)

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 417 taxa and 63 samples ]
## sample_data() Sample Data:       [ 63 samples by 44 sample variables ]
## tax_table()   Taxonomy Table:    [ 417 taxa by 7 taxonomic ranks ]

# Make compositional first, then remove taxa with lower than 50% prevalence in the dataset
otu_table_rel <- microbiome::transform(ps_overall_genus, "compositional")
print(otu_table_rel)

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 417 taxa and 63 samples ]
## sample_data() Sample Data:       [ 63 samples by 44 sample variables ]
## tax_table()   Taxonomy Table:    [ 417 taxa by 7 taxonomic ranks ]

listprevalence <- prevalence(otu_table_rel@otu_table, sort=TRUE)
plot(listprevalence)

prevalence_cutoff = 0.5
ps_overall_prevalence <- microbiome::core(otu_table_rel, detection = 0, prevalence = prevalence_cutoff) 
print(ps_overall_prevalence)

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 138 taxa and 63 samples ]
## sample_data() Sample Data:       [ 63 samples by 44 sample variables ]
## tax_table()   Taxonomy Table:    [ 138 taxa by 7 taxonomic ranks ]

# make into a matrix
taxa_table <- data.frame(tax_table(ps_overall_prevalence))
taxa_table <- taxa_table %>% unite(unique,c("Domain","Phylum","Class","Order","Family","Genus"),sep="__",remove=T)
otu_table_rel <- as.data.frame(otu_table(ps_overall_prevalence))
otu_taxa <- merge(otu_table_rel, taxa_table, by="row.names")
out_taxa_keep <- otu_taxa
rownames(otu_taxa) <- otu_taxa$Row.names
otu_taxa$Row.names <- NULL

# put long taxonomic names as row names
otu_taxa_genus <- aggregate(otu_taxa[,-(length(otu_taxa))], by=list(otu_taxa$unique), sum)

# remove samples which were present in fewer than 10% of OTUs
otu_taxa_prev <- otu_taxa_genus[,which(colSums(otu_taxa_genus>0)>=(nrow(otu_taxa_genus)*.1))]
otu_taxa_prev1 <- sweep(otu_taxa_prev[,-1],2,colSums(otu_taxa_prev[,-1]),"/")
rownames(otu_taxa_prev1) <- otu_taxa_prev$Group.1
otu_taxa_prev1 <- as.data.frame(t(otu_taxa_prev1), stringsAsFactors = FALSE)
metadata <- data.frame(sample_data(ps_overall_prevalence))

otu_taxa_meta <- merge(otu_taxa_prev1, metadata, by="row.names")
rownames(otu_taxa_meta) <- otu_taxa_meta$Row.names
otu_taxa_meta$Row.names <- NULL

saveRDS(otu_taxa_meta, "otu_taxa_metaKleef_final_prev50%_v17.rds")

5.5.2 Patients vs family members

In total, 138 genera are tested and no multiple corrected significant taxa were found. One adjusted significant taxa was detected and 16 nominal significant taxa were detected.

otu_taxa_meta_fam <- readRDS("otu_taxa_metaKleef_final_prev50%_v17.rds")
otu_taxa_meta_fam$FamiliesGrouped <- as.factor(otu_taxa_meta_fam$FamiliesGrouped)
colnames(otu_taxa_meta_fam) <- gsub(" ","_",colnames(otu_taxa_meta_fam))
colnames(otu_taxa_meta_fam) <- gsub("-","_",colnames(otu_taxa_meta_fam))
colnames(otu_taxa_meta_fam) <- gsub("\\[","",colnames(otu_taxa_meta_fam))
colnames(otu_taxa_meta_fam) <- gsub("\\]","",colnames(otu_taxa_meta_fam))


# Analysis with ART
results_patientsvsfamily <- data.frame(matrix(nrow=138, ncol=8))
colnames(results_patientsvsfamily) <- c("Genus","pval_PatientorFamily","fval_PatientorFamily", "pval_AgeMicrobiome","pval_BMI_cat","pval_GenderMale1Female2", "pval_FamiliesGrouped", "pval_Wonen1Gemeenschap2Thuis")
for (i in 1:138){
  results_patientsvsfamily[i,1] <- colnames(otu_taxa_meta_fam)[i]
  otu_taxa_meta_fam$var <- otu_taxa_meta_fam[,i]
  fit_art <- aligned.rank.transform(var~Patient0orFamily1+AgeMicrobiome+BMI_cat+GenderMale1Female2+FamiliesGrouped+Wonen1Gemeenschap2Thuis,data=otu_taxa_meta_fam)
  results_patientsvsfamily[i,2] <- fit_art$significance[1,4]
  results_patientsvsfamily[i,3] <- fit_art$significance[1,3]
  results_patientsvsfamily[i,4] <- fit_art$significance[2,4]
  results_patientsvsfamily[i,5] <- fit_art$significance[3,4]
  results_patientsvsfamily[i,6] <- fit_art$significance[4,4]
  results_patientsvsfamily[i,7] <- fit_art$significance[5,4]
  results_patientsvsfamily[i,8] <- fit_art$significance[6,4]
}

results_patientsvsfamily$adjust_PatientorFamily <- p.adjust(results_patientsvsfamily$pval_PatientorFamily, method="fdr")
results_patientsvsfamily$adjust_Age <- p.adjust(results_patientsvsfamily$pval_AgeMicrobiome, method="fdr")
results_patientsvsfamily$adjust_BMI_cat <- p.adjust(results_patientsvsfamily$pval_BMI_cat, method="fdr")
results_patientsvsfamily$adjust_Gender <- p.adjust(results_patientsvsfamily$pval_GenderMale1Female2, method="fdr")
results_patientsvsfamily$adjust_FamiliesGrouped <- p.adjust(results_patientsvsfamily$pval_FamiliesGrouped, method="fdr")
results_patientsvsfamily$adjust_Wonen <- p.adjust(results_patientsvsfamily$pval_Wonen1Gemeenschap2Thuis, method="fdr")


datatable(results_patientsvsfamily, rownames=FALSE, colnames=c("Genus", "P-value PatientorFamily", "F-value PatientorFamily", "P-value Age", "P-value BMI", "P-value Gender", "P-value FamilyGrouped","P-value Wonen1Gemeenschap2Thuis", "Adjust. P-value PatientorFamily", "Adjust. P-value Age", "Adjust. P-value BMI", "Adjust. P-value Gender","Adjust. P-value FamilyGrouped","Adjust. P-value Wonen1Gemeenschap2Thuis"))

# Number of FDR corrected results
PF_FDR_Genera <- results_patientsvsfamily$Genus[which(results_patientsvsfamily$adjust_PatientorFamily < 0.05)]
length(PF_FDR_Genera)

## [1] 1

# Mean difference of FDR corrected results: patient = 0, family members = 1
otu_taxa_meta_fam[,1:138] <- otu_taxa_meta_fam[,1:138]*100
means <- aggregate(x=otu_taxa_meta_fam[,PF_FDR_Genera], by=list(otu_taxa_meta_fam$Patient0orFamily1), FUN=mean)
means

##   Group.1         x
## 1       0 0.1567622
## 2       1 0.2922127

meandiff <- means[1,2] - means[2,2]
meandiff

## [1] -0.1354505

sd <- aggregate(x=otu_taxa_meta_fam[,PF_FDR_Genera], by=list(otu_taxa_meta_fam$Patient0orFamily1), FUN=sd)
sd

##   Group.1         x
## 1       0 0.2027883
## 2       1 0.2360933

means <- aggregate(x=otu_taxa_meta_fam$Bacteria__Actinobacteria__Coriobacteriia__Coriobacteriales__Atopobiaceae__uncultured, by=list(otu_taxa_meta_fam$Patient0orFamily1), FUN=mean)


# Number of nominal results
PatientFamilyGenera <- results_patientsvsfamily$Genus[which(results_patientsvsfamily$pval_PatientorFamily < 0.05)]
length(PatientFamilyGenera)

## [1] 17

otu_taxa_meta_fam$PatientorFamily <- factor(otu_taxa_meta_fam$PatientorFamily, levels=c("Patient", "Family"))
groupsize <- xtabs(~PatientorFamily, data=otu_taxa_meta_fam)

# plot FDR and nominal results
countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[1]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily1 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[1], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[1])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily1

countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[2]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily2 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[2], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[2])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily2

countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[3]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily3 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[3], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[3])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily3

countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[4]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily4 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[4], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[4])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily4

countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[5]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily5 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[5], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[5])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily5

countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[6]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily6 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[6], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[6])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily6

countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[7]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily7 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[7], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[7])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily7

countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[8]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily8 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[8], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[8])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily8

countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[9]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily9 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[9], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[9])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily9

countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[10]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily10 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[10], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[10])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily10

countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[11]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily11 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[11], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[11])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily11

countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[12]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily12 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[12], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[12])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily12

countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[13]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily13 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[13], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[13])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily13

countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[14]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily14 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[14], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[14])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily14

countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[15]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily15 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[15], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[15])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily15

countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[16]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily16 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[16], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[16])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily16

countnonzero <- aggregate(otu_taxa_meta_fam[PatientFamilyGenera[17]], list(otu_taxa_meta_fam$PatientorFamily), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient"]], groupsize[["Family"]])
countnonzero$Group.1 <- c("Patients","Family members")
boxplotPatientorFamily17 <- ggboxplot(otu_taxa_meta_fam, x="PatientorFamily", y=PatientFamilyGenera[17], fill= "PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",PatientFamilyGenera[17])),xlab=FALSE) + theme(legend.position = "none")+ ylab("Relative abundance %") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"))+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ", countnonzero[1:2,2],"/", countnonzero[1:2,3]))
boxplotPatientorFamily17

5.5.3 Intestinal complaints

Analysis was done to compare relative abundance per genus between the interaction with intestinal complaints and patients or family members. For each test, these variables were taken into account: Age at microbiome collection, BMI in categories (healthy, overweight, obese), Gender, living conditions and a family ID number was used to correct for family effects.

In total, 138 genera are tested and no multiple corrected significant taxa were found. 10 nominal significant taxa were detected for the interaction between intestinal complaints and patients vs family members. These results were plotted in scatter plot with two linear lines showing the direction for the patients and family members. The results are also plotted in a boxplot showing the sample size per intestinal complaints group.

otu_taxa_meta_fam <- readRDS("otu_taxa_metaKleef_final_prev50%_v17.rds")
colnames(otu_taxa_meta_fam) <- gsub(" ","_",colnames(otu_taxa_meta_fam))
colnames(otu_taxa_meta_fam) <- gsub("-","_",colnames(otu_taxa_meta_fam))
colnames(otu_taxa_meta_fam) <- gsub("\\[","",colnames(otu_taxa_meta_fam))
colnames(otu_taxa_meta_fam) <- gsub("\\]","",colnames(otu_taxa_meta_fam))

otu_taxa_meta_fam_sub <- subset(otu_taxa_meta_fam, OnderzoeksID != "KSMB11003" & OnderzoeksID != "KSMB3" & OnderzoeksID != "KSMB3003" & OnderzoeksID != "KSMB23" & OnderzoeksID != "KSMB15001")

#### intestincal complaints vs genetics
chisq.test(otu_taxa_meta_fam_sub$Genetic_variant2,otu_taxa_meta_fam_sub$Intestinal_complaints)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  otu_taxa_meta_fam_sub$Genetic_variant2 and otu_taxa_meta_fam_sub$Intestinal_complaints
## X-squared = 0.0000000000000000000000000000015226, df = 1, p-value = 1

chisq.test(otu_taxa_meta_fam_sub$Genetic_variant3,otu_taxa_meta_fam_sub$Intestinal_complaints)

## 
##  Pearson's Chi-squared test
## 
## data:  otu_taxa_meta_fam_sub$Genetic_variant3 and otu_taxa_meta_fam_sub$Intestinal_complaints
## X-squared = 1.5932, df = 2, p-value = 0.4508

# Analysis using ART
results_buikklachten <- data.frame(matrix(nrow=138, ncol=9))
colnames(results_buikklachten) <- c("Genus","pval_Buikklachten", "pval_AgeMicrobiome","pval_BMI_cat","pval_GenderMale1Female2","pval_FamiliesGrouped","pval_Wonen","pval_interaction", "fval_Buikklachten")
set.seed(2)
for (i in 1:138){
  results_buikklachten[i,1] <- colnames(otu_taxa_meta_fam_sub)[i]
  otu_taxa_meta_fam_sub$var <- otu_taxa_meta_fam_sub[,i]
  fit_art <- aligned.rank.transform(var~Intestinal_complaints+AgeMicrobiome+BMI_cat+GenderMale1Female2+FamiliesGrouped+Wonen1Gemeenschap2Thuis+Intestinal_complaints*Patient0orFamily1,data=otu_taxa_meta_fam_sub)
  results_buikklachten[i,2] <- fit_art$significance[1,4]
  results_buikklachten[i,3] <- fit_art$significance[2,4]
  results_buikklachten[i,4] <- fit_art$significance[3,4]
  results_buikklachten[i,5] <- fit_art$significance[4,4]
  results_buikklachten[i,6] <- fit_art$significance[5,4]
  results_buikklachten[i,7] <- fit_art$significance[6,4]
  results_buikklachten[i,8] <- fit_art$significance[8,4]
  results_buikklachten[i,9] <- fit_art$significance[8,3]
}

results_buikklachten$adjust_buikklachten <- p.adjust(results_buikklachten$pval_Buikklachten, method="fdr")
results_buikklachten$adjust_Age <- p.adjust(results_buikklachten$pval_AgeMicrobiome, method="fdr")
results_buikklachten$adjust_BMI_cat <- p.adjust(results_buikklachten$pval_BMI_cat, method="fdr")
results_buikklachten$adjust_Gender <- p.adjust(results_buikklachten$pval_GenderMale1Female2, method="fdr")
results_buikklachten$adjust_FamiliesGrouped <- p.adjust(results_buikklachten$pval_FamiliesGrouped, method="fdr")
results_buikklachten$adjust_Wonen <- p.adjust(results_buikklachten$pval_Wonen, method="fdr")
results_buikklachten$adjust_interaction <- p.adjust(results_buikklachten$pval_interaction, method="fdr")

datatable(results_buikklachten, rownames=FALSE, colnames=c("Genus", "P-value Buikklachten", "P-value Age", "P-value BMI", "P-value Gender", "P-value FamiliesGrouped", "P-value Wonen", "P-value Buikklachten*PatientFamily","F-value Buikklachten*PatientorFamily", "Adjust. P-value Buikklachten", "Adjust. P-value Age", "Adjust. P-value BMI", "Adjust. P-value Gender", "Adjust. P-value FamiliesGrouped", "Adjust. P-value Wonen","Adjust. P-value Buikklachten*PatientFamily"))

# number of FDR corrected results
bk_FDR_Genera <- results_buikklachten$Genus[which(results_buikklachten$adjust_interaction < 0.05)]
length(bk_FDR_Genera)

## [1] 1

# Mean difference for FDR corrected results; : patient = 0, family members = 1
otu_taxa_meta_fam_sub[,1:138] <- otu_taxa_meta_fam_sub[,1:138]*100
meandiffsd_bk <- aggregate(Bacteria__Firmicutes__Erysipelotrichia__Erysipelotrichales__Erysipelotrichaceae__Merdibacter~Patient0orFamily1+Intestinal_complaints, otu_taxa_meta_fam_sub, function(x) c(mean=mean(x), sd=sd(x)))
meandiffsd_bk

##   Patient0orFamily1 Intestinal_complaints
## 1                 0                    No
## 2                 1                    No
## 3                 0                   Yes
## 4                 1                   Yes
##   Bacteria__Firmicutes__Erysipelotrichia__Erysipelotrichales__Erysipelotrichaceae__Merdibacter.mean
## 1                                                                                       0.048301464
## 2                                                                                       0.005513095
## 3                                                                                       0.001721823
## 4                                                                                       0.013777342
##   Bacteria__Firmicutes__Erysipelotrichia__Erysipelotrichales__Erysipelotrichaceae__Merdibacter.sd
## 1                                                                                     0.091806208
## 2                                                                                     0.009110992
## 3                                                                                     0.002292279
## 4                                                                                     0.022838511

# number of nominal results
bk_Genera <- results_buikklachten$Genus[which(results_buikklachten$pval_interaction < 0.05)]
length(bk_Genera)

## [1] 17

# plot all significant results
otu_taxa_meta_fam_sub$PatientorFamily <- factor(otu_taxa_meta_fam_sub$PatientorFamily, levels=c("Patient", "Family"))
otu_taxa_meta_fam_sub$Intestinal_complaints <- factor(otu_taxa_meta_fam_sub$Intestinal_complaints, levels=c("No", "Yes"))
otu_taxa_meta_fam_sub$pf_intestinal <- paste(otu_taxa_meta_fam_sub$PatientorFamily,"-",otu_taxa_meta_fam_sub$Intestinal_complaints,sep=" ")
otu_taxa_meta_fam_sub$pf_intestinal <- factor(otu_taxa_meta_fam_sub$pf_intestinal, levels=c("Patient - No","Family - No","Patient - Yes","Family - Yes"))
groupsize <- xtabs(~pf_intestinal, data=otu_taxa_meta_fam_sub)
my_comp <- list(c("Patient - No","Family - No"),c("Patient - Yes","Family - Yes"))


countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[1]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk1 <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[1], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[1]))) + ylab("Relative abundance %") + xlab("Disease status - Intestinal complaints") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"),legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3])) + stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk1

countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[2]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk2 <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[2], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[2]))) + ylab("Relative abundance %")+ xlab("Disease status - Intestinal complaints") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"), legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3]))+ stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk2

countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[3]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[3], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[3]))) + ylab("Relative abundance %")+ xlab("Disease status - Intestinal complaints") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"), legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3]))+ stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk

countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[4]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk4 <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[4], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[4]))) + ylab("Relative abundance %")+ xlab("Disease status - Intestinal complaints") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"), legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3]))+ stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk4

countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[5]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk5 <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[5], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[5]))) + ylab("Relative abundance %")+ xlab("Disease status - Intestinal complaints") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"), legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3]))+ stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk5

countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[6]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk6 <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[6], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[6]))) + ylab("Relative abundance %") + xlab("Disease status - Intestinal complaints")+ theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"), legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3]))+ stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk6

countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[7]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk7 <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[7], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[7]))) + ylab("Relative abundance %")+ xlab("Disease status - Intestinal complaints") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"), legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3]))+ stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk7

countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[8]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk8 <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[8], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[8]))) + ylab("Relative abundance %")+ xlab("Disease status - Intestinal complaints") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"), legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3]))+ stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk8

countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[9]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk9 <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[9], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[9]))) + ylab("Relative abundance %") + xlab("Disease status - Intestinal complaints")+ theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"), legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3]))+ stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk9

countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[10]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk10 <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[10], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[10]))) + ylab("Relative abundance %") + xlab("Disease status - Intestinal complaints")+ theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"), legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3]))+ stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk10

countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[11]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk11 <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[11], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[11]))) + ylab("Relative abundance %") + xlab("Disease status - Intestinal complaints")+ theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"), legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3]))+ stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk11

countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[12]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk12 <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[12], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[12]))) + ylab("Relative abundance %") + xlab("Disease status - Intestinal complaints")+ theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"), legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3]))+ stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk12

countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[13]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk13 <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[13], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[13]))) + ylab("Relative abundance %") + xlab("Disease status - Intestinal complaints")+ theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"), legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3]))+ stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk13

countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[14]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk14 <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[14], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[14]))) + ylab("Relative abundance %") + xlab("Disease status - Intestinal complaints")+ theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"), legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3]))+ stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk14

countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[15]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk15 <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[15], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[15]))) + ylab("Relative abundance %")+ xlab("Disease status - Intestinal complaints") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"), legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3]))+ stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk15

countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[16]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk16 <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[16], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[16]))) + ylab("Relative abundance %")+ xlab("Disease status - Intestinal complaints") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"), legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3]))+ stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk16

countnonzero <- aggregate(otu_taxa_meta_fam_sub[bk_Genera[17]], list(otu_taxa_meta_fam_sub$pf_intestinal), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Patient - No"]],groupsize[["Family - No"]],groupsize[["Patient - Yes"]],groupsize[["Family - Yes"]])
boxplot_bk17 <- ggboxplot(otu_taxa_meta_fam_sub, x="pf_intestinal", y=bk_Genera[17], fill="PatientorFamily", title=gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",bk_Genera[17]))) + ylab("Relative abundance %")+ xlab("Disease status - Intestinal complaints") + theme(axis.text.y=element_text(angle=90)) + theme(plot.title = element_text( size=14, face="bold.italic"), legend.position = "none")+ scale_color_manual(values = cbp1_familyorpatient, aesthetics = "fill")+ theme(text = element_text(size = 11))+ scale_x_discrete(labels=paste0(countnonzero[1:4,1]," ", countnonzero[1:4,2],"/", countnonzero[1:4,3]))+ stat_compare_means(comparisons=my_comp, method="wilcox.test",label="p.signif",paired=F)
boxplot_bk17

6 Patients only

This section contains the analysis using the patients only dataset to test the difference in symptom severity and genetic variants.

6.1 Alpha diversity - symptom severity

Analysis was done to compare alpha diversity between symptom severity using the CBCL total problems score and ADOS comparative score. For CBCL and ADOS, this will be done using a correlation plot, the non-significant p-values are plotted within the plot.

### CBCL total
alpha_kleef <- readRDS("alpha50%_kleefstra_final_v17.rds")
alpha_kleef_cbcl <- subset(alpha_kleef, PatientorFamily == "Patient" & OnderzoeksID != "KSMB10")

shapiro.test(alpha_kleef_cbcl$CBCLtot5_TotaalTscore)

## 
##  Shapiro-Wilk normality test
## 
## data:  alpha_kleef_cbcl$CBCLtot5_TotaalTscore
## W = 0.96474, p-value = 0.5904

shapiro.test(alpha_kleef_cbcl$diversity_shannon)

## 
##  Shapiro-Wilk normality test
## 
## data:  alpha_kleef_cbcl$diversity_shannon
## W = 0.97241, p-value = 0.7662

#Correlation matrix with significance levels (p-value)
cor_Bacteria_p <- rcorr(alpha_kleef_cbcl$CBCLtot5_TotaalTscore,alpha_kleef_cbcl$diversity_shannon, type = "pearson")
r_Bacteria_cbcl <- cor_Bacteria_p$r
r_Bacteria_cbcl

##           x         y
## x 1.0000000 0.1137313
## y 0.1137313 1.0000000

p_Bacteria_cbcl <- cor_Bacteria_p$P
p_Bacteria_cbcl

##           x         y
## x        NA 0.6142973
## y 0.6142973        NA

### ADOS comp
ADOS <- read_excel("ADOS.xlsx")
alpha_ados <- merge(alpha_kleef, ADOS, by.x="Onderzoeksnummer",by.y="OnderzoeksID")
shapiro.test(alpha_ados$ADOS_comp)

## 
##  Shapiro-Wilk normality test
## 
## data:  alpha_ados$ADOS_comp
## W = 0.8483, p-value = 0.003169

#Correlation matrix with significance levels (p-value)
cor_Bacteria_p <- rcorr(alpha_ados$ADOS_comp,alpha_ados$diversity_shannon, type = "spearman")
r_Bacteria_ados <- cor_Bacteria_p$r
r_Bacteria_ados

##            x          y
## x 1.00000000 0.01810729
## y 0.01810729 1.00000000

p_Bacteria_ados <- cor_Bacteria_p$P
p_Bacteria_ados

##           x         y
## x        NA 0.9362537
## y 0.9362537        NA

#Correlation matrix with significance levels (p-value)
cor_Bacteria_p <- rcorr(alpha_ados$CBCLtot5_TotaalTscore, alpha_ados$ADOS_comp, type = "spearman")
r_Bacteria_cbcl_ados <- cor_Bacteria_p$r
r_Bacteria_cbcl_ados

##            x          y
## x  1.0000000 -0.1131639
## y -0.1131639  1.0000000

p_Bacteria_cbcl_ados <- cor_Bacteria_p$P
p_Bacteria_cbcl_ados

##           x         y
## x        NA 0.6252655
## y 0.6252655        NA

6.2 Alpha diversity - genetics

Analysis was done to compare alpha diversity between genetics using two genetic variables, genetics2 (mutation or deletion) and genetics3 (mutation, deletion > 1mb and deletion < 1mb). The ANOVA test is used to test the difference in alpha diversity between the genetic groups. For each test, these variables were taken into account: Age at microbiome collection, BMI in categories (healthy, overweight, obese) and Gender.

meta_alpha_PD <- readRDS("alpha50%_kleefstra_final_v17.rds")
meta_alpha_PD <- subset(meta_alpha_PD, FamilyRelation == "patient" & OnderzoeksID != "KSMB23")

cbp_gen2 <- c("#D55E00","#56B4E9")
cbp_gen3 <- c("#882255","#D55E00","#56B4E9")

meta_alpha_PD$PatientorFamily <- factor(meta_alpha_PD$PatientorFamily, levels=c("Patient", "Family"))
meta_alpha_PD$Genetic_variant3 <- factor(meta_alpha_PD$Genetic_variant3, levels=c("Pathogenic mutation","Deletion < 1MB", "Deletion > 1MB"))
meta_alpha_PD$Genetic_variant2 <- factor(meta_alpha_PD$Genetic_variant2, levels=c("Mutation + deletion < 1MB","Deletion > 1MB"))

## ANOVA test Genetic_variant3
meta_alpha_PD$Genetic_variant3 <- factor(meta_alpha_PD$Genetic_variant3, levels=c("Pathogenic mutation","Deletion < 1MB", "Deletion > 1MB"))
shannon_aov_gen3 <- aov(diversity_shannon~Genetic_variant3+AgeMicrobiome+BMI_cat+GenderMale1Female2, meta_alpha_PD)
summary(shannon_aov_gen3)

##                    Df Sum Sq Mean Sq F value Pr(>F)
## Genetic_variant3    2  0.030  0.0150   0.049  0.952
## AgeMicrobiome       1  0.153  0.1529   0.501  0.490
## BMI_cat             2  0.704  0.3518   1.153  0.342
## GenderMale1Female2  1  0.046  0.0459   0.151  0.703
## Residuals          15  4.575  0.3050

pval_shannon_gen3 <- round(summary(shannon_aov_gen3)[[1]][1,5],digits=4)
groupsize <- xtabs(~Genetic_variant3, meta_alpha_PD)
my_comp <- list(c("Pathogenic mutation","Deletion < 1MB"),c("Deletion < 1MB","Deletion > 1MB"),c("Pathogenic mutation","Deletion > 1MB"))

# mean difference genetic variant 3
means <- aggregate(x=meta_alpha_PD$diversity_shannon, by=list(meta_alpha_PD$Genetic_variant3), FUN=mean)
means

##               Group.1        x
## 1 Pathogenic mutation 3.408572
## 2      Deletion < 1MB 3.322191
## 3      Deletion > 1MB 3.387878

sd <- aggregate(x=meta_alpha_PD$diversity_shannon, by=list(meta_alpha_PD$Genetic_variant3), FUN=sd)
sd

##               Group.1         x
## 1 Pathogenic mutation 0.4453270
## 2      Deletion < 1MB 0.6605720
## 3      Deletion > 1MB 0.6095338

# plot alpha diversity genetic variant 3
countnonzero <- aggregate(meta_alpha_PD$diversity_shannon, list(meta_alpha_PD$Genetic_variant3), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Pathogenic mutation"]],groupsize[["Deletion < 1MB"]],groupsize[["Deletion > 1MB"]])
boxplot_shannon_gen3 <- ggboxplot(meta_alpha_PD, x="Genetic_variant3", y="diversity_shannon", fill = "Genetic_variant3", palette = cbp_gen3, title="Shannon diversity index", ylab = FALSE)+ xlab("Genetic variants (3)") + theme(panel.border = element_rect(color = "black", size = 1, fill = NA), legend.position = "none")+    geom_text(aes(y=5, x=0.75, label=paste0("P-val = ",pval_shannon_gen3)), size=4)+ scale_x_discrete(labels=paste0(countnonzero[1:3,1]," ","n=", countnonzero[1:3,3]))+ stat_compare_means(comparisons=my_comp, method="t.test",label="p.signif",paired=F)
boxplot_shannon_gen3

### ANOVA for genetic variant 2: 1) mutation + del < 1mb and 2) del > 1mb
shannon_aov_gen2 <- aov(diversity_shannon~Genetic_variant2+AgeMicrobiome+BMI_cat+GenderMale1Female2, meta_alpha_PD)
summary(shannon_aov_gen2)

##                    Df Sum Sq Mean Sq F value Pr(>F)
## Genetic_variant2    1  0.000 0.00021   0.001  0.979
## AgeMicrobiome       1  0.127 0.12734   0.420  0.526
## BMI_cat             2  0.502 0.25110   0.829  0.455
## GenderMale1Female2  1  0.030 0.02991   0.099  0.757
## Residuals          16  4.848 0.30301

pval_shannon_gen2 <- round(summary(shannon_aov_gen2)[[1]][1,5],digits=4)
groupsize <- xtabs(~Genetic_variant2, meta_alpha_PD)

# Mean difference genetic variant 2
means <- aggregate(x=meta_alpha_PD$diversity_shannon, by=list(meta_alpha_PD$Genetic_variant2), FUN=mean)
means

##                     Group.1        x
## 1 Mutation + deletion < 1MB 3.379778
## 2            Deletion > 1MB 3.387878

meandiff <- means[1,2] - means[2,2]
meandiff

## [1] -0.008100053

sd <- aggregate(x=meta_alpha_PD$diversity_shannon, by=list(meta_alpha_PD$Genetic_variant2), FUN=sd)
sd

##                     Group.1         x
## 1 Mutation + deletion < 1MB 0.5083481
## 2            Deletion > 1MB 0.6095338

# plot alpha diversity genetic variant 2
countnonzero <- aggregate(meta_alpha_PD$diversity_shannon, list(meta_alpha_PD$Genetic_variant2), function(c)sum(c!=0))
countnonzero$groupsize <- c(groupsize[["Mutation + deletion < 1MB"]],groupsize[["Deletion > 1MB"]])
boxplot_shannon_gen2 <- ggboxplot(meta_alpha_PD, x="Genetic_variant2", y="diversity_shannon", fill = "Genetic_variant2", palette = cbp_gen2, title="Shannon diversity index", ylab = FALSE) + xlab("Genetic variants (2)")+
  theme(panel.border = element_rect(color = "black", size = 1, fill = NA), legend.position = "none")+    geom_text(aes(y=5, x=0.75, label=paste0("P-val = ",pval_shannon_gen2)), size=4)+ scale_x_discrete(labels=paste0(countnonzero[1:2,1]," ","n=", countnonzero[1:2,3]))
boxplot_shannon_gen2

6.3 Beta diversity - symptom severity

Analysis was done to compare beta diversity between symptom severity using the CBCL total problems score and ADOS comparative score. The PERMANOVA is used to test the difference in beta diversity. These variables were taken into account: Age at microbiome collection, BMI in categories (healthy, overweight, obese) and Gender.

#### CBCL totaal
dataKleef_2 <- readRDS("dataKleef_qc_final_50%_v17.rds")
dataKleef_2_sub <- subset_samples(dataKleef_2, PatientorFamily == "Patient" & OnderzoeksID != "KSMB10")

dataKleef_2_prev_ra <- microbiome::transform(dataKleef_2_sub, "compositional")


# PERMANOVA
otu <- dataKleef_2_prev_ra@otu_table
meta <- data.frame(sample_data(dataKleef_2_sub))

set.seed(3246) #reproducible results
print(adonis2(t(otu) ~ CBCLtot5_TotaalTscore + AgeMicrobiome + BMI_cat + GenderMale1Female2, method="bray", na.action = na.pass, data=meta, permutations = 999))

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = t(otu) ~ CBCLtot5_TotaalTscore + AgeMicrobiome + BMI_cat + GenderMale1Female2, data = meta, permutations = 999, method = "bray", na.action = na.pass)
##                       Df SumOfSqs      R2      F Pr(>F)  
## CBCLtot5_TotaalTscore  1   0.4953 0.07377 1.6155  0.047 *
## AgeMicrobiome          1   0.2722 0.04054 0.8878  0.567  
## BMI_cat                2   0.6255 0.09315 1.0200  0.397  
## GenderMale1Female2     1   0.4160 0.06196 1.3570  0.134  
## Residual              16   4.9053 0.73058                
## Total                 21   6.7143 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# CAP analysis
weighted_cap <- ordinate(dataKleef_2_prev_ra, "CAP", "bray", formula= ~ dataKleef_2_prev_ra@sam_data$CBCLtot5_TotaalTscore, weighted=TRUE)
plot_weighted_cap <- plot_ordination(dataKleef_2_prev_ra, weighted_cap, color="CBCLtot5_TotaalTscore") + ggtitle("CAP - CBCL total problems score") + geom_point(size=2)  + theme_classic()+ theme(legend.position="top") + scale_color_gradient(name="",low="gray80",high="black")
plot_weighted_cap

#### ADOS comp
dataKleef_2 <- readRDS("dataKleef_qc_final_50%_v17.rds")
dataKleef_2_sub <- subset_samples(dataKleef_2_sub, PatientorFamily == "Patient" & OnderzoeksID != "KSMB22")

dataKleef_2_prev_ra <- microbiome::transform(dataKleef_2_sub, "compositional")

meta <- data.frame(sample_data(dataKleef_2_prev_ra))
ADOS <- read_excel("ADOS.xlsx")
meta_ados <- merge(meta, ADOS,by.x="Onderzoeksnummer",by.y="OnderzoeksID")
sam_new <- sample_data(meta_ados)
sam_new$ADOS_comp <- as.numeric(sam_new$ADOS_comp)
dataKleef_2_prev_ra_ados <- merge_phyloseq(dataKleef_2_prev_ra, sam_new)
dataKleef_2_prev_ra_ados@sam_data$ADOS_comp <- sam_new[["ADOS_comp"]]


# PERMANOVA
otu <- dataKleef_2_prev_ra_ados@otu_table
meta_ados <- data.frame(sample_data(dataKleef_2_prev_ra_ados))

set.seed(38698) #reproducible results
print(adonis2(t(otu) ~ ADOS_comp + AgeMicrobiome + BMI_cat + GenderMale1Female2, method="bray", na.action = na.pass, data=meta_ados, permutations = 999))

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = t(otu) ~ ADOS_comp + AgeMicrobiome + BMI_cat + GenderMale1Female2, data = meta_ados, permutations = 999, method = "bray", na.action = na.pass)
##                    Df SumOfSqs      R2      F Pr(>F)
## ADOS_comp           1   0.2322 0.03575 0.6991  0.841
## AgeMicrobiome       1   0.2918 0.04494 0.8787  0.586
## BMI_cat             2   0.6041 0.09303 0.9095  0.614
## GenderMale1Female2  1   0.3844 0.05919 1.1574  0.261
## Residual           15   4.9815 0.76709              
## Total              20   6.4940 1.00000

# CAP analysis
weighted_cap <- ordinate(dataKleef_2_prev_ra_ados, "CAP", "bray", formula= ~ dataKleef_2_prev_ra_ados@sam_data$ADOS_comp, weighted=TRUE)
plot_weighted_cap <- plot_ordination(dataKleef_2_prev_ra_ados, weighted_cap, color="ADOS_comp") + ggtitle("CAP - ADOS comparative score") + geom_point(size=2) +  theme_classic() +theme(legend.position="top") + scale_color_gradient(name="",low="gray80",high="black")
plot_weighted_cap

6.4 Beta diversity - genetics

Analysis was done to compare Beta diversity for genetics using the variables genetics2 (mutation + deletion < 1MB or deletion > 1MB) and genetics3 (mutation, deletion > 1mb or deletion < 1mb). The PERMANOVA is used to test the difference in beta diversity between the groups. For each test, these variables were taken into account: Age at microbiome collection, BMI in categories (healthy, overweight, obese) and Gender.

dataKleef_2 <- readRDS("dataKleef_qc_final_50%_v17.rds")
dataKleef_2_sub <- subset_samples(dataKleef_2, PatientorFamily == "Patient" & OnderzoeksID != "KSMB23")

dataKleef_2_prev_ra <- microbiome::transform(dataKleef_2_sub, "compositional")

### genetics3
# PERMANOVA
otu_gen3 <- dataKleef_2_prev_ra@otu_table
meta_gen3 <- data.frame(sample_data(dataKleef_2_sub))

set.seed(38698) #reproducible results
print(adonis2(t(otu_gen3) ~ Genetic_variant3 + Patient0orFamily1 + AgeMicrobiome + BMI_cat + GenderMale1Female2, method="bray", na.action = na.pass, data=meta_gen3, permutations = 999))

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = t(otu_gen3) ~ Genetic_variant3 + Patient0orFamily1 + AgeMicrobiome + BMI_cat + GenderMale1Female2, data = meta_gen3, permutations = 999, method = "bray", na.action = na.pass)
##                    Df SumOfSqs      R2      F Pr(>F)
## Genetic_variant3    2   0.5044 0.07591 0.8154  0.796
## AgeMicrobiome       1   0.4171 0.06277 1.3486  0.145
## BMI_cat             2   0.6653 0.10013 1.0755  0.332
## GenderMale1Female2  1   0.4185 0.06298 1.3531  0.138
## Residual           15   4.6390 0.69821              
## Total              21   6.6441 1.00000

# CAP analysis
sample_data(dataKleef_2_prev_ra)$Genetic_variant3 <- factor(sample_data(dataKleef_2_prev_ra)$Genetic_variant3, levels=c("Pathogenic mutation","Deletion < 1MB", "Deletion > 1MB"))

weighted_cap_gen3 <- ordinate(dataKleef_2_prev_ra, "CAP", "bray", formula= ~ dataKleef_2_prev_ra@sam_data$Genetic_variant3, weighted=TRUE)
plot_weighted_cap_gen3 <- plot_ordination(dataKleef_2_prev_ra, weighted_cap_gen3, color="Genetic_variant3") + ggtitle("CAP - Genetic variant (3)") + geom_point(size=2)+ scale_color_manual(name="",values=cbp_gen3, aesthetics = "color")  + theme_classic()+ theme(legend.position="top")
plot_weighted_cap_gen3

### genetics2
# PERMANOVA
otu_gen2 <- dataKleef_2_prev_ra@otu_table
meta_gen2 <- data.frame(sample_data(dataKleef_2_sub))

set.seed(38698) #reproducible results
print(adonis2(t(otu_gen2) ~ Genetic_variant2 + Patient0orFamily1 + AgeMicrobiome + BMI_cat + GenderMale1Female2, method="bray", na.action = na.pass, data=meta_gen2, permutations = 999))

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = t(otu_gen2) ~ Genetic_variant2 + Patient0orFamily1 + AgeMicrobiome + BMI_cat + GenderMale1Female2, data = meta_gen2, permutations = 999, method = "bray", na.action = na.pass)
##                    Df SumOfSqs      R2      F Pr(>F)
## Genetic_variant2    1   0.3189 0.04800 1.0430  0.349
## AgeMicrobiome       1   0.3972 0.05978 1.2990  0.164
## BMI_cat             2   0.6302 0.09486 1.0306  0.407
## GenderMale1Female2  1   0.4055 0.06104 1.3263  0.157
## Residual           16   4.8922 0.73633              
## Total              21   6.6441 1.00000

# CAP analysis
sample_data(dataKleef_2_prev_ra)$Genetic_variant2 <- factor(sample_data(dataKleef_2_prev_ra)$Genetic_variant2, levels=c("Mutation + deletion < 1MB","Deletion > 1MB"))

weighted_cap_gen2 <- ordinate(dataKleef_2_prev_ra, "CAP", "bray", formula= ~ dataKleef_2_prev_ra@sam_data$Genetic_variant2, weighted=TRUE)
plot_weighted_cap_gen2 <- plot_ordination(dataKleef_2_prev_ra, weighted_cap_gen2, color="Genetic_variant2") + ggtitle("CAP - Genetic variant (2)") + geom_point(size=2)+ scale_color_manual(name="",values=cbp_gen2, aesthetics = "color")  + theme_classic()+ theme(legend.position="top")
plot_weighted_cap_gen2

6.5 Taxonomy

6.5.1 CBCL total problems score

Analysis was done to test differences in relative abundance per genus across total CBCL scores. For each test, these variables were taken into account: Age at microbiome collection, BMI in categories (healthy, overweight, obese) and Gender. #(For analysis with CBCL and ADOS, familygrouped and wonen are not included in this analysis since it only contains patients which don’t live together and do not belong to the same family)

otu_taxa_meta_fam <- readRDS("otu_taxa_metaKleef_final_prev50%_v17.rds")
otu_taxa_meta_fam_pat_na <- subset(otu_taxa_meta_fam, PatientorFamily == "Patient" & OnderzoeksID != "KSMB10")
colnames(otu_taxa_meta_fam_pat_na) <- gsub(" ","_",colnames(otu_taxa_meta_fam_pat_na))
colnames(otu_taxa_meta_fam_pat_na) <- gsub("-","_",colnames(otu_taxa_meta_fam_pat_na))
colnames(otu_taxa_meta_fam_pat_na) <- gsub("\\[","",colnames(otu_taxa_meta_fam_pat_na))
colnames(otu_taxa_meta_fam_pat_na) <- gsub("\\]","",colnames(otu_taxa_meta_fam_pat_na))


######## t.test with intestinal complaints and genetics
otu_cbcl <- subset(otu_taxa_meta_fam_pat_na, Onderzoeksnummer != 23 & Onderzoeksnummer != 3)
otu_cbcl$Genetic_variant2 <- factor(otu_cbcl$Genetic_variant2, levels=c("Mutation + deletion < 1MB","Deletion > 1MB"))
otu_cbcl$Genetic_variant3 <- factor(otu_cbcl$Genetic_variant3, levels=c("Pathogenic mutation","Deletion < 1MB", "Deletion > 1MB"))
shapiro.test(otu_cbcl$CBCLtot5_TotaalTscore)

## 
##  Shapiro-Wilk normality test
## 
## data:  otu_cbcl$CBCLtot5_TotaalTscore
## W = 0.9523, p-value = 0.4034

ttest_intest <- t.test(CBCLtot5_TotaalTscore~Intestinal_complaints,data=otu_cbcl)
ttest_intest

## 
##  Welch Two Sample t-test
## 
## data:  CBCLtot5_TotaalTscore by Intestinal_complaints
## t = -3.1707, df = 6.3918, p-value = 0.01772
## alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
## 95 percent confidence interval:
##  -22.004915  -2.995085
## sample estimates:
##  mean in group No mean in group Yes 
##             60.25             72.75

boxplot_CBCL_intest <- ggboxplot(otu_cbcl, x="Intestinal_complaints", y="CBCLtot5_TotaalTscore", fill="Intestinal_complaints", title="CBCL total problems score vs Intestinal complaints", ylab = "CBCL total problems score") + xlab("Intestinal complaints") + theme(legend.position="none",panel.border = element_rect(color = "black", size = 1, fill = NA))
boxplot_CBCL_intest

ttest_gen2 <- t.test(CBCLtot5_TotaalTscore~Genetic_variant2,data=otu_cbcl)
ttest_gen2

## 
##  Welch Two Sample t-test
## 
## data:  CBCLtot5_TotaalTscore by Genetic_variant2
## t = 0.18064, df = 2.1827, p-value = 0.8721
## alternative hypothesis: true difference in means between group Mutation + deletion < 1MB and group Deletion > 1MB is not equal to 0
## 95 percent confidence interval:
##  -39.13110  42.85659
## sample estimates:
## mean in group Mutation + deletion < 1MB            mean in group Deletion > 1MB 
##                                70.52941                                68.66667

boxplot_CBCL_gen2 <- ggboxplot(otu_cbcl, x="Genetic_variant2", y="CBCLtot5_TotaalTscore", fill="Genetic_variant2", title="CBCL total problems score vs Genetic variants (2)", ylab = "CBCL total problems score", xlab="Genetic variants (2)") +scale_color_manual(values = cbp_gen2, aesthetics = "fill")+ theme(legend.position="none",panel.border = element_rect(color = "black", size = 1, fill = NA))
boxplot_CBCL_gen2

ttest_gen3 <- aov(CBCLtot5_TotaalTscore~Genetic_variant3,data=otu_cbcl)
summary(ttest_gen3)

##                  Df Sum Sq Mean Sq F value Pr(>F)
## Genetic_variant3  2   24.6   12.31   0.114  0.893
## Residuals        17 1835.1  107.95

boxplot_CBCL_intest <- ggboxplot(otu_cbcl, x="Genetic_variant3", y="CBCLtot5_TotaalTscore", fill="Genetic_variant3", title="CBCL total problems score vs Genetic variants (3)", ylab = "CBCL total problems score", xlab="Genetic variants (3)") +scale_color_manual(values = cbp_gen3, aesthetics = "fill")+ theme(legend.position="none",panel.border = element_rect(color = "black", size = 1, fill = NA))
boxplot_CBCL_intest

# Analysis with ART
results_CBCL_total <- data.frame(matrix(nrow=138, ncol=6))
colnames(results_CBCL_total) <- c("Genus","pval_CBCL_total","fval_CBCL_total", "pval_AgeMicrobiome","pval_BMI_cat","pval_GenderMale1Female2")
set.seed(2)
for (i in 1:138){
  results_CBCL_total[i,1] <- colnames(otu_taxa_meta_fam_pat_na)[i]
  otu_taxa_meta_fam_pat_na$var <- otu_taxa_meta_fam_pat_na[,i]
  fit_art <- aligned.rank.transform(var~CBCLtot5_TotaalTscore+AgeMicrobiome+BMI_cat+GenderMale1Female2,data=otu_taxa_meta_fam_pat_na)
  results_CBCL_total[i,2] <- fit_art$significance[1,4]
  results_CBCL_total[i,3] <- fit_art$significance[1,3]
  results_CBCL_total[i,4] <- fit_art$significance[2,4]
  results_CBCL_total[i,5] <- fit_art$significance[3,4]
  results_CBCL_total[i,6] <- fit_art$significance[4,4]
}

results_CBCL_total$adjust_CBCL_total <- p.adjust(results_CBCL_total$pval_CBCL_total, method="fdr")
results_CBCL_total$adjust_Age <- p.adjust(results_CBCL_total$pval_AgeMicrobiome, method="fdr")
results_CBCL_total$adjust_BMI_cat <- p.adjust(results_CBCL_total$pval_BMI_cat, method="fdr")
results_CBCL_total$adjust_Gender <- p.adjust(results_CBCL_total$pval_GenderMale1Female2, method="fdr")


datatable(results_CBCL_total, rownames=FALSE, colnames=c("Genus", "P-value CBCL total", "F-value CBCL total", "P-value Age", "P-value BMI", "P-value Gender", "Adjust. P-value CBCL total", "Adjust. P-value Age", "Adjust. P-value BMI", "Adjust. P-value Gender"))

# number of FDR corrected results
CBCL_FDR_Genera <- results_CBCL_total$Genus[which(results_CBCL_total$adjust_CBCL_total < 0.05)]
length(CBCL_FDR_Genera)

## [1] 2

# number of nominal significant results
CBCLGenera <- results_CBCL_total$Genus[which(results_CBCL_total$pval_CBCL_total < 0.05)]
length(CBCLGenera)

## [1] 9

# Plot all significant results
otu_taxa_meta_fam_pat_na[,1:138] <- otu_taxa_meta_fam_pat_na[,1:138]*100

plotCBCL1 <- ggplot(otu_taxa_meta_fam_pat_na, aes(x=CBCLtot5_TotaalTscore, y=otu_taxa_meta_fam_pat_na[,CBCLGenera[1]])) + geom_point() + geom_smooth(method=lm, se=F) + labs(y="Relative abundance %",x="CBCL total problems score") + theme_classic()+ggtitle(gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",CBCLGenera[1])))+ theme(plot.title = element_text( size=14, face="bold.italic"))
plotCBCL1

plotCBCL2 <- ggplot(otu_taxa_meta_fam_pat_na, aes(x=CBCLtot5_TotaalTscore, y=otu_taxa_meta_fam_pat_na[,CBCLGenera[2]])) + geom_point() + geom_smooth(method=lm, se=F) + labs(y="Relative abundance %",x="CBCL total problems score") + theme_classic()+ggtitle(gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",CBCLGenera[2])))+ theme(plot.title = element_text( size=14, face="bold.italic"))
plotCBCL2

plotCBCL3 <- ggplot(otu_taxa_meta_fam_pat_na, aes(x=CBCLtot5_TotaalTscore, y=otu_taxa_meta_fam_pat_na[,CBCLGenera[3]])) + geom_point() + geom_smooth(method=lm, se=F) + labs(y="Relative abundance %",x="CBCL total problems score") +theme_classic()+ggtitle(gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",CBCLGenera[3])))+ theme(plot.title = element_text( size=14, face="bold.italic"))
plotCBCL3

plotCBCL4 <- ggplot(otu_taxa_meta_fam_pat_na, aes(x=CBCLtot5_TotaalTscore, y=otu_taxa_meta_fam_pat_na[,CBCLGenera[4]])) + geom_point() + geom_smooth(method=lm, se=F) + labs(y="Relative abundance %",x="CBCL total problems score") +ggtitle(gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",CBCLGenera[4])))+ theme_classic()+ theme(plot.title = element_text( size=14, face="bold.italic"))
plotCBCL4

plotCBCL5 <- ggplot(otu_taxa_meta_fam_pat_na, aes(x=CBCLtot5_TotaalTscore, y=otu_taxa_meta_fam_pat_na[,CBCLGenera[5]])) + geom_point() + geom_smooth(method=lm, se=F) + labs(y="Relative abundance %",x="CBCL total problems score") +ggtitle(gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",CBCLGenera[5])))+ theme_classic()+ theme(plot.title = element_text( size=14, face="bold.italic"))
plotCBCL5

plotCBCL6 <- ggplot(otu_taxa_meta_fam_pat_na, aes(x=CBCLtot5_TotaalTscore, y=otu_taxa_meta_fam_pat_na[,CBCLGenera[6]])) + geom_point() + geom_smooth(method=lm, se=F) + labs(y="Relative abundance %",x="CBCL total problems score") +ggtitle(gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",CBCLGenera[6])))+ theme_classic()+ theme(plot.title = element_text( size=14, face="bold.italic"))
plotCBCL6

plotCBCL7 <- ggplot(otu_taxa_meta_fam_pat_na, aes(x=CBCLtot5_TotaalTscore, y=otu_taxa_meta_fam_pat_na[,CBCLGenera[7]])) + geom_point() + geom_smooth(method=lm, se=F) + labs(y="Relative abundance %",x="CBCL total problems score") +ggtitle(gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",CBCLGenera[7])))+ theme_classic()+ theme(plot.title = element_text( size=14, face="bold.italic"))
plotCBCL7

plotCBCL8 <- ggplot(otu_taxa_meta_fam_pat_na, aes(x=CBCLtot5_TotaalTscore, y=otu_taxa_meta_fam_pat_na[,CBCLGenera[8]])) + geom_point() + geom_smooth(method=lm, se=F) + labs(y="Relative abundance %",x="CBCL total problems score") +ggtitle(gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",CBCLGenera[8])))+ theme_classic()+ theme(plot.title = element_text( size=14, face="bold.italic"))
plotCBCL8

plotCBCL9 <- ggplot(otu_taxa_meta_fam_pat_na, aes(x=CBCLtot5_TotaalTscore, y=otu_taxa_meta_fam_pat_na[,CBCLGenera[9]])) + geom_point() + geom_smooth(method=lm, se=F) + labs(y="Relative abundance %",x="CBCL total problems score") +ggtitle(gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",CBCLGenera[9])))+ theme_classic()+ theme(plot.title = element_text( size=14, face="bold.italic"))
plotCBCL9

6.5.2 ADOS comparative score

Analysis was done to compare relative abundance per genus between patients or family members. For each test, these variables were taken into account: Age at microbiome collection, Age squared (to explain linear and non-linear effects of age), BMI in categories (healthy, overweight, obese) and Gender. #(For analysis with CBCL and ADOS, familygrouped and wonen are not included in this analysis since it only contains patients which don’t live together and do not belong to the same family)

otu_taxa_meta_fam <- readRDS("otu_taxa_metaKleef_final_prev50%_v17.rds")
ADOS <- read_excel("ADOS.xlsx")
otu_taxa_meta_fam_ados <- merge(otu_taxa_meta_fam, ADOS, by.x="Onderzoeksnummer",by.y="OnderzoeksID")
otu_taxa_meta_fam_ados <- subset(otu_taxa_meta_fam_ados, OnderzoeksID != "KSMB22")
#otu_taxa_meta_fam_ados$Age2 <- (otu_taxa_meta_fam_ados$AgeMicrobiome)^2
colnames(otu_taxa_meta_fam_ados) <- gsub(" ","_",colnames(otu_taxa_meta_fam_ados))
colnames(otu_taxa_meta_fam_ados) <- gsub("-","_",colnames(otu_taxa_meta_fam_ados))
colnames(otu_taxa_meta_fam_ados) <- gsub("\\[","",colnames(otu_taxa_meta_fam_ados))
colnames(otu_taxa_meta_fam_ados) <- gsub("\\]","",colnames(otu_taxa_meta_fam_ados))


################# non-parametric test with intestinal complaints and genetics
otu_ados <- subset(otu_taxa_meta_fam_ados, Onderzoeksnummer != 23)
otu_ados$Genetic_variant3 <- factor(otu_ados$Genetic_variant3, levels=c("Pathogenic mutation","Deletion < 1MB", "Deletion > 1MB"))
otu_ados$Genetic_variant2 <- factor(otu_ados$Genetic_variant2, levels=c("Mutation + deletion < 1MB","Deletion > 1MB"))

shapiro.test(otu_ados$ADOS_comp)

## 
##  Shapiro-Wilk normality test
## 
## data:  otu_ados$ADOS_comp
## W = 0.85646, p-value = 0.005499

ados_int <- subset(otu_ados, Onderzoeksnummer != 3)
ttest_intest <- wilcox.test(ADOS_comp~Intestinal_complaints,data=ados_int)
ttest_intest

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  ADOS_comp by Intestinal_complaints
## W = 28, p-value = 0.7322
## alternative hypothesis: true location shift is not equal to 0

boxplot_ados_intest <- ggboxplot(ados_int, x="Intestinal_complaints", y="ADOS_comp", fill="Intestinal_complaints", title="ADOS comparative score vs Intestinal complaints", ylab = "ADOS comparative score", xlab="Intestinal complaints") + theme(legend.position="none",panel.border = element_rect(color = "black", size = 1, fill = NA))
boxplot_ados_intest

ttest_gen2 <- wilcox.test(ADOS_comp~Genetic_variant2,data=otu_ados)
ttest_gen2

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  ADOS_comp by Genetic_variant2
## W = 38.5, p-value = 0.7122
## alternative hypothesis: true location shift is not equal to 0

boxplot_ados_gen2 <- ggboxplot(otu_ados, x="Genetic_variant2", y="ADOS_comp", fill="Genetic_variant2", title="ADOS comparative score vs Genetic variants (2)", ylab = "ADOS comparative score", xlab="Genetic variants (2)") +scale_color_manual(values = cbp_gen2, aesthetics = "fill")+ theme(legend.position="none",panel.border = element_rect(color = "black", size = 1, fill = NA))
boxplot_ados_gen2

ttest_gen3 <- kruskal.test(ADOS_comp~Genetic_variant3,data=otu_ados)
ttest_gen3

## 
##  Kruskal-Wallis rank sum test
## 
## data:  ADOS_comp by Genetic_variant3
## Kruskal-Wallis chi-squared = 5.0016, df = 2, p-value = 0.08202

boxplot_ados_gen3 <- ggboxplot(otu_ados, x="Genetic_variant3", y="ADOS_comp", fill="Genetic_variant3", title="ADOS comparative score vs Genetic variants (3)", ylab = "ADOS comparative score") +scale_color_manual(values = cbp_gen3, aesthetics = "fill")+ theme(legend.position="none",panel.border = element_rect(color = "black", size = 1, fill = NA))
boxplot_ados_gen3

# Analysis with ART
results_ADOS_comp <- data.frame(matrix(nrow=139, ncol=6))
colnames(results_ADOS_comp) <- c("Genus","pval_ADOS_comp", "fval_ADOS_comp", "pval_AgeMicrobiome","pval_BMI_cat","pval_GenderMale1Female2")
set.seed(2)
for (i in 2:139){
  results_ADOS_comp[i,1] <- colnames(otu_taxa_meta_fam_ados)[i]
  otu_taxa_meta_fam_ados$var <- otu_taxa_meta_fam_ados[,i]
  fit_art <- aligned.rank.transform(var~ADOS_comp+AgeMicrobiome+BMI_cat+GenderMale1Female2,data=otu_taxa_meta_fam_ados)
  results_ADOS_comp[i,2] <- fit_art$significance[1,4]
  results_ADOS_comp[i,3] <- fit_art$significance[1,3]
  results_ADOS_comp[i,4] <- fit_art$significance[2,4]
  results_ADOS_comp[i,5] <- fit_art$significance[3,4]
  results_ADOS_comp[i,6] <- fit_art$significance[4,4]
}

results_ADOS_comp$adjust_ADOS_comp <- p.adjust(results_ADOS_comp$pval_ADOS_comp, method="fdr")
results_ADOS_comp$adjust_Age <- p.adjust(results_ADOS_comp$pval_AgeMicrobiome, method="fdr")
results_ADOS_comp$adjust_BMI_cat <- p.adjust(results_ADOS_comp$pval_BMI_cat, method="fdr")
results_ADOS_comp$adjust_Gender <- p.adjust(results_ADOS_comp$pval_GenderMale1Female2, method="fdr")

datatable(results_ADOS_comp, rownames=FALSE, colnames=c("Genus", "P-value ADOS comp", "F-value ADOS comp", "P-value Age", "P-value BMI", "P-value Gender", "Adjust. P-value ADOS comp", "Adjust. P-value Age", "Adjust. P-value BMI", "Adjust. P-value Gender"))

# Plot nominal significant taxa
ADOSGenera <- results_ADOS_comp$Genus[which(results_ADOS_comp$pval_ADOS_comp < 0.05)]
length(ADOSGenera)

## [1] 1

otu_taxa_meta_fam_ados[,2:139] <- otu_taxa_meta_fam_ados[,2:139]*100

plotADOS1 <- ggplot(otu_taxa_meta_fam_ados, aes(x=ADOS_comp, y=otu_taxa_meta_fam_ados[,ADOSGenera[1]])) + geom_point() + geom_smooth(method=lm, se=F) + labs(y="Relative abundance %",x="ADOS comparative score") + theme_classic()+ggtitle(gsub("_"," ",gsub("(.*)__(.*)__","\\2 - ",ADOSGenera[1])))+ theme(plot.title = element_text( size=14, face="bold.italic"))
plotADOS1

Kleefstra analysis final

Mirjam Bloemendaal & Priscilla Vlaming

16 December 2022