Trifolium nodule microbiome analysis script

1) Import required libraries

This section loads the list of requried libraries for plotting and running models

library("RColorBrewer")
library("ggplot2")
library("plyr")
library("vegan")
library("reshape2")
library("ape")
library("phyloseq")
library("knitr")
library("xtable")

2) Import OTU + mapping files

This section loads the list of OTU tables in BIOM format and a mapping table in csv or tab delimited format. Both files are merged to generate a phyloseq file called “comp”

#otu files
otu_all = "../Desktop/Thesis_files/R_files/otus.biom"
m=read.csv("../Desktop/Thesis_files/R_files/nodule_map_new.txt", row.names=1)
file<-import_biom(otu_all)
map = sample_data(m)
comp <-merge_phyloseq(file,map)
head(sample_data(comp))

##               Soil Species Individuals     Plate    Comb Native nodC_clade
## master.96.A9   Mic     Mic          39 master_96     Mic    yes          n
## master.96.G7   Bif     Bar          78 master_96 Bif_Bar     no          x
## master.96.C8   Mac     Bar          65 master_96 Mac_Bar     no          j
## master.48.F4   Bar     Bar          20 master_48     Bar    yes          p
## master.96.B2   Mic     Mac          44 master_96 Mic_Mac     no          n
## master.96.G11  Bif     Mic          87 master_96 Bif_Mic     no          x
##               nod_size   X18s_PD    trna_PD elison_PD Description
## master.96.A9       0.6 0.0000000 0.00000000 0.0000000          F5
## master.96.G7       2.3 0.2271033 0.04134491 0.1994908          A2
## master.96.C8       0.6 0.2594373 0.08283154 0.2993784          B5
## master.48.F4       1.0 0.0000000 0.00000000 0.0000000          C6
## master.96.B2       0.2 0.2371527 0.09260847 0.2573638         F10
## master.96.G11      1.0 0.1928626 0.05857074 0.1574762          C4

colnames(tax_table(comp)) <- c(k = "Kingdom", p = "Phylum", c = "Class", o = "Order", f = "Family", g = "Genus", s = "Species")
#Sample counts
head(taxa_sums(comp))

## 2496608  783451  213370  968854  839247 4339860 
##       4       3      11       1       1       2

#Complete otu mb
comp

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 2394 taxa and 144 samples ]
## sample_data() Sample Data:       [ 144 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 2394 taxa by 7 taxonomic ranks ]

3) Subsetting abundant and rare microbiomes from the complete OTU table. Rare microbiome consists of OTUs NOT falling under the “Rhizobiales” order

Before we subset the complete OTU table we need to process it to remove mislabelled/ badly assigned OTUs. We drop all OTUs that had no hits in the greengenes database. The cleaned OTU table is stored in “mb_all”

#removing low quality taxa matches and normalize
mb_all= subset_taxa(comp, Kingdom!="None" & Kingdom!="NOHIT" )
mb_all

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 1314 taxa and 144 samples ]
## sample_data() Sample Data:       [ 144 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 1314 taxa by 7 taxonomic ranks ]

#Dropped 1080 OTUs

4) Begin normalisation

Our OTU tables are distributed with a highly abundant fraction and highly variable albeit less abundantly distributed rare fraction. This code block separates the complete OTU table into abundant and rare OTU tables. We normalise all reads to sample number and library size. Also a distance matrix using bray curtis distances were generated. At the end if thus step we had 3 OTU tables. An abundant OTU table (mb_rhiz) comprising of all OTUs belonging to the Rhizobiales order, a rare OTU table with all the less abundant OTUs (mb_rare) and finally the complete OTU table mb_all with all the OTUs that cleared the processing stage.

norm_all = transform_sample_counts(mb_all, function(x) x/sum(x))
df_all = as(sample_data(norm_all), "data.frame")
d_all = phyloseq::distance(norm_all, "bray")
#NO. of OTUs identified in complete microbiome
norm_all

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 1314 taxa and 144 samples ]
## sample_data() Sample Data:       [ 144 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 1314 taxa by 7 taxonomic ranks ]

#only rhiz and normalize
mb_rhiz= subset_taxa(mb_all, Order=="o__Rhizobiales")
norm_rhiz = transform_sample_counts(mb_rhiz, function(x) x/sum(x))
df_rhiz = as(sample_data(norm_rhiz), "data.frame")
d_rhiz = phyloseq::distance(norm_rhiz, "bray")
#NO. of OTUs identified in abundant/ rhizobiales microbiome
norm_rhiz

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 123 taxa and 144 samples ]
## sample_data() Sample Data:       [ 144 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 123 taxa by 7 taxonomic ranks ]

#only rare and normalize
#get OTUs
x = names(sort(taxa_sums(mb_rhiz), decreasing = TRUE))
y = names(sort(taxa_sums(mb_all), decreasing = TRUE))
select_rare <- setdiff(y, x)

#subsample and normalize
mb_rare= prune_taxa(select_rare, mb_all)
norm_rare = transform_sample_counts(mb_rare, function(x) x/sum(x))
df_rare = as(sample_data(norm_rare), "data.frame")
d_rare = phyloseq::distance(norm_rare, "bray")
#NO. of OTUs identified in rare microbiome
norm_rare

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 1191 taxa and 144 samples ]
## sample_data() Sample Data:       [ 144 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 1191 taxa by 7 taxonomic ranks ]

5) Read distribution across OTU tables

This section shows how reads are distributed across the abundant and rare fraction of OTU table. This section of code is followed by a function to summarise reads into their phyla level abundances for easier visualisation.

#read distribution
readsumsdf_all = data.frame(nreads = sort(sample_sums(mb_all),TRUE), sorted = 1:nsamples(mb_all), type = "Samples")
range(readsumsdf_all$nreads)

## [1]   6546 462142

readsumsdf_rhiz = data.frame(nreads = sort(sample_sums(mb_rhiz),TRUE), sorted = 1:nsamples(mb_all), type = "Samples")
range(readsumsdf_rhiz$nreads)

## [1]   6253 460276

readsumsdf_rare = data.frame(nreads = sort(sample_sums(mb_rare),TRUE), sorted = 1:nsamples(mb_all), type = "Samples")
range(readsumsdf_rare$nreads)

## [1]   163 10867

#otu abundances and frequencies required for generating the basic plots
abund_val <- function(normalized){
  otu.abun = apply(otu_table(normalized),1,mean)
  # Calculate the frequency of each OTU across all samples
  otu.freq = rowSums(otu_table(normalized) != 0)/144
  # Reassign names of phyla so we only color by the top 5 phyla and mark all others as "other"
  phyla = as.vector(data.frame(tax_table(normalized))$Phylum)
  levels(phyla) = c(levels(phyla),"other")
  keephyla = c("p__Bacteroidetes","p__Proteobacteria","p__Actinobacteria", "p__Chloroflexi", "p__TM7")
  phyla[!(phyla %in% keephyla)] = "Other"
  phyla = as.vector(phyla)
  phyla=as.factor(phyla)
  otuabun = cbind.data.frame(abundance=log(otu.abun),frequency=otu.freq,phyla)
  return(otuabun)
}

6) Basic plots for OTU table

Here we plot abundance of OTUs in log10 against occupancy/ occurence in samples.

#get values to plot chart
abun_all <-  abund_val(norm_all)
abun_rhiz <- abund_val(norm_rhiz)
abun_rare <- abund_val(norm_rare)

# Use color brewer to pick a color scheme for the phyla
brew = brewer.pal(6, "Set1")
# Create a scatterplot of OTUs showing their average relative abundance and frequency 
ggplot(abun_all, aes(x=abundance,y=frequency,color=phyla)) + geom_point(size=3) + xlab("Average relative abundance (log10 scale)") + ylab("frequency in all samples - All mb") + scale_colour_brewer(palette="Set2")+labs(title="Distribution of all OTU phyla-Complete microbiome")+ xlim(-20, 5)

ggplot(abun_rare, aes(x=abundance,y=frequency,color=phyla)) + geom_point(size=3) + xlab("Average relative abundance (log10 scale)") + ylab("frequency in all samples - Rare mb") + scale_colour_brewer(palette="Set2")+labs(title="Distribution of all OTU phyla-Rare microbiome")+ xlim(-20, 5)

ggplot(abun_rhiz, aes(x=abundance,y=frequency,color=phyla)) + geom_point(size=3) + xlab("Average relative abundance (log10 scale)") + ylab("frequency in all samples - Rhiz mb") + scale_colour_brewer(palette="Set2")+labs(title="Distribution of all OTU phyla-Rhizobiales microbiome")+ xlim(-20, 5)

#bar plots needed for rare and rhizobiales taxa

7) Alpha diversity

We make use of three species diversity metrics to evaluate alpha diversity. Observed species, Shannon and Chao1. All OTU tables were rarefied to 160 and 6000 reads per samples. Soil and species do crop out as important factors from anova but the strength is largest for the smaller samples and also the results are not consistent. Under some metrics we see a significant effect while in others we dont. This is mostly due to an issue of undersampling. There is no correct way to proceed from here.

adiv_anova <- function(ss_no, otu_table){
  mlist <- factor(c("Observed", "Shannon", "Chao1"))
  set.seed(3336)
  rarefied <- rarefy_even_depth(otu_table, sample.size = ss_no)
  richness <- estimate_richness(rarefied, measures=mlist)
  richness <- richness[-3]
  sdata <- sample_data(rarefied)
  for ( i in 1:3){
    aov_facs <- aov(richness[[i]]~ sdata$Soil*sdata$Species)
    print(summary(aov_facs))
  }
}

adiv_anova(160, mb_rare)## all factors crop out as significant. This disappears at higher rarefactions

##                           Df Sum Sq Mean Sq F value  Pr(>F)   
## sdata$Soil                 5  495.8   99.16   3.534 0.00535 **
## sdata$Species              5  592.5  118.49   4.224 0.00152 **
## sdata$Soil:sdata$Species  25 1660.6   66.42   2.368 0.00122 **
## Residuals                108 3030.0   28.06                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##                           Df Sum Sq Mean Sq F value   Pr(>F)    
## sdata$Soil                 5  11388  2277.6   4.520 0.000884 ***
## sdata$Species              5   5869  1173.8   2.329 0.047328 *  
## sdata$Soil:sdata$Species  25  20937   837.5   1.662 0.039255 *  
## Residuals                108  54423   503.9                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##                           Df Sum Sq Mean Sq F value Pr(>F)  
## sdata$Soil                 5  0.393 0.07851   1.341 0.2525  
## sdata$Species              5  0.734 0.14688   2.509 0.0344 *
## sdata$Soil:sdata$Species  25  2.406 0.09624   1.644 0.0426 *
## Residuals                108  6.323 0.05854                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

adiv_anova(160, mb_rhiz)

##                           Df Sum Sq Mean Sq F value Pr(>F)  
## sdata$Soil                 5   9.62  1.9236   2.481 0.0362 *
## sdata$Species              5   1.28  0.2569   0.331 0.8931  
## sdata$Soil:sdata$Species  25  33.67  1.3469   1.737 0.0278 *
## Residuals                108  83.75  0.7755                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##                           Df Sum Sq Mean Sq F value Pr(>F)  
## sdata$Soil                 5  31.40   6.281   2.297 0.0501 .
## sdata$Species              5   4.59   0.918   0.336 0.8902  
## sdata$Soil:sdata$Species  25 106.15   4.246   1.553 0.0638 .
## Residuals                108 295.31   2.734                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##                           Df  Sum Sq  Mean Sq F value Pr(>F)  
## sdata$Soil                 5 0.01678 0.003355   1.679 0.1458  
## sdata$Species              5 0.00372 0.000744   0.372 0.8666  
## sdata$Soil:sdata$Species  25 0.08266 0.003306   1.655 0.0406 *
## Residuals                108 0.21581 0.001998                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

adiv_anova(6000, mb_rare)

##               Df Sum Sq Mean Sq F value Pr(>F)
## sdata$Soil     3  11041    3680   9.389  0.234
## sdata$Species  1    313     313   0.797  0.536
## Residuals      1    392     392               
##               Df Sum Sq Mean Sq F value Pr(>F)
## sdata$Soil     3  13286    4429  29.834  0.134
## sdata$Species  1    568     568   3.828  0.301
## Residuals      1    148     148               
##               Df  Sum Sq Mean Sq F value Pr(>F)
## sdata$Soil     3 0.13982 0.04661   0.251  0.860
## sdata$Species  1 0.03055 0.03055   0.164  0.755
## Residuals      1 0.18591 0.18591

adiv_anova(6000, mb_rhiz)

##                           Df Sum Sq Mean Sq F value Pr(>F)  
## sdata$Soil                 5  124.1  24.828   2.882 0.0176 *
## sdata$Species              5   34.5   6.894   0.800 0.5519  
## sdata$Soil:sdata$Species  25  202.2   8.088   0.939 0.5532  
## Residuals                108  930.5   8.616                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##                           Df Sum Sq Mean Sq F value Pr(>F)
## sdata$Soil                 5    732   146.5   1.211  0.309
## sdata$Species              5    659   131.7   1.089  0.371
## sdata$Soil:sdata$Species  25   3760   150.4   1.243  0.220
## Residuals                108  13066   121.0               
##                           Df   Sum Sq   Mean Sq F value Pr(>F)  
## sdata$Soil                 5 0.003124 0.0006247   2.191 0.0604 .
## sdata$Species              5 0.004012 0.0008025   2.814 0.0198 *
## sdata$Soil:sdata$Species  25 0.007348 0.0002939   1.031 0.4357  
## Residuals                108 0.030796 0.0002851                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

8) Beta diversity: Ordinations

The scree plot shows the number of dominant factors explaining variance. In both the complete and abundant table we see that scree plot has a single dominant axis. however when we look at the scree plot for only the rare microbiome, we see that there are multiple dominant axis with the first two explaining the greatest variance. This indicates that scree plot and ordinations for both complete and abundant tables are driven by the abundant, dominant OTU. Labelling and coloring of arrows for the CCA graphs can be improved under inkscape.

This section needs work

#Ordinations: PCoA principal 
mb.pcoa.all <- ordinate(norm_all, d_all, method="PCoA")
pcoa.all<-plot_ordination(norm_all, mb.pcoa.all, color="Soil", shape="Species")+geom_point(size=5)+labs(title="PCoA plot for all OTUs")
pcoa.all

scree <- head(mb.pcoa.all$values["Eigenvalues"],20)
barplot(scree$Eigenvalues, ylab="Eigenvalues")

mb.pcoa.rhiz <- ordinate(norm_rhiz, d_rhiz, method="PCoA")
pcoa.rhiz<-plot_ordination(norm_rhiz, mb.pcoa.rhiz, color="Soil", shape="Species")+geom_point(size=5)+labs(title="PCoA plot for Rhizobales OTUs")
pcoa.rhiz

scree <- head(mb.pcoa.rhiz$values["Eigenvalues"],20)
barplot(scree$Eigenvalues, ylab="Eigenvalues")

mb.pcoa.rare <- ordinate(norm_rare, d_rare, method="PCoA")
pcoa.rare<-plot_ordination(norm_rare, mb.pcoa.rare, color="Soil", shape="Species")+geom_point(size=5)+labs(title="PCoA plot for rare OTUs")
pcoa.rare

scree <- head(mb.pcoa.rare$values["Eigenvalues"],20)
barplot(scree$Eigenvalues, ylab="Eigenvalues")

#Ordination: CCA constrained by soil interaction. This shows if variance from "Soil" factor is removed, how to samples orient themselves? Here we see similar results for all 3 OTU tables. samples belonging to "Fuc" cluster in the opposite direction to "Mic" and "Wor". This segregation is more apparent in rare microbiome compared to complete and rhizobiales OTU table. 

mball.cca <- ordinate(norm_all, method= "CCA", formula=norm_all~Soil)
all_cca <- plot_ordination(norm_all, mball.cca, color="Soil", shape="Species")+geom_point(size=5)+labs(title="CCA plot for all OTUs, constrained by Soil interaction") 
all_cca

arrowmat = vegan::scores(mball.cca, display = "bp")
arrowdf <- data.frame(labels = rownames(arrowmat), arrowmat)
# Define the arrow aesthetic mapping
arrow_map = aes(xend = CCA1, yend = CCA2, x = 0, y = 0, shape = NULL, color = NULL, 
                label = labels)
label_map = aes(x = 1.2 * CCA1, y = 1.2 * CCA2, shape = NULL, color = NULL, 
                label = labels)
# Make a new graphic
arrowhead = arrow(length = unit(0.05, "npc"))
p1 = all_cca+ geom_segment(arrow_map, size = 1, data = arrowdf, color = "black", 
                            arrow = arrowhead) + geom_text(label_map, size = 5, data = arrowdf)
p1

#similar plot to above
mbrhiz.cca <- ordinate(norm_rhiz, method= "CCA", formula=norm_rhiz~Soil)
rhiz_cca <- plot_ordination(norm_rhiz, mbrhiz.cca, color="Soil", shape="Species")+geom_point(size=5)+labs(title="CCA plot for Rhizobiales OTUs, constrained by Soil interaction") 
rhiz_cca

arrowmat = vegan::scores(mbrhiz.cca, display = "bp")
arrowdf <- data.frame(labels = rownames(arrowmat), arrowmat)
# Define the arrow aesthetic mapping
arrow_map = aes(xend = CCA1, yend = CCA2, x = 0, y = 0, shape = NULL, color = NULL, 
                label = labels)
label_map = aes(x = 1.2 * CCA1, y = 1.2 * CCA2, shape = NULL, color = NULL, 
                label = labels)
# Make a new graphic
arrowhead = arrow(length = unit(0.05, "npc"))
p1 = rhiz_cca+ geom_segment(arrow_map, size = 1, data = arrowdf, color = "black", 
                            arrow = arrowhead) + geom_text(label_map, size = 5, data = arrowdf)
p1

##Notice the better clarity in segregation. 
mbrare.cca <- ordinate(norm_rare, method= "CCA", formula=norm_rare~Soil)
rare_cca <- plot_ordination(norm_rare, mbrare.cca, color="Soil", shape="Species") +geom_point(size=5)+labs(title="CCA plot for rare OTUs, constrained by Soil interaction")
rare_cca

arrowmat = vegan::scores(mbrare.cca, display = "bp")
arrowdf <- data.frame(labels = rownames(arrowmat), arrowmat)
# Define the arrow aesthetic mapping
arrow_map = aes(xend = CCA1, yend = CCA2, x = 0, y = 0, shape = NULL, color = NULL, 
                label = labels)
label_map = aes(x = 1.2 * CCA1, y = 1.2 * CCA2, shape = NULL, color = NULL, 
                label = labels)
# Make a new graphic
arrowhead = arrow(length = unit(0.05, "npc"))
p1 = rare_cca+ geom_segment(arrow_map, size = 1, data = arrowdf, color = "black", 
                            arrow = arrowhead) + geom_text(label_map, size = 5, data = arrowdf)
p1

9) Adonis: Multivarite anova

We make use of microbiome community distances as a matrix and analyze varance across different factors. Primarily we are interested in how 2 factors: Soil and Species contribute to variance explained within these microbiomes.

#adonis
#beta div -- adonis rare using distances identified above
ads_function <- function(dist_mat,df_mat){
  set.seed(123)
  ads_res <- adonis(dist_mat~Soil*Species,df_mat, permutations=999)
  return(ads_res)
}

ad_all<- ads_function(d_all,  df_all)
print(kable(ad_all$aov.tab,digits=4))

## 
## 
##                  Df   SumsOfSqs   MeanSqs   F.Model       R2   Pr(>F)
## -------------  ----  ----------  --------  --------  -------  -------
## Soil              5      0.0052     1e-03    2.3596   0.0767    0.008
## Species           5      0.0043     9e-04    1.9494   0.0634    0.028
## Soil:Species     25      0.0107     4e-04    0.9721   0.1580    0.529
## Residuals       108      0.0474     4e-04        NA   0.7020       NA
## Total           143      0.0675        NA        NA   1.0000       NA

ad_rhiz<- ads_function(d_rhiz,  df_rhiz)
print(kable(ad_rhiz$aov.tab,digits=4))

## 
## 
##                  Df   SumsOfSqs   MeanSqs   F.Model       R2   Pr(>F)
## -------------  ----  ----------  --------  --------  -------  -------
## Soil              5       1e-04         0    3.9488   0.1167    0.001
## Species           5       1e-04         0    2.4910   0.0736    0.012
## Soil:Species     25       2e-04         0    1.1570   0.1710    0.238
## Residuals       108       6e-04         0        NA   0.6386       NA
## Total           143       9e-04        NA        NA   1.0000       NA

ad_rare<-ads_function(d_rare,  df_rare)
print(kable(ad_rare$aov.tab,digits=4))

## 
## 
##                  Df   SumsOfSqs   MeanSqs   F.Model       R2   Pr(>F)
## -------------  ----  ----------  --------  --------  -------  -------
## Soil              5      2.1690    0.4338    5.1546   0.1392    0.001
## Species           5      1.3063    0.2613    3.1044   0.0838    0.001
## Soil:Species     25      3.0178    0.1207    1.4343   0.1937    0.001
## Residuals       108      9.0892    0.0842        NA   0.5833       NA
## Total           143     15.5823        NA        NA   1.0000       NA

set.seed(123)
ads1<-adonis(d_rare ~ Native, df_rare, permutations = 999) # not imp
ads2<-adonis(d_rare ~ Species , df_rare, permutations = 999) # imp 8
ads3<-adonis(d_rare ~ Soil , df_rare, permutations = 999) #
ads4<-adonis(d_rare ~ nod_size , df_rare, permutations = 999) #
ads13<-adonis(d_rare ~ X18s_PD , df_rare, permutations = 999) #
ads14<-adonis(d_rare ~ trna_PD , df_rare, permutations = 999) #
ads12<-adonis(d_rare ~ elison_PD , df_rare, permutations = 999) #
ads5<-adonis(d_rare ~ Soil+Species , df_rare, permutations = 999)
ads6<-adonis(d_rare ~ Soil:Species , df_rare, permutations = 999)
ads7<-adonis(d_rare ~ Soil*Species , df_rare, permutations = 999)
ads8<-adonis(d_rare ~ Native*nod_size , df_rare, permutations = 999)
ads9<-adonis(d_rare ~ nod_size*Soil*Species , df_rare, permutations = 999)
print(kable(ads9$aov.tab, digits=4))

## 
## 
##                           Df   SumsOfSqs   MeanSqs   F.Model       R2   Pr(>F)
## ----------------------  ----  ----------  --------  --------  -------  -------
## nod_size                   1      0.2274    0.2274    2.7252   0.0146    0.001
## Soil                       5      2.1565    0.4313    5.1692   0.1384    0.001
## Species                    5      1.2469    0.2494    2.9889   0.0800    0.001
## nod_size:Soil              5      0.3596    0.0719    0.8619   0.0231    0.802
## nod_size:Species           5      0.5283    0.1057    1.2664   0.0339    0.062
## Soil:Species              25      2.9364    0.1175    1.4077   0.1884    0.001
## nod_size:Soil:Species     23      1.9529    0.0849    1.0176   0.1253    0.432
## Residuals                 74      6.1743    0.0834        NA   0.3962       NA
## Total                    143     15.5823        NA        NA   1.0000       NA

set.seed(123)
ads1<-adonis(d_rhiz ~ Native, df_rhiz, permutations = 999) # not imp
ads2<-adonis(d_rhiz ~ Species , df_rhiz, permutations = 999) # imp 8
ads3<-adonis(d_rhiz ~ Soil , df_rhiz, permutations = 999) #
ads4<-adonis(d_rhiz ~ nod_size , df_rhiz, permutations = 999) #
ads13<-adonis(d_rhiz ~ X18s_PD , df_rhiz, permutations = 999) #
ads14<-adonis(d_rhiz ~ trna_PD , df_rhiz, permutations = 999) #
ads12<-adonis(d_rhiz ~ elison_PD , df_rhiz, permutations = 999) #
ads5<-adonis(d_rhiz ~ Soil+Species , df_rhiz, permutations = 999)
ads6<-adonis(d_rhiz ~ Soil:Species , df_rhiz, permutations = 999)
ads7<-adonis(d_rhiz ~ Soil*Species , df_rhiz, permutations = 999)
ads8<-adonis(d_rhiz ~ Native*nod_size , df_rhiz, permutations = 999)
ads9<-adonis(d_rhiz ~ nod_size*Soil*Species , df_rhiz, permutations = 999)
print(kable(ads9$aov.tab, digits=4))

## 
## 
##                           Df   SumsOfSqs   MeanSqs   F.Model       R2   Pr(>F)
## ----------------------  ----  ----------  --------  --------  -------  -------
## nod_size                   1       0e+00         0    0.8839   0.0058    0.392
## Soil                       5       1e-04         0    3.4793   0.1139    0.001
## Species                    5       1e-04         0    2.2049   0.0722    0.032
## nod_size:Soil              5       0e+00         0    0.6996   0.0229    0.731
## nod_size:Species           5       0e+00         0    0.6952   0.0228    0.714
## Soil:Species              25       1e-04         0    0.9598   0.1572    0.568
## nod_size:Soil:Species     23       1e-04         0    0.8002   0.1205    0.794
## Residuals                 74       5e-04         0        NA   0.4847       NA
## Total                    143       9e-04        NA        NA   1.0000       NA

set.seed(123)
ads1<-adonis(d_all ~ Native, df_all, permutations = 999) # not imp
ads2<-adonis(d_all ~ Species , df_all, permutations = 999) # imp 8
ads3<-adonis(d_all ~ Soil , df_all, permutations = 999) #
ads4<-adonis(d_all ~ nod_size , df_all, permutations = 999) #
ads13<-adonis(d_all ~ X18s_PD , df_all, permutations = 999) #
ads14<-adonis(d_all ~ trna_PD , df_all, permutations = 999) #
ads12<-adonis(d_all ~ elison_PD , df_all, permutations = 999) #
ads5<-adonis(d_all ~ Soil+Species , df_all, permutations = 999)
ads6<-adonis(d_all ~ Soil:Species , df_all, permutations = 999)
ads7<-adonis(d_all ~ Soil*Species , df_all, permutations = 999)
ads8<-adonis(d_all ~ Native*nod_size , df_all, permutations = 999)
ads9<-adonis(d_all ~ nod_size*Soil*Species , df_all, permutations = 999)
print(kable(ads9$aov.tab, digits=4))

## 
## 
##                           Df   SumsOfSqs   MeanSqs   F.Model       R2   Pr(>F)
## ----------------------  ----  ----------  --------  --------  -------  -------
## nod_size                   1      0.0003     3e-04    0.7187   0.0050    0.493
## Soil                       5      0.0052     1e-03    2.2156   0.0776    0.022
## Species                    5      0.0042     8e-04    1.7841   0.0625    0.051
## nod_size:Soil              5      0.0011     2e-04    0.4456   0.0156    0.948
## nod_size:Species           5      0.0019     4e-04    0.8091   0.0283    0.628
## Soil:Species              25      0.0099     4e-04    0.8422   0.1475    0.776
## nod_size:Soil:Species     23      0.0098     4e-04    0.9004   0.1451    0.644
## Residuals                 74      0.0350     5e-04        NA   0.5184       NA
## Total                    143      0.0675        NA        NA   1.0000       NA

#phylogenetic distance
#Testing to see if phylogenetic distance of the host had any effect on the complete and rare microbiome. We used 2 sources of sequences 18s and trna to get distance matrix of host plants. We also used distances from the phylogentic tree of clover built by Elison 2006.
set.seed(1234)
dist1<-adonis(d_all ~ X18s_PD , df_all, permutations = 999) #
dist2<-adonis(d_all ~ trna_PD , df_all, permutations = 999) #
dist2

## 
## Call:
## adonis(formula = d_all ~ trna_PD, data = df_all, permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##            Df SumsOfSqs    MeanSqs F.Model     R2 Pr(>F)
## trna_PD     1  0.000297 0.00029666 0.62728 0.0044  0.565
## Residuals 142  0.067156 0.00047293         0.9956       
## Total     143  0.067453                    1.0000

dist3<-adonis(d_all ~ elison_PD , df_all, permutations = 999) #

set.seed(1234)
dist1<-adonis(d_rare ~ X18s_PD , df_rare, permutations = 999) #
dist1

## 
## Call:
## adonis(formula = d_rare ~ X18s_PD, data = df_rare, permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##            Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)  
## X18s_PD     1    0.1611 0.16107  1.4831 0.01034  0.092 .
## Residuals 142   15.4213 0.10860         0.98966         
## Total     143   15.5823                 1.00000         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

dist2<-adonis(d_rare ~ trna_PD , df_rare, permutations = 999) #
dist2

## 
## Call:
## adonis(formula = d_rare ~ trna_PD, data = df_rare, permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##            Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)  
## trna_PD     1    0.1958 0.19581  1.8072 0.01257  0.032 *
## Residuals 142   15.3865 0.10836         0.98743         
## Total     143   15.5823                 1.00000         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

dist3<-adonis(d_rare ~ elison_PD , df_rare, permutations = 999) #
dist3

## 
## Call:
## adonis(formula = d_rare ~ elison_PD, data = df_rare, permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##            Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## elison_PD   1    0.1331 0.13308  1.2232 0.00854  0.226
## Residuals 142   15.4492 0.10880         0.99146       
## Total     143   15.5823                 1.00000