library(tidyverse); packageVersion("tidyverse") # 1.3.0
library(phyloseq); packageVersion("phyloseq") # 1.30.0
library(ggplot2); packageVersion("ggplot2") # 3.3.0
library(ampvis2); packageVersion("ampvis2") # 2.5.9
library(plyr); packageVersion("plyr") # 1.8.6
library(dplyr); packageVersion("dplyr") # 0.8.5
library(gridExtra); packageVersion("gridExtra") # 2.3
library(ggpubr); packageVersion("ggpubr") # 0.3.0
library(vegan); packageVersion("vegan") # 2.5.6
if(!require("devtools"))
install.packages("devtools")
# source the phyloseq_to_ampvis2() function from the gist
devtools::source_gist("8d0ca4206a66be7ff6d76fc4ab8e66c6")
biom = ("run9-14_unfiltered_feature_table_w_taxonomy_ASV.biom")
biom = import_biom(biom)
metadata = import_qiime_sample_data("metadata_Biofilms_run9-14_v5.txt")
meta_csv <- read.csv("metadata_Biofilms_run9-14_v5.csv")
Rooted_Tree = ("tree.nwk")
Tree <- read_tree(Rooted_Tree)
colnames(tax_table(biom)) = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species", "ASV")
rank_names(biom)
tax_table(biom) <- tax_table(biom) [,-5] # Removes the "Family" column
rank_names(biom)
myData = merge_phyloseq(biom,metadata,Tree)
# Order columns for later graphs
ordered <- c("0Sucr_0Muc", "0Sucr_2.5Muc", "0.1Sucr_0.1Muc", "0.1Sucr_2.5Muc", "0.1Sucr_8Muc",
"0.5Sucr_2.5Muc", "0.8Sucr_2.5Muc", "Inoculum")
sample_data(myData)$Sucrose_and_mucin <-
factor(sample_data(myData)$Sucrose_and_mucin, levels=ordered)
ordered <- c("0Sucr_2.5Muc_0FBS", "0Sucr_0Muc_10FBS", "0Sucr_2.5Muc_10FBS", "0Sucr_2.5Muc_20FBS",
"0.1Sucr_0.1Muc_0FBS", "0.1Sucr_2.5Muc_0FBS", "0.1Sucr_8.0Muc_0FBS", "0.1Sucr_2.5Muc_10FBS",
"0.1Sucr_0.1Muc_30FBS", "0.1Sucr_2.5Muc_30FBS",
"0.1Sucr_2.5Muc_50FBS",
"0.5Sucr_2.5Muc_0FBS", "0.5Sucr_2.5Muc_10FBS",
"0.8Sucr_2.5Muc_0FBS", "0.8Sucr_2.5Muc_10FBS",
"Inoculum")
sample_data(myData)$Replicates_2 <-
factor(sample_data(myData)$Replicates_2, levels=ordered)
myData
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 869 taxa and 94 samples ]
## sample_data() Sample Data: [ 94 samples by 34 sample variables ]
## tax_table() Taxonomy Table: [ 869 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 869 tips and 864 internal nodes ]
myData <- subset_taxa(myData, Phylum != "Unassigned")
myData
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 764 taxa and 94 samples ]
## sample_data() Sample Data: [ 94 samples by 34 sample variables ]
## tax_table() Taxonomy Table: [ 764 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 764 tips and 759 internal nodes ]
controls <- c("media", "not_applicable") # names of controls in the column "sample_type"
myData <- subset_samples(myData, !(sample_type %in% controls))
myData
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 764 taxa and 63 samples ]
## sample_data() Sample Data: [ 63 samples by 34 sample variables ]
## tax_table() Taxonomy Table: [ 764 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 764 tips and 759 internal nodes ]
Contam_o <- c("o__Rhizobiales")
Contam_g <- c("g__Delftia","g__Yersinia", "g__Listeria", "g__Mogibacterium", "g__Acinetobacter", "g__Jeogalicoccus", "g__Pseudomonas", "g__Pseudoramibacter")
Contam_s <- c("s__platensis","s__subtilis", "s__paradoxus","s__coli","s__aeruginosa", "s__fluorescens")
Contam_a <- c("49ed5dfd41e37e23a4e525c5a21a8fa8","ab2bb49867509a1650d185f93c305875")
Contam_media <- c("s__atypica","s__moorei", "s__salivae","s__concisus")
myData <- subset_taxa(myData, !(Order %in% Contam_o))
myData <- subset_taxa(myData, !(Genus %in% Contam_g))
myData <- subset_taxa(myData, !(Species %in% Contam_s))
myData <- subset_taxa(myData, !(ASV %in% Contam_a))
myData <- subset_taxa(myData, !(ASV %in% Contam_media))
myData
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 675 taxa and 63 samples ]
## sample_data() Sample Data: [ 63 samples by 34 sample variables ]
## tax_table() Taxonomy Table: [ 675 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 675 tips and 670 internal nodes ]
myData <- subset_samples(myData, media_type != "spent")
myData
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 675 taxa and 36 samples ]
## sample_data() Sample Data: [ 36 samples by 34 sample variables ]
## tax_table() Taxonomy Table: [ 675 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 675 tips and 670 internal nodes ]
myData <- prune_taxa(taxa_sums(myData) > 0, myData)
myData
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 301 taxa and 36 samples ]
## sample_data() Sample Data: [ 36 samples by 34 sample variables ]
## tax_table() Taxonomy Table: [ 301 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 301 tips and 299 internal nodes ]
myData_species <- tax_glom(myData, "Species")
myData_species <- prune_taxa(taxa_sums(myData_species) > 0, myData_species)
myData_species
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 153 taxa and 36 samples ]
## sample_data() Sample Data: [ 36 samples by 34 sample variables ]
## tax_table() Taxonomy Table: [ 153 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 153 tips and 152 internal nodes ]
# Change rank names
myData_species_ampvis <- myData_species
colnames(tax_table(myData_species_ampvis)) = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
# rank_names(myData_species_ampvis) # Now 7 ranks ordered in the old way, but remember they are wrong
# Convert to ampvis
myData_species_ampvis <- phyloseq_to_ampvis2(myData_species_ampvis)
# Fix the SampleID column name
colnames(myData_species_ampvis$metadata) # Need to change "X_SampleID" to just "SampleID"
## [1] "X_SampleID" "BarcodeSequence"
## [3] "LinkerPrimerSequence" "ReversePrimer"
## [5] "InputFileName" "description"
## [7] "sample_type" "dna_kit"
## [9] "sucrose_concentration_percent" "DS_Sucrose"
## [11] "AV_Sucrose" "Sucrose_and_mucin"
## [13] "mucin_concentration_g" "DS_Mucin"
## [15] "AV_Mucin" "fbs_concentration_percent"
## [17] "DS_FBS" "AV_FBS"
## [19] "Closest" "Replicates"
## [21] "Replicates_2" "Replicates_3"
## [23] "initial_pH" "final_pH"
## [25] "OD" "media_type"
## [27] "study_name" "Sample_ID"
## [29] "Unique_names" "Type"
## [31] "Run" "DNA_Location"
## [33] "Inoculum" "DNA_conc"
colnames(myData_species_ampvis$metadata)[colnames(myData_species_ampvis$metadata) == 'X_SampleID'] <- 'SampleID'
# Merging in ampvis takes the mean of the counts for all the merged samples
myData_species_ampvis_mergedRep <- amp_mergereplicates(myData_species_ampvis, "Replicates_2")
myData_species_ampvis_mergedRep
## ampvis2 object with 4 elements.
## Summary of OTU table:
## Samples OTUs
## 16 153
## (The read counts have been normalised)
##
## Assigned taxonomy:
## Kingdom Phylum Class Order Family Genus Species
## 153(100%) 153(100%) 153(100%) 153(100%) 153(100%) 153(100%) 0(0%)
##
## Metadata variables: 34
## SampleID, BarcodeSequence, LinkerPrimerSequence, ReversePrimer, InputFileName, description, sample_type, dna_kit, sucrose_concentration_percent, DS_Sucrose, AV_Sucrose, Sucrose_and_mucin, mucin_concentration_g, DS_Mucin, AV_Mucin, fbs_concentration_percent, DS_FBS, AV_FBS, Closest, Replicates, Replicates_2, Replicates_3, initial_pH, final_pH, OD, media_type, study_name, Sample_ID, Unique_names, Type, Run, DNA_Location, Inoculum, DNA_conc
abund_table <- myData_species_ampvis_mergedRep$abund
tax_table <- myData_species_ampvis_mergedRep$tax
for_krona <- merge(abund_table, tax_table, by=0)
# write.csv(for_krona "For_Krona/data_unrarefied.csv")
Figure 1: Krona pie charts
myData_sucr <- subset_samples(myData, mucin_concentration_g == "2.5")
myData_sucr <- subset_samples(myData_sucr, fbs_concentration_percent == "10")
myData_sucr_species <- tax_glom(myData_sucr, "Species") # agglomerate to species level
myData_sucr_species <- prune_taxa(taxa_sums(myData_sucr_species) > 0, myData_sucr_species) # remove empty taxa after subsetting
myData_sucr_species
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 93 taxa and 10 samples ]
## sample_data() Sample Data: [ 10 samples by 34 sample variables ]
## tax_table() Taxonomy Table: [ 93 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 93 tips and 92 internal nodes ]
get_variable(myData_sucr_species, "Unique_names") # There are 10 samples with fixed 10% FBS and 2.5g/L mucin
## [1] "14 C 10F 2.5M 0S -1" "14 C 10F 2.5M 0S -2"
## [3] "14 C 10F 2.5M 0S -3" "14 C 10FBS 2.5M 0.1S -1"
## [5] "14 C 10FBS 2.5M 0.1S -2" "14 Trep C 10F 2.5M 0.5S -1"
## [7] "14 Trep C 10F 2.5M 0.5S -2" "14 Trep C 10F 2.5M 0.8S -1"
## [9] "14 Trep C 10F 2.5M 0.8S -2" "9 C 10F 2.5M 0.1S"
sample_names_sucr <- as.character(unique(sample_data(myData_sucr_species)$Sample_ID)) # Extract sample names for the 10 samples
species_number_sucr <-rep(list(NA),10) # Make an empty list where the number of species will go
names(species_number_sucr) <- sample_names_sucr # Add sample names to each place on the list
# Loop to count number of species in each sample
for(k in 1:10){
one_sample <- prune_samples(sample_data(myData_sucr_species)$Sample_ID %in% sample_names_sucr[k],myData_sucr_species) # Isolate one sample
one_sample_pruned <- prune_taxa(taxa_sums(one_sample) > 0, one_sample) # Remove all species that are 0 (not present in the sample)
species_number_sucr[[k]]<-nrow(otu_table(one_sample_pruned)) # write the number of different species for this sample onto the list
}
# Generate dataframe for boxplot
species_number_sucr_df <- ldply(species_number_sucr, cbind) # convert to a dataframe
colnames(species_number_sucr_df) <- c("X.SampleID", "species_count")
# Merge Dataframes
meta_species_sucr_df <- merge(meta_csv, species_number_sucr_df, by = "X.SampleID", all.x = F) # Adds metadata to the samples
# Generate boxplot with p-values
Species_Sucr_plot_t_test <- ggplot(meta_species_sucr_df, aes(x=as.factor(sucrose_concentration_percent), y=species_count)) +
labs(title="Species count for each % sucrose", x="% Sucrose", y="Species count") +
ylim(20,70) + # Change based on the max and min species values for both sucrose and fbs
geom_boxplot() +
stat_compare_means(label.y=c(20)) +
theme_classic() +
theme(plot.title = element_text(size=10))
# Make DNA concentration numeric
meta_species_sucr_df$DNA_conc <- as.character(meta_species_sucr_df$DNA_conc) # Has to be changed to a character first
meta_species_sucr_df$DNA_conc <- as.numeric(meta_species_sucr_df$DNA_conc)
# Visualize p-values: Specify the comparisons
my_comparisons <- list(c("0", "0.5"), c("0", "0.8"), c("0.1", "0.8")) # only significant comparisions
symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), symbols = c("****", "***", "**", "*", "ns"))
# Generate boxplot, only show signifcant comparisions
DNA_Sucr_plot_t_test <- ggplot(meta_species_sucr_df, aes(x=as.factor(sucrose_concentration_percent), y=DNA_conc)) +
labs(title="DNA conc. (ng/µL) for each % sucrose", x="% Sucrose", y="DNA concentration (ng/µL)") +
ylim(0,250) +
geom_boxplot() +
stat_compare_means(comparisons = my_comparisons, method="t.test",
label.y = c(225, 245, 195), symnum.args = symnum.args) +
stat_compare_means(label.y=c(0)) +
theme_classic() +
theme(plot.title = element_text(size=10))
# Make DNA concentration numeric
meta_species_sucr_df$final_pH <- as.character(meta_species_sucr_df$final_pH) # Has to be changed to a character first
meta_species_sucr_df$final_pH <- as.numeric(meta_species_sucr_df$final_pH)
# Visualize p-values: Specify the comparisons
my_comparisons <- list( c("0", "0.5"), c("0", "0.8"), c("0.1", "0.5")) # only significant comparisions
symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), symbols = c("****", "***", "**", "*", "ns"))
# Generate boxplot, only show signifcant comparisions
ph_Sucr_plot_t_test <- ggplot(meta_species_sucr_df, aes(x=as.factor(sucrose_concentration_percent), y=final_pH)) +
labs(title="Final pH for each % sucrose", x="% Sucrose", y="Final pH") +
ylim(4,8.1) +
stat_compare_means(comparisons = my_comparisons, method="t.test",
label.y = c(7.6, 8, 7.2), symnum.args = symnum.args) +
stat_compare_means(label.y=c(4)) +
geom_boxplot() +
theme_classic() +
theme(plot.title = element_text(size=10))
myData_fbs <- subset_samples(myData, mucin_concentration_g == "2.5")
myData_fbs <- subset_samples(myData_fbs, sucrose_concentration_percent == "0.1")
myData_fbs_species <- tax_glom(myData_fbs, "Species") # agglomerate to species level
myData_fbs_species <- prune_taxa(taxa_sums(myData_fbs_species) > 0, myData_fbs_species) # remove empty taxa after subsetting
myData_fbs_species
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 87 taxa and 10 samples ]
## sample_data() Sample Data: [ 10 samples by 34 sample variables ]
## tax_table() Taxonomy Table: [ 87 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 87 tips and 86 internal nodes ]
get_variable(myData_fbs_species, "Unique_names") # There are 10 samples with fixed 0.1% sucrose and 2.5g/L mucin
## [1] "14 C 10FBS 2.5M 0.1S -1" "14 C 10FBS 2.5M 0.1S -2"
## [3] "9 C 0F 2.5M 0.1Suc" "9 C 0F 2.5M 0.1S"
## [5] "9 C control SHI -1" "9 C 10F 2.5M 0.1S"
## [7] "9 C control SHI -2" "9 C control SHI -3"
## [9] "9 C 30F 2.5M 0.1S" "9 C 50F 2.5M 0.1S"
sample_names_fbs <-as.character(unique(sample_data(myData_fbs_species)$Sample_ID)) # Extract sample names for the 10 samples
species_number_fbs <-rep(list(NA),10) # Make an empty list where the number of species will go
names(species_number_fbs) <- sample_names_fbs # Add sample names to each place on the list
# Loop to count number of species in each sample
for(k in 1:10){
one_sample <- prune_samples(sample_data(myData_fbs_species)$Sample_ID %in% sample_names_fbs[k],myData_fbs_species) # Isolate one sample
one_sample_pruned <- prune_taxa(taxa_sums(one_sample) > 0, one_sample) # Remove all species that are 0 (not present in the sample)
species_number_fbs[[k]]<-nrow(otu_table(one_sample_pruned)) # write the number of different species for this sample onto the list
}
# Generate dataframe for boxplot
species_number_fbs_df <- ldply(species_number_fbs, cbind) # convert to a dataframe
colnames(species_number_fbs_df) <- c("X.SampleID", "species_count")
# Merge Dataframes
meta_species_fbs_df <- merge(meta_csv, species_number_fbs_df, by = "X.SampleID", all.x = F) # Adds metadata to the samples
# Generate boxplot with p-values
Species_fbs_plot_t_test <- ggplot(meta_species_fbs_df, aes(x=as.factor(fbs_concentration_percent), y=species_count)) +
labs(title="Species count for each % FBS", x="% FBS", y="Species count") +
ylim(20,70) + # Change based on the max and min species values for both sucrose and fbs
geom_boxplot() +
stat_compare_means(label.y=c(20)) +
theme_classic() +
theme(plot.title = element_text(size=10))
# Make DNA concentration numeric
meta_species_fbs_df$DNA_conc <- as.character(meta_species_fbs_df$DNA_conc) # Has to be changed to a character first
meta_species_fbs_df$DNA_conc <- as.numeric(meta_species_fbs_df$DNA_conc)
# Visualize p-values: Specify the comparisons
# There are no signficant comparisions
# Generate boxplot, only show signifcant comparisions
DNA_fbs_plot_t_test <- ggplot(meta_species_fbs_df, aes(x=as.factor(fbs_concentration_percent), y=DNA_conc)) +
labs(title="DNA conc. (ng/µL) for each % FBS", x="% FBS", y="DNA concentration (ng/µL)") +
ylim(0,250) +
geom_boxplot() +
stat_compare_means(label.y=c(0)) +
theme_classic() +
theme(plot.title = element_text(size=10))
# Make DNA concentration numeric
meta_species_fbs_df$final_pH <- as.character(meta_species_fbs_df$final_pH) # Has to be changed to a character first
meta_species_fbs_df$final_pH <- as.numeric(meta_species_fbs_df$final_pH)
# Visualize p-values: Specify the comparisons
# There are no signficant comparisions
# Generate boxplot, only show signifcant comparisions
ph_fbs_plot_t_test <- ggplot(meta_species_fbs_df, aes(x=as.factor(fbs_concentration_percent), y=final_pH)) +
# geom_dotplot(binaxis='y', stackdir='center', dotsize=0.5, position=position_dodge(1)) +
labs(title="Final pH for each % FBS", x="% FBS", y="Final pH") +
ylim(4,8.1) +
geom_boxplot() +
stat_compare_means(label.y=c(4)) +
theme_classic() +
theme(plot.title = element_text(size=10))
myData_muc <- subset_samples(myData, sucrose_concentration_percent == "0.1")
myData_muc <- subset_samples(myData_muc, fbs_concentration_percent == "0")
myData_muc_species <- tax_glom(myData_muc, "Species") # agglomerate to species level
myData_muc_species <- prune_taxa(taxa_sums(myData_muc_species) > 0, myData_muc_species) # remove empty taxa after subsetting
myData_muc_species
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 76 taxa and 7 samples ]
## sample_data() Sample Data: [ 7 samples by 34 sample variables ]
## tax_table() Taxonomy Table: [ 76 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 76 tips and 75 internal nodes ]
get_variable(myData_muc_species, "Unique_names") # There are 7 samples with fixed 0.1% sucrose and 0% FBS
## [1] "9 C 0F 2.5M 0.1Suc" "9 C 0F 2.5M 0.1S" "9 C control SHI -1"
## [4] "9 C control SHI -2" "9 C control SHI -3" "9 C 0F 8.0M 0.1S"
## [7] "9 C 0F 0.1M 0.1S"
sample_names_muc <-as.character(unique(sample_data(myData_muc_species)$Sample_ID)) # Extract sample names for the 7 samples
species_number_muc <-rep(list(NA),7) # Make an empty list where the number of species will go
names(species_number_muc) <- sample_names_muc # Add sample names to each place on the list
# Loop to count number of species in each sample
for(k in 1:7){
one_sample <- prune_samples(sample_data(myData_muc_species)$Sample_ID %in% sample_names_muc[k],myData_muc_species) # Isolate one sample
one_sample_pruned <- prune_taxa(taxa_sums(one_sample) > 0, one_sample) # Remove all species that are 0 (not present in the sample)
species_number_muc[[k]]<-nrow(otu_table(one_sample_pruned)) # write the number of different species for this sample onto the list
}
# Generate dataframe for boxplot
species_number_muc_df <- ldply(species_number_muc, cbind) # convert to a dataframe
colnames(species_number_muc_df) <- c("X.SampleID", "species_count")
# Merge Dataframes
meta_species_muc_df <- merge(meta_csv, species_number_muc_df, by = "X.SampleID", all.x = F) # Adds metadata to the samples
# Generate boxplot with p-values
Species_muc_plot_t_test <- ggplot(meta_species_muc_df, aes(x=as.factor(mucin_concentration_g), y=species_count)) +
# geom_dotplot(binaxis='y', stackdir='center', dotsize=0.5, position=position_dodge(1)) +
labs(title="Species count for each mucin conc.", x="g/L mucin", y="Species count") +
ylim(20,70) + # Change based on the max and min species values for both sucrose and fbs
geom_boxplot() +
stat_compare_means(label.y=c(20)) +
theme_classic() +
theme(plot.title = element_text(size=10))
# Make DNA concentration numeric
meta_species_muc_df$DNA_conc <- as.character(meta_species_muc_df$DNA_conc) # Has to be changed to a character first
meta_species_muc_df$DNA_conc <- as.numeric(meta_species_muc_df$DNA_conc)
# Visualize p-values: Specify the comparisons
# There are no signficant comparisions
# Generate boxplot, only show signifcant comparisions
DNA_muc_plot_t_test <- ggplot(meta_species_muc_df, aes(x=as.factor(mucin_concentration_g), y=DNA_conc)) +
labs(title="DNA conc. (ng/µL) for each mucin conc.", x="g/L mucin", y="DNA concentration (ng/µL)") +
ylim(0,250) +
geom_boxplot() +
stat_compare_means(label.y=c(0)) +
theme_classic() +
theme(plot.title = element_text(size=10))
# Make DNA concentration numeric
meta_species_muc_df$final_pH <- as.character(meta_species_muc_df$final_pH) # Has to be changed to a character first
meta_species_muc_df$final_pH <- as.numeric(meta_species_muc_df$final_pH)
# Visualize p-values: Specify the comparisons
# There are no signficant comparisions
# Generate boxplot, only show signifcant comparisions
ph_muc_plot_t_test <- ggplot(meta_species_muc_df, aes(x=as.factor(mucin_concentration_g), y=final_pH)) +
# geom_dotplot(binaxis='y', stackdir='center', dotsize=0.5, position=position_dodge(1)) +
labs(title="Final pH for each mucin conc.", x="g/L mucin", y="Final pH") +
ylim(4,8.1) +
geom_boxplot() +
stat_compare_means(label.y=c(4)) +
theme_classic() +
theme(plot.title = element_text(size=10))
grid.arrange(DNA_Sucr_plot_t_test, DNA_fbs_plot_t_test, DNA_muc_plot_t_test,
Species_Sucr_plot_t_test, Species_fbs_plot_t_test, Species_muc_plot_t_test,
ph_Sucr_plot_t_test, ph_fbs_plot_t_test, ph_muc_plot_t_test,
nrow=3, ncol=3)
# Glom to Species
myData_species <- tax_glom(myData, "Species")
# Remove Empty taxa
myData_species <- prune_taxa(taxa_sums(myData_species) > 0, myData_species)
# Make a dataframe of the list of genus and species names connected to the OTU
tax <- as.data.frame(myData_species@tax_table@.Data) # Isolate the tax table
tax$Genus_species <- paste(tax$Genus,tax$Species,sep=" ") # add a new column of Genus and species names
tax$Genus_species <- sub("g__", "", tax$Genus_species) # remove g__
tax$Genus_species <- sub("s__", "", tax$Genus_species) # remove s__
tax$Genus_species <- gsub("_", " ", tax$Genus_species) # replace all occurences of "_"
MAP_FILE_species <- cbind(OTU_ID = rownames(tax), Genus_species = tax$Genus_species) # Generate map file with correct columns and column names
# View(MAP_FILE_species)
# Filter data to contain only conditions with 10% FBS and 2.5g/L mucin
myData_sucr <- subset_samples(myData, mucin_concentration_g == "2.5")
myData_sucr <- subset_samples(myData_sucr, fbs_concentration_percent == "10")
# Agglomerate to species level
myData_sucr_species <- tax_glom(myData_sucr, "Species")
# Remove empty taxa
myData_sucr_species <- prune_taxa(taxa_sums(myData_sucr_species) > 0, myData_sucr_species)
myData_sucr_species
levels(get_variable(myData_sucr_species, "Replicates_2"))
# Create empty lists
replicates_sucr_species<-rep(list(NA),4) # 4 is the number of sucrose conditions being compared
poplist<-rep(list(NA),4)
pops<-as.character(unique(sample_data(myData_sucr_species)$Replicates_2))
# Populate List with the Names of Groups we Want to Compare
poplist[[1]]<-pops[1]
poplist[[2]]<-pops[2]
poplist[[3]]<-pops[3]
poplist[[4]]<-pops[4]
names(replicates_sucr_species)<- pops
#Loop over each population, subset the phyloseq object to just that population and work out which ASVs are in that population, store those names in the list
# If the ASV is in just one of the samples of the condition, then it is included in the list!!!!
for(k in 1:4){ # Change number to number of conditions
phy.sub<-prune_samples(sample_data(myData_sucr_species)$Replicates_2 %in% poplist[[k]],myData_sucr_species)
phy.sub.keep<-apply(otu_table(phy.sub),1,function(x){sum(x)>0})
phy.sub.sub<-prune_taxa(phy.sub.keep,phy.sub)
replicates_sucr_species[[k]]<-rownames(otu_table(phy.sub.sub))
}
# Can check Venn Diagram
# venn(replicates_sucr_species)
# Dataframes we need
replicates_sucr_species # Lists of the species in at least one replicate!
View(MAP_FILE_species) # Key comparing ASV code to species
# Convert to a data frame
replicates_sucr_species_df <- ldply(replicates_sucr_species, cbind)
replicates_sucr_species_df
colnames(replicates_sucr_species_df) <- c("Condition", "OTU_ID")
# Merge the two dataframes based on OTU_ID
# https://stackoverflow.com/questions/25520753/replace-values-from-a-column-of-one-dataframe-by-values-from-another-dataframe
replicates_sucr_species_df_merged <- merge(replicates_sucr_species_df, MAP_FILE_species, by = "OTU_ID", all.x = TRUE)
# Order based on Condition
replicates_sucr_species_df_merged <- replicates_sucr_species_df_merged[order(replicates_sucr_species_df_merged$Condition),]
# Export CSV
# write.csv(replicates_sucr_species_df_merged, "Gradients_Graphs/Venn_sucrose_change_replicates_SPECIES_all.csv")
The CSV file can be used to generate a Venn Diagram with Interactivenn (http://www.interactivenn.net/)
# Filter data to contain only conditions with 0.1% sucrose and 2.5g/L mucin
myData_fbs <- subset_samples(myData, mucin_concentration_g == "2.5")
myData_fbs <- subset_samples(myData_fbs, sucrose_concentration_percent == "0.1")
# Agglomerate to species level
myData_fbs_species <- tax_glom(myData_fbs, "Species")
# Remove empty taxa
myData_fbs_species <- prune_taxa(taxa_sums(myData_fbs_species) > 0, myData_fbs_species)
myData_fbs_species
levels(get_variable(myData_fbs_species, "Replicates_2")) # 4 replicates
# Create empty lists
replicates_fbs_species<-rep(list(NA),4) # 4 is the number of FBS conditions being compared
poplist<-rep(list(NA),4)
pops<-as.character(unique(sample_data(myData_fbs_species)$Replicates_2))
# Populate List with the Names of Groups we Want to Compare
poplist[[1]]<-pops[1]
poplist[[2]]<-pops[2]
poplist[[3]]<-pops[3]
poplist[[4]]<-pops[4]
names(replicates_fbs_species)<- pops
#Loop over each population, subset the phyloseq object to just that population and work out which ASVs are in that population, store those names in the list
# If the ASV is in just one of the samples of the condition, then it is included in the list!!!!
for(k in 1:4){ # Change number to number of conditions
phy.sub<-prune_samples(sample_data(myData_fbs_species)$Replicates_2 %in% poplist[[k]],myData_fbs_species) # Change the condition column
phy.sub.keep<-apply(otu_table(phy.sub),1,function(x){sum(x)>0})
phy.sub.sub<-prune_taxa(phy.sub.keep,phy.sub)
replicates_fbs_species[[k]]<-rownames(otu_table(phy.sub.sub))
}
# Can check Venn Diagram
# venn(replicates_fbs_species)
# Dataframes we need
replicates_fbs_species # Lists of the species in at least one replicate!
View(MAP_FILE_species) # Key comparing ASV code to species
# Convert to a data frame
replicates_fbs_species_df <- ldply(replicates_fbs_species, cbind)
replicates_fbs_species_df
colnames(replicates_fbs_species_df) <- c("Condition", "OTU_ID")
# Merge the two dataframes based on OTU_ID
# https://stackoverflow.com/questions/25520753/replace-values-from-a-column-of-one-dataframe-by-values-from-another-dataframe
replicates_fbs_species_df_merged <- merge(replicates_fbs_species_df, MAP_FILE_species, by = "OTU_ID", all.x = TRUE)
# Order based on Condition
replicates_fbs_species_df_merged <- replicates_fbs_species_df_merged[order(replicates_fbs_species_df_merged$Condition),]
# Export CSV
# write.csv(replicates_fbs_species_df_merged, "Gradients_Graphs/Venn_fbs_change_replicates_SPECIES_all.csv")
The CSV file can be used to generate a Venn Diagram with Interactivenn (http://www.interactivenn.net/)
# Filter data to contain only conditions with 0.1% sucrose and 2.5g/L mucin
myData_muc <- subset_samples(myData, sucrose_concentration_percent == "0.1")
myData_muc <- subset_samples(myData_muc, fbs_concentration_percent == "0")
# Agglomerate to species level
myData_muc_species <- tax_glom(myData_muc, "Species")
# Remove empty taxa
myData_muc_species <- prune_taxa(taxa_sums(myData_muc_species) > 0, myData_muc_species)
myData_muc_species
levels(get_variable(myData_fbs_species, "Replicates_2")) # 3 replicates
# Create empty lists
replicates_muc_species<-rep(list(NA),3) # 3 is the number of mucin conditions we are comparing
poplist<-rep(list(NA),3)
pops<-as.character(unique(sample_data(myData_muc_species)$Replicates_2)) # Change to be the correct column
# Populate List with the Names of Groups we Want to Compare
poplist[[1]]<-pops[1]
poplist[[2]]<-pops[2]
poplist[[3]]<-pops[3]
names(replicates_muc_species)<- pops
#Loop over each population, subset the phyloseq object to just that population and work out which ASVs are in that population, store those names in the list
# If the ASV is in just one of the samples of the condition, then it is included in the list!!!!
for(k in 1:3){ # Change number to number of conditions
phy.sub<-prune_samples(sample_data(myData_muc_species)$Replicates_2 %in% poplist[[k]],myData_muc_species) # Change the condition column
phy.sub.keep<-apply(otu_table(phy.sub),1,function(x){sum(x)>0})
phy.sub.sub<-prune_taxa(phy.sub.keep,phy.sub)
replicates_muc_species[[k]]<-rownames(otu_table(phy.sub.sub))
}
# Can check Venn Diagram
# venn(replicates_muc_species)
# Dataframes we need
replicates_muc_species # Lists of the species in at least one replicate!
View(MAP_FILE_species) # Key comparing ASV code to species
# Convert to a data frame
replicates_muc_species_df <- ldply(replicates_muc_species, cbind)
replicates_muc_species_df
colnames(replicates_muc_species_df) <- c("Condition", "OTU_ID")
# Merge the two dataframes based on OTU_ID
# https://stackoverflow.com/questions/25520753/replace-values-from-a-column-of-one-dataframe-by-values-from-another-dataframe
replicates_muc_species_df_merged <- merge(replicates_muc_species_df, MAP_FILE_species, by = "OTU_ID", all.x = TRUE)
# Order based on Condition
replicates_muc_species_df_merged <- replicates_muc_species_df_merged[order(replicates_muc_species_df_merged$Condition),]
# Export CSV
# write.csv(replicates_muc_species_df_merged, "Gradients_Graphs/Venn_mucin_change_replicates_SPECIES_all.csv")
The CSV file can be used to generate a Venn Diagram with Interactivenn (http://www.interactivenn.net/)
# Glom to phylum
myData_phylum <- tax_glom(myData, "Phylum")
myData_phylum # 9 phyla
# Change rank names
myData_phylum_ampvis <- myData_phylum
colnames(tax_table(myData_phylum_ampvis)) = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
# rank_names(myData_phylum_ampvis) # Now 7 ranks ordered in the old way, but remember they are wrong
# Convert to ampvis
myData_phylum_ampvis <- phyloseq_to_ampvis2(myData_phylum_ampvis)
# Fix the SampleID column name
colnames(myData_phylum_ampvis$metadata) # Need to change "X_SampleID" to just "SampleID"
colnames(myData_phylum_ampvis$metadata)[colnames(myData_phylum_ampvis$metadata) == 'X_SampleID'] <- 'SampleID'
# Merging in ampvis takes the mean of the counts for all the merged samples
myData_phylum_ampvis_mergedRep <- amp_mergereplicates(myData_phylum_ampvis, "Replicates_2")
myData_phylum_ampvis_mergedRep # 16 samples, 9 Phyla (OTUs here)
# Convert to percent abundance
myData_phylum_ampvis_mergedRep_ra <- normaliseTo100(myData_phylum_ampvis_mergedRep)
#Convert to a dataframe
myData_phylum_ampvis_mergedRep_ra_df <- amp_export_long(myData_phylum_ampvis_mergedRep_ra,
tax_levels = "Phylum")
Barplot <- ggplot(myData_phylum_ampvis_mergedRep_ra_df, aes(x=Sucrose_and_mucin, y = count, fill = Phylum)) +
geom_bar(stat="identity", position="stack") +
theme_gray() + # Makes it look more like the ampvis heatmap
facet_grid(~fbs_concentration_percent, scales="free", space = "free") +
theme(axis.text.x = element_text(angle = 45, size=8, vjust = 1, hjust=1),
axis.text.y = element_text(size=8), axis.title.y = element_text(size=8),
legend.position="left", legend.text=element_text(size=10),
legend.title = element_text(size=10), legend.key.size = unit(0.2, "cm"),
plot.subtitle = element_text(hjust=0.5, size=10),
strip.text.x = element_text(size=5), # changes size of FBS labels
plot.margin=unit(c(0.5,0,0.5,1.5),"cm"),
panel.spacing = unit(0.6, "lines")) + # increases the distance between FBS groups
labs(title="Figure 3", x="Medium condition", y=("Average % Abundance"),
caption=NULL, subtitle="% FBS") +
scale_y_continuous(expand=c(0,0)) + scale_x_discrete(expand=c(0,0)) # Removes the extra space around the plot
Barplot
# Remove unassigned
myData <- subset_taxa(myData, Phylum != "Unassigned")
myData
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 764 taxa and 94 samples ]
## sample_data() Sample Data: [ 94 samples by 34 sample variables ]
## tax_table() Taxonomy Table: [ 764 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 764 tips and 759 internal nodes ]
# Remove controls
controls <- c("media", "not_applicable") # names of controls in the column "sample_type"
myData <- subset_samples(myData, !(sample_type %in% controls))
myData
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 764 taxa and 63 samples ]
## sample_data() Sample Data: [ 63 samples by 34 sample variables ]
## tax_table() Taxonomy Table: [ 764 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 764 tips and 759 internal nodes ]
# RAREFY
set.seed(142)
myData_rarefied <- rarefy_even_depth(myData, sample.size=min(sample_sums(myData)), rngseed=F, replace=T, trimOTUs=T, verbose=T)
# 419OTUs were removed because they are no longer
# present in any sample after random subsampling
# Remove contaminants
Contam_o <- c("o__Rhizobiales")
Contam_g <- c("g__Delftia","g__Yersinia", "g__Listeria", "g__Mogibacterium", "g__Acinetobacter", "g__Jeogalicoccus", "g__Pseudomonas", "g__Pseudoramibacter")
Contam_s <- c("s__platensis","s__subtilis", "s__paradoxus","s__coli","s__aeruginosa", "s__fluorescens")
Contam_a <- c("49ed5dfd41e37e23a4e525c5a21a8fa8","ab2bb49867509a1650d185f93c305875")
myData_rarefied <- subset_taxa(myData_rarefied, !(Order %in% Contam_o))
myData_rarefied <- subset_taxa(myData_rarefied, !(Genus %in% Contam_g))
myData_rarefied <- subset_taxa(myData_rarefied, !(Species %in% Contam_s))
myData_rarefied <- subset_taxa(myData_rarefied, !(ASV %in% Contam_a))
myData_rarefied
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 329 taxa and 63 samples ]
## sample_data() Sample Data: [ 63 samples by 34 sample variables ]
## tax_table() Taxonomy Table: [ 329 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 329 tips and 327 internal nodes ]
myData_unrarefied <- subset_taxa(myData, !(Order %in% Contam_o))
myData_unrarefied <- subset_taxa(myData_unrarefied, !(Genus %in% Contam_g))
myData_unrarefied <- subset_taxa(myData_unrarefied, !(Species %in% Contam_s))
myData_unrarefied <- subset_taxa(myData_unrarefied, !(ASV %in% Contam_a))
myData_unrarefied
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 675 taxa and 63 samples ]
## sample_data() Sample Data: [ 63 samples by 34 sample variables ]
## tax_table() Taxonomy Table: [ 675 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 675 tips and 670 internal nodes ]
# Remove spent SHI
myData_rarefied <- subset_samples(myData_rarefied, media_type != "spent")
myData_rarefied
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 329 taxa and 36 samples ]
## sample_data() Sample Data: [ 36 samples by 34 sample variables ]
## tax_table() Taxonomy Table: [ 329 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 329 tips and 327 internal nodes ]
myData_unrarefied <- subset_samples(myData_unrarefied, media_type != "spent")
myData_unrarefied
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 675 taxa and 36 samples ]
## sample_data() Sample Data: [ 36 samples by 34 sample variables ]
## tax_table() Taxonomy Table: [ 675 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 675 tips and 670 internal nodes ]
# Remove Empty Taxa
myData_rarefied <- prune_taxa(taxa_sums(myData_rarefied) > 0, myData_rarefied)
myData_rarefied
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 272 taxa and 36 samples ]
## sample_data() Sample Data: [ 36 samples by 34 sample variables ]
## tax_table() Taxonomy Table: [ 272 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 272 tips and 270 internal nodes ]
myData_unrarefied <- prune_taxa(taxa_sums(myData_unrarefied) > 0, myData_unrarefied)
myData_unrarefied
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 301 taxa and 36 samples ]
## sample_data() Sample Data: [ 36 samples by 34 sample variables ]
## tax_table() Taxonomy Table: [ 301 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 301 tips and 299 internal nodes ]
using rarefied data
p_observed <- plot_richness(myData_rarefied, x="Sucrose_and_mucin", color="mucin_concentration_g",
measures=c("Observed")) +
theme_bw() +
labs(title="Observed ASVs", color="g/L mucin", subtitle="% FBS", x="% sucrose", y="Observed ASVs") +
geom_boxplot(aes(color = mucin_concentration_g), alpha = 0.1) +
facet_grid(~fbs_concentration_percent, scales="free", space = "free") +
theme(axis.text.x = element_text(angle = 45, size=9, vjust = 1, hjust=1),
plot.subtitle = element_text(hjust=0.5, size=10),
plot.margin=unit(c(0.1,0.1,0.1,2),"cm"),
panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
# panel.border = element_blank(), axis.line = element_line(colour = "black")) +
scale_x_discrete(labels=c("0Sucr_2.5Muc"="0", "0.1Sucr_0.1Muc"="0.1", "0.1Sucr_2.5Muc"="0.1", "0.1Sucr_8Muc"="0.1",
"0.5Sucr_2.5Muc"="0.5", "0Sucr_0Muc"="0", "0.8Sucr_2.5Muc"="0.8", "Inoculum"="Inoculum"))
p_observed
using rarefied data
p_shannon <- plot_richness(myData_rarefied, x="Sucrose_and_mucin", color="mucin_concentration_g",
measures=c("Shannon")) +
theme_bw() +
labs(title="Shannon Index", color="g/L mucin", subtitle="% FBS", x="% sucrose") +
geom_boxplot(aes(color = mucin_concentration_g), alpha = 0.1) +
facet_grid(~fbs_concentration_percent, scales="free", space = "free") +
theme(axis.text.x = element_text(angle = 45, size=9, vjust = 1, hjust=1),
plot.subtitle = element_text(hjust=0.5, size=10),
plot.margin=unit(c(0.1,0.1,0.1,2),"cm"),
panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
scale_x_discrete(labels=c("0Sucr_2.5Muc"="0", "0.1Sucr_0.1Muc"="0.1", "0.1Sucr_2.5Muc"="0.1", "0.1Sucr_8Muc"="0.1",
"0.5Sucr_2.5Muc"="0.5", "0Sucr_0Muc"="0", "0.8Sucr_2.5Muc"="0.8", "Inoculum"="Inoculum"))
p_shannon
using rarefied data
p_simpson <- plot_richness(myData_rarefied, x="Sucrose_and_mucin", color="mucin_concentration_g",
measures=c("Simpson")) +
theme_bw() +
labs(title="Simpson Index", color="g/L mucin", subtitle="% FBS", x="% sucrose") +
geom_boxplot(aes(color = mucin_concentration_g), alpha = 0.1) +
facet_grid(~fbs_concentration_percent, scales="free", space = "free") +
theme(axis.text.x = element_text(angle = 45, size=9, vjust = 1, hjust=1),
plot.subtitle = element_text(hjust=0.5, size=10),
plot.margin=unit(c(0.1,0.1,0.1,2),"cm"),
panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
scale_x_discrete(labels=c("0Sucr_2.5Muc"="0", "0.1Sucr_0.1Muc"="0.1", "0.1Sucr_2.5Muc"="0.1", "0.1Sucr_8Muc"="0.1",
"0.5Sucr_2.5Muc"="0.5", "0Sucr_0Muc"="0", "0.8Sucr_2.5Muc"="0.8", "Inoculum"="Inoculum"))
p_simpson
Using unrarefied data
myData_ord_unweighted <- ordinate(myData_unrarefied, method="PCoA", distance="unifrac", weighted=F)
myData_ord_unweighted_plot <- plot_ordination(myData_unrarefied, myData_ord_unweighted, type="samples", color="Replicates_2",
title="Unweighted Unifrac PCoA", axes=1:2) +
theme_classic()
myData_ord_unweighted_plot
Using unrarefied data
myData_ord_weighted <- ordinate(myData_unrarefied, method="PCoA", distance="unifrac", weighted=T)
myData_ord_weighted_plot <- plot_ordination(myData_unrarefied, myData_ord_weighted, type="samples", color="Replicates_2",
title="Weighted Unifrac PCoA", axes=1:2) +
theme_classic()
myData_ord_weighted_plot
# Glom to species
myData_species <- tax_glom(myData, "Species") #agglomerate to the Species level
myData_species # (153 species)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 153 taxa and 36 samples ]
## sample_data() Sample Data: [ 36 samples by 34 sample variables ]
## tax_table() Taxonomy Table: [ 153 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 153 tips and 152 internal nodes ]
# Prune empty taxa
myData_species <- prune_samples(sample_sums(myData_species)>0, myData_species)
# Change rank names
myData_species_ampvis <- myData_species
colnames(tax_table(myData_species_ampvis)) = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
# Convert to ampvis
myData_species_ampvis <- phyloseq_to_ampvis2(myData_species_ampvis)
# Normalise OTUs to percent abundance then subset
# https://madsalbertsen.github.io/ampvis2/reference/amp_heatmap.html#preserving-relative-abundances-in-a-subset-of-larger-data
myData_species_ampvis_Trep <- amp_subset_taxa(myData_species_ampvis, tax_vector="g__Treponema", normalise=T)
# 134 OTUs have been filtered
# Before: 153 OTUs
# After: 19 OTUs
Heatmap_Trep_pa <- amp_heatmap(myData_species_ampvis_Trep, group_by="Sucrose_and_mucin",facet_by="fbs_concentration_percent",
normalise=F, tax_aggregate="Genus", tax_add="Family",
tax_show=20, tax_empty="remove",
color_vector=c("white","Dark Red"), plot_colorscale="log10", # added a color vector
max_abundance = 0.0001, min_abundance = .0000001, # these changed
plot_values = F, plot_values_size = 3) + # Change plot values to falst
theme(axis.text.x = element_text(angle = 45, size=9, vjust = 1),
axis.text.y = element_text(size=9),
legend.position="none",
strip.text.x = element_text(size=5),
plot.subtitle = element_text(hjust=0.5, size=10),
plot.caption = element_text(hjust=0.5, size=10),
plot.title = element_text(hjust=0, size=12)) +
labs(fill = "Presence/absence", title="Presence/absence of Treponema spp", subtitle="% FBS", caption="Medium condition")
Heatmap_Trep_pa
# Normalise OTUs to percent abundance then subset
# https://madsalbertsen.github.io/ampvis2/reference/amp_heatmap.html#preserving-relative-abundances-in-a-subset-of-larger-data
myData_species_ampvis_TM7 <- amp_subset_taxa(myData_species_ampvis, tax_vector="p__Saccharibacteria_(TM7)", normalise=T)
# 147 OTUs have been filtered
# Before: 153 OTUs
# After: 6 OTUs
# Presence absence heatmap
Heatmap_tm7_pa <- amp_heatmap(myData_species_ampvis_TM7, group_by="Sucrose_and_mucin",facet_by="fbs_concentration_percent",
normalise=F, tax_aggregate="Genus", tax_add="Family",
tax_show=20, tax_empty="remove",
color_vector=c("white","Dark Red"), plot_colorscale="log10", # added a color vector
max_abundance = 0.0001, min_abundance = .0000001, # these changed
plot_values = F, plot_values_size = 3) + # Change plot values to falst
theme(axis.text.x = element_text(angle = 45, size=9, vjust = 1),
axis.text.y = element_text(size=9),
legend.position="none",
strip.text.x = element_text(size=5),
plot.subtitle = element_text(hjust=0.5, size=10),
plot.caption = element_text(hjust=0.5, size=10),
plot.title = element_text(hjust=0, size=12)) +
labs(fill = "Presence/absence", title="Presence/absence of TM7 spp", subtitle="% FBS", caption="Medium condition")
Heatmap_tm7_pa
# Normalise OTUs to percent abundance then subset
# https://madsalbertsen.github.io/ampvis2/reference/amp_heatmap.html#preserving-relative-abundances-in-a-subset-of-larger-data
myData_species_ampvis_Actino <- amp_subset_taxa(myData_species_ampvis, tax_vector="g__Actinomyces", normalise=T)
# 141 OTUs have been filtered
# Before: 153 OTUs
# After: 12 OTUs
# Warning message:
# One or more samples have 0 total reads
# Presence/absence heatmap
Heatmap_Actino_pa <- amp_heatmap(myData_species_ampvis_Actino, group_by="Sucrose_and_mucin",facet_by="fbs_concentration_percent",
normalise=F, tax_aggregate="Genus", tax_add="Family",
tax_show=20, tax_empty="remove",
color_vector=c("white","Dark Red"), plot_colorscale="log10", # added a color vector
max_abundance = 0.0001, min_abundance = .0000001, # these changed
plot_values = F, plot_values_size = 3) + # Change plot values to falst
theme(axis.text.x = element_text(angle = 45, size=9, vjust = 1),
axis.text.y = element_text(size=9),
legend.position="none",
strip.text.x = element_text(size=5),
plot.subtitle = element_text(hjust=0.5, size=10),
plot.caption = element_text(hjust=0.5, size=10),
plot.title = element_text(hjust=0, size=12)) +
labs(fill = "Presence/absence", title="Presence/absence of Actinomyces spp.", subtitle="% FBS", caption="Medium condition")
Heatmap_Actino_pa
#Transform Count Data to % Abundance
myData_ra <- transform_sample_counts(myData, function(x) (x/sum(x))*100)
#Filter
myData_ra <- prune_taxa(taxa_sums(myData_ra) >0, myData_ra)
#Agglomerate Data at the Phylum Level
myData_ra_phylum = tax_glom(myData_ra, "Phylum", NArm = TRUE)
myData_ra_phylum
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 9 taxa and 36 samples ]
## sample_data() Sample Data: [ 36 samples by 34 sample variables ]
## tax_table() Taxonomy Table: [ 9 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 9 tips and 8 internal nodes ]
# Remove inoculum
myData_ra_phylum_2 <- subset_samples(myData_ra_phylum, Type == "Biofilm")
myData_ra_phylum_2
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 9 taxa and 34 samples ]
## sample_data() Sample Data: [ 34 samples by 34 sample variables ]
## tax_table() Taxonomy Table: [ 9 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 9 tips and 8 internal nodes ]
#Filter
myData_ra_phylum_2 <- prune_taxa(taxa_sums(myData_ra_phylum_2) >0, myData_ra_phylum_2)
#Convert to Dataframe
myData_ra_phylum_2_df <- psmelt(myData_ra_phylum_2)
trend_firmicutes <- ggplot(subset(myData_ra_phylum_2_df, Phylum %in% c("p__Firmicutes")),
aes(x=as.factor(sucrose_concentration_percent), y=Abundance)) +
geom_point(size = 1, alpha = 0.5) + # adds the specific points to the graph and makes transparent
theme_grey() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"),
strip.text.x = element_text(size=6), # changes size of facet labels
axis.text.x = element_text(angle = 0, size=9), axis.title.x = element_text(size=9),
axis.text.y = element_text(size=9), axis.title.y = element_text(size=9)) +
scale_y_continuous(trans = 'log10') + # Expand the y-axis
geom_smooth(method=lm, aes(group=1), se=T, color="dark grey", formula=y~x) + # https://stackoverflow.com/questions/10911057/adding-a-simple-lm-trend-line-to-a-ggplot-boxplot
labs(title="Firmicutes % abundance with sucrose conc.", x="% sucrose", y=("% Abundance (log10 transformed)"), subtitle="All medium contitions",
caption=NULL)
trend_firmicutes
# https://sejohnston.com/2012/08/09/a-quick-and-easy-function-to-plot-lm-results-in-r/
regression_trend_firmicutes <- lm(Abundance ~ as.factor(sucrose_concentration_percent), data = subset(myData_ra_phylum_2_df, Phylum %in% c("p__Firmicutes")))
summary(regression_trend_firmicutes) # Give the R2 value
##
## Call:
## lm(formula = Abundance ~ as.factor(sucrose_concentration_percent),
## data = subset(myData_ra_phylum_2_df, Phylum %in% c("p__Firmicutes")))
##
## Residuals:
## Min 1Q Median 3Q Max
## -26.5054 -3.9289 -0.1508 3.7473 24.1833
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 38.277 2.858 13.392
## as.factor(sucrose_concentration_percent)0.1 15.964 3.690 4.326
## as.factor(sucrose_concentration_percent)0.5 52.237 4.951 10.552
## as.factor(sucrose_concentration_percent)0.8 51.049 5.347 9.547
## Pr(>|t|)
## (Intercept) 0.0000000000000343 ***
## as.factor(sucrose_concentration_percent)0.1 0.000155 ***
## as.factor(sucrose_concentration_percent)0.5 0.0000000000129133 ***
## as.factor(sucrose_concentration_percent)0.8 0.0000000001332891 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.039 on 30 degrees of freedom
## Multiple R-squared: 0.8442, Adjusted R-squared: 0.8286
## F-statistic: 54.18 on 3 and 30 DF, p-value: 0.000000000003206
trend_Bacteroidetes <- ggplot(subset(myData_ra_phylum_2_df, Phylum %in% c("p__Bacteroidetes")),
aes(as.factor(fbs_concentration_percent), Abundance)) +
geom_point(size = 1, alpha = 0.5) + # adds the specific points to the graph and makes transparent
theme_grey() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"),
strip.text.x = element_text(size=6), # changes size of facet labels
axis.text.x = element_text(angle = 0, size=9), axis.title.x = element_text(size=9),
axis.text.y = element_text(size=9), axis.title.y = element_text(size=9)) +
scale_y_continuous(trans = 'log10') + # Expand the y-axis
geom_smooth(method=lm, aes(group=1), se=T, color="dark grey") + # https://stackoverflow.com/questions/10911057/adding-a-simple-lm-trend-line-to-a-ggplot-boxplot
labs(title="Bacteroidetes % abundance with sucrose conc.", x="% FBS", y=("% Abundance (Log10 transformed)"), subtitle="All medium contitions",
caption=NULL)
trend_Bacteroidetes
# https://sejohnston.com/2012/08/09/a-quick-and-easy-function-to-plot-lm-results-in-r/
regression_trend_Bacteroidetes <- lm(Abundance ~ as.factor(fbs_concentration_percent), data = subset(myData_ra_phylum_2_df, Phylum %in% c("p__Bacteroidetes")))
summary(regression_trend_Bacteroidetes) # Give the R2 value
##
## Call:
## lm(formula = Abundance ~ as.factor(fbs_concentration_percent),
## data = subset(myData_ra_phylum_2_df, Phylum %in% c("p__Bacteroidetes")))
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.638 -4.878 -1.339 5.555 15.485
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.635 1.945 4.439 0.00012
## as.factor(fbs_concentration_percent)10 2.024 2.751 0.736 0.46771
## as.factor(fbs_concentration_percent)20 12.401 4.492 2.761 0.00989
## as.factor(fbs_concentration_percent)30 13.458 4.010 3.356 0.00222
## as.factor(fbs_concentration_percent)50 56.975 7.278 7.829 0.0000000124
##
## (Intercept) ***
## as.factor(fbs_concentration_percent)10
## as.factor(fbs_concentration_percent)20 **
## as.factor(fbs_concentration_percent)30 **
## as.factor(fbs_concentration_percent)50 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.013 on 29 degrees of freedom
## Multiple R-squared: 0.7158, Adjusted R-squared: 0.6765
## F-statistic: 18.26 on 4 and 29 DF, p-value: 0.0000001363
# Convert to species level
myData_species <- tax_glom(myData, taxrank = "Species")
# Convert to percent abundance
myData_species_pa <- transform_sample_counts(myData_species, function(x) (x/sum(x)*100))
# Export OTU and TAX table to a CSV each (can this step be bypassed?)
write.csv(tax_table(myData_species_pa), file="G3_host_MLRM_copy/percent_abundance_TAX_table_for_G3_host_ASV-level2.csv")
write.csv(otu_table(myData_species_pa), file="G3_host_MLRM_copy/percent_abundance_OTU_table_for_G3_host_ASV-level2.csv")
# import the CSVs (did not alter them at all)
TAX_species_for_G3_host <- read.csv(file="G3_host_MLRM_copy/percent_abundance_TAX_table_for_G3_host_ASV-level2.csv")
# View(TAX_species_for_G3_host)
OTU_species_for_G3_host <- read.csv(file="G3_host_MLRM_copy/percent_abundance_OTU_table_for_G3_host_ASV-level2.csv")
# View(TAX_species_for_G3_host)
# Merge the two dataframes based on X (which is the OTU_ID)
# https://stackoverflow.com/questions/25520753/replace-values-from-a-column-of-one-dataframe-by-values-from-another-dataframe
for_G3_host_species_merged_df <- merge(TAX_species_for_G3_host, OTU_species_for_G3_host, by = "X", all.x = TRUE)
# View(for_G3_host_species_merged_df)
# Do some post processing on the data frame
for_G3_host_species_merged_df$ASV <- NULL # remove the empty ASV column
for_G3_host_species_merged_df$X14.033.sub.plaque.Q <- NULL # remove the source plaque column
for_G3_host_species_merged_df$X9.033.sub.plaque.Q <- NULL
colnames(for_G3_host_species_merged_df)
# View(for_G3_host_species_merged_df)
for_G3_host_species_merged_df$Genus_species <- paste(for_G3_host_species_merged_df$Genus,for_G3_host_species_merged_df$Species,sep="_") # create a new column of genus species names
for_G3_host_species_merged_df$Genus_species <- sub("g__", "", for_G3_host_species_merged_df$Genus_species) # remove g__
for_G3_host_species_merged_df$Genus_species <- sub("s__", "", for_G3_host_species_merged_df$Genus_species) # remove s__
for_G3_host_species_merged_df[c("X", "Kingdom", "Phylum", "Class", "Order", "Genus", "Species")] <- list(NULL) # Remove the extra columns
rownames(for_G3_host_species_merged_df) <- for_G3_host_species_merged_df$Genus_species # make genus species the row names
for_G3_host_species_merged_df$Genus_species <- NULL # remove the genus species column
which(rownames(for_G3_host_species_merged_df) == "Saccharibacteria_(TM7)_[G-3]_bacterium_HMT_351")
# This is row number 22
# Make two dataframes, one with samples containing G3 and one samples without any G3 ####
df <- for_G3_host_species_merged_df
df[22,] > 0
# https://stackoverflow.com/questions/40800731/subset-columns-based-on-row-value
# https://win-vector.com/2018/02/27/r-tip-use-drop-false-with-data-frames/
wG3 <- df[,df[22,]>0,drop=FALSE]
# View(wG3)
ncol(wG3) # 19
woutG3 <- df[,df[22,]==0,drop=FALSE]
# View(woutG3)
ncol(woutG3) # 15
wG3 <- t(wG3) # transpose
rownames(wG3) <- c() # remove the row names
# View(wG3) # perfect!
woutG3 <- t(woutG3) # transpose
rownames(woutG3) <- c() # remove the row names
# View(woutG3) # perfect!
We want to know if the mean for wG3 is greater than the mean for wOut G3, so want a one sided Wilcoxon test (non-parametric) - Ho: wG3 - woutG3 = 0 - Ha: wG3 - woutG3 > 0
# Loop #
# (from https://stackoverflow.com/questions/44400278/t-test-for-each-row-in-r)
# Create an empty list that has the same number of values as number of columns (this will be the number of p-values we want at the end)
pvals <- rep(NA, ncol(wG3))
# pvals # empty list where every value is NA
# Make a loop
for(i in 1:ncol(wG3)) pvals[i] <-
wilcox.test(wG3[,i], woutG3[,i], alternative="greater", mu=0)$p.value
# [,i] points to the columns
# This is comparing each column from each dataframe, running a t-test and giving just the p-value,
# then writing that to each spot in the list
pvals
typeof(pvals) # its a double which is like an integer
class(pvals) # it is numeric
# Create a list of the species names
colnames(wG3)
# This pulls up the column names of wG3
names <- colnames(wG3)
names
# Combine the pvals and the names into one data frame
species_level_p_values <- data.frame(names, pvals)
species_level_p_values
# Give the data frame nicer column headers
cols <- c("Species", "p_value")
colnames(species_level_p_values) <- cols
species_level_p_values
# Add a column showing whether it is significant or not
# https://stackoverflow.com/questions/37321020/adding-new-column-with-conditional-values-using-ifelse
species_level_p_values$Significant <- ifelse(species_level_p_values$p_value < 0.001, '**', ifelse(species_level_p_values$p_value < 0.05, "*", "ns"))
View(species_level_p_values)
wG3_average_percent_abundance <- colMeans(wG3)
wG3_pa_df <- as.data.frame(wG3_average_percent_abundance)
row.names(wG3_pa_df)
wG3_pa_df <- data.frame(Species = rownames(wG3_pa_df), wG3_pa_df)
rownames(wG3_pa_df) <- c() # remove the row names
View(wG3_pa_df)
woutG3_average_percent_abundance <- colMeans(woutG3)
woutG3_pa_df <- as.data.frame(woutG3_average_percent_abundance)
row.names(woutG3_pa_df)
woutG3_pa_df <- data.frame(Species = rownames(woutG3_pa_df), woutG3_pa_df)
rownames(woutG3_pa_df) <- c() # remove the row names
# View(woutG3_pa_df)
# Combine percent abundances with p-values
species_level_p_values_pa <- merge(wG3_pa_df, woutG3_pa_df, by = "Species", all.x = TRUE)
species_level_p_values_pa <- merge(species_level_p_values_pa, species_level_p_values, by = "Species", all.x = TRUE)
write.csv(species_level_p_values_pa, "G3_host_MLRM_copy/species_level_p_values_pa2.csv")