Here we are refining bar chart summaries of the taxonomy tables. This is an usual situation in that the dominant taxon makes up over 99% of the total data set. Consequently we will use an “exploding bar chart”. Here we will generate two bar charts for each data set:
Initially this will be conducted at Order level of taxonomy.
Two sets will be generated based on:
Primers used were the “updated” Earth Microbiome Project primers (described here: http://press.igsb.anl.gov/earthmicrobiome/protocols-and-standards/16s/):
Primers were removed after amplification by Dan Williams, operating the MiSeq sequencer.
for analysis, reads were:
OTU.amplib.table <- read.csv("rich.OTU.table.BTEX.csv", header = TRUE)
taxonomy <- data.frame(do.call('rbind', strsplit(as.character(OTU.amplib.table$taxonomy),';',fixed=TRUE)))
colnames(taxonomy) <- c("kingdom","phylum","class","order","family","genus","species")
OTU.amplib.table <- cbind(taxonomy, OTU.amplib.table[-1])
OTU.amplib.table <- data.frame(lapply(OTU.amplib.table, gsub, pattern = "D_\\d__", replacement = ""))
OTU.amplib.table$relative.abund. <- as.numeric(as.character(OTU.amplib.table$relative.abund.))
kable(head(OTU.amplib.table))
| kingdom | phylum | class | order | family | genus | species | seq.count | relative.abund. | taxonomy.confidence | sequence |
|---|---|---|---|---|---|---|---|---|---|---|
| Bacteria | Proteobacteria | Gammaproteobacteria | Nitrococcales | Nitrococcaceae | Arhodomonas | uncultured bacterium | 453177 | 0.9931036 | 0.9567242262 | TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGCGGTCGGATAAGTCGGGTGTGAAAGCCCCGGGCTCAACCTGGGAATTGCATTCGATACTGTCTGGCTAGAGTCTGGCAGAGGGAGGTGGAATTTCCGGTGTAGCGGTGAAATGCGTAGATATCGGAAGGAACACCAGTGGCGAAGGCGACCTCCTGGGCCAAGACTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGG |
| Bacteria | Firmicutes | Clostridia | Clostridiales | Clostridiaceae 1 | Clostridium sensu stricto 13 | uncultured bacterium | 1276 | 0.0027963 | 0.8319686708 | TACGTAGGTGGCAAGCGTTGTCCGGATTTACTGGGCGTAAAGGATGCGTAGGCGGACATTTAAGTCAGATGTGAAATACCCGAGCTTAACTTGGGTGCTGCATTTGAAACTGGGTGTCTAGAGTGCAGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGAGATTAGGAAGAACACCAGTGGCGAAGGCGACTTACTGGACTGTAACTGACGCTGAGGCATGAAAGCGTGGGGAGCAAACAGG |
| Bacteria | Firmicutes | Bacilli | 591 | 0.0012951 | 0.9850161649 | TACGTAGGGGGCAAGCGTTGTCCGGAATGATTGGGCGTAAAGGGCGCGCAGGCGGTTTCGTGTGTCTGATGTGAAAGGCTCCGGCTCAACCGGAGAAGGTCATCGGAAACTGCGGGACTTGAGTCCAGGAGAGGGGAGTGGAATTCCACGTGTAGCGGTGAAATGCGTAGAGATGTGGAGGAACACCAGTGGCGAAGGCGGCTCTCTGGCCTGAGACTGACGCTGAGGCGCGAAAGCGTGGGGAGCGAACAGG | ||||
| Bacteria | Firmicutes | Clostridia | Clostridiales | Peptococcaceae | Candidatus Dichloromethanomonas | uncultured bacterium | 201 | 0.0004405 | 0.9184785956 | TACGTAGGGGGCAAGCGTTGTCCGGAATCATTGGGCGTAAAGGGCGCGTAGGCGGGCATATAAGTCTGATGTGAAAGTGCGGTGCTTAACACCGTAAAGCATTGGAAACTGTATGTCTTGAGGACAGGAGAGGAAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGACTGTAACTGACGCTGAGGCGCGAAAGCGTGGGGAGCGAACAGG |
| Bacteria | Firmicutes | Bacilli | Bacillales | Bacillaceae | Bacillus | uncultured compost bacterium | 191 | 0.0004186 | 0.7080361989 | TACGTAGGGGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGGGCGCGCAGGCGGCTTCTTAAGTCTGATGTGAAAGCCCGTGGCTCAACCACGGAGGGGCATTGGAAACTGGGGAGCTTGAGTACAGGAGAGGAGAGCGGAATTCCACGTGTAGCGGTGAAATGCGTAGAGATGTGGAGGAACACCAGTGGCGAAAGCGGCTCTCTGGCCTGTAACTGACGCTGAGGCGCGAAAGCGTGGGGAGCGAACAGG |
| Bacteria | Proteobacteria | Deltaproteobacteria | Desulfovibrionales | Desulfovibrionaceae | Desulfovibrio | Desulfovibrio desulfuricans | 101 | 0.0002213 | 0.8360530132 | TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGTAGGCTGTAGTGTAAGTCAGGGGTGAAATCCCACGGCTCAACCGTGGAACTGCCTTTGATACTGCACAACTTGAATCCGGGAGAGGGTGGCGGAATTCCAGGTGTAGGAGTGAAATCCGTAGATATCTGGAGGAACATCAGTGGCGAAGGCGGCCACCTGGACCGGTATTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGG |
Plotting based on Class
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
OTU.amplib.class <- dplyr::group_by(OTU.amplib.table, class)
OTU.amplib.class.summary <- dplyr::summarise(OTU.amplib.class, class.total = sum(relative.abund.))
OTU.amplib.class.summary <- OTU.amplib.class.summary[order(-OTU.amplib.class.summary$class.total),]
OTU.amplib.class.summary$class[OTU.amplib.class.summary$class == ""] <- "NA"
## Warning in `[<-.factor`(`*tmp*`, OTU.amplib.class.summary$class == "",
## value = structure(c(10L, : invalid factor level, NA generated
kable(OTU.amplib.class.summary)
| class | class.total |
|---|---|
| Gammaproteobacteria | 0.9937457 |
| Clostridia | 0.0033463 |
| Bacilli | 0.0018781 |
| Deltaproteobacteria | 0.0002805 |
| Bacteroidia | 0.0001578 |
| Cloacimonadia | 0.0001205 |
| Spirochaetia | 0.0000986 |
| Thermotogae | 0.0000986 |
| Rhodothermia | 0.0000416 |
| Anaerolineae | 0.0000394 |
| LD1-PA32 | 0.0000373 |
| Methanobacteria | 0.0000351 |
| Synergistia | 0.0000329 |
| Methanomicrobia | 0.0000219 |
| NA | 0.0000153 |
| Hydrogenedentia | 0.0000153 |
| Caldatribacteriia | 0.0000131 |
| Thermoleophilia | 0.0000088 |
| Actinobacteria | 0.0000066 |
| Verrucomicrobiae | 0.0000066 |
library(ggplot2)
library(ggthemes)
OTU.amplib.class.major <- rbind(data.frame(OTU.amplib.class.summary[1,]),data.frame(OTU.amplib.class.summary[1,]))
OTU.amplib.class.major$class.total <- as.numeric(as.character(OTU.amplib.class.major$class.total))
OTU.amplib.class.major$class <- as.character(OTU.amplib.class.major$class)
OTU.amplib.class.major[1,1] <- "other"
OTU.amplib.class.major[1,2] <- 1 - OTU.amplib.class.major[1,2]
OTU.amplib.class.major$class <- factor(OTU.amplib.class.major$class, levels = OTU.amplib.class.major$class, ordered = T)
colors <- c(
"#a0a0a0","#e59090", "#5F7FC7", "orange","#DA5724", "#508578", "#CD9BCD",
"#AD6F3B", "#673770","#D14285", "#652926", "#C84248",
"#8569D5", "#5E738F","#D1A33D", "#8A7C64", "#599861")
ggplot(OTU.amplib.class.major, aes(fill=class, x="x", y=class.total)) +
geom_bar( stat="identity", position="fill") + scale_fill_manual(values = colors) +
theme_void()
amplib.major <- ggplot(OTU.amplib.class.major, aes(fill=class, x="x", y=class.total)) +
geom_bar( stat="identity", position="fill") + scale_fill_manual(values = colors) +
theme_void()
ggsave("amplib.major.png", amplib.major, height = 10, width = 4)
Plotting minor community members, grouping everything with representation of less than 0.0001 as “other”
library(ggplot2)
library(rlist)
OTU.amplib.class.summary.sat <- OTU.amplib.class.summary[-1,]
OTU.amplib.class.summary.sat$class <- as.character(OTU.amplib.class.summary.sat$class)
OTU.amplib.class.summary.sat.top <- OTU.amplib.class.summary.sat[c(1:6),]
OTU.amplib.class.summary.sat.bot <- OTU.amplib.class.summary.sat[-c(1:6),]
OTU.amplib.class.summary.sat.top[6,1] <- "other"
OTU.amplib.class.summary.sat.top[6,2] <- sum(OTU.amplib.class.summary.sat.bot$class.total)
#OTU.amplib.class.summary.sat <- OTU.amplib.class.summary.sat[order(OTU.amplib.class.summary.sat$class.total),]
OTU.amplib.class.summary.sat.top$class <- factor(OTU.amplib.class.summary.sat.top$class, levels = list.reverse(OTU.amplib.class.summary.sat.top$class), ordered = T)
colors2 <- c(
"#a0a0a0","#e59090", "#5F7FC7", "orange","#DA5724", "#508578", "#CD9BCD",
"#AD6F3B", "#673770","#D14285", "#652926", "#C84248",
"#8569D5", "#5E738F","#D1A33D", "#8A7C64", "#599861","#4286f4")
amplib.minor <- ggplot(OTU.amplib.class.summary.sat.top, aes(fill=class, x="x", y=class.total)) + scale_fill_manual(values = colors2) +
geom_bar( stat="identity", position="fill") +
theme_void()
amplib.minor
ggsave("amplib.minor.png", amplib.minor, height = 10, width = 4)
SILVASSU30 <- read.csv("MG-RAST/final.SilvaSSU.e30.tsv", header=TRUE, sep="\t")
SILVASSU30 <- SILVASSU30[-8]
SILVASSU30 <- SILVASSU30[order(-SILVASSU30$BTEX1.unmapped.FR.fastq),]
SILVASSU30$BTEX1.unmapped.FR.fastq <- SILVASSU30$BTEX1.unmapped.FR.fastq / sum(SILVASSU30$BTEX1.unmapped.FR.fastq)
SILVASSU30$Kuwait.1810.mgenome.fastq <- SILVASSU30$Kuwait.1810.mgenome.fastq / sum(SILVASSU30$Kuwait.1810.mgenome.fastq)
colnames(SILVASSU30) <- c("domain","phylum","class","order","family","genus","unmapped","full.mgenome")
kable(SILVASSU30)
| domain | phylum | class | order | family | genus | unmapped | full.mgenome | |
|---|---|---|---|---|---|---|---|---|
| 3 | Bacteria | Proteobacteria | Gammaproteobacteria | Chromatiales | Ectothiorhodospiraceae | Arhodomonas | 0.9170452 | 0.9009120 |
| 23 | unclassified sequences | unclassified (derived from unclassified sequences) | unclassified (derived from unclassified sequences) | unclassified (derived from unclassified sequences) | unclassified (derived from unclassified sequences) | unclassified (derived from unclassified sequences) | 0.0597080 | 0.0589529 |
| 21 | Bacteria | Proteobacteria | Gammaproteobacteria | unclassified (derived from Gammaproteobacteria) | unclassified (derived from Gammaproteobacteria) | unclassified (derived from Gammaproteobacteria) | 0.0117813 | 0.0179613 |
| 20 | Bacteria | unclassified (derived from Bacteria) | unclassified (derived from Bacteria) | unclassified (derived from Bacteria) | unclassified (derived from Bacteria) | unclassified (derived from Bacteria) | 0.0099109 | 0.0194889 |
| 4 | Bacteria | Firmicutes | Bacilli | Bacillales | Bacillaceae | Bacillus | 0.0006802 | 0.0004398 |
| 11 | Bacteria | Firmicutes | Bacilli | Bacillales | Bacillaceae | Geobacillus | 0.0002915 | 0.0003009 |
| 12 | Bacteria | Firmicutes | Bacilli | Bacillales | Bacillaceae | Marinococcus | 0.0001700 | 0.0002315 |
| 2 | Bacteria | Firmicutes | Bacilli | Bacillales | Alicyclobacillaceae | Alicyclobacillus | 0.0001457 | 0.0001157 |
| 7 | Bacteria | Firmicutes | Clostridia | Clostridiales | Peptococcaceae | Desulfotomaculum | 0.0000972 | 0.0000000 |
| 9 | Bacteria | Firmicutes | Bacilli | Lactobacillales | Enterococcaceae | Enterococcus | 0.0000729 | 0.0000000 |
| 6 | Bacteria | Firmicutes | Clostridia | Clostridiales | Peptococcaceae | Desulfosporosinus | 0.0000486 | 0.0000000 |
| 19 | Bacteria | Firmicutes | Bacilli | Bacillales | Bacillaceae | Virgibacillus | 0.0000486 | 0.0000694 |
| 1 | Bacteria | Firmicutes | Clostridia | Clostridiales | Eubacteriaceae | Acetobacterium | 0.0000000 | 0.0000694 |
| 5 | Eukaryota | Nematoda | Chromadorea | Rhabditida | Rhabditidae | Caenorhabditis | 0.0000000 | 0.0000463 |
| 8 | Bacteria | Proteobacteria | Gammaproteobacteria | Chromatiales | Ectothiorhodospiraceae | Ectothiorhodospira | 0.0000000 | 0.0000926 |
| 10 | Bacteria | Firmicutes | Clostridia | Clostridiales | Eubacteriaceae | Eubacterium | 0.0000000 | 0.0000694 |
| 13 | Bacteria | Proteobacteria | Gammaproteobacteria | unclassified (derived from Gammaproteobacteria) | unclassified (derived from Gammaproteobacteria) | Methylonatrum | 0.0000000 | 0.0000463 |
| 14 | Bacteria | Proteobacteria | Gammaproteobacteria | Chromatiales | Ectothiorhodospiraceae | Nitrococcus | 0.0000000 | 0.0006018 |
| 15 | Bacteria | Firmicutes | Bacilli | Bacillales | Paenibacillaceae | Paenibacillus | 0.0000000 | 0.0001157 |
| 16 | Bacteria | Proteobacteria | Gammaproteobacteria | Pseudomonadales | Moraxellaceae | Psychrobacter | 0.0000000 | 0.0001157 |
| 17 | Bacteria | Firmicutes | Bacilli | Bacillales | Sporolactobacillaceae | Sporolactobacillus | 0.0000000 | 0.0000694 |
| 18 | Bacteria | Proteobacteria | Gammaproteobacteria | Chromatiales | Ectothiorhodospiraceae | Thioalkalivibrio | 0.0000000 | 0.0002315 |
| 22 | Bacteria | Proteobacteria | unclassified (derived from Proteobacteria) | unclassified (derived from Proteobacteria) | unclassified (derived from Proteobacteria) | unclassified (derived from Proteobacteria) | 0.0000000 | 0.0000694 |
library(dplyr)
OTU.mgrast.class <- dplyr::group_by(SILVASSU30, class)
OTU.mgrast.class.summary <- dplyr::summarise(OTU.mgrast.class, class.total = sum(unmapped))
OTU.mgrast.class.summary <- OTU.mgrast.class.summary[order(-OTU.mgrast.class.summary$class.total),]
OTU.mgrast.class.summary$class[OTU.mgrast.class.summary$class == ""] <- "NA"
## Warning in `[<-.factor`(`*tmp*`, OTU.mgrast.class.summary$class == "",
## value = structure(c(4L, : invalid factor level, NA generated
OTU.mgrast.class.summary$class <- as.character(OTU.mgrast.class.summary$class)
OTU.mgrast.class.summary <- OTU.mgrast.class.summary[-c(2,3,7),] # removing unclassified fragments
OTU.mgrast.class.summary$class.total <- OTU.mgrast.class.summary$class.total/sum(OTU.mgrast.class.summary$class.total)
kable(OTU.mgrast.class.summary)
| class | class.total |
|---|---|
| Gammaproteobacteria | 0.9983290 |
| Bacilli | 0.0015143 |
| Clostridia | 0.0001567 |
| Chromadorea | 0.0000000 |
Plotting Arhodomonas and unclassified seqs
library(ggplot2)
library(ggthemes)
OTU.mgrast.class.major <- rbind(data.frame(OTU.mgrast.class.summary[1,]),data.frame(OTU.mgrast.class.summary[1,]))
OTU.mgrast.class.major$class.total <- as.numeric(as.character(OTU.mgrast.class.major$class.total))
OTU.mgrast.class.major$class <- as.character(OTU.mgrast.class.major$class)
OTU.mgrast.class.major[2,1] <- "other"
OTU.mgrast.class.major[2,2] <- 1 - OTU.mgrast.class.major[1,2]
OTU.mgrast.class.major <- OTU.mgrast.class.major[order(OTU.mgrast.class.major$class.total),]
OTU.mgrast.class.major$class <- factor(OTU.mgrast.class.major$class, levels = OTU.mgrast.class.major$class, ordered = T)
colors <- c(
"#a0a0a0","#e59090", "#5F7FC7", "orange","#DA5724", "#508578", "#CD9BCD",
"#AD6F3B", "#673770","#D14285", "#652926", "#C84248",
"#8569D5", "#5E738F","#D1A33D", "#8A7C64", "#599861")
ggplot(OTU.mgrast.class.major, aes(fill=class, x="x", y=class.total)) +
geom_bar( stat="identity", position="fill") + scale_fill_manual(values = colors) +
theme_void()
mgrast.major <- ggplot(OTU.mgrast.class.major, aes(fill=class, x="x", y=class.total)) +
geom_bar( stat="identity", position="fill") + scale_fill_manual(values = colors) +
theme_void()
ggsave("mgrast.major.png", amplib.major, height = 10, width = 4)
Plotting the satellite community
library(ggplot2)
library(rlist)
OTU.mgrast.class.summary.sat <- OTU.mgrast.class.summary[-c(1,4),]
OTU.mgrast.class.summary.sat$class <- as.character(OTU.mgrast.class.summary.sat$class)
#OTU.mgrast.class.summary.sat.top <- OTU.mgrast.class.summary.sat[c(1:6),]
#OTU.mgrast.class.summary.sat.bot <- OTU.mgrast.class.summary.sat[-c(1:6),]
#OTU.mgrast.class.summary.sat.top[6,1] <- "other"
#OTU.mgrast.class.summary.sat.top[6,2] <- sum(OTU.mgrast.class.summary.sat.bot$class.total)
#OTU.mgrast.class.summary.sat <- OTU.mgrast.class.summary.sat[order(OTU.mgrast.class.summary.sat$class.total),]
OTU.mgrast.class.summary.sat$class <- factor(OTU.mgrast.class.summary.sat$class, levels = list.reverse(OTU.mgrast.class.summary.sat$class), ordered = T)
colors2 <- c(
"#DA5724", "#508578", "#CD9BCD",
"#AD6F3B", "#673770","#D14285", "#652926", "#C84248",
"#8569D5", "#5E738F","#D1A33D", "#8A7C64", "#599861","#4286f4")
mgrast.minor <- ggplot(OTU.mgrast.class.summary.sat, aes(fill=class, x="x", y=class.total)) + scale_fill_manual(values = colors2) +
geom_bar( stat="identity", position="fill") +
theme_void()
mgrast.minor
ggsave("mgrast.minor.png", mgrast.minor, height = 10, width = 4)
These figures are assembled into exploding-bar-charts in PowerPoint. Figures shown at the beginning of the document.