title: “Microbiome Differential Abundance Testing Tutorial”
Dr. upasna srivastava output: html_notebook
#for this study we can use The 16S sequence data have been submitted to the NCBI Sequence Read Archive (http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi) under accession number SRP000383.
#Import data with phyloseq, convert to DESeq2
library("phyloseq");
packageVersion("phyloseq")
#Defined file path, and import the published OTU count data into R.
filepath = system.file("extdata", "study_1457_split_library_seqs_and_mapping.zip", package="phyloseq")
colon = microbio_me_qiime(filepath)
#in case if you qiime id then you could directly download the data from the microbio.me/qiime server using this code
colon = microbio_me_qiime(1457)
colon
library("DESeq2");
packageVersion("DESeq2")
colon <- subset_samples(colon, DIAGNOSIS != "None")
colon <- prune_samples(sample_sums(colon) > 500, colon)
colon
diagdds = phyloseq_to_deseq2(colon, ~ DIAGNOSIS)
# calculate geometric means prior to estimate size factors
gm_mean = function(x, na.rm=TRUE){
exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x))
}
geoMeans = apply(counts(diagdds), 1, gm_mean)
diagdds = estimateSizeFactors(diagdds, geoMeans = geoMeans)
diagdds = DESeq(diagdds, fitType="local")
res = results(diagdds)
res = res[order(res$padj, na.last=NA), ]
alpha = 0.01
sigtab = res[(res$padj < alpha), ]
sigtab = cbind(as(sigtab, "data.frame"), as(tax_table(colon)[rownames(sigtab), ], "matrix"))
head(sigtab)
#OTUs that were significantly enriched
posigtab = sigtab[sigtab[, "log2FoldChange"] > 0, ]
posigtab = posigtab[, c("baseMean", "log2FoldChange", "lfcSE", "padj", "Phylum", "Class", "Family", "Genus")]
## ggplot2 commands to show bar plot showing the log2-fold-change, showing Genus and Phylum.
library("ggplot2")
theme_set(theme_bw())
sigtabgen = subset(sigtab, !is.na(Genus))
# Phylum order
x = tapply(sigtabgen$log2FoldChange, sigtabgen$Phylum, function(x) max(x))
x = sort(x, TRUE)
sigtabgen$Phylum = factor(as.character(sigtabgen$Phylum), levels=names(x))
# Genus order
x = tapply(sigtabgen$log2FoldChange, sigtabgen$Genus, function(x) max(x))
x = sort(x, TRUE)
sigtabgen$Genus = factor(as.character(sigtabgen$Genus), levels=names(x))
ggplot(sigtabgen, aes(y=Genus, x=log2FoldChange, color=Phylum)) +
geom_vline(xintercept = 0.0, color = "gray", size = 0.5) +
geom_point(size=6) +
theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5))
dev.off()
LS0tCm91dHB1dDoKICBodG1sX25vdGVib29rOiBkZWZhdWx0CiAgaHRtbF9kb2N1bWVudDogZGVmYXVsdAotLS0KLS0tCnRpdGxlOiAiTWljcm9iaW9tZSBEaWZmZXJlbnRpYWwgQWJ1bmRhbmNlIFRlc3RpbmcgVHV0b3JpYWwiCkRyLiB1cGFzbmEgc3JpdmFzdGF2YQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKCmBgYHtyfQojZm9yIHRoaXMgc3R1ZHkgd2UgY2FuIHVzZSBUaGUgMTZTIHNlcXVlbmNlIGRhdGEgaGF2ZSBiZWVuIHN1Ym1pdHRlZCB0byB0aGUgTkNCSSBTZXF1ZW5jZSBSZWFkIEFyY2hpdmUgKGh0dHA6Ly93d3cubmNiaS5ubG0ubmloLmdvdi9UcmFjZXMvc3JhL3NyYS5jZ2kpIHVuZGVyIGFjY2Vzc2lvbiBudW1iZXIgU1JQMDAwMzgzLgpgYGAKCmBgYHtyfQojSW1wb3J0IGRhdGEgd2l0aCBwaHlsb3NlcSwgY29udmVydCB0byBERVNlcTIKbGlicmFyeSgicGh5bG9zZXEiKTsgCnBhY2thZ2VWZXJzaW9uKCJwaHlsb3NlcSIpCiNEZWZpbmVkIGZpbGUgcGF0aCwgYW5kIGltcG9ydCB0aGUgcHVibGlzaGVkIE9UVSBjb3VudCBkYXRhIGludG8gUi4KZmlsZXBhdGggPSBzeXN0ZW0uZmlsZSgiZXh0ZGF0YSIsICJzdHVkeV8xNDU3X3NwbGl0X2xpYnJhcnlfc2Vxc19hbmRfbWFwcGluZy56aXAiLCBwYWNrYWdlPSJwaHlsb3NlcSIpCmNvbG9uID0gbWljcm9iaW9fbWVfcWlpbWUoZmlsZXBhdGgpCiNpbiBjYXNlIGlmIHlvdSBxaWltZSBpZCB0aGVuIHlvdSBjb3VsZCBkaXJlY3RseSBkb3dubG9hZCB0aGUgZGF0YSBmcm9tIHRoZSBtaWNyb2Jpby5tZS9xaWltZSBzZXJ2ZXIgdXNpbmcgdGhpcyBjb2RlCmNvbG9uID0gbWljcm9iaW9fbWVfcWlpbWUoMTQ1NykKY29sb24KbGlicmFyeSgiREVTZXEyIik7IApwYWNrYWdlVmVyc2lvbigiREVTZXEyIikKY29sb24gPC0gc3Vic2V0X3NhbXBsZXMoY29sb24sIERJQUdOT1NJUyAhPSAiTm9uZSIpCmNvbG9uIDwtIHBydW5lX3NhbXBsZXMoc2FtcGxlX3N1bXMoY29sb24pID4gNTAwLCBjb2xvbikKY29sb24KZGlhZ2RkcyA9IHBoeWxvc2VxX3RvX2Rlc2VxMihjb2xvbiwgfiBESUFHTk9TSVMpCiMgY2FsY3VsYXRlIGdlb21ldHJpYyBtZWFucyBwcmlvciB0byBlc3RpbWF0ZSBzaXplIGZhY3RvcnMKZ21fbWVhbiA9IGZ1bmN0aW9uKHgsIG5hLnJtPVRSVUUpewogIGV4cChzdW0obG9nKHhbeCA+IDBdKSwgbmEucm09bmEucm0pIC8gbGVuZ3RoKHgpKQp9Cmdlb01lYW5zID0gYXBwbHkoY291bnRzKGRpYWdkZHMpLCAxLCBnbV9tZWFuKQpkaWFnZGRzID0gZXN0aW1hdGVTaXplRmFjdG9ycyhkaWFnZGRzLCBnZW9NZWFucyA9IGdlb01lYW5zKQpkaWFnZGRzID0gREVTZXEoZGlhZ2RkcywgZml0VHlwZT0ibG9jYWwiKQpyZXMgPSByZXN1bHRzKGRpYWdkZHMpCnJlcyA9IHJlc1tvcmRlcihyZXMkcGFkaiwgbmEubGFzdD1OQSksIF0KYWxwaGEgPSAwLjAxCnNpZ3RhYiA9IHJlc1socmVzJHBhZGogPCBhbHBoYSksIF0Kc2lndGFiID0gY2JpbmQoYXMoc2lndGFiLCAiZGF0YS5mcmFtZSIpLCBhcyh0YXhfdGFibGUoY29sb24pW3Jvd25hbWVzKHNpZ3RhYiksIF0sICJtYXRyaXgiKSkKaGVhZChzaWd0YWIpCiNPVFVzIHRoYXQgd2VyZSBzaWduaWZpY2FudGx5IGVucmljaGVkCnBvc2lndGFiID0gc2lndGFiW3NpZ3RhYlssICJsb2cyRm9sZENoYW5nZSJdID4gMCwgXQpwb3NpZ3RhYiA9IHBvc2lndGFiWywgYygiYmFzZU1lYW4iLCAibG9nMkZvbGRDaGFuZ2UiLCAibGZjU0UiLCAicGFkaiIsICJQaHlsdW0iLCAiQ2xhc3MiLCAiRmFtaWx5IiwgIkdlbnVzIildCiMjIGdncGxvdDIgY29tbWFuZHMgdG8gc2hvdyBiYXIgcGxvdCBzaG93aW5nIHRoZSBsb2cyLWZvbGQtY2hhbmdlLCBzaG93aW5nIEdlbnVzIGFuZCBQaHlsdW0uIApsaWJyYXJ5KCJnZ3Bsb3QyIikKdGhlbWVfc2V0KHRoZW1lX2J3KCkpCnNpZ3RhYmdlbiA9IHN1YnNldChzaWd0YWIsICFpcy5uYShHZW51cykpCiMgUGh5bHVtIG9yZGVyCnggPSB0YXBwbHkoc2lndGFiZ2VuJGxvZzJGb2xkQ2hhbmdlLCBzaWd0YWJnZW4kUGh5bHVtLCBmdW5jdGlvbih4KSBtYXgoeCkpCnggPSBzb3J0KHgsIFRSVUUpCnNpZ3RhYmdlbiRQaHlsdW0gPSBmYWN0b3IoYXMuY2hhcmFjdGVyKHNpZ3RhYmdlbiRQaHlsdW0pLCBsZXZlbHM9bmFtZXMoeCkpCiMgR2VudXMgb3JkZXIKeCA9IHRhcHBseShzaWd0YWJnZW4kbG9nMkZvbGRDaGFuZ2UsIHNpZ3RhYmdlbiRHZW51cywgZnVuY3Rpb24oeCkgbWF4KHgpKQp4ID0gc29ydCh4LCBUUlVFKQpzaWd0YWJnZW4kR2VudXMgPSBmYWN0b3IoYXMuY2hhcmFjdGVyKHNpZ3RhYmdlbiRHZW51cyksIGxldmVscz1uYW1lcyh4KSkKZ2dwbG90KHNpZ3RhYmdlbiwgYWVzKHk9R2VudXMsIHg9bG9nMkZvbGRDaGFuZ2UsIGNvbG9yPVBoeWx1bSkpICsgCiAgZ2VvbV92bGluZSh4aW50ZXJjZXB0ID0gMC4wLCBjb2xvciA9ICJncmF5Iiwgc2l6ZSA9IDAuNSkgKwogIGdlb21fcG9pbnQoc2l6ZT02KSArIAogIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGFuZ2xlID0gLTkwLCBoanVzdCA9IDAsIHZqdXN0PTAuNSkpCiAgZGV2Lm9mZigpCmBgYAoK