library(tidyverse)
library(microbiome)
library(reticulate)
library(qiime2R)
library(vegan)
library(ape)
library(ggpubr)
library(plyr)
# library(rotemfuns)
# rotemfuns = list.files("/pita/users/rotem/rotemfuns/R", full.names = T)
source("/pita/users/rotem/bin/read_taxa_barplot.R")
# sapply(rotemfuns, source)
# setwd("/pita/users/rotem/MXX/")
short_bacteria_name <- function(Taxon) {
split_taxonomy = strsplit(as.character(Taxon), split = "[kpcofgs]__")[[1]]
short_taxonomy = split_taxonomy[nchar(split_taxonomy) > 4]
# remove bacteria prefix
paste(short_taxonomy[2:length(short_taxonomy)], collapse = " ")
}
mappath = "/pita/pub/data/16S_DBs/maps/DB1-13_premap_v2.csv"
biompath = "/pita/users/rotem/MXX/mtxmgx.qza"
taxpath = "/pita//users/rotem/MXX/taxonomy.qza"
map = read.csv(mappath)
map = sample_data(map)
sample_names(map) = map$X.SampleID
biom = read_qza(biompath)$data
biom = biom %>% data.frame()
min_reads_cutoff = 2000
biom = biom[apply(biom, 2, sum) > min_reads_cutoff]
rare_biom = vegan::rrarefy(biom %>% t(), 10000) %>% t()
otu = phyloseq::otu_table(rare_biom, taxa_are_rows = T)
otu = phyloseq::prune_taxa(taxa_sums(otu) > 10, otu)
relative_otu = otu_table(funrar::make_relative(otu@.Data), taxa_are_rows = T)
relative_otu = filter_taxa(relative_otu, function(x) mean(x) > 1e-5, TRUE)
pretax = read_qza(taxpath)$data
tax = pretax %>% phyloseq::tax_table()
phyloseq::taxa_names(tax) = pretax$Feature.ID
tree = ape::rtree(ntaxa(tax), rooted = T, tip.label = phyloseq::taxa_names(tax))
abs_exp = phyloseq(otu, map, tax, tree)
exp = phyloseq(relative_otu, map, tax, tree)
# apply(abs_exp@otu_table@.Data,1 ,sum) %>% sort() %>% plot()
# ttt = two_or_more_ids %>% prune_samples(abs_exp) %>% estimate_richness(split = T) %>% select(Observed)
exp
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 967 taxa and 139 samples ]
## sample_data() Sample Data: [ 139 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 967 taxa by 3 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 967 tips and 966 internal nodes ]
exp@sam_data %>% data.frame %>% filter(Barcode == "P-298") %>% select(reads_number)
## reads_number
## DB13.298 43077
read_qza("/pita/users/rotem/MXX/isolates.qza")$data %>% colnames() %>% intersect("DB13.298")
## [1] "DB13.298"
#!/pita/users/rotem/miniconda3/envs/qiime2-2020.2/bin/ bash
/pita/users/rotem/miniconda3/envs/qiime2-2020.2/bin/qiime feature-table filter-samples \
--i-table DB1-13.qza \
--m-metadata-file ./isolates.txt \
--o-filtered-table ./isolates.qza
/pita/users/rotem/miniconda3/envs/qiime2-2020.2/bin/qiime taxa barplot \
--i-table isolates.qza \
--i-taxonomy taxonomy.qza \
--o-visualization isolates.qzv \
--m-metadata-file /pita/pub/data/16S_DBs/maps/DB1-13_premap_v2.tsv\
--verbose
## Saved FeatureTable[Frequency] to: ./isolates.qza
## Saved Visualization to: isolates.qzv
tbp <- read_taxa_barplot("/pita/users/rotem/MXX/isolates.qzv", level = 7)$data
tmp = tbp %>%
filter(!reads_number < 2000) %>%
pivot_longer(cols = contains("__"), names_to = "taxon", values_to ="n")
tmp$shortTaxon = lapply(tmp$taxon , short_bacteria_name) %>% unlist()
pl =
tmp %>%
group_by(index) %>%
# filter(n == max(n)) %>%
select(index, sample_ID, reads_number, taxon, shortTaxon, n) %>%
ggplot(aes( x = index
# , x = reorder(index, n/reads_number)
, y = n
, label = sample_ID
, fill = shortTaxon
, accuracy = taxon)) +
xlab("Barcode") +
geom_col(position="fill") + theme_pubr() +
theme(legend.position = "none", axis.text.x = element_text(angle = -45))
plotly::ggplotly(pl, width = 1000)
The graph show the percentage of the highest bacteria in the samples. hover with mouse for show expected taxon and תכלס
isolatessamples = exp %>% sample_data() %>% data.frame() %>%
filter(Cohort == "Isolates" | Cohort == "ISOLATES") %>% rownames()
exp %>% sample_data() %>% data.frame() %>%
filter(X.SampleID %in% isolatessamples) %>% rownames() %>%
prune_samples(exp) %>%
otu_table() %>% data.frame() %>% t() %>% data.frame() %>% rownames_to_column("index") %>%
pivot_longer(cols = contains("TA"), names_to = "bac", values_to = "n") %>%
group_by(index) %>% filter(n == max(n)) %>% kableExtra::kable() %>% kableExtra::kable_classic()
| index | bac | n |
|---|---|---|
| DB12.285 | TACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTCGTTAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGACGAGCTAGAGTATGGTAGAGGGTGGTGGAATTTCCTG | 0.1794872 |
| DB12.276 | TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTCTGTCAAGTCGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCCGAAACTGGCAGGCTAGAGTCTTGTAGAGGGGGGTAGAATTCCAGG | 0.4384457 |
| DB12.271 | TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGTGGACTGGTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGATACTGTCAGTCTTGAGTACAGTAGAGGTGGGCGGAATTCGTGG | 0.1581476 |
| DB12.275 | TACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTTGTTAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGACTGACTAGAGTATGGTAGAGGGTGGTGGAATTTCCTG | 0.3414634 |
| DB12.277 | TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTCTGTCAAGTCGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCCGAAACTGGCAGGCTAGAGTCTTGTAGAGGGGGGTAGAATTCCAGG | 0.4383141 |
| DB12.289 | TACATAGGTCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGCGTTCGTAGGCTGTTTGTTAAGTCTGGAGTTAAATCCCGGGGCTCAACCCCGGCTCGCTTTGGATACTAGCAAACTAGAGTTAGATAGAGGTAAGCGGAATTCCATGT | 0.3437100 |
| DB13.291 | TACGTAGGGTGCGAGCGTTGTCCGGATTTATTGGGCGTAAAGGGCTCGTAGGTGGTTGATCGCGTCGGAAGTGTAATCTTGGGGCTTAACCCTGAGCGTGCTTTCGATACGGGTTGACTTGAGGAAGGTAGGGGAGAATGGAATTCCTGG | 1.0000000 |
| DB13.296 | TACATAGGTCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGCGTTCGTAGGCTGTTTGTTAAGTCTGGAGTTAAATCCCGGGGCTCAACCCCGGCTCGCTTTGGATACTAGCAAACTAGAGTTAGATAGAGGTAAGCGGAATTCCATGT | 0.3350900 |
| DB13.295 | TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGTGGACTGGTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGATACTGTCAGTCTTGAGTACAGTAGAGGTGGGCGGAATTCGTGG | 0.1582587 |
| DB13.298 | TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGTGGACAGTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGATACTGGCTGTCTTGAGTACAGTAGAGGTGGGCGGAATTCGTGG | 0.4563027 |
| DB13.297 | TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGCGCTTAACGTGGGAACTGCATTTGAAACTGGCAAGCTAGAGTCTTGTAGAGGGGGGTAGAATTCCAGG | 0.3439153 |
# exp %>% sample_data() %>% data.frame() %>% filter(X.SampleID == "DB13.297") %>% rownames() %>%
# prune_samples(exp) %>% otu_table() %>% data.frame() %>% top_n(1, DB13.297) %>% row.names()
Without Saliva samples.
exp %<>% sample_data() %>% data.frame() %>% filter(!grepl("Saliva", Cohort)) %>% rownames() %>% prune_samples(exp)
exp %<>% sample_data() %>% data.frame() %>% filter(!pn_ID == "") %>% rownames() %>% prune_samples(exp)
ord <- ordinate(exp, method = "PCoA", distance = "unifrac")
pl =
ord$vectors %>% data.frame() %>% select(Axis.1, Axis.2) %>% rownames_to_column("nnnn") %>%
left_join(exp %>% sample_data() %>% data.frame()%>% rownames_to_column("nnnn")) %>% column_to_rownames("nnnn") %>% ggplot(aes(
x = Axis.1, y = Axis.2, color = pn_ID, shape = Source, text = sample_ID, Description = X.SampleID)) +
geom_point() +
guides(shape = F) +
theme_pubr()
## Joining, by = "nnnn"
plotly::ggplotly(pl, width = 1000)
# pl = plot_ordination(exp, ord, color = "pn_ID", shape = "Source", text = "sample_ID") + theme_rh
# plotly::ggplotly(pl, width = 1000)
install.packages("learnr")
## Installing package into '/usr/local/lib/R/site-library'
## (as 'lib' is unspecified)
alldata = ord$vectors %>% data.frame() %>% select(Axis.1, Axis.2) %>% rownames_to_column("nnnn") %>% left_join(exp %>% sample_data() %>% data.frame()%>% rownames_to_column("nnnn")) %>% column_to_rownames("nnnn")
## Joining, by = "nnnn"
pl =
alldata %>% ggplot(aes(x = Axis.1, y = Axis.2, shape = Source, text = sample_ID)) +
geom_point(color = "grey") +
geom_point(data = alldata %>% filter(pn_ID %in% c("A363", "G-029", "G-027", "A001", "A002"))
, aes(x = Axis.1, y = Axis.2, shape = Source, label = Barcode, color = pn_ID)) +
guides(shape = F) +
theme_pubr()
## Warning: Ignoring unknown aesthetics: label
plotly::ggplotly(pl, width = 1000)
For all samples from patients.
bigmap = "../../../../pita/pub/data/16S_DBs/maps/DB1-13_premap_v2.csv"
indices = qiime2R::read_q2biom("feature-table.biom")
totake = indices[1,] %>% names()
smallmap = read.csv(bigmap) %>% filter(X.SampleID %in% totake)
names(smallmap)[1] <- "#SampleID"
smallmap %>% write.table("filteredmap.tsv", sep = "\t", row.names = F)
# calour.ipynb
import calour as ca
import pandas as pd
import calour_utils as cu
## failed to load logging config file
import matplotlib
import numpy as np
import os
import PyQt5
matplotlib.use("Agg", force = True)
exp = ca.read_amplicon("feature-table.biom", "filteredmap.tsv", min_reads=2000, normalize=10000)
## 2020-10-19 15:28:49 WARNING Do you forget to normalize your data? It is required before running this function
expc = exp.cluster_features(10)
DBnum = expc.sample_metadata["Database"].str.extract(r'(\d+)', expand = False)
BCnum = expc.sample_metadata["Barcode"].str.extract(r'(\d+)', expand = False)
runorder = DBnum + BCnum
expc.sample_metadata["runorder"] = pd.to_numeric(runorder)
expc.plot(gui = "cli", barx_fields = ["pn_ID", "Database"])
## <calour.heatmap.plotgui_cli.PlotGUI_CLI object at 0x7f60d2dd3d30>
##
## /home/rstudio/.local/share/r-miniconda/envs/r-reticulate/lib/python3.6/site-packages/calour/heatmap/heatmap.py:309: MatplotlibDeprecationWarning: You are modifying the state of a globally registered colormap. In future versions, you will not be able to modify a registered colormap in-place. To remove this warning, you can make a copy of the colormap first. cmap = copy.copy(mpl.cm.get_cmap("viridis"))
## cmap.set_bad(bad_color)
for patient in filter(lambda v: v==v, expc.sample_metadata["pn_ID"].unique()):
print(patient)
plot_patient = expc.filter_by_metadata("pn_ID", [patient])
pl = plot_patient.plot(gui = "cli", barx_fields = ["Database", "Source", "pn_ID"])
pl.resize_figure(12,8)
pl.figure.tight_layout()
pl.save_figure("heatmaps/" +patient + '.png')
## A301
## A308
## A304
## A309
## G-035
## G-027
## A363
## G-029
## C-046