library(tidyverse)
library(microbiome)
# library(rotemfuns)
library(qiime2R)
library(ape)
library(phyloseq)
library(ggpubr)
library(reticulate)
mappath = "/pita/pub/data/16S_DBs/maps/DB12_covid19_26July_map.csv"
mappath = "/pita/pub/data/16S_DBs/maps/DB12_covid19_11Oct_map.txt"
biompath = "/pita/pub/data/16S_DBs/processed/16S_DB12/feature-table.biom"

taxpath = "/pita/pub/data/16S_DBs/processed/16S_DB12/taxonomy.qza"
map = read.csv(mappath, sep ="\t")
map = sample_data(map)
sample_names(map) = map$SampleID 

biom = read_q2biom(biompath)
# biom = biom %>% column_to_rownames(names(biom)[1])
min_reads_cutoff = 2000
biom = biom[apply(biom, 2, sum) > min_reads_cutoff,]
otu = phyloseq::otu_table(biom, taxa_are_rows = T)
otu = phyloseq::prune_taxa(taxa_sums(otu) > 10, otu)
relative_otu = otu_table(funrar::make_relative(otu@.Data), taxa_are_rows = T)
relative_otu = filter_taxa(relative_otu,  function(x) mean(x) > 1e-5, TRUE)


pretax = read_qza(taxpath)$data
tax = pretax %>% phyloseq::tax_table()
phyloseq::taxa_names(tax) = pretax$Feature.ID

tree = ape::rtree(ntaxa(tax), rooted = T, tip.label = phyloseq::taxa_names(tax))


abs_exp = phyloseq(otu, map, tax, tree)
exp = phyloseq(relative_otu, map, tax, tree)

# apply(abs_exp@otu_table@.Data,1 ,sum) %>% sort() %>% plot()
# ttt = two_or_more_ids %>% prune_samples(abs_exp) %>% estimate_richness(split = T) %>% select(Observed)

Cohort Characterization

# exp %>% sample_data() %>% data.frame() %>% filter(is.na(gender))
 pl =
exp %>% sample_data() %>% data.frame() %>% filter(days_from_first_sample >= 0) %>% 
  mutate(age %>% round()) %>% 
  ggplot(aes(x = days_from_first_sample, y = reorder(pn_ID, days_from_first_sample), color = cov_res)) + 
  geom_line(color = "gray") + geom_point(size = 3) +
  # scale_color_manual(values = c("green", "gold4", "blue", "red")) +
  scale_color_manual(values = c("blue", "red")) +
  scale_x_continuous(expand = c(.07,.01)) +
  # geom_text(x = -2, aes(label = round(age), color = gender)) +
  theme_pubr() +theme(axis.text.y = element_text(), axis.line.y = element_line(), axis.ticks.y = element_line(), legend.title = element_blank(), text = element_text(size = 16))

# labs = pl$data %>% filter(days_from_first_sample >= 0) %>% 
#   group_by(pn_ID) %>% summarise(age = first(age)) %>% pull(age) %>% sort() %>% round()
# pl + scale_y_discrete(labels = labs, name = "Age")

plotly::ggplotly(pl + scale_y_discrete(name = "Pateint"), height = 700)

Taxa barplot

/pita/users/rotem/miniconda3/envs/qiime2-2020.2/bin/qiime feature-table filter-samples \
  --i-table  /pita/pub/data/16S_DBs/processed/16S_DB12/table.qza \
  --m-metadata-file /pita/pub/data/16S_DBs/maps/DB12_covid19_11Oct_map.txt \
  --o-filtered-table ./covid.qza

/pita/users/rotem/miniconda3/envs/qiime2-2020.2/bin/qiime taxa barplot \
  --i-table covid.qza \
  --i-taxonomy /pita/pub/data/16S_DBs/processed/16S_DB12/taxonomy.qza \
  --o-visualization covid_tbp.qzv \
  --m-metadata-file /pita/pub/data/16S_DBs/maps/DB12_covid19_11Oct_map.txt 
## Saved FeatureTable[Frequency] to: ./covid.qza
## Saved Visualization to: covid_tbp.qzv
source("/pita/users/rotem/bin/read_taxa_barplot.R")
tbp <- read_taxa_barplot("/pita/users/rotem/corona/covid_tbp.qzv", level = 2)$data

tmp = tbp %>% 
  filter(!read_count < 2000) %>%
  pivot_longer(cols = contains("__"), names_to = "taxon", values_to ="n") 

# tmp$shortTaxon = lapply(tmp$taxon , short_bacteria_name) %>% unlist()
pl = 
tmp %>% 
  filter(!is.na(cov_res)) %>% 
  group_by(cov_res, taxon) %>% 
  summarise(n = sum(n)) %>% 
      ggplot(aes(  x = cov_res
                 , fill = taxon
                 , y = n)) + 
  geom_col(position = "fill") + 
  theme_q2r()+theme(text = element_text(size = 16))

plotly::ggplotly(pl + theme(legend.position = "none"))
# Filter age below 20
# sample_data(exp) %<>% data.frame() %>% 
#   filter(age > 20) %>%  rownames() %>% prune_samples(exp)
ord = phyloseq::ordinate(exp, method = "PCoA", distance = "unifrac", weighted = F)

PCoA

pl = plot_ordination(exp, ord, color = 'cov_res') + geom_point(size = 3) + 
  theme_q2r() + theme(text = element_text(size = 16))
pl = pl + scale_color_manual(values = c("blue", "red"))
plotly::ggplotly(pl, width = 700)
alldata = ord$vectors %>% data.frame() %>% 
  rownames_to_column("SampleID") %>% left_join(exp %>% sample_data() %>% data.frame())  

pl1 = 
  ggplot(alldata, aes(x = Axis.1, y = Axis.2, color = cov_res)) +
  geom_point(size = 3) + 
  geom_point(data = alldata %>% filter(pn_ID == "COV_164")
             , aes(x = Axis.1, y = Axis.2, color = cov_res), size = 7) +
        geom_text(data = alldata %>% filter(pn_ID %in% c("COV_164"))
                              , aes(x = Axis.1, y = Axis.2 + .03, color = cov_res, label = pn_ID), label.size = NA) +
  scale_color_manual(values = c("blue", "red")) +
  theme_q2r() + theme(text = element_text(size = 16))
pl2 = 
ggplot(alldata, aes(x = Axis.1, y = Axis.2, color = cov_res)) +
  geom_point(size = 3) + 
  geom_point(data = alldata %>% filter(pn_ID %in% c("COV_150", "COV_160", "COV_146"))
             , aes(x = Axis.1, y = Axis.2, color = cov_res, shape = pn_ID), size = 7) +
  geom_text(data = alldata %>% filter(pn_ID %in% c("COV_150", "COV_160", "COV_146"))
                            , aes(x = Axis.1, y = Axis.2 +.03, color = cov_res, label = pn_ID), label.size = NA) +
  scale_color_manual(values = c("blue", "red")) +
  theme_q2r()+ theme(legend.position = "none") + theme(text = element_text(size = 16))

plotly::ggplotly(pl1, width = 700)
plotly::ggplotly(pl2, width = 700)

Calour

import calour as ca
import calour_utils as cu
import matplotlib
matplotlib.use("Agg", force = True)

exp = ca.read_amplicon(r.biompath, r.mappath, min_reads=2000, normalize=10000)
exp = exp.filter_samples("cov_res", ["Positive", "Negative"])
exp.sample_metadata["pn_ID"] = exp.sample_metadata["pn_ID"].str[-3:]
expc = exp.cluster_features(10)
expc = expc.sort_samples("pn_ID")
expc = expc.sort_samples("cov_res")
pl = expc.plot(barx_fields = ["cov_res", "pn_ID"], barx_label_kwargs ={'rotation':90, 'color':"white", 'size':10})
## /home/rstudio/.local/share/r-miniconda/envs/r-reticulate/lib/python3.6/site-packages/calour/heatmap/heatmap.py:309: MatplotlibDeprecationWarning: You are modifying the state of a globally registered colormap. In future versions, you will not be able to modify a registered colormap in-place. To remove this warning, you can make a copy of the colormap first. cmap = copy.copy(mpl.cm.get_cmap("viridis"))
##   cmap.set_bad(bad_color)

pos = expc.filter_samples("cov_res", ["Positive"])

pl.save_figure("/pita/users/rotem/corona/all_hm.svg")
expc.sample_metadata['firstsample'] = expc.sample_metadata['days_from_first_sample'] == 0
first = expc.filter_samples("firstsample", [True])
first = first.sort_by_metadata("cov_res")
first.plot(barx_fields = ["cov_res"], gui = "cli")
## <calour.heatmap.plotgui_cli.PlotGUI_CLI object at 0x7f117bdf77f0>
## 
## /home/rstudio/.local/share/r-miniconda/envs/r-reticulate/lib/python3.6/site-packages/calour/heatmap/heatmap.py:309: MatplotlibDeprecationWarning: You are modifying the state of a globally registered colormap. In future versions, you will not be able to modify a registered colormap in-place. To remove this warning, you can make a copy of the colormap first. cmap = copy.copy(mpl.cm.get_cmap("viridis"))
##   cmap.set_bad(bad_color)

DE = first.diff_abundance("cov_res", ["Positive"],alpha = .25, random_seed = 5781)
## 2020-10-29 13:04:31 WARNING no significant features found
DE = DE.sort_samples("cov_res")
# DE.plot(barx_fields = ["cov_res", "pn_ID"], gui = "cli")
ca.__file__
## '/home/rstudio/.local/share/r-miniconda/envs/r-reticulate/lib/python3.6/site-packages/calour/__init__.py'

After First sample

# , barx_label_kwargs ={'rotation':90}
lasts = expc.filter_samples("firstsample", [False])
lasts = lasts.sort_by_metadata("cov_res")
lasts.plot(title = "Second check and above", barx_fields = ["cov_res"], gui = "cli")
## <calour.heatmap.plotgui_cli.PlotGUI_CLI object at 0x7f115e124fd0>

lasts.diff_abundance("cov_res", ["Positive"],alpha = .25, random_seed = 5781)
## 2020-10-29 13:04:32 WARNING no significant features found
## AmpliconExperiment with 22 samples, 0 features
lasts = alldata %>% filter(days_from_first_sample > 0)
# lasts.exp = lasts %>% pull(SampleID) %>% prune_samples(exp@sam_data$cov_res)
pl1 = 
  ggplot(lasts, aes(x = Axis.1, y = Axis.2, color = cov_res, size = days_from_first_sample)) +
  geom_point() + 
  scale_color_manual(values = c("blue", "red")) +
  theme_q2r() + theme(text = element_text(size = 16))

plotly::ggplotly(pl1)

Alpha diversity

microbiome::alpha(abs_exp) %>% rownames_to_column("SampleID") %>% 
  left_join(alldata) %>% 
  # pivot_longer(2, values_to = "statistic", names_to = "alpha.measurment") %>% 
  filter(days_from_first_sample > 0) %>%
  # filter(!(cov_res == "Positive" & days_from_first_sample == 0)) %>%
  ggplot(aes(x = cov_res, fill = cov_res, y = observed)) +
  # facet_wrap(~alpha.measurment, scales = "free_y") + 
  scale_fill_manual(values = c("blue", "red")) +
  geom_boxplot() + 
  theme_q2r()