<!DOCTYPE html>
<!DOCTYPE html>
library(tidyverse)
# library(xlsx)
# library(rotemfuns)
# library(vegan)
library(funrar)
library(subSeq)
library(qiime2R)
library(ggpubr)
# library(magrittr)
library(forcats)
library(rmarkdown)
# library(kableExtra)
library(gghighlight)
library(microbiomeutilities)
source("/pita/users/rotem/scripts/qiime2_funcs.R")
# Assumes that the merged table already exists in the processed directory.
librarynumber = "14"
mappath = paste0("/pita/pub/data/16S_DBs/maps/DB1-", librarynumber, "_premap_v1.csv")
dbmap = read.csv(mappath)
dbmap$reads_number = dbmap$reads_number %>% as.character() %>% as.numeric()
map = sample_data(dbmap)
sample_names(map) = dbmap %>% pull(1)
biompath = paste0("/pita/pub/data/16S_DBs/processed/16S_DB1-",librarynumber, "_merged/table.qza")
taxpath = paste0("/pita/pub/data/16S_DBs/processed/16S_DB1-",librarynumber
,"_merged/taxonomy.qza")
biom = read_qza(biompath)$data
biom = biom %>% data.frame()
# setdiff(dbmap$X.SampleID, colnames(biom))
# biom = biom %>% select(sample_names(map))
min_reads_cutoff = 2000
biom = biom[apply(biom, 2, sum) > min_reads_cutoff]
rare_biom = vegan::rrarefy(biom %>% t(), 10000) %>% t()
otu = phyloseq::otu_table(rare_biom, taxa_are_rows = T)
otu = phyloseq::prune_taxa(taxa_sums(otu) > 10, otu)
relative_otu = otu_table(funrar::make_relative(otu@.Data), taxa_are_rows = T)
relative_otu = filter_taxa(relative_otu, function(x) mean(x) > 1e-5, TRUE)
pretax = read_qza(taxpath)$data
tax = pretax %>% phyloseq::tax_table()
phyloseq::taxa_names(tax) = pretax$Feature.ID
tree = ape::rtree(ntaxa(tax), rooted = T, tip.label = phyloseq::taxa_names(tax))
abs_exp = phyloseq(otu, map, tax, tree)
exp = phyloseq(relative_otu, map, tax, tree)
dbmap %>%
filter(Database == paste0("DB", librarynumber)) %>%
filter(reads_number < min_reads_cutoff) %>% select(1, sample_ID, Cohort, reads_number) %>%
paged_table()
repliactes <- c("024-M3","042-S","034-S","G-055-1","G-056-1")
repliactes_ID <- sample_data(exp) %>% data.frame() %>%
filter(sample_ID %in% repliactes) %>% rownames()
set.seed(.5781)
number_of_subsamples = 1000
randomsample <- sample_data(exp) %>% data.frame() %>% rownames() %>%
sample(number_of_subsamples)
samples <- union(repliactes_ID, randomsample)
subexp <- samples %>% prune_samples(exp)
ord <- ordinate(subexp, method = "PCoA", distance = "unifrac")
pl = plot_ordination(subexp, ord, color = "sample_ID") + theme_q2r()
pl +
gghighlight(sample_ID %in% repliactes, use_direct_label = FALSE) +
ggtitle(paste("PCoA for random sampling of", number_of_subsamples, "samples from all DB's including constant controls"))