library(tidyverse)
library(xlsx)
# library(rotemfuns)
library(vegan)
library(funrar)
library(subSeq)
library(qiime2R)
library(ggpubr)
library(magrittr)
library(forcats)
library(rmarkdown)
library(kableExtra)
library(gghighlight)
library(microbiomeutilities)
source("/pita/users/rotem/scripts/qiime2_funcs.R")
if(getwd() == "/pita/users/rotem/eyes/") { setwd("rattus") }
if(getwd() == "/pita/users/rotem/eyes/human") { setwd("../rattus") }
rawmetadata <- read.xlsx("Microbiome- animal information 29.9.20.xlsx", 1, na.strings = "-")
rawmetadata[rawmetadata == "-"] <- NA
metadata <-
rawmetadata %>% pivot_longer(cols = 17:22
, names_to = "Sample.type", values_to = "ID", values_drop_na = T)
metadata$ID <- gsub("-","_", metadata$ID)
metadata$Age..weeks. <- gsub("W", "", metadata$Age..weeks.) %>% as.numeric()
metadata$mother.age..weeks. %<>% as.numeric()
numeric_columns <- c(5,9,11,12,16,19:44)
metadata %<>% mutate_at(numeric_columns, as.numeric)
dbmap = read.csv("/pita/pub/data/16S_DBs/maps/DB1-14_premap_v2.csv")
dbmap$reads_number = dbmap$reads_number %>% as.character() %>% as.numeric()
done =
dbmap %>%
filter(Cohort == "eyes") %>%
# select(-Family) %>%
filter(Organism == "Rat") %>%
arrange(desc(reads_number)) %>%
distinct(sample_ID, .keep_all = T)
# done = filter(done, reads_number > 1000)
done$sample_ID = gsub("-", "_", done$sample_ID %>% as.character())
# done$sample_ID = str_replace_all(done$sample_ID, "_(?=[FSM])", "")
done$sample_ID %>% unique()
done %>% filter(done$sample_ID %>% duplicated()) %>% arrange(sample_ID) %>%
select(1, sample_ID, reads_number)
# done = distinct(done, sample_ID, .keep_all = T)
names(done)[1] = "sampleid"
# write.table(done, "16s_map.tsv", row.names = F, sep = "\t", quote = F)
cutoff = 2000
# hadasmap %>% filter(Family == "FR15") %>% select(ID, sample_ID, pnid)
From total 1270 rat samples. We sequenced 307 samples.
There are 11 samples that the label isn’t find in the metadata table. There are 264 (88%) That have more than 2000 reads.
# # samples without metadata
# data %>% filter(SPF %>% is.na()) %>% pull(sample_ID)
# # sequenced but labeled wrong
# data %>% filter(reads_number %>% is.na())%>% pull(sample_ID)
E220_R_3, E218_R_3, E222_R_3, E419_L_B_4, E223_R_3, E224_R_3, E508_L_4W, E344_R_3, E408_L_B_4, E409_L_B_4, E407_L_B_4
# data %>% filter(Species == "LE") %>% paged_table()
map = sample_data(data)
sample_names(map) <- data$sampleid
biompath = "/pita/pub/data/16S_DBs/processed/16S_DB1-14_merged/table.qza"
taxpath = "/pita/pub/data/16S_DBs/processed/16S_DB1-14_merged/taxonomy.qza"
biom = read_qza(biompath)$data
biom = biom %>% data.frame()
biom = biom %>% select(sample_names(map))
min_reads_cutoff = cutoff
biom = biom[apply(biom, 2, sum) > min_reads_cutoff]
rare_biom = vegan::rrarefy(biom %>% t(), 10000) %>% t()
otu = phyloseq::otu_table(rare_biom, taxa_are_rows = T)
otu = phyloseq::prune_taxa(taxa_sums(otu) > 10, otu)
relative_otu = otu_table(funrar::make_relative(otu@.Data), taxa_are_rows = T)
relative_otu = filter_taxa(relative_otu, function(x) mean(x) > 1e-5, TRUE)
pretax = read_qza(taxpath)$data
tax = pretax %>% phyloseq::tax_table()
phyloseq::taxa_names(tax) = pretax$Feature.ID
tree = ape::rtree(ntaxa(tax), rooted = T, tip.label = phyloseq::taxa_names(tax))
abs_exp = phyloseq(otu, map, tax, tree)
exp = phyloseq(relative_otu, map, tax, tree)