1 - Purpose
This script is intended to assess BioSkryb’s single cell RNA-sequencing datasets. The following will be assessed: [1] - Building the seurat object; [2] - Running quality control at the cellular and sample level both. This include 2.1 - estimating dead cells; 2.2 - examining distibution of gene/UMI counts, as well as %MT genes; 2.3 - testing modality of gene/UMI counts and %MT genes; 2.4 - identifying samples w/outlying gene/UMI counts and %MT genes; 2.5 - identifying doublets present; 2.6 - identifying cells with a high abundance of ambient RNA (contamination); 2.7 - and flagging cells w/outlying gene/UMI counts and %MT genes, as well having been detected as doublets or having a high abundance of contaminating RNA.; [3] - Normalizing and scaling counts; [4] - Running PCA on scaled counts; [5] - Cluster identifying and UMAP calculation; [6] - Identifying confounding variables.
2 - Load libraries, functions, and themes
2.1 - Libraries
All libraries are required for this analysis.
begin <- Sys.time()
library(ggplot2)
library(data.table)
library(plyr)
library(FactoMineR)
library(DESeq2)
library(dplyr)
library(BiocParallel)
library(kableExtra)
library(unikn)
library(nord)
library(ggpointdensity)
library(Seurat)
library(microViz)
library(scds)
library(celda)
library(ggridges)
library(paletteer)
2.2 - Functions
These function are required to assess confounding variables in the metadata.
anova_PCA <- function(obj_name, pca.df, metadata, merge_ID, cat_var, numPCs) {
names(cat_var) <- cat_var
ind.pca.df <- as.data.frame(pca.df[["ind"]][["coord"]][,1:numPCs])
ind.pca.df[[merge_ID]] <- rownames(ind.pca.df)
ind.pca.wcat.df <- merge(ind.pca.df, metadata, by=merge_ID, all.x=TRUE, all.y=FALSE)
endCol <- length(names(ind.pca.df))
ind.pca.an <- ind.pca.wcat.df[,2:endCol]
anovaPvalueList <- lapply(cat_var, function(f) apply(ind.pca.an, 2, function(x) anova(aov(x ~ as.factor(ind.pca.wcat.df[[f]])))$`Pr(>F)`[1]))
res.pval <- plyr::ldply(anovaPvalueList, .id="Factor")
setnames(res.pval, names(res.pval), c("Factor", paste0("Dim", seq(1:numPCs), "_pvalue")))
endCol2 <- numPCs+1
if (length(cat_var) == 1) {
res.padj <- data.frame(t(apply(res.pval[,2:endCol2], 2, function(x) p.adjust(x, method="BH"))))
} else {
res.padj <- data.frame(apply(res.pval[,2:endCol2], 2, function(x) p.adjust(x, method="BH")))
}
names(res.padj) <- gsub("_pvalue", "_adj_pvalue", names(res.padj))
res.padj$Factor <- res.pval$Factor
res_all.df <- list(res.pval, res.padj) %>% purrr::reduce(full_join, by="Factor")
res_all_ord.df <- res_all.df[,order(colnames(res_all.df))]
return(res_all_ord.df)
}
glm_PCA <- function(obj_name, pca.list, metadata, merge_ID, num_var, numPCs=10) {
names(num_var) <- num_var
ind.pca.df <- as.data.frame(pca.list[["ind"]][["coord"]][,1:numPCs])
ind.pca.df[[merge_ID]] <- rownames(ind.pca.df)
ind.pca.wnum.df <- merge(ind.pca.df, metadata, by=merge_ID, all.x=TRUE, all.y=FALSE)
endCol <- length(names(ind.pca.df))
ind.pca.an <- ind.pca.wnum.df[,2:endCol]
glmPvalueList <-lapply(num_var, function(f) apply(ind.pca.an, 2, function(x) summary(glm(x ~ as.numeric(ind.pca.wnum.df[[f]])))$coefficients[2,4]))
res.df <- plyr::ldply(glmPvalueList, .id="Factor")
setnames(res.df, names(res.df), c("Factor", paste0("Dim", seq(1:numPCs), "_pvalue")))
endCol2 <- numPCs+1
if (length(num_var) == 1) {
res.padj <- data.frame(t(apply(res.df[,2:endCol2], 2, function(x) p.adjust(x, method="BH"))))
} else {
res.padj <- data.frame(apply(res.df[,2:endCol2], 2, function(x) p.adjust(x, method="BH")))
}
names(res.padj) <- gsub("_pvalue", "_adj_pvalue", names(res.padj))
res.padj$Factor <- res.df$Factor
setcolorder(res.padj, "Factor")
res_all.df <- list(res.df, res.padj) %>% purrr::reduce(full_join, by="Factor")
res_all_ord.df <- res_all.df[,order(colnames(res_all.df))]
return(res_all_ord.df)
}
2.3 - Themes
These themes are generic to ensure all Bioskryb’s figures have the same theme.
theme_bioskryb <- function(){
theme_bw(base_size=12)+
theme(axis.text=element_text(color="black"),
panel.background=element_rect(color="white"),
strip.text = element_text(size=12),
strip.background = element_rect(fill="white"))
}
theme_bioskryb_90 <- function() {
theme_bw(base_size=12)+
theme(axis.text.x=element_text(angle=90,hjust = 1,vjust = 0.5),
axis.text=element_text(color="black"),
panel.background=element_rect(color="black"),
strip.text = element_text(size=14),
strip.background = element_rect(fill="white"))
}
3 - Load data
3.1 - Expression and metadata
Here we load the data and create the seurat object. Note, the gene symbols have several (>20k) that are blank because the ensembl ID has no associated gene symbols. However, gene symbols are required to identify mitochondrial gene contamination and inferring cell cycle. So the blanks get renamed as “Unknown” and made unique.
meta<-as.data.frame(fread(file="df_metadata_quantbio.tsv"))
genes<-as.data.frame(fread(file="df_metadata_gene_id_to_gene_symbol.tsv"))
exp<-as.data.frame(fread(file="tab_quantbio_nreads_all.tsv"))
exp100k<-as.data.frame(fread(file="tab_quantbio_nreads_100kpairs.tsv"))
### Clean the data up a little
rownames(meta)<-meta$SampleId
rownames(exp)<-exp$V1
exp$V1<-NULL
rownames(exp100k)<-exp100k$V1
exp100k$V1<-NULL
### Examine duplicated genes. It seems like there are 19K genes with duplicated
### gene symbols; however, 18,500 of those are duplicated because there is
### no gene symbol present.
dups<-genes[which(duplicated(genes$gene_symbol)),]
blanks<-genes[which(genes$gene_symbol==""),]
print(paste(length(which(genes$gene_symbol=="")),"of",
length(genes$gene_id),
"genes have no associated gene symbol."))
## [1] "20055 of 60664 genes have no associated gene symbol."
### Subset the genes list by those found in exp/exp100k
#identical(rownames(exp), rownames(exp100k))
genes<-genes[genes$gene_id %in% rownames(exp), ]
#identical(genes$gene_id, rownames(exp))
### Add gene symbols to the counts.
genes$gene_symbol[which(genes$gene_symbol=="")]<-"no_symbol"
exp$gene_symbol<-genes$gene_symbol
exp$gene_symbol[which(exp$gene_symbol=="no_symbol")]<-make.unique(exp$gene_symbol[which(exp$gene_symbol=="no_symbol")])
exp<-aggregate(. ~ gene_symbol, data=exp, FUN=sum)
rownames(exp)<-exp$gene_symbol
exp$gene_symbol<-NULL
### Create seurats
seurat<-CreateSeuratObject(counts=exp, project = "BioSkryb", assay = "RNA",
meta.data = meta)
seurat100k<-CreateSeuratObject(counts=exp100k, project = "BioSkryb", assay = "RNA",
meta.data = meta)
### Set factor order
order<-unique(seurat@meta.data$CellLine)
seurat@meta.data$CellLine<-factor(seurat@meta.data$CellLine, levels=order)
3.1.1 - Setup the colors to be used
4 - Run quality control (QC) steps
With traditional scRNA-seq (plate or droplet-base), sample-level QC should be performed prior to conducting cell-level QC, so here we start with sample-level QC in section 4.1 then move on to cell-level QC in section 4.2.
4.1 - Sample level QC
4.1.1 - Estimate the abundance of dead cells
The % of mitochondrial genes in a cells usually suggests a high number of dead cells in the samples.
seurat <- PercentageFeatureSet(seurat, pattern = "^MT-|^mt-", col.name = "percent.mt")
summary(seurat@meta.data$percent.mt )
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.5306 2.8801 3.9072 4.8609 5.4257 65.1223
4.1.2 - Examine distributions
Look for samples with skewed, abnormal or bimodal distributions in key variables of gene count (nFeature_RNA), UMI count (nCount_RNA), and % mitochondria genes (mt.percent)
### Gene count
p1 <- RidgePlot(seurat, group.by="SampleID",
features="nFeature_RNA",
same.y.lims=TRUE, sort=FALSE, log=TRUE) +
scale_fill_manual(values=sample.cols) +
scale_x_continuous(labels = scales::scientific)
### UMI count
p2 <- RidgePlot(seurat, group.by="SampleID",
features="nCount_RNA",
same.y.lims=TRUE, sort=FALSE, log=TRUE) +
scale_fill_manual(values=sample.cols) +
scale_x_continuous(labels = scales::scientific) +
theme(axis.text.x=element_text(size=9))
### Percent mitochondrial genes
p3 <- RidgePlot(seurat, group.by="SampleID",
features="percent.mt", y.max=50, sort=FALSE) +
scale_fill_manual(values=sample.cols) +
xlim(0, max(seurat@meta.data$percent.mt )+5)
### Plot them all together
cowplot::plot_grid(p1 + theme(legend.position="none",
axis.title.y=element_blank(),
axis.text.x=element_text(angle=90,hjust = 1,vjust = 0.5)),
p2 + theme(legend.position="none",
axis.title.y=element_blank(),
axis.text.x=element_text(angle=90,hjust = 1,vjust = 0.5)),
p3 + theme(legend.position="none",
axis.title.y=element_blank(),
axis.text.x=element_text(angle=90,hjust = 1,vjust = 0.5)),
align = 'vh',
labels = c("A", "B", "C"),
hjust = -1,
nrow = 1)
4.1.3 - Test modality of features
features <- c("nFeature_RNA", "nCount_RNA", "percent.mt")
names(features) <- features
modetest.list <- BiocParallel::bplapply(features,
function(f) multimode::modetest(seurat@meta.data[[f]]),
BPPARAM=SerialParam())
pvalues_logical.list <- lapply(modetest.list, function(mt) mt[["p.value"]] <= 0.05)
lapply(modetest.list, function(t) t)
## $nFeature_RNA
##
## Ameijeiras-Alonso et al. (2019) excess mass test
##
## data: seurat@meta.data[[f]]
## Excess mass = 0.13776, p-value < 2.2e-16
## alternative hypothesis: true number of modes is greater than 1
##
##
## $nCount_RNA
##
## Ameijeiras-Alonso et al. (2019) excess mass test
##
## data: seurat@meta.data[[f]]
## Excess mass = 0.044221, p-value = 0.034
## alternative hypothesis: true number of modes is greater than 1
##
##
## $percent.mt
##
## Ameijeiras-Alonso et al. (2019) excess mass test
##
## data: seurat@meta.data[[f]]
## Excess mass = 0.030617, p-value = 0.75
## alternative hypothesis: true number of modes is greater than 1
features.test <- features[unlist(pvalues_logical.list)]
modes.list <- BiocParallel::bplapply(features.test,
function(f) multimode::locmodes(seurat@meta.data[[f]],
mod0=2,
display=FALSE),
BPPARAM=SerialParam())
antimode.list <- lapply(modes.list,
function(m) round(m$locations[[2]], digits=2))
4.1.3.1 - Inspect the modality
If the antimode falls in the valley, it is potentially a good cutoff.
### Gene count
p1 <- ggplot(data=seurat@meta.data, aes(x=nFeature_RNA))
p1 <- p1 + geom_density(fill="slateblue4") + labs(x="Number of genes", y="Density")
if (isTRUE(pvalues_logical.list[["nFeature_RNA"]])) {
p1 <- p1 + geom_vline(xintercept=antimode.list[["nFeature_RNA"]])
p1 <- p1 + labs(caption=paste("antimode =", antimode.list[["nFeature_RNA"]]))
} else {
p1 <- p1 + labs(caption="No antimode to display")
}
p1 <- p1 + theme(plot.caption=element_text(face="italic"))
#s1 <- p1 + facet_wrap(~FlowCell)
### UMI
p2 <- ggplot(data=seurat@meta.data, aes(x=nCount_RNA))
p2 <- p2 + geom_density(fill="magenta4") + labs(x="Number of UMIs") + theme(axis.title.y=element_blank())
if (isTRUE(pvalues_logical.list[["nCount_RNA"]])) {
p2 <- p2 + geom_vline(xintercept=antimode.list[["nCount_RNA"]])
p2 <- p2 + labs(caption=paste("antimode =", antimode.list[["nCount_RNA"]]))
} else {
p2 <- p2 + labs(caption="No antimode to display")
}
p2 <- p2 + theme(plot.caption=element_text(face="italic"))
#s2 <- p2 + facet_wrap(~FlowCell)
### %MT
mt.ul <- max(seurat@meta.data$percent.mt )+2
p3 <- ggplot(data=seurat@meta.data, aes(x=percent.mt))
p3 <- p3 + geom_density(fill="olivedrab3") + labs(x="% mitochondrial genes") + theme(axis.title.y=element_blank())
if (isTRUE(pvalues_logical.list[["percent.mt"]])) {
p3 <- p3 + geom_vline(xintercept=antimode.list[["percent.mt"]])
p3 <- p3 + labs(caption=paste("antimode =", antimode.list[["percent.mt"]]))
} else {
p3 <- p3 + labs(caption="No antimode to display")
}
p3 <- p3 + theme(plot.caption=element_text(face="italic"))
p3 <- p3 +xlim(0, mt.ul)
#s3 <- p3 + facet_wrap(~FlowCell)
cowplot::plot_grid(p1,
p2,
p3,
align = 'vh',
labels = c("A", "B", "C"),
hjust = -1,
nrow = 1)
# cowplot::plot_grid(s1,
# s2,
# s3,
# align = 'vh',
# labels = c("A", "B", "C"),
# hjust = -1,
# nrow = 1)
4.1.4 - Mean and MAD of features
Calculate median, and median absolute deviation (mad) for gene counts, UMI counts, and %MT for entire dataset
median.list <- lapply(features,
function(f) median(seurat@meta.data[[f]],
na.rm=TRUE))
mad.list <- lapply(features,
function(f) mad(seurat@meta.data[[f]],
na.rm=TRUE))
### Print for the report
lapply(features, function(f) median(seurat@meta.data[[f]],
na.rm=TRUE))
## $nFeature_RNA
## [1] 6978
##
## $nCount_RNA
## [1] 270603.5
##
## $percent.mt
## [1] 3.907208
lapply(features,
function(f) mad(seurat@meta.data[[f]],
na.rm=TRUE))
## $nFeature_RNA
## [1] 3149.784
##
## $nCount_RNA
## [1] 180342.7
##
## $percent.mt
## [1] 1.726394
4.1.5 - Identify sample-level outliers
Calculate median, and median absolute deviation (mad) at the sample level Remove samples that meet the 3 criteria: Gene count median above 75% quantile of entire dataset, UMI count median above 75% quantile of entire dataset, %MT below 25% median of entire dataset.
sample.median.df <- seurat@meta.data %>%
group_by(SampleID) %>%
summarize(nFeature_RNA_median=median(nFeature_RNA, na.rm=TRUE),
nCount_RNA_median=median(nCount_RNA, na.rm=TRUE),
percent.mt_median=median(percent.mt , na.rm=TRUE))
### Flag a sample if it meets the following parameters:
sample.median.df$nFeature_RNA_outlr <- ifelse(sample.median.df$nFeature_RNA_median <= median.list[["nFeature_RNA"]] - 3*mad.list[["nFeature_RNA"]] || sample.median.df$nFeature_RNA_median >= median.list[["nFeature_RNA"]] + 3*mad.list[["nFeature_RNA"]],
"FLAG",
"OK")
sample.median.df$nCount_RNA_outlr <- ifelse(sample.median.df$nCount_RNA_median <= median.list[["nCount_RNA"]] - 3*mad.list[["nCount_RNA"]] || sample.median.df$nFeature_RNA_median >= median.list[["nCount_RNA"]] + 3*mad.list[["nCount_RNA"]],
"FLAG",
"OK")
sample.median.df$percent.mt_outlr <- ifelse(sample.median.df$percent.mt_median >= median.list[["percent.mt"]] + 3*mad.list[["percent.mt"]],
"FLAG",
"OK")
### Write to a table
kable(sample.median.df, booktabs = TRUE,table.attr = "style = \"color: black;\"") %>% kable_styling(font_size = 12)
| SampleID | nFeature_RNA_median | nCount_RNA_median | percent.mt_median | nFeature_RNA_outlr | nCount_RNA_outlr | percent.mt_outlr |
|---|---|---|---|---|---|---|
| tme_epcamh | 3480.0 | 156315.0 | 5.359060 | OK | OK | OK |
| tme_epcaml | 1835.5 | 82116.5 | 4.765005 | OK | OK | OK |
| molm13 | 7164.0 | 345293.5 | 4.014256 | OK | OK | OK |
| tme | 8395.0 | 207120.0 | 2.929828 | OK | OK | OK |
| Sum159 | 7550.0 | 278201.0 | 3.494873 | OK | OK | OK |
4.2 - Cell level QC
4.2.1 - Doublet detection
Doublets are identified from each sample individually. Currently, doublet detection is performed via the R package scds. The hybrid score utilized here incorporates two doublet scores: - bcds - artificial doublets are created and a doublet classifier is trained. Cells are assigned a probability of falling into the artificial doublet class. - cxds - expected co-expression of gene-pairs based on frequency is utilized to assign a rank-order based score to all gene-pairs. Scores for co-occuring gene-pairs for each cell are summed.
The hybrid score is derived by summing the normalized bcds and cxdx scores.
# Seurat was the only package that provided workable code for conversion between sce and seurat objects
seurat_dbl <- seurat
seurat_dbl@meta.data$rmv <- ifelse(seurat_dbl@meta.data$nFeature_RNA < 200, "remove", "Keep")
seurat_dbl@meta.data$rmv <- ifelse(seurat_dbl@meta.data$percent.mt >= 20, "remove", seurat_dbl@meta.data$rmv)
#seurat_dbl <- subset(seurat_dbl, subset=`rmv`=="Keep")
samples <- seurat_dbl@meta.data$SampleID
seurat.list <- SplitObject(seurat_dbl, split.by="SampleID")
sce.list <- lapply(seurat.list,
function(s) Seurat::as.SingleCellExperiment(s))
sce.list <- lapply(sce.list,
function(sce) scds::cxds_bcds_hybrid(sce))
## [09:49:48] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [09:49:48] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [09:49:51] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [09:49:52] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [09:49:53] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
seurat.list <- lapply(sce.list,
function(sce) Seurat::as.Seurat(sce,
counts = "counts"))
num_seurat_obj <- length(seurat.list)
seurat <- merge(x=seurat.list[[1]], y=seurat.list[2:num_seurat_obj])
hcutoff<-1.5
seurat@meta.data$hybrid_call <- ifelse(seurat@meta.data$hybrid_score >= hcutoff, "eDoublet", "eSinglet")
4.2.1.1 - Plot density of doublet scores
### Plot density of the hybrid scores
p <- ggplot(data=seurat@meta.data, aes(x=hybrid_score))
p <- p + geom_density(fill=dbl.cols[["eDoublet"]], color="#AA005FFF")
p <- p + theme_bioskryb() + labs(x="SCDS doublet score", y="Density") + geom_vline(xintercept=hcutoff)
p
4.2.1.2 - Plot density of doublet scores (sample-level)
dat <- as.data.frame(seurat@meta.data)
dbl_data.df <- data.frame(SampleID=dat$SampleID,
hybrid_score=dat$hybrid_score,
hybrid_call=dat$hybrid_call)
median.df <- dbl_data.df %>%
group_by(SampleID) %>%
summarize(median_h=median(hybrid_score, na.rm=TRUE))
median.dt <- setDT(median.df)
# Get x-axis orders
# setorder(median.dt, median_h)
# h.ord <- median.dt$SampleID
dbl_data.df$SampleID<-factor(dbl_data.df$SampleID, levels=order)
p1 <- ggplot(data=dbl_data.df, aes(x=hybrid_score, y=SampleID, fill=SampleID))
p1 <- p1 + ggridges::geom_density_ridges()
p1 <- p1 + theme_bioskryb() + labs(x="SCDS doublet score")
p1 <- p1 + theme(axis.title.y=element_blank(),
legend.position="none")
p1 <- p1 + scale_fill_manual(values=sample.cols)
p1
4.2.1.3 - Plot boxplot doublet scores
A look at the distribution of doublet scores
p1 <- ggplot(data=dbl_data.df,
aes(y=hybrid_score,
x=SampleID,
fill=SampleID))
p1 <- p1 + geom_boxplot() + labs(y="scds hybrid score")
p1 <- p1 + theme_bioskryb() #+ scale_x_discrete(limits=h.ord)
p1 <- p1 + theme(axis.text.x=element_text(angle=90),
axis.title.x=element_blank(),
legend.position="none")
p1 <- p1 + scale_fill_manual(values=sample.cols) + ylab("Doublet score")
p1
4.2.1.4 - Call doublets
dbl_data.df$hybrid_call <- factor(dbl_data.df$hybrid_call, levels=c("eSinglet", "eDoublet"))
p <- ggplot(data=dbl_data.df, aes(x=SampleID, fill=hybrid_call))
p <- p + geom_bar(color="black")
p <- p + scale_fill_manual(values=dbl.cols)
p <- p + theme_bioskryb()
p <- p + theme(axis.title.x = element_blank())
p
4.2.2 - Decontamination
This function is recently incorporated and not completely recommended unless you know what you are doing. The DecontX package is utilized for contaminant detection. A threshold of 0.5 is recommended. DecontX can be run with all samples together (as is done here) or with the raw (unfiltered data) as background.
sce <- as.SingleCellExperiment(seurat)
sce <- celda::decontX(sce)
seurat <- CreateSeuratObject(counts=counts(sce),
meta.data=as.data.frame(colData(sce)))
seurat@meta.data$decontX_contamination <- sce$decontX_contamination
contam_cutoff<-0.5
seurat@meta.data$contam_status <- ifelse(seurat@meta.data$decontX_contamination >= contam_cutoff, # Thresholding is experimental, proceed at your own risk.
"Ambient",
"Native")
4.2.2.1 - Plot density of contamination score
p <- ggplot(data=seurat@meta.data, aes(x=decontX_contamination))
p <- p + geom_density(fill=dcx.cols[["Ambient"]], color="#62C707FF")
p <- p + theme_bioskryb() + labs(x="Percent contamination", y="Density") + geom_vline(xintercept = contam_cutoff)
p
4.2.2.2 - Plot density of contaminations core (sample-level)
dat <- as.data.frame(seurat@meta.data)
median.df <- dat %>%
group_by(SampleID) %>%
summarize(median_X=median(decontX_contamination, na.rm=TRUE))
median.dt <- setDT(median.df)
# Get x-axis orders
#setorder(median.dt, median_X)
#X.ord <- median.dt$SampleID
dat$SampleID<-factor(dat$SampleID, levels=order)
p1 <- ggplot(data=dat, aes(x=decontX_contamination, y=SampleID, fill=SampleID))
p1 <- p1 + ggridges::geom_density_ridges()
p1 <- p1 + labs(x="Percent contamination")
p1 <- p1 + theme_bioskryb()
p1 <- p1 + theme(axis.title.y=element_blank(),
legend.position="none")
p1 <- p1 + scale_fill_manual(values=sample.cols)
p1
4.2.2.3 - Plot boxplot of contamination
A look at the distribution of decontamination scores. The score ranges from 0 to 1 and refers to the percentage of cells that are contaminated.
dat <- as.data.frame(seurat@meta.data)
median.df <- dat %>%
group_by(SampleID) %>%
summarize(median_X=median(decontX_contamination, na.rm=TRUE))
median.dt <- setDT(median.df)
# Get x-axis orders
# setorder(median.dt, median_X)
# X.ord <- median.dt$SampleID
dat$SampleID<-factor(dat$SampleID, levels=order)
p1 <- ggplot(data=dat, aes(y=decontX_contamination, x=SampleID, fill=SampleID))
p1 <- p1 + geom_boxplot()
p1 <- p1 + labs(y="Percent contamination")
p1 <- p1 + theme_bioskryb() #+ scale_x_discrete(limits=X.ord)
p1 <- p1 + theme(axis.text.x=element_text(angle=90),
axis.title.x=element_blank(),
legend.position="none")
p1 <- p1 + scale_fill_manual(values=sample.cols)
p1
4.2.2.4 Call ambient RNA contamination
p <- ggplot(data=dat, aes(x=SampleID, fill=contam_status))
p <- p + labs(y="Contaminated cell count")
p <- p + geom_bar(color="black") #+ scale_x_discrete(limits=X.ord)
p <- p + scale_fill_manual(values=dcx.cols)
p <- p + theme_bioskryb()
p <- p + theme(axis.title.x = element_blank()) + guides(fill=guide_legend(title="Contamination status"))
p
4.2.3 - Establish nCount_RNA, nFeature_RNA, and mt.percent acceptable levels
We calculate the 3*MAD of nCount_RNA, nFeature_RNA, and mt.percent to assess whether a cell is an outlier.Lower limites of nFeature and nCount are hard cut-offs of 500 and 1,000.
### Lower limit of acceptable gene counts before a cell is removed
nFeature_RNA.ll <- 500
### Lower limit of acceptable UMI counts before a cell is removed
nCount_RNA.ll <- 1000
### Upper limit of acceptable gene counts before a cell is removed
nFeature_RNA.ul <- round(median.list[["nFeature_RNA"]]+3*mad.list[["nFeature_RNA"]],
digits=0)
### Upper limit of acceptable UMI counts before a cell is removed
percent.mt.ul <- round(median.list[["percent.mt"]]+3*mad.list[["percent.mt"]],
digits=0)
### Upper limit of acceptable % mitochondria genes before a cell is removed
nCount_RNA.ul <- round(median.list[["nCount_RNA"]]+3*mad.list[["nCount_RNA"]],
digits=0)
4.2.4 - Flag cells that should be removed
Now that doublet scores, contamination scores, and nFeature_RNA/nCount_RNA/percen
4.2.4.1 - Add “flag” to metadata
Create metadata variable and plot filtered vs non-filtered cells.
If doublet detection was performed (and data will be downsampled), the cells are already downsampled to 80k and filtered to exclude percent mitochondrial genes above 20% and number of genes below 200.
### nFeature_RNA
seurat@meta.data$Filter <- ifelse(seurat@meta.data$nFeature_RNA < nFeature_RNA.ll |
seurat@meta.data$nFeature_RNA > nFeature_RNA.ul,
"nFeature",
"")
### Percent mt
seurat@meta.data$Filter <- ifelse(seurat@meta.data$percent.mt > percent.mt.ul,
paste(seurat@meta.data$Filter,"percent.mt"),
seurat@meta.data$Filter)
### nCount_RNA
seurat@meta.data$Filter <- ifelse(seurat@meta.data$nCount_RNA < nCount_RNA.ll |
seurat@meta.data$nCount_RNA > nCount_RNA.ul,
paste(seurat@meta.data$Filter,"nCount"),
seurat@meta.data$Filter)
### Doublet status
seurat@meta.data$Filter <- ifelse(seurat@meta.data$hybrid_call=="eDoublet",
paste(seurat@meta.data$Filter,"doublet"),
seurat@meta.data$Filter)
### Contamination status
seurat@meta.data$Filter <- ifelse(seurat@meta.data$contam_status=="Ambient",
paste(seurat@meta.data$Filter,"contamination"),
seurat@meta.data$Filter)
### Clean it up a little
seurat@meta.data$Filter[which(seurat@meta.data$Filter=="")]<-"Keep"
seurat@meta.data$Filter[which(seurat@meta.data$Filter=="NA contamination")]<-"contamination"
### Doublet and contamination algorithms did not calculate anything for the
### two TME cells, so its listed as NA; however, they do not meet the
### nCount/nFeature/percent.mt cut-offs, so I'll keep them.
seurat@meta.data$Filter[which(is.na (seurat@meta.data$Filter))]<-"Keep"
### Summarize
summary(as.factor(seurat@meta.data$Filter))
## doublet nCount nCount doublet percent.mt
## 9 27 1 12
## percent.mt nCount Keep nFeature nFeature nCount
## 1 140 47 2
## nFeature percent.mt
## 1
4.2.4.2 - Visualize the flagged cells and their cutoffs
keep_remove.cols <- paletteer_d("pals::alphabet",n=length(unique(seurat@meta.data$Filter)))
p1 <- ggplot(data=seurat@meta.data, aes(x=nCount_RNA, y=nFeature_RNA, color=Filter))
p1 <- p1 + geom_point() + geom_hline(yintercept=nFeature_RNA.ll) + geom_hline(yintercept=nFeature_RNA.ul) + geom_vline(xintercept=nCount_RNA.ul)
p1 <- p1 + theme_bioskryb_90()
p1 <- p1 + scale_color_manual(values=keep_remove.cols)
p2 <- ggplot(data=seurat@meta.data, aes(x=percent.mt, y=nFeature_RNA, color=Filter))
p2 <- p2 + geom_point() + geom_hline(yintercept=nFeature_RNA.ll) + geom_hline(yintercept=nFeature_RNA.ul) + geom_vline(xintercept = percent.mt.ul)
p2 <- p2 + theme_bioskryb_90()
p2 <- p2 + scale_color_manual(values=keep_remove.cols)
p3 <- ggplot(data=seurat@meta.data, aes(x=percent.mt, y=nCount_RNA, color=Filter))
p3 <- p3 + geom_point() + geom_hline(yintercept=nCount_RNA.ul) + geom_hline(yintercept=nCount_RNA.ll) + geom_vline(xintercept = percent.mt.ul)
p3 <- p3 + theme_bioskryb_90()
p3 <- p3 + scale_color_manual(values=keep_remove.cols)
legend <- cowplot::get_legend(p1 + theme(legend.box.margin = margin(0, 0, 0, 12)))
prow <- cowplot::plot_grid(p1 + theme(legend.position="none",
),
p2 + theme(legend.position="none"),
p3 + theme(legend.position="none"),
align = 'vh',
labels = c("A", "B", "C"),
hjust = -1,
nrow = 1)
cowplot::plot_grid(prow, legend, rel_widths = c(4, .6))
4.2.4.3 - Remove flagged cells (optional)
This step is skipped until further direction is given by Bioskryb
#seurat <- subset(seurat, subset=`Filter`=="Keep")
4.2.3.4 - Check filtering w/scatterplots
This is commented out for now, until BioSkryb instructs for us to remove them.
4.8.3.2 - Density plots
This is commented out for now, until BioSkryb instructs for us to remove them.
4.8.3.3 - Boxplot after filtering
This is commented out for now, until BioSkryb instructs for us to remove them.
5 - Normalize and scale data
By default, Seurat implements a global-scaling normalization method “LogNormalize”. The definition is as follows: Feature (gene) counts for each cell are divided by the total counts (UMI) for that cell and multiplied by the scale.factor. This is then natural-log transformed using log1p. Only standard transformation is recommended at this time, due to statistical suitability and computational efficiency. SCTransformation is possible but has a very long run time.
num_cells <- ncol(seurat)
num_genes <- nrow(seurat)
5.1 - Density plot before/after normalization/scaling
For the next 2 plots (density and box), we’ll only plot 12 samples to get an idea of how normalization impacts the data. Samples 149:151, as well as 191:193 are samples from flow cells Al1 and Al2, respectively, which have high gene counts (nFeature_RNA) overall.
The seurat dataset used for this analysis has:
- 240 cells
- 56334 genes
### Examine distribution of genes in the first 10 samples before normalizing
beforenorm10<-as.data.frame(seurat@assays$RNA@counts[,c(1:6,100:102,142:144)])
mbeforenorm10<-melt(beforenorm10)
p<-ggplot(data=mbeforenorm10,
aes(x=value,
y=variable,
fill=variable))+
geom_density_ridges(quantile_lines=TRUE, quantiles=4) +
theme_bioskryb() +
ylab("Samples") +
xlab("") +
ggtitle("Before normalization") +
scale_fill_discrete(name = "Samples")
p2 <- p + xlim(c(1,150)) + ggtitle("Before normalization (shortened x-axis)")
#std_prefix <- "RNA_snn_res."
### [1] Normalize
seurat <- NormalizeData(seurat,
normalization.method = "LogNormalize",
scale.factor = 10000)
### Examine distribution of genes in the first 10 samples after normalizing
afternorm10<-as.data.frame(seurat@assays$RNA@data[,c(1:6,100:102,142:144)])
mafternorm10<-melt(afternorm10)
p3<-ggplot(data=mafternorm10,
aes(x=value,
y=variable,
fill=variable))+
geom_density_ridges(quantile_lines=TRUE, quantiles=4) +
theme_bioskryb() +
ylab("Samples") +
xlab("Expression") +
ggtitle("After normalization") +
scale_fill_discrete(name = "Samples")
p4<-p3+xlim(c(1,15)) + ggtitle("After normalization (shortened x-axis)")
### [2] Find variable features
seurat <- FindVariableFeatures(object=seurat,
selection.method = "vst",
nfeatures = 2000)
### [2.1] Scale the data
seurat <- ScaleData(object = seurat,
vars.to.regress=NULL,
verbose = TRUE)
### Plot
prow <- cowplot::plot_grid(p + theme(legend.position="none"),
p2 + theme(legend.position="none"),
p3 + theme(legend.position="none"),
p4 + theme(legend.position="none"),
align = 'vh',
labels = c("A", "B", "C","D"),
hjust = -1,
nrow = 2)
cowplot::plot_grid(prow, rel_widths = c(4, .6))
5.2 - Boxplot plot before/after normalization/scaling
p1<-ggplot(data=mbeforenorm10,
aes(x=variable,
y=value,
fill=variable))+
geom_boxplot() +
theme_bioskryb_90() +
ylab("Gene expression") +
xlab("") +
ggtitle("Before normalization") +
scale_fill_discrete(name = "")
p2 <- p1 + ylim(c(1,100)) + ggtitle("Before normalization (shortened y-axis)")
p3<-ggplot(data=mafternorm10,
aes(x=variable,
y=value,
fill=variable))+
geom_boxplot() +
theme_bioskryb_90() +
ylab("Gene expression") +
xlab("Cells (only showing 12)") +
ggtitle("After normalization") +
scale_fill_discrete(name = "")
p4 <- p3 + ylim(c(1,5)) + ggtitle("After normalization (shortened y-axis)")
### Put together cowplot
prow <- cowplot::plot_grid(p + theme(legend.position="none"),
p2 + theme(legend.position="none"),
p3 + theme(legend.position="none"),
p4 + theme(legend.position="none"),
align = 'vh',
labels = c("A", "B", "C","D"),
hjust = -1,
nrow = 2)
cowplot::plot_grid(prow, rel_widths = c(4, .6))
6 - Cell Cycle score
Cell cycle is often a confounding variable, so we will annotate the metadata with the suggested phase of each cell.
### [1] Create G2M marker gene list (i.e. genes associated with the G2M phase of the
### cell cycle using Seurat's built-in cc.genes (cell cycle) genes list
g2m.genes <- cc.genes$g2m.genes
s.genes <- cc.genes$s.genes
g2m <- rownames(seurat)[rownames(seurat) %in% g2m.genes]
### [2] Calculate G2M marker module score.
seurat <- CellCycleScoring(seurat,
s.features = s.genes, ### Genes associated with s-phase
g2m.features = g2m.genes, ### Genes associated with G2M phase
set.ident = TRUE)
6.1 - Proportion of cells per sample in each phase
tab<-prop.table(table(seurat@meta.data$Phase, seurat@meta.data$SampleID),2)
nr <- length(levels(as.factor(seurat@meta.data$Phase)))
nc <- length(levels(as.factor(seurat@meta.data$SampleID)))
### Plot
ComplexHeatmap::Heatmap(tab,
cluster_columns = TRUE,
show_column_dend = TRUE,
row_names_gp = grid::gpar(fontsize = 12),
column_names_gp = grid::gpar(fontsize=12),
cluster_rows = TRUE,
show_row_dend = FALSE,
col = heatmap.colfun,
height = unit(5, "mm")*nr,
width = unit(5, "mm")*nc,
name="Proportion")
6.2 - Barplot cell cycle phase
cell_cycle.cols <- c("#01617E","#8B9934","#E04883")
names(cell_cycle.cols) <- levels(as.factor(seurat@meta.data$Phase))
p <- ggplot(data=seurat@meta.data, aes(x=SampleID, fill=Phase))
p <- p + geom_bar(color="black")
p <- p + theme_bioskryb_90()
p <- p + scale_fill_manual("", values=cell_cycle.cols)
p <- p + labs(y="Count")
p <- p + theme(axis.title.x=element_blank(),
axis.text=element_text(size=9),
legend.text=element_text(size=9),
legend.title=element_text(size=9),
axis.title.y=element_text(size=10),
strip.text=element_text(size=10),
legend.position="bottom")
p
6.3 - Cell cycle regression
If cell cycle was previously identified as having an effect on the clustering and the differences are not biological, the difference between G2M and S phase scores will be calculated and used as the variable to regress. For most cases, it is not recommended to regress out cell cycle. Other variables will also be regressed out at this time.
seurat@meta.data$cc_dif <- seurat@meta.data$S.Score - seurat@meta.data$G2M.Score
#var_regress <- gsub("cc", "cc_dif", params$var_regress)
#seurat <- ScaleData(object = seurat,
# vars.to.regress=var_regress,
# verbose = TRUE)
7 - PCA
7.1 - Plot Elbow of PCs
seurat <- RunPCA(seurat)
#The elbow plot gives us an idea of how many PCs will be significant. When the plot plateaus, that is the point of diminishing returns.
dat <- data.frame(`Standard_deviation`=seurat@reductions[["pca"]]@stdev,
PC=c(1:length(seurat@reductions[["pca"]]@stdev)))
p <- ggplot(data=dat, aes(x=PC, y=Standard_deviation))
p <- p + geom_bar(stat="identity", fill="#6AAAB7", color=NA)
p <- p + theme_bioskryb()
p <- p + labs(y="Standard deviation")
p
7.2 - Dimension loadings
The dimension loadings give us an idea of the top 10 genes contributing to each PC.
plot.list <- VizDimLoadings(seurat, dims=1:9, ncol=3, nfeatures=10,
combine=FALSE, col=NA)
for (i in 1:length(plot.list)) {
pc <- paste("PC", i, sep="_")
#scale_lim <- max(a)
p <- plot.list[[i]]
xscale_lim <- max(abs(p$data[[pc]]))
p <- p + geom_bar(stat="identity", aes(fill=.data[[pc]]))
p<- p + scale_fill_gradient2(low=unikn_blue_red.cols[1],
mid="white",
high=unikn_blue_red.cols[3],
limits=c(-xscale_lim, xscale_lim))
p <- p + theme_bioskryb()
p <- p + theme(axis.title = element_blank(),
axis.text.x=element_text(angle=90))
plot.list[[i]] <- p
}
cowplot::plot_grid(plotlist=plot.list,
align="vh",
labels="AUTO",
label_size=12,
ncol=3)
7.3 - Interogate PCs
colnames(seurat@meta.data)
## [1] "orig.ident" "nCount_RNA" "nFeature_RNA"
## [4] "SampleId" "FlowCell" "CellLine"
## [7] "Condition" "SCBulk" "SampleID"
## [10] "percent.mt" "rmv" "ident"
## [13] "cxds_score" "bcds_score" "hybrid_score"
## [16] "hybrid_call" "decontX_contamination" "decontX_clusters"
## [19] "contam_status" "Filter" "S.Score"
## [22] "G2M.Score" "Phase" "old.ident"
## [25] "cc_dif"
seurat@meta.data$FlowCell[which(seurat@meta.data$FlowCell=="")]<-"Unknown"
categorical_var <- c("FlowCell","CellLine","Condition","SCBulk","Phase")
numeric_var <- c("nCount_RNA","nFeature_RNA","percent.mt","hybrid_score","decontX_contamination")
### Calculate PCs
mat<-as.data.frame(seurat@assays$RNA@data)
pca.list<-list(All=PCA(t(mat), ncp=10, graph=FALSE, scale=FALSE))
### Run ANOVA of categorical variables
anova.results <- anova_PCA(obj_name="All",
pca.df=pca.list[["All"]],
metadata=seurat@meta.data,
merge_ID="SampleId",
cat_var=categorical_var,
numPCs=10)
rownames(anova.results)<-anova.results$Factor
### Run GLM for continuous variables
glm.results <- glm_PCA(obj_name="All",
pca.list=pca.list[["All"]],
metadata=seurat@meta.data,
merge_ID="SampleId",
num_var=numeric_var,
numPCs=10)
rownames(glm.results)<-glm.results$Factor
### Melt results for plotting
results <- rbind(anova.results, glm.results)
df.melt <- melt(results)
df.melt <- cbind(df.melt, PC=as.numeric(gsub("Dim|_adj_pvalue", "", df.melt$variable)))
df.melt$ajp <- ifelse(df.melt$value <= 0.001, "<=0.001", df.melt$value)
df.melt$ajp <- ifelse(df.melt$ajp <= 0.01 & df.melt$ajp > 0.001, "<=0.01", df.melt$ajp)
df.melt$ajp <- ifelse(df.melt$ajp <= 0.05 & df.melt$ajp > 0.01, "<=0.05", df.melt$ajp)
df.melt$ajp <- ifelse(df.melt$ajp > 0.05, "NS", df.melt$ajp)
### Plot
ggplot(df.melt, aes(x=PC, y=Factor)) +
geom_tile(aes(fill=ajp),color="white") +
scale_fill_manual(values=c("<=0.001"="#4c1d4bff",
"<=0.01"="#bd1655ff",
"<=0.05"="#f47f58ff",
"NS"="#FAEBddff"),
name="FDR adjusted p-value") +
labs(title="ANOVA FDR-adjusted pvalues - Categorical Factors vs. PCs",
subtitle="Test") +
theme_bioskryb()
8 - Clustering & UMAP
### Find neighbors
seurat <- FindNeighbors(seurat, dims = 1:30)
### Find clusters
seurat <- FindClusters(seurat, algorithm=1,
resolution=1.0,
verbose=FALSE)
#names(seurat@meta.data) <- gsub(std_prefix, "Louvain2_", names(seurat@meta.data))
### Run UMAP
seurat <- RunUMAP(seurat, dims = 1:30, reduction.key="UMAP_")
### Plot PCA
groups<-c(categorical_var, "seurat_clusters","hybrid_call")
for (i in 1: length(groups)) {
print(
DimPlot(seurat, reduction = "pca",
group.by = groups[i],
# cols = ct.colors2,
# shuffle = TRUE,
repel=TRUE,
label=TRUE, #label.size = 1.5,label.box = TRUE,
pt.size = 2) +
ggtitle(paste("PCA of cells colored by",groups[i] )) +
theme_bioskryb()+
ylab("PC 2")+
xlab("PC 1")+
theme(axis.title.y = element_text(),
axis.title.x = element_text())
)
}
### Highlight those with high nFeature RNA
print("Highlight those with high nFeature RNA")
## [1] "Highlight those with high nFeature RNA"
tmp<-cbind(seurat@meta.data, seurat@reductions$pca@cell.embeddings[,c("PC_1","PC_2")])
tmp$high_feats<-"Low gene count"
tmp$high_feats[which(tmp$nFeature_RNA>nFeature_RNA.ul)]<-"High gene count"
ggplot(tmp,
aes(PC_1,PC_2,
colour=high_feats))+
geom_point(size=3)+
scale_color_manual("High nFeature_RNA",values=c("Low gene count"="white",
"High gene count"="black"))+
geom_point(aes(fill=FlowCell),
shape=21,
size=2.5) +
theme_bioskryb()+
xlab("PC 1")+
ylab("PC 2")+
ggtitle("PCA of cells colored by FlowCell")
### Plot UMAP
# for (i in 1: length(groups)) {
# print(
# DimPlot(seurat, reduction = "umap",
# group.by = groups[i],
# # cols = ct.colors2,
# # shuffle = TRUE,
# repel=TRUE,
# label=TRUE, #label.size = 1.5,label.box = TRUE,
# pt.size = 2) +
# ggtitle(paste("UMAP of cells colored by",groups[i] )) +
# theme_bioskryb()+
# ylab("UMAP 2")+
# xlab("UMAP 1")+
# theme(axis.title.y = element_text(),
# axis.title.x = element_text())
# )
# }
9 - Investigate nFeatures_RNA
p.list <- list()
tmp<-cbind(seurat@meta.data, seurat@reductions$pca@cell.embeddings[,c("PC_1","PC_2")])
mtmp<-melt(tmp[,c("SampleId","nFeature_RNA","nCount_RNA","percent.mt ")],
id.vars=c("SampleId"))
labs<-c("gene count (nFeatures_RNA)", "UMI count (nCount_RNA)")
for(i in 1:length(features[1:2])){
feat <- features[[i]]
p <- ggplot(data=tmp,
aes(x=PC_1, y=PC_2,
color=mtmp$value[which(mtmp$variable==feat)]))
p <- p + geom_point(size=1.5)
p <- p + theme_bioskryb()
if(feat == "percent.mt") {
p <- p + scale_color_viridis_c(option="rocket",
direction=-1)
} else {
p <- p + scale_color_viridis_c(option="rocket",
direction=-1,
labels=scales::label_scientific())
}
p <- p + theme(axis.title.y.right = element_blank(),
legend.position="right",
legend.title=element_blank()) +
ggtitle(paste("PCA of cells colored by",labs[i])) +
ylab("PC 2")+xlab("PC 1")
print(p)
}
10 - Final conclusions
The main findings are as follows: [1] Several of the ensembl gene IDs are not associated with gene symbols (>20k); [2] At the sample-level, no samples (CellLines) were flagged for removal upon performing sample-level QC; [3] at the cell-level, 100 cells were flagged for removal due to doublets (9 cells), high/low UMIs (27 cells), doublets/UMIs (1 cell), high percentage of mitochondria genes (12 cells), high percentage of mitochondria genes and UMIs (1 cell), high gene counts (47 cells), high UMI/gene counts (2), and high percentage of mitochondria genes and gene counts (1 cell). Use table(seurat@meta.data$Filter) for details; [4] the data was scaled by a scaling factor of 10,000, and logarithmically transformed by a natural log; [5] dimension reduction was performed with PCA, and confounding variables include doublet (hybrid_score), mitochondrial gene percentage (percent.mt), gene count (nFeature_RNA), cell cycle phase (Phase), CellLine, and FlowCell; and [6] finally, CellLine, FlowCell, Phase, and gene counts (nFeature_RNA) are dependent variables that affect clustering of cells.
end <- Sys.time()
That’s it! Spend some time looking through the report to determine next steps.
Analysis started: 2022-04-06 09:48:19
Analysis finished: 2022-04-06 09:51:09
R Session Information
Information about R, the OS and attached or loaded packagespander::pander(sessionInfo(), compact = FALSE)
R version 4.1.1 (2021-08-10)
Platform: x86_64-w64-mingw32/x64 (64-bit)
locale: LC_COLLATE=English_United States.1252, LC_CTYPE=English_United States.1252, LC_MONETARY=English_United States.1252, LC_NUMERIC=C and LC_TIME=English_United States.1252
attached base packages:
- parallel
- stats4
- stats
- graphics
- grDevices
- utils
- datasets
- methods
- base
other attached packages:
- paletteer(v.1.4.0)
- ggridges(v.0.5.3)
- celda(v.1.8.1)
- scds(v.1.8.0)
- microViz(v.0.9.0)
- SeuratObject(v.4.0.4)
- Seurat(v.4.1.0)
- ggpointdensity(v.0.1.0)
- nord(v.1.0.0)
- unikn(v.0.4.0)
- kableExtra(v.1.3.4)
- BiocParallel(v.1.26.2)
- dplyr(v.1.0.8)
- DESeq2(v.1.32.0)
- SummarizedExperiment(v.1.22.0)
- Biobase(v.2.52.0)
- MatrixGenerics(v.1.4.3)
- matrixStats(v.0.61.0)
- GenomicRanges(v.1.44.0)
- GenomeInfoDb(v.1.28.4)
- IRanges(v.2.26.0)
- S4Vectors(v.0.30.2)
- BiocGenerics(v.0.38.0)
- FactoMineR(v.2.4)
- plyr(v.1.8.6)
- data.table(v.1.14.2)
- ggplot2(v.3.3.5)
loaded via a namespace (and not attached):
- MCMCprecision(v.0.4.0)
- scattermore(v.0.8)
- tidyr(v.1.2.0)
- bit64(v.4.0.5)
- knitr(v.1.37)
- multimode(v.1.5)
- irlba(v.2.3.5)
- DelayedArray(v.0.18.0)
- rpart(v.4.1-15)
- KEGGREST(v.1.32.0)
- RCurl(v.1.98-1.6)
- doParallel(v.1.0.17)
- generics(v.0.1.2)
- ScaledMatrix(v.1.0.0)
- cowplot(v.1.1.1)
- RSQLite(v.2.2.10)
- RANN(v.2.6.1)
- combinat(v.0.0-8)
- future(v.1.24.0)
- bit(v.4.0.4)
- spatstat.data(v.2.1-2)
- webshot(v.0.5.2)
- xml2(v.1.3.3)
- httpuv(v.1.6.5)
- assertthat(v.0.2.1)
- viridis(v.0.6.2)
- xfun(v.0.29)
- jquerylib(v.0.1.4)
- evaluate(v.0.15)
- promises(v.1.2.0.1)
- fansi(v.1.0.2)
- assertive.files(v.0.0-2)
- igraph(v.1.2.11)
- DBI(v.1.1.2)
- geneplotter(v.1.70.0)
- htmlwidgets(v.1.5.4)
- spatstat.geom(v.2.3-2)
- purrr(v.0.3.4)
- ellipsis(v.0.3.2)
- RSpectra(v.0.16-0)
- ks(v.1.13.4)
- bookdown(v.0.24)
- permute(v.0.9-7)
- annotate(v.1.70.0)
- prismatic(v.1.1.0)
- sparseMatrixStats(v.1.4.2)
- deldir(v.1.0-6)
- vctrs(v.0.3.8)
- SingleCellExperiment(v.1.14.1)
- Cairo(v.1.5-14)
- ROCR(v.1.0-11)
- abind(v.1.4-5)
- cachem(v.1.0.6)
- RcppEigen(v.0.3.3.9.1)
- withr(v.2.4.3)
- sctransform(v.0.3.3)
- vegan(v.2.5-7)
- scran(v.1.20.1)
- mclust(v.5.4.9)
- goftest(v.1.2-3)
- svglite(v.2.1.0)
- cluster(v.2.1.2)
- ape(v.5.6-2)
- lazyeval(v.0.2.2)
- crayon(v.1.5.0)
- genefilter(v.1.74.1)
- edgeR(v.3.34.1)
- labeling(v.0.4.2)
- pkgconfig(v.2.0.3)
- vipor(v.0.4.5)
- nlme(v.3.1-152)
- diptest(v.0.76-0)
- rlang(v.1.0.1)
- globals(v.0.14.0)
- lifecycle(v.1.0.1)
- miniUI(v.0.1.1.1)
- dbscan(v.1.1-10)
- enrichR(v.3.0)
- phyloseq(v.1.38.0)
- rsvd(v.1.0.5)
- ggrastr(v.1.0.1)
- polyclip(v.1.10-0)
- lmtest(v.0.9-39)
- Matrix(v.1.3-4)
- Rhdf5lib(v.1.14.2)
- zoo(v.1.8-9)
- beeswarm(v.0.4.0)
- GlobalOptions(v.0.1.2)
- png(v.0.1-7)
- viridisLite(v.0.4.0)
- rjson(v.0.2.21)
- rootSolve(v.1.8.2.3)
- bitops(v.1.0-7)
- KernSmooth(v.2.23-20)
- rhdf5filters(v.1.4.0)
- pROC(v.1.18.0)
- pander(v.0.6.4)
- Biostrings(v.2.60.2)
- DelayedMatrixStats(v.1.14.3)
- blob(v.1.2.2)
- shape(v.1.4.6)
- stringr(v.1.4.0)
- parallelly(v.1.30.0)
- spatstat.random(v.2.1-0)
- gridGraphics(v.0.5-1)
- beachmat(v.2.8.1)
- scales(v.1.1.1)
- leaps(v.3.1)
- memoise(v.2.0.1)
- magrittr(v.2.0.2)
- ica(v.1.0-2)
- zlibbioc(v.1.38.0)
- compiler(v.4.1.1)
- dqrng(v.0.3.0)
- RColorBrewer(v.1.1-2)
- clue(v.0.3-60)
- fitdistrplus(v.1.1-6)
- cli(v.3.1.1)
- ade4(v.1.7-18)
- XVector(v.0.32.0)
- listenv(v.0.8.0)
- patchwork(v.1.1.1)
- pbapply(v.1.5-0)
- MASS(v.7.3-54)
- mgcv(v.1.8-36)
- tidyselect(v.1.1.1)
- stringi(v.1.7.6)
- highr(v.0.9)
- yaml(v.2.2.2)
- BiocSingular(v.1.8.1)
- assertive.numbers(v.0.0-2)
- locfit(v.1.5-9.4)
- ggrepel(v.0.9.1)
- grid(v.4.1.1)
- sass(v.0.4.0)
- tools(v.4.1.1)
- future.apply(v.1.8.1)
- circlize(v.0.4.14)
- rstudioapi(v.0.13)
- bluster(v.1.2.1)
- foreach(v.1.5.2)
- metapod(v.1.0.0)
- gridExtra(v.2.3)
- assertive.types(v.0.0-3)
- scatterplot3d(v.0.3-41)
- rmdformats(v.1.0.3)
- farver(v.2.1.0)
- Rtsne(v.0.15)
- digest(v.0.6.27)
- FNN(v.1.1.3)
- pracma(v.2.3.8)
- shiny(v.1.7.1)
- Rcpp(v.1.0.8)
- scuttle(v.1.2.1)
- later(v.1.3.0)
- RcppAnnoy(v.0.0.19)
- httr(v.1.4.2)
- AnnotationDbi(v.1.54.1)
- ComplexHeatmap(v.2.8.0)
- assertive.properties(v.0.0-4)
- colorspace(v.2.0-2)
- rvest(v.1.0.2)
- XML(v.3.99-0.8)
- tensor(v.1.5)
- reticulate(v.1.24)
- splines(v.4.1.1)
- statmod(v.1.4.36)
- uwot(v.0.1.11)
- rematch2(v.2.1.2)
- spatstat.utils(v.2.3-0)
- scater(v.1.20.1)
- multtest(v.2.50.0)
- xgboost(v.1.5.2.1)
- plotly(v.4.10.0)
- systemfonts(v.1.0.4)
- xtable(v.1.8-4)
- jsonlite(v.1.7.3)
- assertive.base(v.0.0-9)
- rcartocolor(v.2.0.0)
- flashClust(v.1.01-2)
- R6(v.2.5.1)
- pillar(v.1.7.0)
- htmltools(v.0.5.2)
- mime(v.0.12)
- glue(v.1.6.1)
- fastmap(v.1.1.0)
- DT(v.0.20)
- BiocNeighbors(v.1.10.0)
- codetools(v.0.2-18)
- mvtnorm(v.1.1-3)
- utf8(v.1.2.2)
- lattice(v.0.20-44)
- bslib(v.0.3.1)
- spatstat.sparse(v.2.1-0)
- tibble(v.3.1.6)
- ggbeeswarm(v.0.6.0)
- multipanelfigure(v.2.1.2)
- leiden(v.0.3.9)
- magick(v.2.7.3)
- limma(v.3.48.3)
- survival(v.3.2-11)
- rmarkdown(v.2.11)
- biomformat(v.1.22.0)
- munsell(v.0.5.0)
- GetoptLong(v.1.0.5)
- rhdf5(v.2.36.0)
- GenomeInfoDbData(v.1.2.6)
- iterators(v.1.0.14)
- reshape2(v.1.4.4)
- gtable(v.0.3.0)
- spatstat.core(v.2.4-0)