knitr::opts_knit$set(root.dir = "/research/labs/neurology/fryer/m214960/10x_HIVE_PIPseq/10x/scripts/R/single_cell_with_choroid")
library(data.table) # fread()
library(dplyr) # lef_join()
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2) # ggplot()
library(gridExtra) # grid.arrange
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(harmony) # RunHarmony
## Loading required package: Rcpp
library(plotly) # plot_ly()
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(rtracklayer) # import()
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:plotly':
##
## rename
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following objects are masked from 'package:data.table':
##
## first, second
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:plotly':
##
## slice
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:data.table':
##
## shift
## Loading required package: GenomeInfoDb
library(Seurat) # CreateSeuratObject()
## Attaching SeuratObject
library(stringr)
# variables
sample_order <- c("sc.WT","sc.APP")
sample_colors <- c("cyan1","cornflowerblue")
# thresholds
nCount.min <- 500
nCount.max <- 15000
nFeature.min <- 200
complexity.cutoff <- 0.85
mt.cutoff <- 10
ribo.cutoff <- 50
hb.cutoff <- 1
ttr.cutoff <- 15
# set seed
set.seed(8)
These functions with help simultaneously save plots as a png and pdf.
saveToPDF <- function(...) {
d = dev.copy(pdf,...)
dev.off(d)
}
saveToPNG <- function(...) {
d = dev.copy(png,...)
dev.off(d)
}
prefix <- "../../../counts/"
suffix <- "/outs/filtered_feature_bc_matrix.h5"
if (file.exists("../../../rObjects/mouse_cells_merged_h5.rds")) {
mouse <- readRDS("../../../rObjects/mouse_cells_merged_h5.rds")
} else {
# individual sample objects
wt <- CreateSeuratObject(Read10X_h5(paste0(prefix,"x10_sc_WT",suffix)))
app <- CreateSeuratObject(Read10X_h5(paste0(prefix,"x10_sc_APP",suffix)))
# merge objects
mouse <- merge(x = wt,
y = app,
add.cell.ids = c("sc.WT","sc.APP"),
project = "10x Genomics scRNAseq")
# cleanup and save
remove(wt,app)
saveRDS(mouse, "../../../rObjects/mouse_cells_merged_h5.rds")
}
# preview
mouse
## An object of class Seurat
## 33885 features across 15283 samples within 1 assay
## Active assay: RNA (33885 features, 0 variable features)
# read in annotation file
if (file.exists("../../../rObjects/annotation.rds")) {
genes <- readRDS("../../../rObjects/annotation.rds")
} else {
gtf.file <- "../../../refs/genes.gtf"
genes <- rtracklayer::import(gtf.file)
genes <- as.data.frame(genes)
genes <- genes[genes$type == "gene",]
saveRDS(genes, "../../../rObjects/annotation.rds")
}
nCount_RNA = total number of transcripts (UMIs) in a single cell nFeature_RNA = number of unique transcripts (features)
# sample
barcodes <- rownames(mouse@meta.data)
samples <- str_match(barcodes, "(s[cn].[WTAP]+)_[ACGT]+")[,2]
table(samples)
## samples
## sc.APP sc.WT
## 5971 9312
mouse$sample <- factor(samples, levels = sample_order)
# cell.complexity
mouse$cell.complexity <- log10(mouse$nFeature_RNA) / log10(mouse$nCount_RNA)
# percent.mt
mt.genes <- genes[genes$seqnames == "MT", 12]
mouse$percent.mt <- PercentageFeatureSet(mouse, features = mt.genes)
mt.genes
## [1] "mt-Nd1" "mt-Nd2" "mt-Co1" "mt-Co2" "mt-Atp8" "mt-Atp6" "mt-Co3"
## [8] "mt-Nd3" "mt-Nd4l" "mt-Nd4" "mt-Nd5" "mt-Nd6" "mt-Cytb"
# percent.ribo
# ribosomal proteins begin with 'Rps' or 'Rpl' in this annotation file
# mitochondrial ribosomes start with 'Mrps' or 'Mrpl'
gene.names <- genes$gene_name
ribo <- gene.names[grep("^Rp[sl]", gene.names)]
mt.ribo <- gene.names[grep("^Mrp[sl]", gene.names)]
ribo.combined <- c(mt.ribo,ribo)
mouse$percent.ribo.protein <- PercentageFeatureSet(mouse, features = ribo.combined)
ribo.combined
## [1] "Mrpl44" "Mrps14" "Mrpl30" "Mrpl15" "Mrps9"
## [6] "Mrps2" "Mrps5" "Mrpl41" "Mrps26" "Mrpl47"
## [11] "Mrps21" "Mrpl24" "Mrps28" "Mrpl9" "Mrpl37"
## [16] "Mrpl20" "Mrps15" "Mrpl50" "Mrpl1" "Mrpl33"
## [21] "Mrps17" "Mrps18c" "Mrpl35" "Mrps25" "Mrpl19"
## [26] "Mrpl51" "Mrps35" "Mrps33" "Mrpl53" "Mrpl17"
## [31] "Mrpl46" "Mrps11" "Mrpl48" "Mrpl23" "Mrps12"
## [36] "Mrpl42" "Mrpl54" "Mrps31" "Mrpl34" "Mrpl57"
## [41] "Mrps16" "Mrpl52" "Mrpl4" "Mrps22" "Mrpl3"
## [46] "Mrps7" "Mrpl38" "Mrpl12" "Mrpl22" "Mrps23"
## [51] "Mrps24" "Mrpl58" "Mrpl27" "Mrpl10" "Mrpl45"
## [56] "Mrpl55" "Mrps27" "Mrpl36" "Mrps36" "Mrps30"
## [61] "Mrpl32" "Mrpl13" "Mrps6" "Mrpl40" "Mrpl39"
## [66] "Mrps34" "Mrps18b" "Mrpl18" "Mrpl14" "Mrps18a"
## [71] "Mrpl2" "Mrps10" "Mrpl28" "Mrpl11" "Mrpl21"
## [76] "Mrpl43" "Mrpl16" "Mrpl49" "Rpl7" "Rps6kc1"
## [81] "Rpl31" "Rpl37a" "Rps21" "Rpl12" "Rpl7a"
## [86] "Rpl35" "Rpl39" "Rpl36a" "Rps6ka3" "Rpl10"
## [91] "Rps4x" "Rps6ka6" "Rps3a1" "Rpl34" "Rpl22l1"
## [96] "Rps27" "Rpl22" "Rps6ka1" "Rps6" "Rpl11"
## [101] "Rps8" "Rps20" "Rpl5" "Rpl21" "Rplp0"
## [106] "Rpl9" "Rpl6" "Rpl32" "Rps16" "Rpl27a"
## [111] "Rps9" "Rps11" "Rpl13a" "Rpl28" "Rps19"
## [116] "Rps17" "Rps3" "Rps13" "Rpl18" "Rps15a"
## [121] "Rps5" "Rplp2" "Rpl41" "Rps26" "Rps12"
## [126] "Rps15" "Rpl18a" "Rps23rg1" "Rpl13" "Rpl36a-ps1"
## [131] "Rpl15" "Rps24" "Rpl29" "Rpl10-ps3" "Rps25"
## [136] "Rplp1" "Rpl4" "Rps27l" "Rps27rt" "Rpsa"
## [141] "Rpl14" "Rpl23" "Rpl19" "Rps27a" "Rpl27"
## [146] "Rpl23a" "Rpl26" "Rps6kb1" "Rpl38" "Rps18-ps4"
## [151] "Rps18-ps6" "Rps18-ps5" "Rps23" "Rps6kl1" "Rps7"
## [156] "Rpl10l" "Rps29" "Rpl36al" "Rps6ka5" "Rplp2-ps1"
## [161] "Rpl37" "Rpl30" "Rpl8" "Rpl3" "Rps19bp1"
## [166] "Rpl24" "Rpl39l" "Rpl35a" "Rpl36-ps4" "Rpl36"
## [171] "Rps2" "Rpl3l" "Rps6ka2" "Rps10" "Rpl10a"
## [176] "Rpl7l1" "Rps28" "Rps18" "Rps14" "Rpl17"
## [181] "Rpl9-ps6" "Rps6kb2" "Rps6ka4"
# percent.hb
# percent.hb - hemoglobin proteins begin with 'Hbb' or 'Hba' for mouse
hb.genes <- gene.names[grep("^Hb[ba]-", gene.names)]
mouse$percent.hb <- PercentageFeatureSet(mouse, features = hb.genes)
hb.genes
## [1] "Hbb-bh2" "Hbb-bh1" "Hbb-y" "Hbb-bt" "Hbb-bs" "Hba-x" "Hba-a1"
## [8] "Hba-a2"
# Percent Ttr
mouse$percent.choroid <- PercentageFeatureSet(mouse, features = c("Ttr","Enpp2"))
# Visualize the number of cell counts per sample
data <- as.data.frame(table(mouse$sample))
colnames(data) <- c("sample","frequency")
ncells1 <- ggplot(data, aes(x = sample, y = frequency, fill = sample)) +
geom_col() +
theme_classic() +
geom_text(aes(label = frequency),
position=position_dodge(width=0.9),
vjust=-0.25) +
scale_fill_manual(values = sample_colors) +
scale_y_continuous(breaks = seq(0,10000, by = 2000), limits = c(0,10000)) +
ggtitle("Raw: cells per sample") +
theme(legend.position = "none") +
theme(axis.text.x = element_text(angle = 45, hjust=1))
ncells1
# Visualize nCount_RNA
den1 <- ggplot(mouse@meta.data,
aes(color = sample,
x = nCount_RNA,
fill = sample)) +
geom_density(alpha = 0.2) +
theme_classic() +
scale_x_log10() +
scale_color_manual(values = sample_colors) +
scale_fill_manual(values = sample_colors) +
xlab("nCount_RNA") +
ylab("Density") +
theme(legend.position = "none") +
geom_vline(xintercept = nCount.min) +
geom_vline(xintercept = nCount.max) +
theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))
# Visualize nFeature_RNA
den2 <- ggplot(mouse@meta.data,
aes(color = sample,
x = nFeature_RNA,
fill = sample)) +
geom_density(alpha = 0.2) +
theme_classic() +
scale_x_log10() +
scale_color_manual(values = sample_colors) +
theme(legend.position = "none") +
scale_fill_manual(values = sample_colors) +
xlab("nFeature_RNA") +
ylab("Density") +
geom_vline(xintercept = nFeature.min) +
theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))
# Visualize cell complexity
# Quality cells are usually above 0.85
den3 <- ggplot(mouse@meta.data,
aes(color = sample,
x = cell.complexity,
fill = sample)) +
geom_density(alpha = 0.2) +
theme_classic() +
scale_color_manual(values = sample_colors) +
theme(legend.position = "none") +
scale_fill_manual(values = sample_colors) +
xlab("Cell Complexity (log10(nFeature/nCount))") +
ylab("Density") +
geom_vline(xintercept = complexity.cutoff) +
theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))
# Visualize percent.mt
den4 <- ggplot(mouse@meta.data,
aes(color = sample,
x = percent.mt,
fill = sample)) +
geom_density(alpha = 0.2) +
theme_classic() +
scale_x_continuous(n.breaks = 4) +
geom_vline(xintercept = mt.cutoff) +
scale_color_manual(values = sample_colors) +
theme(legend.position = "none") +
scale_fill_manual(values = sample_colors) +
xlab("% Mitochondrial Genes") +
ylab("Density") +
theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))
# Visualize percent.ribo.protein
den5 <- ggplot(mouse@meta.data,
aes(color = sample,
x = percent.ribo.protein,
fill = sample)) +
geom_density(alpha = 0.2) +
theme_classic() +
scale_x_log10() +
geom_vline(xintercept = ribo.cutoff) +
scale_color_manual(values = sample_colors) +
theme(legend.position = "none") +
scale_fill_manual(values = sample_colors) +
xlab("% Ribosomal Protein Genes") +
ylab("Density") +
theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))
# Visualize percent.hb
den6 <- ggplot(mouse@meta.data,
aes(color = sample,
x = percent.hb,
fill = sample)) +
geom_density(alpha = 0.2) +
theme_classic() +
scale_x_log10() +
geom_vline(xintercept = hb.cutoff) +
scale_color_manual(values = sample_colors) +
theme(legend.position = "none") +
scale_fill_manual(values = sample_colors) +
xlab("% Hemoglobin Genes") +
ylab("Density") +
theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))
# Arrange graphs in grid
plots <- list(den1,den2,den3,den4,den5,den6)
layout <- rbind(c(1,4),c(2,5),c(3,6))
grid <- grid.arrange(grobs = plots, layout_matrix = layout)
# nFeature, nCount, and cell.complexity violins
v1 <- VlnPlot(mouse,
features = c("nFeature_RNA", "nCount_RNA","cell.complexity"),
ncol = 3,
group.by = 'sample',
cols = sample_colors,
pt.size = 0)
v1
# percent violins
v2 <- VlnPlot(mouse,
features = c("percent.mt","percent.ribo.protein","percent.hb"),
ncol = 3,
group.by = 'sample',
cols = sample_colors,
pt.size = 0)
v2
s1 <- ggplot(
mouse@meta.data,
aes(x = nCount_RNA, y = nFeature_RNA, color = percent.mt)) +
geom_point() +
geom_smooth(method = "lm") +
scale_x_log10() +
scale_y_log10() +
theme_classic() +
geom_vline(xintercept = nCount.min) +
geom_hline(yintercept = nFeature.min) +
facet_wrap(~sample) +
scale_colour_gradient(low = "gray90", high = "black", limits =c(0,100))
s1
## `geom_smooth()` using formula 'y ~ x'
s2 <- FeatureScatter(mouse,
feature1 = "nCount_RNA",
feature2 = "percent.mt",
group.by = 'sample',
cols = sample_colors,
shuffle = TRUE)
s2
# filter
mouse.filtered <- subset(mouse,
subset = (nCount_RNA > nCount.min) &
(nCount_RNA < nCount.max) &
(nFeature_RNA > nFeature.min) &
(cell.complexity > complexity.cutoff) &
(percent.mt < mt.cutoff) &
(percent.ribo.protein < ribo.cutoff) &
(percent.hb < hb.cutoff))
# print cells removed
print(paste0(dim(mouse)[2] - dim(mouse.filtered)[2]," cells removed"))
## [1] "4650 cells removed"
Remove lowly expressed genes. We will keep genes that have at least 1 count in 10 cells.
# filter genes
counts <- GetAssayData(object = mouse.filtered, slot = "counts")
nonzero <- counts > 0 # produces logical
keep <- Matrix::rowSums(nonzero) >= 10 # sum the true/false
counts.filtered <- counts[keep,] # keep certain genes
# overwrite mouse.filtered
mouse.filtered <- CreateSeuratObject(counts.filtered,
meta.data = mouse.filtered@meta.data)
# print features removed
print(paste0(dim(counts)[1] - dim(counts.filtered)[1], " features removed"))
## [1] "14599 features removed"
Multiple passes were made to assess whether mitochondrial, ribosomal, and immunoglobulin genes should be removed. Ultimately, removal of these genes enhanced clustering.
# remove mt.genes and ribo.combined
counts <- GetAssayData(object = mouse.filtered, slot = "counts")
keep <- !rownames(counts) %in% c(mt.genes,ribo.combined)
counts.filtered <- counts[keep,]
# overwrite mouse.filtered
mouse.filtered <- CreateSeuratObject(counts.filtered,
meta.data = mouse.filtered@meta.data)
# print features removed
print(paste0(dim(counts)[1] - dim(counts.filtered)[1], " features removed"))
## [1] "192 features removed"
# cleanup data
remove(mouse,counts,counts.filtered,nonzero,data)
# Visualize the number of cell counts per sample
data <- as.data.frame(table(mouse.filtered$sample))
colnames(data) <- c("sample","frequency")
ncells2 <- ggplot(data, aes(x = sample, y = frequency, fill = sample)) +
geom_col() +
theme_classic() +
geom_text(aes(label = frequency),
position=position_dodge(width=0.9),
vjust=-0.25) +
scale_fill_manual(values = sample_colors) +
scale_y_continuous(breaks = seq(0,10000, by = 2000), limits = c(0,10000)) +
ggtitle("Filtered: cells per sample") +
theme(legend.position = "none") +
theme(axis.text.x = element_text(angle = 45, hjust=1))
# Arrange graphs in grid
plots <- list(ncells1,ncells2)
layout <- cbind(c(1),c(2))
grid <- grid.arrange(grobs = plots, layout_matrix = layout)
# Visualize nCount_RNA
den1 <- ggplot(mouse.filtered@meta.data,
aes(color = sample,
x = nCount_RNA,
fill = sample)) +
geom_density(alpha = 0.2) +
theme_classic() +
scale_x_log10() +
scale_color_manual(values = sample_colors) +
scale_fill_manual(values = sample_colors) +
xlab("nCount_RNA") +
ylab("Density") +
theme(legend.position = "none") +
geom_vline(xintercept = nCount.min) +
geom_vline(xintercept = nCount.max) +
theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))
# Visualize nFeature_RNA
den2 <- ggplot(mouse.filtered@meta.data,
aes(color = sample,
x = nFeature_RNA,
fill = sample)) +
geom_density(alpha = 0.2) +
theme_classic() +
scale_x_log10() +
scale_color_manual(values = sample_colors) +
theme(legend.position = "none") +
scale_fill_manual(values = sample_colors) +
xlab("nFeature_RNA") +
ylab("Density") +
geom_vline(xintercept = nFeature.min) +
theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))
# Visualize cell complexity
# Quality cells are usually above 0.85
den3 <- ggplot(mouse.filtered@meta.data,
aes(color = sample,
x = cell.complexity,
fill = sample)) +
geom_density(alpha = 0.2) +
theme_classic() +
scale_color_manual(values = sample_colors) +
theme(legend.position = "none") +
scale_fill_manual(values = sample_colors) +
xlab("Cell Complexity (log10(nFeature/nCount))") +
ylab("Density") +
geom_vline(xintercept = complexity.cutoff) +
theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))
# Visualize percent.mt
den4 <- ggplot(mouse.filtered@meta.data,
aes(color = sample,
x = percent.mt,
fill = sample)) +
geom_density(alpha = 0.2) +
theme_classic() +
scale_x_continuous(n.breaks = 4) +
geom_vline(xintercept = mt.cutoff) +
scale_color_manual(values = sample_colors) +
theme(legend.position = "none") +
scale_fill_manual(values = sample_colors) +
xlab("% Mitochondrial Genes") +
ylab("Density") +
theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))
# Visualize percent.ribo.protein
den5 <- ggplot(mouse.filtered@meta.data,
aes(color = sample,
x = percent.ribo.protein,
fill = sample)) +
geom_density(alpha = 0.2) +
theme_classic() +
scale_x_log10() +
geom_vline(xintercept = ribo.cutoff) +
scale_color_manual(values = sample_colors) +
theme(legend.position = "none") +
scale_fill_manual(values = sample_colors) +
xlab("% Ribosomal Protein Genes") +
ylab("Density") +
theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))
# Visualize percent.hb
den6 <- ggplot(mouse.filtered@meta.data,
aes(color = sample,
x = percent.hb,
fill = sample)) +
geom_density(alpha = 0.2) +
theme_classic() +
scale_x_log10() +
geom_vline(xintercept = hb.cutoff) +
scale_color_manual(values = sample_colors) +
theme(legend.position = "none") +
scale_fill_manual(values = sample_colors) +
xlab("% Hemoglobin Genes") +
ylab("Density") +
theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))
# Arrange graphs in grid
plots <- list(den1,den2,den3,den4,den5,den6)
layout <- rbind(c(1,4),c(2,5),c(3,6))
grid <- grid.arrange(grobs = plots, layout_matrix = layout)
# nFeature, nCount, and cell.complexity violins
v3 <- VlnPlot(mouse.filtered,
features = c("nFeature_RNA", "nCount_RNA","cell.complexity"),
ncol = 3,
group.by = 'sample',
cols = sample_colors,
pt.size = 0)
v3
# percent violins
v4 <- VlnPlot(mouse.filtered,
features = c("percent.mt","percent.ribo.protein","percent.hb"),
ncol = 3,
group.by = 'sample',
cols = sample_colors,
pt.size = 0)
v4
s3 <- ggplot(
mouse.filtered@meta.data,
aes(x = nCount_RNA, y = nFeature_RNA, color = percent.mt)) +
geom_point() +
geom_smooth(method = "lm") +
scale_x_log10() +
scale_y_log10() +
theme_classic() +
geom_vline(xintercept = nCount.min) +
geom_hline(yintercept = nFeature.min) +
facet_wrap(~sample) +
scale_colour_gradient(low = "gray90", high = "black", limits =c(0,100))
s3
## `geom_smooth()` using formula 'y ~ x'
s4 <- FeatureScatter(mouse.filtered,
feature1 = "nCount_RNA",
feature2 = "percent.mt",
group.by = 'sample',
cols = sample_colors,
shuffle = TRUE)
s4
# Visualize the distribution of genes detected per cell via boxplot
b1 <- ggplot(mouse.filtered@meta.data,
aes(x = sample,
y = log10(nFeature_RNA),
fill=sample)) +
geom_boxplot() +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
theme(plot.title = element_text(hjust = 0.5, face="bold")) +
ggtitle("Unique Genes / Cell / Sample") +
scale_color_manual(values = sample_colors) +
scale_fill_manual(values = sample_colors) +
xlab("Sample")
b1
df <- data.frame(row.names = rownames(mouse.filtered))
df$rsum <- rowSums(x = mouse.filtered, slot = "counts")
df$gene_name <- rownames(df)
df <- df[order(df$rsum, decreasing = TRUE),]
head(df, 30)
## rsum gene_name
## Malat1 6509603 Malat1
## Cst3 1303525 Cst3
## Ttr 715817 Ttr
## Apoe 658170 Apoe
## Fth1 337385 Fth1
## Tmsb4x 286070 Tmsb4x
## Plp1 262627 Plp1
## Mt1 232208 Mt1
## Ckb 198068 Ckb
## Slc1a2 196143 Slc1a2
## Sparcl1 192761 Sparcl1
## Actb 185472 Actb
## Atp1a2 177356 Atp1a2
## Mt3 170888 Mt3
## Ptn 140019 Ptn
## Tpt1 133329 Tpt1
## Itm2b 129289 Itm2b
## Dbi 122474 Dbi
## Ppia 122436 Ppia
## Ptgds 122222 Ptgds
## Clu 122078 Clu
## Plpp3 120210 Plpp3
## Glul 116625 Glul
## Cd81 116025 Cd81
## Eef1a1 115613 Eef1a1
## Bsg 115261 Bsg
## Fau 114840 Fau
## C1qa 114611 C1qa
## Aldoc 114445 Aldoc
## C1qb 107951 C1qb
# log normalization
mouse.phase <- NormalizeData(mouse.filtered)
# load mouse cell cycle markers
phase.markers <- read.delim("../../../refs/mouse_cell_cycle.txt")
colnames(phase.markers)[2] <- "gene_id"
phase.markers <- left_join(phase.markers, genes[,c(10,12)], by = "gene_id")
g2m <- phase.markers[phase.markers$phase == "G2/M", 4]
g2m
## [1] "Ube2c" "Lbr" "Ctcf" "Cdc20" "Cbx5" "Kif11" "Anp32e"
## [8] "Birc5" "Cdk1" "Tmpo" "Hmmr" "Jpt1" "Pimreg" "Aurkb"
## [15] "Top2a" "Gtse1" "Rangap1" "Cdca3" "Ndc80" "Kif20b" "Cenpf"
## [22] "Nek2" "Nuf2" "Nusap1" "Bub1" "Tpx2" "Aurka" "Ect2"
## [29] "Cks1b" "Kif2c" "Cdca8" "Cenpa" "Mki67" "Ccnb2" "Kif23"
## [36] "Smc4" "G2e3" "Tubb4b" "Anln" "Tacc3" "Dlgap5" "Ckap2"
## [43] "Ncapd2" "Ttk" "Ckap5" "Cdc25c" "Hjurp" "Cenpe" "Ckap2l"
## [50] "Cdca2" "Hmgb2" "Cks2" "Psrc1" "Gas2l3"
s <- phase.markers[phase.markers$phase == "S", 4]
s
## [1] "Cdc45" "Uhrf1" "Mcm2" "Slbp" "Mcm5" "Pola1"
## [7] "Gmnn" "Cdc6" "Rrm2" "Atad2" "Dscc1" "Mcm4"
## [13] "Chaf1b" "Rfc2" "Msh2" "Fen1" "Hells" "Prim1"
## [19] "Tyms" "Mcm6" "Wdr76" "Rad51" "Pcna" "Ccne2"
## [25] "Casp8ap2" "Usp1" "Nasp" "Rpa2" "Ung" "Rad51ap1"
## [31] "Blm" "Pold3" "Rrm1" "Cenpu" "Gins2" "Tipin"
## [37] "Brip1" "Dtl" "Exo1" "Ubr7" "Clspn" "E2f8"
## [43] "Cdca7"
# write table
write.table(phase.markers,
"../../../results/single_cell_with_choroid/cellcycle/mouse_cell_cycle_phase_markers.tsv",
quote = FALSE, sep = "\t", row.names = FALSE)
# score cells
mouse.phase <- CellCycleScoring(mouse.phase,
g2m.features = g2m,
s.features = s,
set.ident = TRUE)
## Warning: The following features are not present in the object: Bub1, Cdc25c, not
## searching for symbol synonyms
mouse.filtered$phase <- mouse.phase$Phase
Find the top variable genes before performing PCA. The data is scaled since highly expressed genes usually are the most variable. This will make the mean expression zero and the variance of each gene across cells is one.
# Identify the most variable genes
mouse.phase <- FindVariableFeatures(mouse.phase, verbose = FALSE)
# Preview top 40
head(VariableFeatures(mouse.phase), 40)
## [1] "Cd74" "Vtn" "Ccl5" "Crym" "Il1b" "H2-Aa"
## [7] "H2-Eb1" "H2-Ab1" "Mgp" "S100a8" "Ccl4" "S100a9"
## [13] "Tmem212" "Ccdc153" "Ccl3" "Rgs5" "Dcn" "Snhg11"
## [19] "Lyz2" "Spp1" "Meg3" "Cst7" "Acta2" "Ccl2"
## [25] "Pdgfra" "Ccl12" "Ccnd2" "Plac9" "Apod" "Ptgds"
## [31] "Cox8b" "Serpine1" "Tnr" "Cxcl12" "Pf4" "Dlx6os1"
## [37] "Sox11" "Lpl" "Tagln" "Ndnf"
# Scale the counts
mouse.phase <- ScaleData(mouse.phase)
## Centering and scaling data matrix
If the PCA plots for each phase do not look similar you may want to regress out variation due to cell cycle phase. Otherwise, nothing needs to be done.
# Run PCA
mouse.phase <- RunPCA(mouse.phase, nfeatures.print = 10)
## PC_ 1
## Positive: Car2, Aplp1, Fez1, Csrp1, Syt11, Kif1b, Cryab, Ptprd, Mapt, Sox2ot
## Negative: Cx3cr1, Laptm5, Csf1r, C1qc, Selplg, C1qa, Ly86, Ctss, Siglech, P2ry12
## PC_ 2
## Positive: Gm21984, Mag, Ugt8a, Ermn, Cldn11, Mog, Edil3, Tspan2, Trf, Tubb4a
## Negative: Slco1c1, Id3, Tsc22d1, Pltp, Utrn, Id1, Bsg, Cldn5, Ly6c1, Flt1
## PC_ 3
## Positive: Cldn5, Flt1, Adgrl4, Abcb1a, Itm2a, Cxcl12, Slco1a4, Ptprb, Ly6a, Ly6c1
## Negative: Chchd10, Slc1a3, Aqp4, Fjx1, Cspg5, Slc6a11, Tubb2b, Id4, Timp4, Slc7a11
## PC_ 4
## Positive: Rtn1, Stmn3, Gng3, Vcan, Meg3, Fxyd6, Dpysl3, Nxph1, Adarb2, Nrxn3
## Negative: Calml4, 2900040C04Rik, Krt18, Ttr, Clic6, Ecrg4, Cab39l, Rsph1, Fam81a, Kl
## PC_ 5
## Positive: Nsg2, Meg3, Fxyd6, Dpysl3, Ly6h, Stmn3, Gng3, Rtn1, Adarb2, Calml4
## Negative: Slc1a3, Aqp4, Slc6a11, Fjx1, Mfge8, Slc7a11, Timp4, Csrp1, Itih3, Lcat
# Plot
pca1 <- DimPlot(mouse.phase,
reduction = "pca",
group.by = "Phase",
split.by = "Phase")
pca1
pca2 <- DimPlot(mouse.phase,
reduction = "pca",
group.by = "Phase",
shuffle = TRUE)
pca2
data <- as.data.frame(table(mouse.phase$Phase))
colnames(data) <- c("Phase","frequency")
ncells3 <- ggplot(data, aes(x = Phase, y = frequency, fill = Phase)) +
geom_col() +
theme_classic() +
geom_text(aes(label = frequency),
position=position_dodge(width=0.9),
vjust=-0.25) +
ggtitle("Cells per phase")
ncells3
percent.phase <- mouse.phase@meta.data %>%
group_by(sample, Phase) %>%
dplyr::count() %>%
group_by(sample) %>%
dplyr::mutate(percent = 100*n/sum(n)) %>%
ungroup() %>%
ggplot(aes(x = sample, y = percent, fill = Phase)) +
geom_col() +
ggtitle("Percentage of phase per sample")
percent.phase
Evaluating effects of mitochondrial expression
# Check quartile values and store
summary(mouse.phase$percent.mt)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 2.808 4.453 4.600 6.350 9.993
first <- as.numeric(summary(mouse.phase$percent.mt)[2])
mean <- as.numeric(summary(mouse.phase$percent.mt)[4])
third <- as.numeric(summary(mouse.phase$percent.mt)[5])
# Turn percent.mt into factor based on quartile value
mouse.phase@meta.data$mito.factor <- cut(mouse.phase@meta.data$percent.mt,
breaks=c(-Inf, first, mean, third, Inf),
labels=c("Low","Medium","Medium high", "High"))
mouse.filtered[["mito.factor"]] <- mouse.phase$mito.factor
# Plot
pca1 <- DimPlot(mouse.phase,
reduction = "pca",
group.by = "mito.factor",
split.by = "mito.factor")
pca1
pca2 <- DimPlot(mouse.phase,
reduction = "pca",
group.by = "mito.factor",
shuffle = TRUE)
pca2
percent <- mouse.phase@meta.data %>%
group_by(sample, mito.factor) %>%
dplyr::count() %>%
group_by(sample) %>%
dplyr::mutate(percent = 100*n/sum(n)) %>%
ungroup() %>%
ggplot(aes(x = sample, y = percent, fill = mito.factor)) +
geom_col() +
ggtitle("Mitochondrial fraction per sample")
percent
Now, we can use the SCTransform method as a more accurate method of normalizing, estimating the variance of the raw filtered data, and identifying the most variable genes. Variation in sequencing depth (total nCount_RNA per cell) is normalized using a regularized negative binomial model.
Sctransform automatically accounts for cellular sequencing depth by regressing out sequencing depth (nUMIs). However, if there are other sources of uninteresting variation identified in the data during the exploration steps we can also include these. We observed little to no effect due to cell cycle phase or percent.mito and so we chose not to regress this out of our data.
Since we have 8 samples in our dataset we want to keep them as separate objects and transform them as that is what is required for integration.
# split
mouse.split <- SplitObject(mouse.filtered, split.by = "sample")
mouse.split
## $sc.WT
## An object of class Seurat
## 19094 features across 6937 samples within 1 assay
## Active assay: RNA (19094 features, 0 variable features)
##
## $sc.APP
## An object of class Seurat
## 19094 features across 3696 samples within 1 assay
## Active assay: RNA (19094 features, 0 variable features)
# cleanup
remove(mouse.phase, mouse.filtered)
We will use a ‘for loop’ to run the SCTransform() on each sample, and regress out mitochondrial expression by specifying in the vars.to.regress argument of the SCTransform() function.
Before we run this for loop, we know that the output can generate large R objects/variables in terms of memory. If we have a large dataset, then we might need to adjust the limit for allowable object sizes within R (Default is 500 * 1024 ^ 2 = 500 Mb).
options(future.globals.maxSize = 4000 * 1024^5)
for (i in 1:length(mouse.split)) {
print(paste0("Sample ", i))
mouse.split[[i]] <- SCTransform(mouse.split[[i]], verbose = FALSE)
}
## [1] "Sample 1"
## [1] "Sample 2"
mouse.split
## $sc.WT
## An object of class Seurat
## 38050 features across 6937 samples within 2 assays
## Active assay: SCT (18956 features, 3000 variable features)
## 1 other assay present: RNA
##
## $sc.APP
## An object of class Seurat
## 37655 features across 3696 samples within 2 assays
## Active assay: SCT (18561 features, 3000 variable features)
## 1 other assay present: RNA
Condition-specific clustering of cells indicates that we need to integrate the cells across conditions to ensure that cells of the same cell type cluster together.
To integrate, use the shared highly variable genes from each condition identified using SCTransform. Then, integrate conditions to overlay cells that are similar or have a “common set of biological features” between groups.
Now, using our SCTransform object as input, let’s perform the integration across conditions.
First, we need to specify that we want to use all of the 3000 most variable genes identified by SCTransform for the integration. By default, this function selects the top 2000 genes.
# Choose the features to use when integrating multiple datasets
# will use nfeatures as 3000 as defined by running SCTransform above
var.features <- SelectIntegrationFeatures(object.list = mouse.split,
nfeatures = 3000)
# merge the mouse
mouse.merged <- merge(x = mouse.split[[1]],
y = c(mouse.split[[2]]))
# define the variable features
VariableFeatures(mouse.merged) <- var.features
# run PCA on the merged object
mouse.merged <- RunPCA(object = mouse.merged, assay = "SCT")
# harmony dimensional reduction
mouse.integrated <- RunHarmony(object = mouse.merged,
group.by.vars = "sample",
assay.use = "SCT",
reduction = "pca",
plot_convergence = TRUE)
# save and cleanup
saveRDS(mouse.integrated, "../../../rObjects/mouse_cells_integrated.rds")
remove(mouse.split, var.features, mouse.merged)
Top 20 variable features
top20 <- mouse.integrated@assays$SCT@var.features[1:20]
top20
## [1] "Plp1" "Ptgds" "Ttr" "Cldn5" "Cxcl12" "Ccl4" "Vtn" "Flt1"
## [9] "Mal" "C1qa" "Rgs5" "Ly6c1" "C1qb" "Apod" "Fos" "Ctss"
## [17] "Itm2a" "Ly6a" "Hexb" "Ccl3"
After integration, to visualize the integrated data we can use dimensionality reduction techniques, such as PCA and Uniform Manifold Approximation and Projection (UMAP). While PCA will determine all PCs, we can only plot two at a time. In contrast, UMAP will take the information from any number of top PCs to arrange the cells in this multidimensional space. It will take those distances in multidimensional space, and try to plot them in two dimensions. In this way, the distances between cells represent similarity in expression.
To generate these visualizations with the harmony output, use reduction = “harmony”
# Reset idents and levels
DefaultAssay(mouse.integrated) <- "SCT"
Idents(mouse.integrated) <- "sample"
mouse.integrated$sample <- factor(mouse.integrated$sample,
levels = sample_order)
# Plot PCA
pca1 <- DimPlot(mouse.integrated,
reduction = "harmony",
group.by = "sample",
cols = sample_colors)
pca1
pca2 <- DimPlot(mouse.integrated,
reduction = "harmony",
split.by = "sample",
group.by = "sample",
cols = sample_colors)
pca2
To overcome the extensive technical noise in the expression of any single gene for scRNA-seq data, Seurat assigns cells to clusters based on their PCA scores derived from the expression of the integrated most variable genes, with each PC essentially representing a “metagene” that combines information across a correlated gene set. Determining how many PCs to include in the clustering step is therefore important to ensure that we are capturing the majority of the variation, or cell types, present in our dataset.
# Printing out the most variable genes driving PCs
print(x = mouse.integrated[["pca"]],
dims = 1:10,
nfeatures = 10)
## PC_ 1
## Positive: C1qa, C1qb, C1qc, Ctss, Hexb, Cx3cr1, Csf1r, Tyrobp, P2ry12, Fcer1g
## Negative: Cryab, Mal, Plp1, Slc1a2, Ptgds, Car2, Cnp, Ermn, Septin4, Aplp1
## PC_ 2
## Positive: Plp1, Ermn, Mbp, Mag, Cnp, Trf, Cryab, Tubb4a, Cldn11, Ptgds
## Negative: Flt1, Cldn5, Bsg, Ly6c1, Ly6a, Cxcl12, Itm2a, Slco1a4, Pltp, Spock2
## PC_ 3
## Positive: Slc1a2, Plpp3, Atp1a2, Aldoc, Clu, Slc1a3, Gja1, Mt3, Slc4a4, Pla2g7
## Negative: Mal, Plp1, Ermn, Mbp, Mag, Apod, Trf, Cldn11, Tubb4a, Cnp
## PC_ 4
## Positive: Ttr, 2900040C04Rik, Calml4, Ecrg4, Rbp1, Cab39l, Enpp2, Atp5g1, Cdkn1c, Chchd10
## Negative: Slc1a2, Plpp3, Atp1a2, Sparcl1, Slc1a3, Gja1, Slc4a4, Aldoc, Acsl3, Ptprz1
## PC_ 5
## Positive: Rtn1, Meg3, Stmn3, Map1b, Vcan, Nrxn3, Stmn2, Celf4, Gng3, Sox4
## Negative: Slc1a2, Plpp3, Clu, Aldoc, Mt3, Gja1, Slc1a3, Mt1, Atp1a2, Acsl3
## PC_ 6
## Positive: Vtn, Rgs5, Ndufa4l2, Cald1, Plac9, Myl9, Higd1b, Kcnj8, Cox4i2, Abcc9
## Negative: Bsg, Cldn5, Slco1a4, Spock2, Ly6a, Ly6c1, Stmn2, Adgrl4, Rtn1, Itm2a
## PC_ 7
## Positive: Pdgfra, Tnr, Lhfpl3, Olig1, C1ql1, Opcml, Cntn1, Cspg4, Gpr17, Cacng4
## Negative: Stmn1, Tmsb10, Dlx6os1, Meis2, Igfbpl1, Stmn2, Dlx1, Sox11, Sp9, Ccnd2
## PC_ 8
## Positive: Fos, Egr1, Junb, Zfp36, Btg2, Jun, Ier2, Atf3, Nfkbia, Dusp1
## Negative: P2ry12, Ctsd, Hexb, Gpr34, Fcrls, Trem2, C1qb, C1qa, C1qc, Ctss
## PC_ 9
## Positive: Jun, Egr1, Zfp36, Fos, Btg2, Kctd12, Atf3, Ier2, Junb, 2900040C04Rik
## Negative: Cd52, Tmsb4x, S100a11, H2-D1, S100a6, Vim, Crip1, Ptprc, Fxyd5, Rac2
## PC_ 10
## Positive: Vcan, Pdgfra, Tnr, Lhfpl3, Cacng4, Olig1, C1ql1, Tmsb10, Nnat, Cspg4
## Negative: Snap25, Adcy1, Syt1, Scn2a, Sphkap, Neurod1, Cbln3, Chgb, Snhg11, Gabra6
Quantitative approach to an elbow plot
- The point where the principal components only contribute 5% of standard deviation and the principal components cumulatively contribute 90% of the standard deviation.
- The point where the percent change in variation between the consecutive PCs is less than 0.1%.
First metric
# Determine percent of variation associated with each PC
stdv <- mouse.integrated[["pca"]]@stdev
sum.stdv <- sum(mouse.integrated[["pca"]]@stdev)
percent.stdv <- (stdv / sum.stdv) * 100
# Calculate cumulative percents for each PC
cumulative <- cumsum(percent.stdv)
# Determine which PC exhibits cumulative percent greater than 90% and
# and % variation associated with the PC as less than 5
co1 <- which(cumulative > 90 & percent.stdv < 5)[1]
co1
## [1] 39
Second metric
# Determine the difference between variation of PC and subsequent PC
co2 <- sort(which(
(percent.stdv[1:length(percent.stdv) - 1] -
percent.stdv[2:length(percent.stdv)]) > 0.1),
decreasing = T)[1] + 1
# last point where change of % of variation is more than 0.1%.
co2
## [1] 14
Choose the minimum of these two metrics as the PCs covering the majority of the variation in the data.
# Minimum of the two calculation
min.pc <- min(co1, co2)
min.pc
## [1] 14
Use min.pc we just calculated to generate the clusters. We can plot the elbow plot again and overlay the information determined using our metrics:
# Create a dataframe with values
plot_df <- data.frame(pct = percent.stdv,
cumu = cumulative,
rank = 1:length(percent.stdv))
# Elbow plot to visualize
ggplot(plot_df, aes(cumulative, percent.stdv, label = rank, color = rank > min.pc)) +
geom_text() +
geom_vline(xintercept = 90, color = "grey") +
geom_hline(yintercept = min(percent.stdv[percent.stdv > 5]), color = "grey") +
theme_bw()
# Run UMAP
mouse.integrated <- RunUMAP(mouse.integrated,
dims = 1:min.pc,
reduction = "harmony",
n.components = 3) # set to 3 to use with VR
# plot UMAP
DimPlot(mouse.integrated,
shuffle = TRUE)
Seurat uses a graph-based clustering approach, which embeds cells in a graph structure, using a K-nearest neighbor (KNN) graph (by default), with edges drawn between cells with similar gene expression patterns. Then, it attempts to partition this graph into highly interconnected ‘quasi-cliques’ or ‘communities’ [Seurat - Guided Clustering Tutorial].
We will use the FindClusters() function to perform the graph-based clustering. The resolution is an important argument that sets the “granularity” of the downstream clustering and will need to be optimized for every individual experiment. For datasets of 3,000 - 5,000 cells, the resolution set between 0.4-1.4 generally yields good clustering. Increased resolution values lead to a greater number of clusters, which is often required for larger datasets.
The FindClusters() function allows us to enter a series of resolutions and will calculate the “granularity” of the clustering. This is very helpful for testing which resolution works for moving forward without having to run the function for each resolution.
# Determine the K-nearest neighbor graph
mouse.unannotated <- FindNeighbors(object = mouse.integrated,
assay = "SCT", # set as default after SCTransform
reduction = "harmony",
dims = 1:min.pc)
# Determine the clusters for various resolutions
mouse.unannotated <- FindClusters(object = mouse.unannotated,
algorithm = 1, # 1 = Louvain
resolution = seq(0.1,0.8,by=0.1))
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10633
## Number of edges: 342985
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9798
## Number of communities: 12
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10633
## Number of edges: 342985
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9660
## Number of communities: 13
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10633
## Number of edges: 342985
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9551
## Number of communities: 16
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10633
## Number of edges: 342985
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9457
## Number of communities: 18
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10633
## Number of edges: 342985
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9371
## Number of communities: 19
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10633
## Number of edges: 342985
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9289
## Number of communities: 21
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10633
## Number of edges: 342985
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9218
## Number of communities: 22
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10633
## Number of edges: 342985
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9149
## Number of communities: 23
## Elapsed time: 0 seconds
# 0.1
DimPlot(mouse.unannotated,
group.by = "SCT_snn_res.0.1",
label = TRUE)
# 0.2
DimPlot(mouse.unannotated,
group.by = "SCT_snn_res.0.2",
label = TRUE)
# 0.3
DimPlot(mouse.unannotated,
group.by = "SCT_snn_res.0.3",
label = TRUE)
# 0.4
DimPlot(mouse.unannotated,
group.by = "SCT_snn_res.0.4",
label = TRUE)
# 0.5
DimPlot(mouse.unannotated,
group.by = "SCT_snn_res.0.5",
label = TRUE)
# 0.6
DimPlot(mouse.unannotated,
group.by = "SCT_snn_res.0.6",
label = TRUE)
# 0.7
DimPlot(mouse.unannotated,
group.by = "SCT_snn_res.0.7",
label = TRUE)
# 0.8
DimPlot(mouse.unannotated,
group.by = "SCT_snn_res.0.8",
label = TRUE)
# set params
mouse.unannotated$seurat_clusters <- mouse.unannotated$SCT_snn_res.0.6
Idents(mouse.unannotated) <- "seurat_clusters"
# save
saveRDS(mouse.unannotated, paste0("../../../rObjects/mouse_cells_unannotated.rds"))
embeddings <- mouse.unannotated@reductions$umap@cell.embeddings
embeddings <- cbind(embeddings, as.character(mouse.unannotated$seurat_clusters))
colnames(embeddings)[4] <- "seurat_clusters"
embeddings <- as.data.frame(embeddings)
three.dim <- plot_ly(embeddings,
x = ~UMAP_1,
y = ~UMAP_2,
z = ~UMAP_3,
color = ~seurat_clusters,
size = 1)
three.dim <- three.dim %>% add_markers()
three.dim <- three.dim %>% layout(scene = list(xaxis = list(title = 'UMAP_1'),
yaxis = list(title = 'UMAP_2'),
zaxis = list(title = 'UMAP_3')))
three.dim
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors