knitr::opts_knit$set(root.dir = "/research/labs/neurology/fryer/m214960/aducanumab/scripts/R")
# load libraries
library(cowplot) # plot_grid()
library(dplyr) # left_join()
library(ggplot2) # ggplot()
library(gridExtra) # grid.arrange()
library(harmony) # RunHarmony()
library(parallel) # detectCores()
library(plotly) # plot_ly()
library(Seurat) # Read10X_h5()
library(ShinyCell) # makeShinyApp()
library(stringr) # str_match()
# variables
sample_colors <- "red"
# thresholds
nCount.min <- 200
nCount.max <- 10000
nFeature.min <- 100
complexity.cutoff <- 0.8
mt.cutoff <- 20
ribo.cutoff <- 20
hb.cutoff <- 3
# set seed
set.seed(8)
# work in parallel
options(mc.cores = detectCores() - 1)
# Load counts
prefix <- "../../cellbender/"
suffix <- "_fpr_0.05_filtered.h5"
mouse <- CreateSeuratObject(Read10X_h5(paste0(prefix,"939B_IgG_Female",suffix)))
# Load annotation file
if (file.exists("../../rObjects/annotation.rds")) {
genes <- readRDS("../../rObjects/annotation.rds")
} else {
gtf.file <- "../../refs/mouse_genes.gtf"
genes <- rtracklayer::import(gtf.file)
genes <- as.data.frame(genes)
genes <- genes[genes$type == "gene",]
saveRDS(genes, "../../rObjects/annotation.rds")
}
# cell.complexity
mouse$cell.complexity <- log10(mouse$nFeature_RNA) / log10(mouse$nCount_RNA)
# percent.mt
mt.genes <- genes[genes$seqnames == "chrM",13]
mouse$percent.mt <- PercentageFeatureSet(mouse, features = mt.genes)
mt.genes
## [1] "mt-Nd1" "mt-Nd2" "mt-Co1" "mt-Co2" "mt-Atp8" "mt-Atp6" "mt-Co3"
## [8] "mt-Nd3" "mt-Nd4l" "mt-Nd4" "mt-Nd5" "mt-Nd6" "mt-Cytb"
# percent.ribo
# ribosomal proteins begin with 'Rps' or 'Rpl' in this annotation file
# mitochondrial ribosomes start with 'Mrps' or 'Mrpl'
gene.names <- genes$gene_name
ribo <- gene.names[grep("^Rp[sl]", gene.names)]
mt.ribo <- gene.names[grep("^Mrp[sl]", gene.names)]
ribo.combined <- c(mt.ribo,ribo)
mouse$percent.ribo.protein <- PercentageFeatureSet(mouse, features = ribo.combined)
ribo.combined
## [1] "Mrpl15" "Mrpl30" "Mrps9" "Mrpl44" "Mrps14"
## [6] "Mrpl41" "Mrps2" "Mrps5" "Mrps26" "Mrps28"
## [11] "Mrpl47" "Mrpl24" "Mrpl9" "Mrps21" "Mrpl50"
## [16] "Mrpl37" "Mrps15" "Mrpl20" "Mrpl33" "Mrpl1"
## [21] "Mrps18c" "Mrps17" "Mrps33" "Mrpl35" "Mrpl19"
## [26] "Mrpl53" "Mrps25" "Mrpl51" "Mrps35" "Mrps12"
## [31] "Mrpl46" "Mrps11" "Mrpl48" "Mrpl17" "Mrpl23"
## [36] "Mrps31" "Mrpl34" "Mrpl4" "Mrps22" "Mrpl3"
## [41] "Mrpl54" "Mrpl42" "Mrps24" "Mrpl22" "Mrpl55"
## [46] "Mrps23" "Mrpl27" "Mrpl10" "Mrpl45" "Mrpl58"
## [51] "Mrps7" "Mrpl38" "Mrpl12" "Mrpl32" "Mrpl36"
## [56] "Mrps27" "Mrps36" "Mrps30" "Mrps16" "Mrpl52"
## [61] "Mrpl57" "Mrpl13" "Mrpl40" "Mrpl39" "Mrps6"
## [66] "Mrpl18" "Mrps34" "Mrpl28" "Mrps18b" "Mrpl14"
## [71] "Mrps18a" "Mrpl2" "Mrps10" "Mrpl21" "Mrpl11"
## [76] "Mrpl49" "Mrpl16" "Mrpl43" "Rpl7" "Rpl31"
## [81] "Rpl37a" "Rps6kc1" "Rpl7a" "Rpl12" "Rpl35"
## [86] "Rps21" "Rpl22l1" "Rps3a1" "Rps27" "Rpl34"
## [91] "Rps20" "Rps6" "Rps8" "Rps6ka1" "Rpl11"
## [96] "Rpl22" "Rpl9" "Rpl5" "Rplp0" "Rpl6"
## [101] "Rpl21" "Rpl32" "Rps9" "Rpl28" "Rps5"
## [106] "Rps19" "Rps16" "Rps11" "Rpl13a" "Rpl18"
## [111] "Rps17" "Rps3" "Rpl27a" "Rps13" "Rps15a"
## [116] "Rplp2" "Rpl18a" "Rpl13" "Rps25" "Rpl10-ps3"
## [121] "Rplp1" "Rpl4" "Rps27l" "Rpl29" "Rps27rt"
## [126] "Rpsa" "Rpl14" "Rps12" "Rps15" "Rpl41"
## [131] "Rps26" "Rps27a" "Rpl26" "Rpl23a" "Rpl9-ps1"
## [136] "Rps6kb1" "Rpl23" "Rpl19" "Rpl27" "Rpl38"
## [141] "Rps7" "Rpl10l" "Rps29" "Rpl36al" "Rps6kl1"
## [146] "Rps6ka5" "Rps23" "Rpl15" "Rps24" "Rpl36a-ps1"
## [151] "Rpl37" "Rpl30" "Rpl8" "Rpl3" "Rps19bp1"
## [156] "Rpl39l" "Rpl35a" "Rpl24" "Rps6ka2" "Rps2"
## [161] "Rpl3l" "Rps10" "Rpl10a" "Rps28" "Rps18"
## [166] "Rpl7l1" "Rpl36" "Rpl36-ps4" "Rps14" "Rpl17"
## [171] "Rps6kb2" "Rps6ka4" "Rpl9-ps6" "Rpl39" "Rpl10"
## [176] "Rps4x" "Rps6ka6" "Rpl36a" "Rps6ka3"
# percent.hb
# percent.hb - hemoglobin proteins begin with 'Hbb' or 'Hba' for mouse
hb.genes <- gene.names[grep("^Hb[ba]-", gene.names)]
mouse$percent.hb <- PercentageFeatureSet(mouse, features = hb.genes)
hb.genes
## [1] "Hbb-bt" "Hbb-bs" "Hbb-bh2" "Hbb-bh1" "Hbb-y" "Hba-x" "Hba-a1"
## [8] "Hba-a2"
# sample
mouse$sample <- "IgG.F.939B "
# Visualize the number of cell counts per sample
data <- as.data.frame(table(mouse$sample))
colnames(data) <- c("sample","frequency")
ncells1 <- ggplot(data, aes(x = sample, y = frequency, fill = sample)) +
geom_col() +
theme_classic() +
geom_text(aes(label = frequency),
position=position_dodge(width=0.9),
vjust=-0.25) +
scale_fill_manual(values = sample_colors) +
ggtitle("Raw: cells per sample") +
theme(legend.position = "none") +
scale_y_continuous(breaks = seq(0,10000, by = 2000), limits = c(0,10000)) +
theme(axis.text.x = element_text(angle = 45, hjust=1))
ncells1
# Visualize nCount_RNA
den1 <- ggplot(mouse@meta.data,
aes(color = sample,
x = nCount_RNA,
fill = sample)) +
geom_density(alpha = 0.2) +
theme_classic() +
scale_x_log10() +
scale_color_manual(values = sample_colors) +
scale_fill_manual(values = sample_colors) +
xlab("nCount_RNA") +
ylab("Density") +
theme(legend.position = "none") +
geom_vline(xintercept = nCount.min) +
geom_vline(xintercept = nCount.max) +
theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))
# Visualize nFeature_RNA
den2 <- ggplot(mouse@meta.data,
aes(color = sample,
x = nFeature_RNA,
fill = sample)) +
geom_density(alpha = 0.2) +
theme_classic() +
scale_x_log10() +
scale_color_manual(values = sample_colors) +
theme(legend.position = "none") +
scale_fill_manual(values = sample_colors) +
xlab("nFeature_RNA") +
ylab("Density") +
geom_vline(xintercept = nFeature.min) +
theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))
# Visualize cell complexity
# Quality cells are usually above 0.85
den3 <- ggplot(mouse@meta.data,
aes(color = sample,
x = cell.complexity,
fill = sample)) +
geom_density(alpha = 0.2) +
theme_classic() +
scale_color_manual(values = sample_colors) +
theme(legend.position = "none") +
scale_fill_manual(values = sample_colors) +
xlab("Cell Complexity (log10(nFeature/nCount))") +
ylab("Density") +
geom_vline(xintercept = complexity.cutoff) +
theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))
# Visualize percent.mt
den4 <- ggplot(mouse@meta.data,
aes(color = sample,
x = percent.mt,
fill = sample)) +
geom_density(alpha = 0.2) +
theme_classic() +
scale_x_continuous(n.breaks = 4) +
geom_vline(xintercept = mt.cutoff) +
scale_color_manual(values = sample_colors) +
theme(legend.position = "none") +
scale_fill_manual(values = sample_colors) +
xlab("% Mitochondrial Genes") +
ylab("Density") +
theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))
# Visualize percent.ribo.protein
den5 <- ggplot(mouse@meta.data,
aes(color = sample,
x = percent.ribo.protein,
fill = sample)) +
geom_density(alpha = 0.2) +
theme_classic() +
scale_x_log10() +
geom_vline(xintercept = ribo.cutoff) +
scale_color_manual(values = sample_colors) +
theme(legend.position = "none") +
scale_fill_manual(values = sample_colors) +
xlab("% Ribosomal Protein Genes") +
ylab("Density") +
theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))
# Visualize percent.hb
den6 <- ggplot(mouse@meta.data,
aes(color = sample,
x = percent.hb,
fill = sample)) +
geom_density(alpha = 0.2) +
theme_classic() +
scale_x_log10() +
geom_vline(xintercept = hb.cutoff) +
scale_color_manual(values = sample_colors) +
theme(legend.position = "none") +
scale_fill_manual(values = sample_colors) +
xlab("% Hemoglobin Genes") +
ylab("Density") +
theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))
# Arrange graphs in grid
plots <- list(den1,den2,den3,den4,den5,den6)
layout <- rbind(c(1,4),c(2,5),c(3,6))
grid <- grid.arrange(grobs = plots, layout_matrix = layout)
# nFeature, nCount, and cell.complexity violins
v1 <- VlnPlot(mouse,
features = c("nFeature_RNA", "nCount_RNA","cell.complexity"),
ncol = 3,
group.by = 'sample',
cols = sample_colors,
pt.size = 0)
v1
# percent violins
v2 <- VlnPlot(mouse,
features = c("percent.mt","percent.ribo.protein","percent.hb"),
ncol = 3,
group.by = 'sample',
cols = sample_colors,
pt.size = 0)
v2
s1 <- ggplot(
mouse@meta.data,
aes(x = nCount_RNA, y = nFeature_RNA, color = percent.mt)) +
geom_point() +
geom_smooth(method = "lm") +
scale_x_log10() +
scale_y_log10() +
theme_classic() +
geom_vline(xintercept = nCount.min) +
geom_hline(yintercept = nFeature.min) +
facet_wrap(~sample) +
scale_colour_gradient(low = "gray90", high = "black", limits =c(0,100))
s1
## `geom_smooth()` using formula 'y ~ x'
s2 <- FeatureScatter(mouse,
feature1 = "nCount_RNA",
feature2 = "percent.mt",
group.by = 'sample',
cols = sample_colors,
shuffle = TRUE)
s2
# filter
mouse.filtered <- subset(mouse,
subset = (nCount_RNA > nCount.min) &
(nCount_RNA < nCount.max) &
(nFeature_RNA > nFeature.min) &
(cell.complexity > complexity.cutoff) &
(percent.mt < mt.cutoff) &
(percent.ribo.protein < ribo.cutoff) &
(percent.hb < hb.cutoff))
# print cells removed
print(paste0(dim(mouse)[2] - dim(mouse.filtered)[2]," cells removed"))
## [1] "2115 cells removed"
Remove lowly expressed genes. We will keep genes that have at least 1 count in 10 cells.
# filter genes
counts <- GetAssayData(object = mouse.filtered, slot = "counts")
nonzero <- counts > 0 # produces logical
keep <- Matrix::rowSums(nonzero) >= 10 # sum the true/false
counts.filtered <- counts[keep,] # keep certain genes
# overwrite mouse.filtered
mouse.filtered <- CreateSeuratObject(counts.filtered,
meta.data = mouse.filtered@meta.data)
# print features removed
print(paste0(dim(counts)[1] - dim(counts.filtered)[1], " features removed"))
## [1] "23809 features removed"
Remove mitochondrial genes.
# remove mt.genes
counts <- GetAssayData(object = mouse.filtered, slot = "counts")
keep <- !rownames(counts) %in% mt.genes # false when mt.gene
counts.filtered <- counts[keep,]
# overwrite mouse.filtered
mouse.filtered <- CreateSeuratObject(counts.filtered,
meta.data = mouse.filtered@meta.data)
# print features removed
print(paste0(dim(counts)[1] - dim(counts.filtered)[1], " features removed"))
## [1] "13 features removed"
# cleanup data
remove(mouse,counts,counts.filtered,nonzero)
# Visualize the number of cell counts per sample
data <- as.data.frame(table(mouse.filtered$sample))
colnames(data) <- c("sample","frequency")
ncells2 <- ggplot(data, aes(x = sample, y = frequency, fill = sample)) +
geom_col() +
theme_classic() +
geom_text(aes(label = frequency),
position=position_dodge(width=0.9),
vjust=-0.25) +
scale_fill_manual(values = sample_colors) +
scale_y_continuous(breaks = seq(0,10000, by = 2000), limits = c(0,10000)) +
ggtitle("Filtered: cells per sample") +
theme(legend.position = "none") +
theme(axis.text.x = element_text(angle = 45, hjust=1))
# Arrange graphs in grid
plots <- list(ncells1,ncells2)
layout <- cbind(c(1),c(2))
grid <- grid.arrange(grobs = plots, layout_matrix = layout)
# Visualize nCount_RNA
den1 <- ggplot(mouse.filtered@meta.data,
aes(color = sample,
x = nCount_RNA,
fill = sample)) +
geom_density(alpha = 0.2) +
theme_classic() +
scale_x_log10() +
scale_color_manual(values = sample_colors) +
scale_fill_manual(values = sample_colors) +
xlab("nCount_RNA") +
ylab("Density") +
theme(legend.position = "none") +
geom_vline(xintercept = nCount.min) +
geom_vline(xintercept = nCount.max) +
theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))
# Visualize nFeature_RNA
den2 <- ggplot(mouse.filtered@meta.data,
aes(color = sample,
x = nFeature_RNA,
fill = sample)) +
geom_density(alpha = 0.2) +
theme_classic() +
scale_x_log10() +
scale_color_manual(values = sample_colors) +
theme(legend.position = "none") +
scale_fill_manual(values = sample_colors) +
xlab("nFeature_RNA") +
ylab("Density") +
geom_vline(xintercept = nFeature.min) +
theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))
# Visualize cell complexity
# Quality cells are usually above 0.85
den3 <- ggplot(mouse.filtered@meta.data,
aes(color = sample,
x = cell.complexity,
fill = sample)) +
geom_density(alpha = 0.2) +
theme_classic() +
scale_color_manual(values = sample_colors) +
theme(legend.position = "none") +
scale_fill_manual(values = sample_colors) +
xlab("Cell Complexity (log10(nFeature/nCount))") +
ylab("Density") +
geom_vline(xintercept = complexity.cutoff) +
theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))
# Visualize percent.mt
den4 <- ggplot(mouse.filtered@meta.data,
aes(color = sample,
x = percent.mt,
fill = sample)) +
geom_density(alpha = 0.2) +
theme_classic() +
scale_x_continuous(n.breaks = 4) +
geom_vline(xintercept = mt.cutoff) +
scale_color_manual(values = sample_colors) +
theme(legend.position = "none") +
scale_fill_manual(values = sample_colors) +
xlab("% Mitochondrial Genes") +
ylab("Density") +
theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))
# Visualize percent.ribo.protein
den5 <- ggplot(mouse.filtered@meta.data,
aes(color = sample,
x = percent.ribo.protein,
fill = sample)) +
geom_density(alpha = 0.2) +
theme_classic() +
scale_x_log10() +
geom_vline(xintercept = ribo.cutoff) +
scale_color_manual(values = sample_colors) +
theme(legend.position = "none") +
scale_fill_manual(values = sample_colors) +
xlab("% Ribosomal Protein Genes") +
ylab("Density") +
theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))
# Visualize percent.hb
den6 <- ggplot(mouse.filtered@meta.data,
aes(color = sample,
x = percent.hb,
fill = sample)) +
geom_density(alpha = 0.2) +
theme_classic() +
scale_x_log10() +
geom_vline(xintercept = hb.cutoff) +
scale_color_manual(values = sample_colors) +
theme(legend.position = "none") +
scale_fill_manual(values = sample_colors) +
xlab("% Hemoglobin Genes") +
ylab("Density") +
theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))
# Arrange graphs in grid
plots <- list(den1,den2,den3,den4,den5,den6)
layout <- rbind(c(1,4),c(2,5),c(3,6))
grid <- grid.arrange(grobs = plots, layout_matrix = layout)
# nFeature, nCount, and cell.complexity violins
v3 <- VlnPlot(mouse.filtered,
features = c("nFeature_RNA", "nCount_RNA","cell.complexity"),
ncol = 3,
group.by = 'sample',
cols = sample_colors,
pt.size = 0)
v3
# percent violins
v4 <- VlnPlot(mouse.filtered,
features = c("percent.mt","percent.ribo.protein","percent.hb"),
ncol = 3,
group.by = 'sample',
cols = sample_colors,
pt.size = 0)
v4
s3 <- ggplot(
mouse.filtered@meta.data,
aes(x = nCount_RNA, y = nFeature_RNA, color = percent.mt)) +
geom_point() +
geom_smooth(method = "lm") +
scale_x_log10() +
scale_y_log10() +
theme_classic() +
geom_vline(xintercept = nCount.min) +
geom_hline(yintercept = nFeature.min) +
facet_wrap(~sample) +
scale_colour_gradient(low = "gray90", high = "black", limits =c(0,100))
s3
## `geom_smooth()` using formula 'y ~ x'
s4 <- FeatureScatter(mouse.filtered,
feature1 = "nCount_RNA",
feature2 = "percent.mt",
group.by = 'sample',
cols = sample_colors,
shuffle = TRUE)
s4
# Visualize the distribution of genes detected per cell via boxplot
b1 <- ggplot(mouse.filtered@meta.data,
aes(x = sample,
y = log10(nFeature_RNA),
fill=sample)) +
geom_boxplot() +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
theme(plot.title = element_text(hjust = 0.5, face="bold")) +
ggtitle("Unique Genes / Cell / Sample") +
scale_color_manual(values = sample_colors) +
scale_fill_manual(values = sample_colors) +
xlab("Sample")
b1
# SCT
mouse.sct <- SCTransform(mouse.filtered, verbose = FALSE)
# Cluster with the SCT workflow
mouse.sct <- RunPCA(object = mouse.sct, assay = "SCT")
## PC_ 1
## Positive: Meg3, Snhg11, Gm26917, Cdr1os, Malat1, Syne1, Luc7l3, Trank1, Chd5, Map1b
## Rbm25, Pclo, Srrm2, Gria2, Matk, Ankrd11, Snhg14, Arpp21, Ahi1, Pnisr
## Mical2, Srsf11, Arglu1, Kmt2a, Lars2, Map1a, Syt1, Ryr2, Unc13a, Ttc3
## Negative: Rps29, Apoe, Fau, Rplp1, Tmsb4x, Rpl41, Rps21, Eef1a1, Tyrobp, Rps24
## Rpl37a, Tpt1, Rps28, Rpl38, Actb, Rpl37, Rpl39, Rpl13, B2m, Rps27
## Rpl21, Rpl26, Rpl32, Rpl19, Ctsb, Rpl35a, Lyz2, Rps12, Rpl23, Rpl30
## PC_ 2
## Positive: Malat1, Tsix, Pld4, Gm43221, Slc40a1, Sipa1, Clec4a3, Rgl2, Tcf7l2, Slc12a6
## Lrp10, Tra2b, Map4k4, A830008E24Rik, Chd9, Ap4m1, Vav2, Htra2, Tmem131, Slc25a37
## Ms4a6b, Mttp, 4833411C07Rik, Tgfa, Ube2d3, Ireb2, Brd1, Nuak1, Tardbp, Knop1
## Negative: Fth1, Rpl41, Rpl38, Rplp1, Rps29, Rps24, Tpt1, Rps28, Rpl26, Rpl37a
## Rpl19, Rpl13, Fau, Rpl37, Rps14, Rpl32, Rps16, Rps27, Rpl21, Rpl17
## Eef1a1, Rpl9, Rpl39, Rps4x, Cox8a, Rpl34, Mt1, Rps20, Rps3a1, Rps23
## PC_ 3
## Positive: Lyz2, Meg3, Tyrobp, Cst7, B2m, Snhg11, Ctsb, Chd5, Trank1, Srrm2
## Cd63, Cd52, H2-D1, Lgals3bp, Cdr1os, Arpp21, Rbm25, Matk, Ryr2, Pclo
## Ank, Gm26917, Lpl, Selenop, Ahi1, Cacna1e, Ccl6, Fau, Apoe, Srsf11
## Negative: Malat1, Atp1a2, Slc1a2, Ckb, Ddhd1, Gnao1, Mbp, Mt3, Plp1, Ptn
## Bcan, Mt1, Sparcl1, Gpm6b, Appl2, Dbi, Igsf11, Msi2, Ptgds, Myo6
## Grin2c, Zbtb20, Clu, Ptprz1, Trpm3, Daam2, Fth1, Gm3764, Cpe, Sox2ot
## PC_ 4
## Positive: Atp1a2, Gm26917, Lars2, Luc7l3, Syne1, Bcan, Rbm25, Prnp, Chd5, Col16a1
## Dst, Slc1a2, Ncam1, Vegfa, Slc4a4, Plcd4, Daam2, Grin2c, Igsf11, Spag9
## Itih3, Myo6, Ankrd11, Gnao1, Safb2, Gm3764, Slc30a10, Fam193b, Appl2, Ptprz1
## Negative: Malat1, Snhg14, Pabpn1, Fbxw7, Stxbp5l, Abca5, Mir124a-1hg, Gria2, Gm17167, Gria1
## Rbfox3, Ubr5, Prkag2, Gria3, Celf5, Aifm3, Czib, Celf4, Car11, Plk2
## Kcnq1ot1, Cadps, Sorbs2, Kcnk9, Ank3, Ntrk3, Kcnip4, Sacs, 9330159F19Rik, Unc80
## PC_ 5
## Positive: Atp1a2, Malat1, Ddhd1, Gnao1, Bcan, Ntm, Tnik, Lyz2, Tmsb4x, Cst7
## Tyrobp, Rps24, Utp14b, Gm3764, Itih3, Rplp1, Cd52, Rps12, Appl2, Ptprz1
## Rpl21, Rps23, Rps29, Rpl13, Mir100hg, B2m, Daam2, Washc2, Grin2c, Rpl37a
## Negative: Cryab, Dbi, Mt3, Prnp, Enpp2, Pebp1, Mpc1, Ptgds, Dynll1, Chd5
## Car2, Ndufb6, Mbp, Hsp90aa1, App, Map1b, Ubb, Atp5k, Chchd10, Ndufa6
## Calm1, Ndufa4, Etfb, Pam16, Uqcrq, Vdac3, Atp5md, Atp5f1, Chchd2, Fth1
# Plot PCA
p1 <- DimPlot(object = mouse.sct,
reduction = "pca",
group.by = "sample",
cols = sample_colors,
shuffle = TRUE) + NoLegend()
p1
# Cleanup
remove(mouse.filtered)
First metric
# Determine percent of variation associated with each PC
stdv <- mouse.sct[["pca"]]@stdev
sum.stdv <- sum(mouse.sct[["pca"]]@stdev)
percent.stdv <- (stdv / sum.stdv) * 100
# Calculate cumulative percents for each PC
cumulative <- cumsum(percent.stdv)
# Determine which PC exhibits cumulative percent greater than 90% and
# and % variation associated with the PC as less than 5
co1 <- which(cumulative > 90 & percent.stdv < 5)[1]
co1
## [1] 44
Second metric
# Determine the difference between variation of PC and subsequent PC
co2 <- sort(which(
(percent.stdv[1:length(percent.stdv) - 1] -
percent.stdv[2:length(percent.stdv)]) > 0.1),
decreasing = T)[1] + 1
# last point where change of % of variation is more than 0.1%.
co2
## [1] 7
Choose the minimum of these two metrics as the PCs covering the majority of the variation in the data.
# Minimum of the two calculation
min.pc <- min(co1, co2)
min.pc
## [1] 7
# Run UMAP
mouse.sct <- RunUMAP(mouse.sct,
dims = 1:min.pc,
reduction = "pca",
n.components = 3) # set to 3 to use with VR
# Plot UMAP
DimPlot(mouse.sct)
# Determine the K-nearest neighbor graph
mouse.unannotated <- FindNeighbors(object = mouse.sct,
assay = "SCT", # set as default after SCTransform
reduction = "pca",
dims = 1:min.pc)
# Determine the clusters for various resolutions
mouse.unannotated <- FindClusters(object = mouse.unannotated,
algorithm = 1, # 1 = Louvain
resolution = seq(0.1,0.5,by=0.1))
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 712
## Number of edges: 21016
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9554
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 712
## Number of edges: 21016
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9276
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 712
## Number of edges: 21016
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9006
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 712
## Number of edges: 21016
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8777
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 712
## Number of edges: 21016
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8547
## Number of communities: 6
## Elapsed time: 0 seconds
mouse.unannotated$seurat_clusters <- mouse.unannotated$SCT_snn_res.0.5
# 0.1
DimPlot(mouse.unannotated,
group.by = "SCT_snn_res.0.1",
label = TRUE)
# 0.2
DimPlot(mouse.unannotated,
group.by = "SCT_snn_res.0.2",
label = TRUE)
# 0.3
DimPlot(mouse.unannotated,
group.by = "SCT_snn_res.0.3",
label = TRUE)
# 0.4
DimPlot(mouse.unannotated,
group.by = "SCT_snn_res.0.4",
label = TRUE)
# 0.5
DimPlot(mouse.unannotated,
group.by = "SCT_snn_res.0.5",
label = TRUE)
embeddings <- mouse.unannotated@reductions$umap@cell.embeddings
embeddings <- cbind(embeddings, as.character(mouse.unannotated$seurat_clusters))
colnames(embeddings)[4] <- "seurat_clusters"
embeddings <- as.data.frame(embeddings)
embeddings$seurat_clusters <- factor(embeddings$seurat_clusters,
levels = c("0","1","2","3","4","5"))
cluster_colors <- c("gray","red1","orange","yellow","green",
"blue")
three.dim <- plot_ly(embeddings,
x = ~UMAP_1,
y = ~UMAP_2,
z = ~UMAP_3,
color = ~seurat_clusters,
colors = cluster_colors,
size = 1)
three.dim <- three.dim %>% add_markers()
three.dim <- three.dim %>% layout(scene = list(xaxis = list(title = 'UMAP_1'),
yaxis = list(title = 'UMAP_2'),
zaxis = list(title = 'UMAP_3')))
three.dim
DefaultAssay(mouse.unannotated) <- "RNA"
mouse.unannotated <- NormalizeData(mouse.unannotated)
Idents(mouse.unannotated) <- "SCT_snn_res.0.5"
VlnPlot(mouse.unannotated,
features = "Gfap",
cols = cluster_colors,
group.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Gja1",
cols = cluster_colors,
group.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Cd79b",
cols = cluster_colors,
group.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Pecam1",
cols = cluster_colors,
group.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Tmem119",
group.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Itgam", # aka Cd11b
group.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Cd14",
group.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Cd68",
group.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Ccr5",
group.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Cspg5",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##
## This message will be shown once per session.
mouse.unannotated@assays$RNA@var.features <- mouse.unannotated@assays$SCT@var.features
metadata <- mouse.unannotated@meta.data
metadata <- metadata[,c(15,16,2:14)]
mouse.unannotated@meta.data <- metadata
mouse.unannotated@assays$SCT@meta.features <- metadata
mouse.unannotated@assays$RNA@meta.features <- metadata
# make shiny folder
DefaultAssay(mouse.unannotated) <- "RNA"
Idents(mouse.unannotated) <- mouse.unannotated$seurat_clusters
sc.config <- createConfig(mouse.unannotated)
makeShinyApp(mouse.unannotated, sc.config, gene.mapping = TRUE,
shiny.title = "IgG_F_939B ")