Setup

Set working directory

knitr::opts_knit$set(root.dir = "/research/labs/neurology/fryer/m214960/aducanumab/scripts/R")

Load libraries

# load libraries
library(cowplot)    # plot_grid()
library(dplyr)      # left_join()
library(ggplot2)    # ggplot()
library(gridExtra)  # grid.arrange()
library(harmony)    # RunHarmony()
library(parallel)   # detectCores()
library(plotly)     # plot_ly()
library(Seurat)     # Read10X_h5()
library(ShinyCell)  # makeShinyApp()
library(stringr)    # str_match()

Set variables and thresholds

# variables
sample_colors <- "purple"

# thresholds
nCount.min <- 200
nCount.max <- 10000
nFeature.min <- 100
complexity.cutoff <- 0.8
mt.cutoff <- 20
ribo.cutoff <- 20
hb.cutoff <- 3

# set seed
set.seed(8)

# work in parallel
options(mc.cores = detectCores() - 1)

Load data

# Load counts
prefix <- "../../cellbender/"
suffix <- "_fpr_0.05_filtered.h5"
mouse <- CreateSeuratObject(Read10X_h5(paste0(prefix,"705A_Adu_Male",suffix)))

# Load annotation file, GENCODE GRCm38 version M23 (Ensembl 98)
if (file.exists("../../rObjects/annotation.rds")) {
  genes <- readRDS("../../rObjects/annotation.rds")
} else {
  gtf.file <- "../../refs/mouse_genes.gtf"
  genes <- rtracklayer::import(gtf.file)
  genes <- as.data.frame(genes)
  genes <- genes[genes$type == "gene",]
  saveRDS(genes, "../../rObjects/annotation.rds")
}

Metadata columns

# cell.complexity
mouse$cell.complexity <- log10(mouse$nFeature_RNA) / log10(mouse$nCount_RNA)

# percent.mt
mt.genes <- genes[genes$seqnames == "chrM",13]
mouse$percent.mt <- PercentageFeatureSet(mouse, features = mt.genes)
mt.genes
##  [1] "mt-Nd1"  "mt-Nd2"  "mt-Co1"  "mt-Co2"  "mt-Atp8" "mt-Atp6" "mt-Co3" 
##  [8] "mt-Nd3"  "mt-Nd4l" "mt-Nd4"  "mt-Nd5"  "mt-Nd6"  "mt-Cytb"
# percent.ribo
# ribosomal proteins begin with 'Rps' or 'Rpl' in this annotation file
# mitochondrial ribosomes start with 'Mrps' or 'Mrpl'
gene.names <- genes$gene_name
ribo <- gene.names[grep("^Rp[sl]", gene.names)]
mt.ribo <- gene.names[grep("^Mrp[sl]", gene.names)]
ribo.combined <- c(mt.ribo,ribo)
mouse$percent.ribo.protein <- PercentageFeatureSet(mouse, features = ribo.combined)
ribo.combined
##   [1] "Mrpl15"     "Mrpl30"     "Mrps9"      "Mrpl44"     "Mrps14"    
##   [6] "Mrpl41"     "Mrps2"      "Mrps5"      "Mrps26"     "Mrps28"    
##  [11] "Mrpl47"     "Mrpl24"     "Mrpl9"      "Mrps21"     "Mrpl50"    
##  [16] "Mrpl37"     "Mrps15"     "Mrpl20"     "Mrpl33"     "Mrpl1"     
##  [21] "Mrps18c"    "Mrps17"     "Mrps33"     "Mrpl35"     "Mrpl19"    
##  [26] "Mrpl53"     "Mrps25"     "Mrpl51"     "Mrps35"     "Mrps12"    
##  [31] "Mrpl46"     "Mrps11"     "Mrpl48"     "Mrpl17"     "Mrpl23"    
##  [36] "Mrps31"     "Mrpl34"     "Mrpl4"      "Mrps22"     "Mrpl3"     
##  [41] "Mrpl54"     "Mrpl42"     "Mrps24"     "Mrpl22"     "Mrpl55"    
##  [46] "Mrps23"     "Mrpl27"     "Mrpl10"     "Mrpl45"     "Mrpl58"    
##  [51] "Mrps7"      "Mrpl38"     "Mrpl12"     "Mrpl32"     "Mrpl36"    
##  [56] "Mrps27"     "Mrps36"     "Mrps30"     "Mrps16"     "Mrpl52"    
##  [61] "Mrpl57"     "Mrpl13"     "Mrpl40"     "Mrpl39"     "Mrps6"     
##  [66] "Mrpl18"     "Mrps34"     "Mrpl28"     "Mrps18b"    "Mrpl14"    
##  [71] "Mrps18a"    "Mrpl2"      "Mrps10"     "Mrpl21"     "Mrpl11"    
##  [76] "Mrpl49"     "Mrpl16"     "Mrpl43"     "Rpl7"       "Rpl31"     
##  [81] "Rpl37a"     "Rps6kc1"    "Rpl7a"      "Rpl12"      "Rpl35"     
##  [86] "Rps21"      "Rpl22l1"    "Rps3a1"     "Rps27"      "Rpl34"     
##  [91] "Rps20"      "Rps6"       "Rps8"       "Rps6ka1"    "Rpl11"     
##  [96] "Rpl22"      "Rpl9"       "Rpl5"       "Rplp0"      "Rpl6"      
## [101] "Rpl21"      "Rpl32"      "Rps9"       "Rpl28"      "Rps5"      
## [106] "Rps19"      "Rps16"      "Rps11"      "Rpl13a"     "Rpl18"     
## [111] "Rps17"      "Rps3"       "Rpl27a"     "Rps13"      "Rps15a"    
## [116] "Rplp2"      "Rpl18a"     "Rpl13"      "Rps25"      "Rpl10-ps3" 
## [121] "Rplp1"      "Rpl4"       "Rps27l"     "Rpl29"      "Rps27rt"   
## [126] "Rpsa"       "Rpl14"      "Rps12"      "Rps15"      "Rpl41"     
## [131] "Rps26"      "Rps27a"     "Rpl26"      "Rpl23a"     "Rpl9-ps1"  
## [136] "Rps6kb1"    "Rpl23"      "Rpl19"      "Rpl27"      "Rpl38"     
## [141] "Rps7"       "Rpl10l"     "Rps29"      "Rpl36al"    "Rps6kl1"   
## [146] "Rps6ka5"    "Rps23"      "Rpl15"      "Rps24"      "Rpl36a-ps1"
## [151] "Rpl37"      "Rpl30"      "Rpl8"       "Rpl3"       "Rps19bp1"  
## [156] "Rpl39l"     "Rpl35a"     "Rpl24"      "Rps6ka2"    "Rps2"      
## [161] "Rpl3l"      "Rps10"      "Rpl10a"     "Rps28"      "Rps18"     
## [166] "Rpl7l1"     "Rpl36"      "Rpl36-ps4"  "Rps14"      "Rpl17"     
## [171] "Rps6kb2"    "Rps6ka4"    "Rpl9-ps6"   "Rpl39"      "Rpl10"     
## [176] "Rps4x"      "Rps6ka6"    "Rpl36a"     "Rps6ka3"
# percent.hb
# percent.hb - hemoglobin proteins begin with 'Hbb' or 'Hba' for mouse
hb.genes <- gene.names[grep("^Hb[ba]-", gene.names)]
mouse$percent.hb <- PercentageFeatureSet(mouse, features = hb.genes)
hb.genes
## [1] "Hbb-bt"  "Hbb-bs"  "Hbb-bh2" "Hbb-bh1" "Hbb-y"   "Hba-x"   "Hba-a1" 
## [8] "Hba-a2"
# sample
mouse$sample <- "Adu.M.705.A"

Pre-filtering QC

Number of cells

# Visualize the number of cell counts per sample
data <- as.data.frame(table(mouse$sample))
colnames(data) <- c("sample","frequency")

ncells1 <- ggplot(data, aes(x = sample, y = frequency, fill = sample)) + 
  geom_col() +
  theme_classic() +
  geom_text(aes(label = frequency), 
            position=position_dodge(width=0.9), 
            vjust=-0.25) +
  scale_fill_manual(values = sample_colors) + 
  ggtitle("Raw: cells per sample") +
  theme(legend.position =  "none") + 
  scale_y_continuous(breaks = seq(0,8000, by = 2000), limits = c(0,8000)) +
  theme(axis.text.x = element_text(angle = 45, hjust=1))
ncells1

Density plots

# Visualize nCount_RNA
den1 <- ggplot(mouse@meta.data,
       aes(color = sample,
           x = nCount_RNA,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_log10() +
  scale_color_manual(values = sample_colors) +
  scale_fill_manual(values = sample_colors) +
  xlab("nCount_RNA") +
  ylab("Density") +
  theme(legend.position =  "none") +
  geom_vline(xintercept = nCount.min) +
  geom_vline(xintercept = nCount.max) +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize nFeature_RNA
den2 <- ggplot(mouse@meta.data,
       aes(color = sample,
           x = nFeature_RNA,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_log10() +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("nFeature_RNA") +
  ylab("Density") +
  geom_vline(xintercept = nFeature.min) +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize cell complexity
# Quality cells are usually above 0.85
den3 <- ggplot(mouse@meta.data,
       aes(color = sample,
           x = cell.complexity,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("Cell Complexity (log10(nFeature/nCount))") +
  ylab("Density") +
  geom_vline(xintercept = complexity.cutoff) +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize percent.mt
den4 <- ggplot(mouse@meta.data,
       aes(color = sample,
           x = percent.mt,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_continuous(n.breaks = 4) +
  geom_vline(xintercept = mt.cutoff) +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("% Mitochondrial Genes") +
  ylab("Density") +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize percent.ribo.protein
den5 <- ggplot(mouse@meta.data,
       aes(color = sample,
           x = percent.ribo.protein,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_log10() +
  geom_vline(xintercept = ribo.cutoff) +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("% Ribosomal Protein Genes") +
  ylab("Density") +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize percent.hb
den6 <- ggplot(mouse@meta.data,
       aes(color = sample,
           x = percent.hb,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_log10() +
  geom_vline(xintercept = hb.cutoff) +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("% Hemoglobin Genes") +
  ylab("Density") +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Arrange graphs in grid
plots <- list(den1,den2,den3,den4,den5,den6)
layout <- rbind(c(1,4),c(2,5),c(3,6))
grid <- grid.arrange(grobs = plots, layout_matrix = layout)

Violin plots

# nFeature, nCount, and cell.complexity violins
v1 <- VlnPlot(mouse,
              features = c("nFeature_RNA", "nCount_RNA","cell.complexity"),
              ncol = 3,
              group.by = 'sample',
              cols = sample_colors,
              pt.size = 0)
v1

#  percent violins
v2 <- VlnPlot(mouse,
              features = c("percent.mt","percent.ribo.protein","percent.hb"),
              ncol = 3,
              group.by = 'sample',
              cols = sample_colors,
              pt.size = 0)
v2

Scatter plots

s1 <- ggplot(
  mouse@meta.data,
  aes(x = nCount_RNA, y = nFeature_RNA, color = percent.mt)) + 
  geom_point() + 
  geom_smooth(method = "lm") +
    scale_x_log10() +       
  scale_y_log10() + 
  theme_classic() +
  geom_vline(xintercept = nCount.min) + 
  geom_hline(yintercept = nFeature.min) + 
  facet_wrap(~sample) +
  scale_colour_gradient(low = "gray90", high = "black", limits =c(0,100))
s1
## `geom_smooth()` using formula 'y ~ x'

s2 <- FeatureScatter(mouse,
                     feature1 = "nCount_RNA",
                     feature2 = "percent.mt",
                     group.by = 'sample',
                     cols = sample_colors,
                     shuffle = TRUE)
s2

Filtering

Cell-level filtering

# filter
mouse.filtered <- subset(mouse,
                        subset = (nCount_RNA > nCount.min) &
                          (nCount_RNA < nCount.max) &
                          (nFeature_RNA > nFeature.min) &
                          (cell.complexity > complexity.cutoff) &
                          (percent.mt < mt.cutoff) &
                          (percent.ribo.protein < ribo.cutoff) &
                          (percent.hb < hb.cutoff))

# print cells removed
print(paste0(dim(mouse)[2] - dim(mouse.filtered)[2]," cells removed"))
## [1] "730 cells removed"

Gene-level filtering

Remove lowly expressed genes. We will keep genes that have at least 1 count in 10 cells.

# filter genes
counts <- GetAssayData(object = mouse.filtered, slot = "counts")
nonzero <- counts > 0  # produces logical
keep <- Matrix::rowSums(nonzero) >= 10  # sum the true/false
counts.filtered <- counts[keep,]  # keep certain genes

# overwrite mouse.filtered
mouse.filtered <- CreateSeuratObject(counts.filtered, 
                                    meta.data = mouse.filtered@meta.data)

# print features removed
print(paste0(dim(counts)[1] - dim(counts.filtered)[1], " features removed"))
## [1] "15115 features removed"

Remove mitochondrial genes.

# remove mt.genes
counts <- GetAssayData(object = mouse.filtered, slot = "counts")
keep <- !rownames(counts) %in% mt.genes # false when mt.gene
counts.filtered <- counts[keep,]

# overwrite mouse.filtered
mouse.filtered <- CreateSeuratObject(counts.filtered, 
                                    meta.data = mouse.filtered@meta.data)

# print features removed
print(paste0(dim(counts)[1] - dim(counts.filtered)[1], " features removed"))
## [1] "13 features removed"
# cleanup data
remove(mouse,counts,counts.filtered,nonzero)

Post-filtering QC

Number of cells

# Visualize the number of cell counts per sample
data <- as.data.frame(table(mouse.filtered$sample))
colnames(data) <- c("sample","frequency")

ncells2 <- ggplot(data, aes(x = sample, y = frequency, fill = sample)) + 
  geom_col() +
  theme_classic() +
  geom_text(aes(label = frequency), 
            position=position_dodge(width=0.9), 
            vjust=-0.25) +
  scale_fill_manual(values = sample_colors) + 
  scale_y_continuous(breaks = seq(0,8000, by = 2000), limits = c(0,8000)) +
  ggtitle("Filtered: cells per sample") +
  theme(legend.position =  "none") + 
  theme(axis.text.x = element_text(angle = 45, hjust=1))

# Arrange graphs in grid
plots <- list(ncells1,ncells2)
layout <- cbind(c(1),c(2))
grid <- grid.arrange(grobs = plots, layout_matrix = layout)

Density plots

# Visualize nCount_RNA
den1 <- ggplot(mouse.filtered@meta.data,
       aes(color = sample,
           x = nCount_RNA,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_log10() +
  scale_color_manual(values = sample_colors) +
  scale_fill_manual(values = sample_colors) +
  xlab("nCount_RNA") +
  ylab("Density") +
  theme(legend.position =  "none") +
  geom_vline(xintercept = nCount.min) +
  geom_vline(xintercept = nCount.max) +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize nFeature_RNA
den2 <- ggplot(mouse.filtered@meta.data,
       aes(color = sample,
           x = nFeature_RNA,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_log10() +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("nFeature_RNA") +
  ylab("Density") +
  geom_vline(xintercept = nFeature.min) +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize cell complexity
# Quality cells are usually above 0.85
den3 <- ggplot(mouse.filtered@meta.data,
       aes(color = sample,
           x = cell.complexity,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("Cell Complexity (log10(nFeature/nCount))") +
  ylab("Density") +
  geom_vline(xintercept = complexity.cutoff) +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize percent.mt
den4 <- ggplot(mouse.filtered@meta.data,
       aes(color = sample,
           x = percent.mt,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_continuous(n.breaks = 4) +
  geom_vline(xintercept = mt.cutoff) +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("% Mitochondrial Genes") +
  ylab("Density") +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize percent.ribo.protein
den5 <- ggplot(mouse.filtered@meta.data,
       aes(color = sample,
           x = percent.ribo.protein,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_log10() +
  geom_vline(xintercept = ribo.cutoff) +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("% Ribosomal Protein Genes") +
  ylab("Density") +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize percent.hb
den6 <- ggplot(mouse.filtered@meta.data,
       aes(color = sample,
           x = percent.hb,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_log10() +
  geom_vline(xintercept = hb.cutoff) +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("% Hemoglobin Genes") +
  ylab("Density") +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Arrange graphs in grid
plots <- list(den1,den2,den3,den4,den5,den6)
layout <- rbind(c(1,4),c(2,5),c(3,6))
grid <- grid.arrange(grobs = plots, layout_matrix = layout)

Violin plots

# nFeature, nCount, and cell.complexity violins
v3 <- VlnPlot(mouse.filtered,
              features = c("nFeature_RNA", "nCount_RNA","cell.complexity"),
              ncol = 3,
              group.by = 'sample',
              cols = sample_colors,
              pt.size = 0)
v3

#  percent violins
v4 <- VlnPlot(mouse.filtered,
              features = c("percent.mt","percent.ribo.protein","percent.hb"),
              ncol = 3,
              group.by = 'sample',
              cols = sample_colors,
              pt.size = 0)
v4

Scatter plots

s3 <- ggplot(
  mouse.filtered@meta.data,
  aes(x = nCount_RNA, y = nFeature_RNA, color = percent.mt)) + 
  geom_point() + 
  geom_smooth(method = "lm") +
    scale_x_log10() +       
  scale_y_log10() + 
  theme_classic() +
  geom_vline(xintercept = nCount.min) + 
  geom_hline(yintercept = nFeature.min) + 
  facet_wrap(~sample) +
  scale_colour_gradient(low = "gray90", high = "black", limits =c(0,100))
s3
## `geom_smooth()` using formula 'y ~ x'

s4 <- FeatureScatter(mouse.filtered,
                     feature1 = "nCount_RNA",
                     feature2 = "percent.mt",
                     group.by = 'sample',
                     cols = sample_colors,
                     shuffle = TRUE)
s4

Box plot

# Visualize the distribution of genes detected per cell via boxplot
b1 <- ggplot(mouse.filtered@meta.data,
       aes(x = sample, 
           y = log10(nFeature_RNA), 
           fill=sample)) + 
  geom_boxplot() + 
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
  theme(plot.title = element_text(hjust = 0.5, face="bold")) +
  ggtitle("Unique Genes / Cell / Sample") +
  scale_color_manual(values = sample_colors) +
  scale_fill_manual(values = sample_colors) +
  xlab("Sample")
b1

PCA, UMAP, Cluster

PCA

# SCT
mouse.sct <- SCTransform(mouse.filtered, verbose = FALSE)

# Cluster with the SCT workflow
mouse.sct <- RunPCA(object = mouse.sct, assay = "SCT")
## PC_ 1 
## Positive:  Cst3, C1qa, C1qb, Hexb, Ctss, C1qc, Ctsd, Itm2b, Tmsb4x, Trem2 
##     Fcer1g, Cx3cr1, Tyrobp, Ctsb, Cd81, Cd9, Csf1r, Ctsz, Laptm5, Ctsl 
##     Lgmn, Actb, Gpr34, Fau, Ly86, P2ry12, Eef1a1, Sparc, B2m, Fth1 
## Negative:  Meg3, Snhg11, R3hdm1, Ptprd, Gria2, Ahi1, Kcnq1ot1, Rian, Nrxn1, Syt1 
##     Gm26917, Snhg14, Arpp21, Syne1, Opcml, Erc2, Pde10a, Dlgap1, Celf2, Kalrn 
##     Epha5, Sorbs2, Dclk1, Ryr2, Atp2b1, Lrp1b, Tcf4, Phactr1, Nrxn3, Slc1a2 
## PC_ 2 
## Positive:  Hexb, Ctss, C1qa, C1qb, Ctsd, C1qc, Cst3, Trem2, Cx3cr1, Itm2b 
##     Gpr34, Csf1r, Lgmn, Cd81, Ctsl, Laptm5, Cd9, Ctsz, Fcer1g, Lpcat2 
##     Olfml3, Ifngr1, Fcrls, Lamp1, Tgfbr1, Serinc3, Mpeg1, Ly86, Selplg, Vsir 
## Negative:  Fth1, Rpl13, Eef1a1, Rps24, Rpl19, Fau, Rps29, Rpl9, Tmsb4x, Rplp1 
##     Rps3a1, Ppia, Rps4x, Tpt1, Rps27a, Rps5, Rpl3, Rps20, Rpl18a, Rps19 
##     Rps3, Rpl30, Rpl11, Rpsa, Rpl21, Cox8a, Ftl1, Rps10, Rps7, Rps2 
## PC_ 3 
## Positive:  Meg3, Snhg11, R3hdm1, Rian, Ahi1, Syt1, Arpp21, Snhg14, Celf2, Erc2 
##     Kalrn, Pde10a, Atp2b1, Sorbs2, Opcml, Kcnq1ot1, Ryr2, Epha5, Nrxn3, Dlgap1 
##     Phactr1, Mef2c, Homer1, Rgs9, Ptprd, Lingo2, Tmsb4x, A830018L16Rik, Cacna2d3, Gria1 
## Negative:  Slc1a2, Atp1a2, Neat1, Qk, Ptprz1, Slc1a3, Plp1, Bcan, Appl2, Daam2 
##     Ntsr2, Ndrg2, Mbp, Apoe, Sparcl1, Acsl3, Gm6145, Gm3764, Nwd1, Prex2 
##     Prdx6, Scd2, Plpp3, Slc7a11, Vegfa, C4b, Cspg5, Phkg1, Ptn, Sash1 
## PC_ 4 
## Positive:  Slc1a2, Atp1a2, Ntsr2, Ptprz1, Slc1a3, Gm3764, Bcan, Gm6145, Nwd1, Prex2 
##     Ndrg2, Cspg5, Phkg1, Slc7a11, Appl2, Vegfa, Slc4a4, Fgfr3, Acsl3, Prdx6 
##     Plpp3, Grin2c, Dtna, Slc7a10, Gpc5, Plcd4, Atp13a4, Msi2, Dclk1, Nrxn1 
## Negative:  Plp1, Mbp, Neat1, Mag, Gjc3, Enpp2, Phldb1, Mobp, Sept4, Aspa 
##     Ptgds, Gatm, Hapln2, Trf, Ccp110, Car2, C030029H02Rik, Aatk, Plekhh1, Pex5l 
##     Map7, Ugt8a, Mal, Anln, Ermn, Sox2ot, 1700047M11Rik, Erbin, Cntn2, Apod 
## PC_ 5 
## Positive:  Apoe, Cd63, Cd9, Ctsl, Mt3, Dbi, Lyz2, Cox8a, Ctsz, Ctsb 
##     Cox6c, Cst7, Bsg, Trem2, Lamp1, Cox4i1, Chchd10, Ndufa13, Timp2, Clu 
##     Gpx4, Selenow, Gm42418, Fth1, Gnas, Calm3, Camk2n1, Cyba, Ndufa4, Calm1 
## Negative:  Tmsb4x, Marcks, Actb, P2ry12, Rgs10, Cx3cr1, Kctd12, Rps9, Basp1, Mafb 
##     Rhob, Gng10, Pfn1, Aif1, Qk, Fau, Capza2, Rps4x, Neat1, Plp1 
##     Maf, Rps29, Rap1b, Txnip, Sall1, Cyth4, Rpl18a, Scoc, Selplg, Epb41l2
# Plot PCA
p1 <- DimPlot(object = mouse.sct, 
              reduction = "pca",
              group.by = "sample",
              cols = sample_colors,
              shuffle = TRUE) + NoLegend()
p1

# Cleanup
remove(mouse.filtered)

Significant PCs

First metric

# Determine percent of variation associated with each PC
stdv <- mouse.sct[["pca"]]@stdev
sum.stdv <- sum(mouse.sct[["pca"]]@stdev)
percent.stdv <- (stdv / sum.stdv) * 100

# Calculate cumulative percents for each PC
cumulative <- cumsum(percent.stdv)

# Determine which PC exhibits cumulative percent greater than 90% and
# and % variation associated with the PC as less than 5
co1 <- which(cumulative > 90 & percent.stdv < 5)[1]
co1
## [1] 41

Second metric

# Determine the difference between variation of PC and subsequent PC
co2 <- sort(which(
  (percent.stdv[1:length(percent.stdv) - 1] - 
     percent.stdv[2:length(percent.stdv)]) > 0.1), 
  decreasing = T)[1] + 1

# last point where change of % of variation is more than 0.1%.
co2
## [1] 14

Choose the minimum of these two metrics as the PCs covering the majority of the variation in the data.

# Minimum of the two calculation
min.pc <- min(co1, co2)
min.pc
## [1] 14

UMAP

# Run UMAP
mouse.sct <- RunUMAP(mouse.sct,
                     dims = 1:min.pc,
                     reduction = "pca",
                     n.components = 3) # set to 3 to use with VR

# Plot UMAP
DimPlot(mouse.sct)

Find clusters

# Determine the K-nearest neighbor graph
mouse.unannotated <- FindNeighbors(object = mouse.sct,
                                   assay = "SCT", # set as default after SCTransform
                                   reduction = "pca",
                                   dims = 1:min.pc)

# Determine the clusters for various resolutions
mouse.unannotated <- FindClusters(object = mouse.unannotated,
                                  algorithm = 1, # 1 = Louvain
                                  resolution = seq(0.1,0.5,by=0.1))
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 6005
## Number of edges: 198975
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9666
## Number of communities: 8
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 6005
## Number of edges: 198975
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9503
## Number of communities: 10
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 6005
## Number of edges: 198975
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9351
## Number of communities: 12
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 6005
## Number of edges: 198975
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9235
## Number of communities: 13
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 6005
## Number of edges: 198975
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9133
## Number of communities: 14
## Elapsed time: 0 seconds
mouse.unannotated$seurat_clusters <- mouse.unannotated$SCT_snn_res.0.5

Explore resolutions

# 0.1
DimPlot(mouse.unannotated,
        group.by = "SCT_snn_res.0.1",
        label = TRUE)

# 0.2
DimPlot(mouse.unannotated,
        group.by = "SCT_snn_res.0.2",
        label = TRUE)

# 0.3
DimPlot(mouse.unannotated,
        group.by = "SCT_snn_res.0.3",
        label = TRUE)

# 0.4
DimPlot(mouse.unannotated,
        group.by = "SCT_snn_res.0.4",
        label = TRUE)

# 0.5
DimPlot(mouse.unannotated,
        group.by = "SCT_snn_res.0.5",
        label = TRUE)

3D UMAP

embeddings <- mouse.unannotated@reductions$umap@cell.embeddings
embeddings <- cbind(embeddings, as.character(mouse.unannotated$seurat_clusters))
colnames(embeddings)[4] <- "seurat_clusters"
embeddings <- as.data.frame(embeddings)
embeddings$seurat_clusters <- factor(embeddings$seurat_clusters,
                                     levels = c("0","1","2","3","4","5","6",
                                                "7","8","9","10","11","12","13"))
cluster_colors <- c("brown","gray40","gray","red1","darkorange2","yellow","chartreuse1",
                    "forestgreen","cyan","steelblue","blue",
                    "plum1", "magenta1","purple1")

three.dim <- plot_ly(embeddings,
                     x = ~UMAP_1, 
                     y = ~UMAP_2, 
                     z = ~UMAP_3, 
                     color = ~seurat_clusters,
                     colors = cluster_colors,
                     size = 1) 
three.dim <- three.dim %>% add_markers() 
three.dim <- three.dim %>% layout(scene = list(xaxis = list(title = 'UMAP_1'), 
                                     yaxis = list(title = 'UMAP_2'), 
                                     zaxis = list(title = 'UMAP_3')))
three.dim

Potential Markers

Astrocytes

DefaultAssay(mouse.unannotated) <- "RNA"
mouse.unannotated <- NormalizeData(mouse.unannotated)
Idents(mouse.unannotated) <- "SCT_snn_res.0.5"
VlnPlot(mouse.unannotated,
        features = "Aqp4",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Gfap",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Gja1",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

B-cell

  • Cd19: expressed in B-cells and follicular dendritic cells
  • Fcrla: a B-cell specific protein in mice
  • Cd79a & Cd79b: together form BCR complex
  • Ms4a1: aka Cd20, expressed beginning at pro-B phase and progressively increases in concentration until maturity
  • Clusters 11, 20, and 21 are B cell clusters
VlnPlot(mouse.unannotated,
        features = "Cd79b",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Cd80",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

Endothelial cells

VlnPlot(mouse.unannotated,
        features = "Pecam1",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Cdh5",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Kdr",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Flt1",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Plvap",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

Fibroblasts

VlnPlot(mouse.unannotated,
        features = "Col1a1",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Col1a2",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Dcn",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Ogn",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Gas1",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

Mast cells

VlnPlot(mouse.unannotated,
        features = "Kit", # aka Cd117
        group.by = "SCT_snn_res.0.5")

Macrophage/Microglia

VlnPlot(mouse.unannotated,
        features = "Tmem119",
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Itgam", # aka Cd11b
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Cd14",
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Cd68",
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Ccr5",
        group.by = "SCT_snn_res.0.5")

Neurons

VlnPlot(mouse.unannotated,
        features = "Gad1",
        cols = cluster_colors,
        split.by = "seurat_clusters")
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##       
## This message will be shown once per session.

VlnPlot(mouse.unannotated,
        features = "Gad2",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Slc32a1",
        cols = cluster_colors,
        split.by = "seurat_clusters")

Oligodendrocyte precursor cells

VlnPlot(mouse.unannotated,
        features = "Cspg5",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Gpr17",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Olig1",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

T-cell

  • Trac/Cd3d/Cd3e/Cd3g components of the TCR
  • Clusters 7 and 24 are T cells
VlnPlot(mouse.unannotated,
        features = "Cd3d",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

Shiny App

Cleanup object

mouse.unannotated@assays$RNA@var.features <- mouse.unannotated@assays$SCT@var.features
metadata <- mouse.unannotated@meta.data
metadata <- metadata[,c(15,16,2:14)]
mouse.unannotated@meta.data <- metadata
mouse.unannotated@assays$SCT@meta.features <- metadata
mouse.unannotated@assays$RNA@meta.features <- metadata

Output directory

# make shiny folder
DefaultAssay(mouse.unannotated) <- "RNA"
Idents(mouse.unannotated) <- mouse.unannotated$seurat_clusters
sc.config <- createConfig(mouse.unannotated)
makeShinyApp(mouse.unannotated, sc.config, gene.mapping = TRUE,
             shiny.title = "Adu_M_705A")