Setup

Set working directory

knitr::opts_knit$set(root.dir = "/research/labs/neurology/fryer/m214960/aducanumab/scripts/R")

Load libraries

# load libraries
library(cowplot)    # plot_grid()
library(dplyr)      # left_join()
library(ggplot2)    # ggplot()
library(gridExtra)  # grid.arrange()
library(harmony)    # RunHarmony()
library(parallel)   # detectCores()
library(plotly)     # plot_ly()
library(Seurat)     # Read10X_h5()
library(ShinyCell)  # makeShinyApp()
library(stringr)    # str_match()

Set variables and thresholds

# variables
sample_colors <- "orange"

# thresholds
nCount.min <- 200
nCount.max <- 10000
nFeature.min <- 100
complexity.cutoff <- 0.8
mt.cutoff <- 20
ribo.cutoff <- 20
hb.cutoff <- 3

# set seed
set.seed(8)

# work in parallel
options(mc.cores = detectCores() - 1)

Load data

# Load counts
prefix <- "../../cellbender/"
suffix <- "_fpr_0.05_filtered.h5"
mouse <- CreateSeuratObject(Read10X_h5(paste0(prefix,"959B_IgG_Female",suffix)))

# Load annotation file
if (file.exists("../../rObjects/annotation.rds")) {
  genes <- readRDS("../../rObjects/annotation.rds")
} else {
  gtf.file <- "../../refs/mouse_genes.gtf"
  genes <- rtracklayer::import(gtf.file)
  genes <- as.data.frame(genes)
  genes <- genes[genes$type == "gene",]
  saveRDS(genes, "../../rObjects/annotation.rds")
}

Metadata columns

# cell.complexity
mouse$cell.complexity <- log10(mouse$nFeature_RNA) / log10(mouse$nCount_RNA)

# percent.mt
mt.genes <- genes[genes$seqnames == "chrM",13]
mouse$percent.mt <- PercentageFeatureSet(mouse, features = mt.genes)
mt.genes
##  [1] "mt-Nd1"  "mt-Nd2"  "mt-Co1"  "mt-Co2"  "mt-Atp8" "mt-Atp6" "mt-Co3" 
##  [8] "mt-Nd3"  "mt-Nd4l" "mt-Nd4"  "mt-Nd5"  "mt-Nd6"  "mt-Cytb"
# percent.ribo
# ribosomal proteins begin with 'Rps' or 'Rpl' in this annotation file
# mitochondrial ribosomes start with 'Mrps' or 'Mrpl'
gene.names <- genes$gene_name
ribo <- gene.names[grep("^Rp[sl]", gene.names)]
mt.ribo <- gene.names[grep("^Mrp[sl]", gene.names)]
ribo.combined <- c(mt.ribo,ribo)
mouse$percent.ribo.protein <- PercentageFeatureSet(mouse, features = ribo.combined)
ribo.combined
##   [1] "Mrpl15"     "Mrpl30"     "Mrps9"      "Mrpl44"     "Mrps14"    
##   [6] "Mrpl41"     "Mrps2"      "Mrps5"      "Mrps26"     "Mrps28"    
##  [11] "Mrpl47"     "Mrpl24"     "Mrpl9"      "Mrps21"     "Mrpl50"    
##  [16] "Mrpl37"     "Mrps15"     "Mrpl20"     "Mrpl33"     "Mrpl1"     
##  [21] "Mrps18c"    "Mrps17"     "Mrps33"     "Mrpl35"     "Mrpl19"    
##  [26] "Mrpl53"     "Mrps25"     "Mrpl51"     "Mrps35"     "Mrps12"    
##  [31] "Mrpl46"     "Mrps11"     "Mrpl48"     "Mrpl17"     "Mrpl23"    
##  [36] "Mrps31"     "Mrpl34"     "Mrpl4"      "Mrps22"     "Mrpl3"     
##  [41] "Mrpl54"     "Mrpl42"     "Mrps24"     "Mrpl22"     "Mrpl55"    
##  [46] "Mrps23"     "Mrpl27"     "Mrpl10"     "Mrpl45"     "Mrpl58"    
##  [51] "Mrps7"      "Mrpl38"     "Mrpl12"     "Mrpl32"     "Mrpl36"    
##  [56] "Mrps27"     "Mrps36"     "Mrps30"     "Mrps16"     "Mrpl52"    
##  [61] "Mrpl57"     "Mrpl13"     "Mrpl40"     "Mrpl39"     "Mrps6"     
##  [66] "Mrpl18"     "Mrps34"     "Mrpl28"     "Mrps18b"    "Mrpl14"    
##  [71] "Mrps18a"    "Mrpl2"      "Mrps10"     "Mrpl21"     "Mrpl11"    
##  [76] "Mrpl49"     "Mrpl16"     "Mrpl43"     "Rpl7"       "Rpl31"     
##  [81] "Rpl37a"     "Rps6kc1"    "Rpl7a"      "Rpl12"      "Rpl35"     
##  [86] "Rps21"      "Rpl22l1"    "Rps3a1"     "Rps27"      "Rpl34"     
##  [91] "Rps20"      "Rps6"       "Rps8"       "Rps6ka1"    "Rpl11"     
##  [96] "Rpl22"      "Rpl9"       "Rpl5"       "Rplp0"      "Rpl6"      
## [101] "Rpl21"      "Rpl32"      "Rps9"       "Rpl28"      "Rps5"      
## [106] "Rps19"      "Rps16"      "Rps11"      "Rpl13a"     "Rpl18"     
## [111] "Rps17"      "Rps3"       "Rpl27a"     "Rps13"      "Rps15a"    
## [116] "Rplp2"      "Rpl18a"     "Rpl13"      "Rps25"      "Rpl10-ps3" 
## [121] "Rplp1"      "Rpl4"       "Rps27l"     "Rpl29"      "Rps27rt"   
## [126] "Rpsa"       "Rpl14"      "Rps12"      "Rps15"      "Rpl41"     
## [131] "Rps26"      "Rps27a"     "Rpl26"      "Rpl23a"     "Rpl9-ps1"  
## [136] "Rps6kb1"    "Rpl23"      "Rpl19"      "Rpl27"      "Rpl38"     
## [141] "Rps7"       "Rpl10l"     "Rps29"      "Rpl36al"    "Rps6kl1"   
## [146] "Rps6ka5"    "Rps23"      "Rpl15"      "Rps24"      "Rpl36a-ps1"
## [151] "Rpl37"      "Rpl30"      "Rpl8"       "Rpl3"       "Rps19bp1"  
## [156] "Rpl39l"     "Rpl35a"     "Rpl24"      "Rps6ka2"    "Rps2"      
## [161] "Rpl3l"      "Rps10"      "Rpl10a"     "Rps28"      "Rps18"     
## [166] "Rpl7l1"     "Rpl36"      "Rpl36-ps4"  "Rps14"      "Rpl17"     
## [171] "Rps6kb2"    "Rps6ka4"    "Rpl9-ps6"   "Rpl39"      "Rpl10"     
## [176] "Rps4x"      "Rps6ka6"    "Rpl36a"     "Rps6ka3"
# percent.hb
# percent.hb - hemoglobin proteins begin with 'Hbb' or 'Hba' for mouse
hb.genes <- gene.names[grep("^Hb[ba]-", gene.names)]
mouse$percent.hb <- PercentageFeatureSet(mouse, features = hb.genes)
hb.genes
## [1] "Hbb-bt"  "Hbb-bs"  "Hbb-bh2" "Hbb-bh1" "Hbb-y"   "Hba-x"   "Hba-a1" 
## [8] "Hba-a2"
# sample
mouse$sample <- "IgG.F.959B "

Pre-filtering QC

Number of cells

# Visualize the number of cell counts per sample
data <- as.data.frame(table(mouse$sample))
colnames(data) <- c("sample","frequency")

ncells1 <- ggplot(data, aes(x = sample, y = frequency, fill = sample)) + 
  geom_col() +
  theme_classic() +
  geom_text(aes(label = frequency), 
            position=position_dodge(width=0.9), 
            vjust=-0.25) +
  scale_fill_manual(values = sample_colors) + 
  ggtitle("Raw: cells per sample") +
  theme(legend.position =  "none") + 
  scale_y_continuous(breaks = seq(0,10000, by = 2000), limits = c(0,10000)) +
  theme(axis.text.x = element_text(angle = 45, hjust=1))
ncells1

Density plots

# Visualize nCount_RNA
den1 <- ggplot(mouse@meta.data,
       aes(color = sample,
           x = nCount_RNA,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_log10() +
  scale_color_manual(values = sample_colors) +
  scale_fill_manual(values = sample_colors) +
  xlab("nCount_RNA") +
  ylab("Density") +
  theme(legend.position =  "none") +
  geom_vline(xintercept = nCount.min) +
  geom_vline(xintercept = nCount.max) +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize nFeature_RNA
den2 <- ggplot(mouse@meta.data,
       aes(color = sample,
           x = nFeature_RNA,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_log10() +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("nFeature_RNA") +
  ylab("Density") +
  geom_vline(xintercept = nFeature.min) +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize cell complexity
# Quality cells are usually above 0.85
den3 <- ggplot(mouse@meta.data,
       aes(color = sample,
           x = cell.complexity,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("Cell Complexity (log10(nFeature/nCount))") +
  ylab("Density") +
  geom_vline(xintercept = complexity.cutoff) +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize percent.mt
den4 <- ggplot(mouse@meta.data,
       aes(color = sample,
           x = percent.mt,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_continuous(n.breaks = 4) +
  geom_vline(xintercept = mt.cutoff) +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("% Mitochondrial Genes") +
  ylab("Density") +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize percent.ribo.protein
den5 <- ggplot(mouse@meta.data,
       aes(color = sample,
           x = percent.ribo.protein,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_log10() +
  geom_vline(xintercept = ribo.cutoff) +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("% Ribosomal Protein Genes") +
  ylab("Density") +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize percent.hb
den6 <- ggplot(mouse@meta.data,
       aes(color = sample,
           x = percent.hb,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_log10() +
  geom_vline(xintercept = hb.cutoff) +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("% Hemoglobin Genes") +
  ylab("Density") +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Arrange graphs in grid
plots <- list(den1,den2,den3,den4,den5,den6)
layout <- rbind(c(1,4),c(2,5),c(3,6))
grid <- grid.arrange(grobs = plots, layout_matrix = layout)

Violin plots

# nFeature, nCount, and cell.complexity violins
v1 <- VlnPlot(mouse,
              features = c("nFeature_RNA", "nCount_RNA","cell.complexity"),
              ncol = 3,
              group.by = 'sample',
              cols = sample_colors,
              pt.size = 0)
v1

#  percent violins
v2 <- VlnPlot(mouse,
              features = c("percent.mt","percent.ribo.protein","percent.hb"),
              ncol = 3,
              group.by = 'sample',
              cols = sample_colors,
              pt.size = 0)
v2

Scatter plots

s1 <- ggplot(
  mouse@meta.data,
  aes(x = nCount_RNA, y = nFeature_RNA, color = percent.mt)) + 
  geom_point() + 
  geom_smooth(method = "lm") +
    scale_x_log10() +       
  scale_y_log10() + 
  theme_classic() +
  geom_vline(xintercept = nCount.min) + 
  geom_hline(yintercept = nFeature.min) + 
  facet_wrap(~sample) +
  scale_colour_gradient(low = "gray90", high = "black", limits =c(0,100))
s1
## `geom_smooth()` using formula 'y ~ x'

s2 <- FeatureScatter(mouse,
                     feature1 = "nCount_RNA",
                     feature2 = "percent.mt",
                     group.by = 'sample',
                     cols = sample_colors,
                     shuffle = TRUE)
s2

Filtering

Cell-level filtering

# filter
mouse.filtered <- subset(mouse,
                        subset = (nCount_RNA > nCount.min) &
                          (nCount_RNA < nCount.max) &
                          (nFeature_RNA > nFeature.min) &
                          (cell.complexity > complexity.cutoff) &
                          (percent.mt < mt.cutoff) &
                          (percent.ribo.protein < ribo.cutoff) &
                          (percent.hb < hb.cutoff))

# print cells removed
print(paste0(dim(mouse)[2] - dim(mouse.filtered)[2]," cells removed"))
## [1] "3758 cells removed"

Gene-level filtering

Remove lowly expressed genes. We will keep genes that have at least 1 count in 10 cells.

# filter genes
counts <- GetAssayData(object = mouse.filtered, slot = "counts")
nonzero <- counts > 0  # produces logical
keep <- Matrix::rowSums(nonzero) >= 10  # sum the true/false
counts.filtered <- counts[keep,]  # keep certain genes

# overwrite mouse.filtered
mouse.filtered <- CreateSeuratObject(counts.filtered, 
                                    meta.data = mouse.filtered@meta.data)

# print features removed
print(paste0(dim(counts)[1] - dim(counts.filtered)[1], " features removed"))
## [1] "21925 features removed"

Remove mitochondrial genes.

# remove mt.genes
counts <- GetAssayData(object = mouse.filtered, slot = "counts")
keep <- !rownames(counts) %in% mt.genes # false when mt.gene
counts.filtered <- counts[keep,]

# overwrite mouse.filtered
mouse.filtered <- CreateSeuratObject(counts.filtered, 
                                    meta.data = mouse.filtered@meta.data)

# print features removed
print(paste0(dim(counts)[1] - dim(counts.filtered)[1], " features removed"))
## [1] "13 features removed"
# cleanup data
remove(mouse,counts,counts.filtered,nonzero)

Post-filtering QC

Number of cells

# Visualize the number of cell counts per sample
data <- as.data.frame(table(mouse.filtered$sample))
colnames(data) <- c("sample","frequency")

ncells2 <- ggplot(data, aes(x = sample, y = frequency, fill = sample)) + 
  geom_col() +
  theme_classic() +
  geom_text(aes(label = frequency), 
            position=position_dodge(width=0.9), 
            vjust=-0.25) +
  scale_fill_manual(values = sample_colors) + 
  scale_y_continuous(breaks = seq(0,10000, by = 2000), limits = c(0,10000)) +
  ggtitle("Filtered: cells per sample") +
  theme(legend.position =  "none") + 
  theme(axis.text.x = element_text(angle = 45, hjust=1))

# Arrange graphs in grid
plots <- list(ncells1,ncells2)
layout <- cbind(c(1),c(2))
grid <- grid.arrange(grobs = plots, layout_matrix = layout)

Density plots

# Visualize nCount_RNA
den1 <- ggplot(mouse.filtered@meta.data,
       aes(color = sample,
           x = nCount_RNA,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_log10() +
  scale_color_manual(values = sample_colors) +
  scale_fill_manual(values = sample_colors) +
  xlab("nCount_RNA") +
  ylab("Density") +
  theme(legend.position =  "none") +
  geom_vline(xintercept = nCount.min) +
  geom_vline(xintercept = nCount.max) +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize nFeature_RNA
den2 <- ggplot(mouse.filtered@meta.data,
       aes(color = sample,
           x = nFeature_RNA,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_log10() +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("nFeature_RNA") +
  ylab("Density") +
  geom_vline(xintercept = nFeature.min) +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize cell complexity
# Quality cells are usually above 0.85
den3 <- ggplot(mouse.filtered@meta.data,
       aes(color = sample,
           x = cell.complexity,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("Cell Complexity (log10(nFeature/nCount))") +
  ylab("Density") +
  geom_vline(xintercept = complexity.cutoff) +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize percent.mt
den4 <- ggplot(mouse.filtered@meta.data,
       aes(color = sample,
           x = percent.mt,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_continuous(n.breaks = 4) +
  geom_vline(xintercept = mt.cutoff) +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("% Mitochondrial Genes") +
  ylab("Density") +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize percent.ribo.protein
den5 <- ggplot(mouse.filtered@meta.data,
       aes(color = sample,
           x = percent.ribo.protein,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_log10() +
  geom_vline(xintercept = ribo.cutoff) +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("% Ribosomal Protein Genes") +
  ylab("Density") +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize percent.hb
den6 <- ggplot(mouse.filtered@meta.data,
       aes(color = sample,
           x = percent.hb,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_log10() +
  geom_vline(xintercept = hb.cutoff) +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("% Hemoglobin Genes") +
  ylab("Density") +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Arrange graphs in grid
plots <- list(den1,den2,den3,den4,den5,den6)
layout <- rbind(c(1,4),c(2,5),c(3,6))
grid <- grid.arrange(grobs = plots, layout_matrix = layout)

Violin plots

# nFeature, nCount, and cell.complexity violins
v3 <- VlnPlot(mouse.filtered,
              features = c("nFeature_RNA", "nCount_RNA","cell.complexity"),
              ncol = 3,
              group.by = 'sample',
              cols = sample_colors,
              pt.size = 0)
v3

#  percent violins
v4 <- VlnPlot(mouse.filtered,
              features = c("percent.mt","percent.ribo.protein","percent.hb"),
              ncol = 3,
              group.by = 'sample',
              cols = sample_colors,
              pt.size = 0)
v4

Scatter plots

s3 <- ggplot(
  mouse.filtered@meta.data,
  aes(x = nCount_RNA, y = nFeature_RNA, color = percent.mt)) + 
  geom_point() + 
  geom_smooth(method = "lm") +
    scale_x_log10() +       
  scale_y_log10() + 
  theme_classic() +
  geom_vline(xintercept = nCount.min) + 
  geom_hline(yintercept = nFeature.min) + 
  facet_wrap(~sample) +
  scale_colour_gradient(low = "gray90", high = "black", limits =c(0,100))
s3
## `geom_smooth()` using formula 'y ~ x'

s4 <- FeatureScatter(mouse.filtered,
                     feature1 = "nCount_RNA",
                     feature2 = "percent.mt",
                     group.by = 'sample',
                     cols = sample_colors,
                     shuffle = TRUE)
s4

Box plot

# Visualize the distribution of genes detected per cell via boxplot
b1 <- ggplot(mouse.filtered@meta.data,
       aes(x = sample, 
           y = log10(nFeature_RNA), 
           fill=sample)) + 
  geom_boxplot() + 
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
  theme(plot.title = element_text(hjust = 0.5, face="bold")) +
  ggtitle("Unique Genes / Cell / Sample") +
  scale_color_manual(values = sample_colors) +
  scale_fill_manual(values = sample_colors) +
  xlab("Sample")
b1

PCA, UMAP, Cluster

SCTransform and PCA

# SCT
mouse.sct <- SCTransform(mouse.filtered, verbose = FALSE)

# Cluster with the SCT workflow
mouse.sct <- RunPCA(object = mouse.sct, assay = "SCT")
## PC_ 1 
## Positive:  Apoe, Tmsb4x, Rps29, Fau, Trem2, Tyrobp, Rplp1, Actb, Eef1a1, Tpt1 
##     B2m, Rps24, Rpl41, Cd9, Ctsl, Rpl13, Rpl37a, Rpl37, Lyz2, Rpl23 
##     Rps28, Rpl35a, Rps23, Rpl39, Fth1, Rpl21, Rpl19, Rpl30, Ppia, Rpl32 
## Negative:  Meg3, Snhg11, Gm26917, Cdr1os, Syne1, Chd5, Pclo, Trank1, Luc7l3, Map1b 
##     Rbm25, Arpp21, Kmt2a, Ahi1, Syt1, Ankrd11, Gria2, Map1a, Srsf11, Matk 
##     Srrm2, Pnisr, Tnrc6c, Plec, Snhg14, Srek1, Ryr2, Mical2, Chd3, Brd9 
## PC_ 2 
## Positive:  Trem2, Cd9, Ctsl, H2-D1, Mir99ahg, Cd63, Lgals3bp, Nav2, Lyz2, B2m 
##     Dip2b, Clk4, Cln3, Taf1d, Ank, Angel2, Upf2, Mdm4, Marcks, Gm47283 
##     Cst7, Ighm, Tpcn1, E2f3, Myo18b, Madd, Tyrobp, Neat1, Chd7, Foxp1 
## Negative:  Mt3, Fth1, Cox8a, Dbi, Rpl41, Chchd2, Cox6c, Cox4i1, Mt1, Gapdh 
##     Uqcr11, Atp5md, Cox6b1, Cox5b, Chchd10, Ndufa4, Cox7c, Rpl38, S100a1, Selenow 
##     Gpx4, Atp5j, Ckb, Atp5h, Ndufb1-ps, Atp5l, Ppia, Elob, Uqcrh, Calm1 
## PC_ 3 
## Positive:  Tmsb4x, Fau, Snhg11, Rplp1, Tyrobp, Rps29, Rps24, Meg3, Cdr1os, Rps28 
##     Cst7, Fth1, Chd5, Lyz2, Rpl35a, Eef1a1, Trank1, Rpl32, Rps23, Tpt1 
##     B2m, Actb, Rpl37a, Cd52, Rpl23, Rps4x, Ftl1, Rpl19, Rpl21, Rps3 
## Negative:  Atp1a2, Bcan, Slc1a2, Sparcl1, Gnao1, Slc4a4, Ptprz1, Ntm, Msi2, Slc1a3 
##     Gm3764, Appl2, Vegfa, Grin2c, Slc6a1, Zbtb20, Ddhd1, Daam2, Mir100hg, Ntsr2 
##     Clu, Acsl3, Mt3, Plpp3, Prex2, Cpe, Neat1, Itih3, Grk3, Nwd1 
## PC_ 4 
## Positive:  Apoe, Slc1a2, Rplp1, Rps24, Cpe, Rps29, Fth1, Sparcl1, Atp1a2, Mt1 
##     Rpl37, Aldoc, Fau, Glul, Tmsb4x, Rps28, Rpl37a, Scd2, Rpl35a, Gpm6b 
##     Rps3, Rpl13, Rpl9, Rpl32, Rpl21, Rps3a1, Rpl17, Rpl19, Rpsa, Rps23 
## Negative:  Calml4, Rbp1, Cab39l, 2900040C04Rik, Ttr, Atp5g1, Rdh5, Cdkn1c, Hist1h2bc, Mdh1 
##     Dstn, Enpp2, Uqcrb, Kl, Acaa2, Car2, Stk39, Ndufa11, Lgals1, Ezr 
##     Mcee, Prdx5, Pcp4l1, Clic6, Ecrg4, Cul4b, Chchd10, Rsph1, Ptgds, Cox8b 
## PC_ 5 
## Positive:  Celf4, Cpe, Tuba1a, Nrgn, Prnp, Pcsk1n, Tcf4, Nfib, Rtn1, Nsg2 
##     Snap25, Fbxw7, Selenow, Camk2a, Dlx6os1, Atp1b1, Mir124a-1hg, Sorbs2, Nrxn3, Phactr1 
##     Kif5c, Aldoc, Camk2n1, Thra, Elavl3, 2900097C17Rik, Kcnq1ot1, Dhx57, Bcl11a, Camk2b 
## Negative:  Atp1a2, Syne1, Gm26917, Luc7l3, Vegfa, Ddhd1, Bcan, Daam2, Srek1, Cst7 
##     Grin2c, Apoe, Ankrd11, Kmt2a, Pnisr, Grk3, Myo6, C4b, Gnao1, Lyz2 
##     Rbm25, Nwd1, Ntm, Tnik, Tyrobp, Plcd4, Utp14b, Dst, Arglu1, Srsf11
# Plot PCA
p1 <- DimPlot(object = mouse.sct, 
              reduction = "pca",
              group.by = "sample",
              cols = sample_colors,
              shuffle = TRUE) + NoLegend()
p1

# Cleanup
remove(mouse.filtered)

Significant PCs

First metric

# Determine percent of variation associated with each PC
stdv <- mouse.sct[["pca"]]@stdev
sum.stdv <- sum(mouse.sct[["pca"]]@stdev)
percent.stdv <- (stdv / sum.stdv) * 100

# Calculate cumulative percents for each PC
cumulative <- cumsum(percent.stdv)

# Determine which PC exhibits cumulative percent greater than 90% and
# and % variation associated with the PC as less than 5
co1 <- which(cumulative > 90 & percent.stdv < 5)[1]
co1
## [1] 44

Second metric

# Determine the difference between variation of PC and subsequent PC
co2 <- sort(which(
  (percent.stdv[1:length(percent.stdv) - 1] - 
     percent.stdv[2:length(percent.stdv)]) > 0.1), 
  decreasing = T)[1] + 1

# last point where change of % of variation is more than 0.1%.
co2
## [1] 9

Choose the minimum of these two metrics as the PCs covering the majority of the variation in the data.

# Minimum of the two calculation
min.pc <- min(co1, co2)
min.pc
## [1] 9

UMAP

# Run UMAP
mouse.sct <- RunUMAP(mouse.sct,
                     dims = 1:min.pc,
                     reduction = "pca",
                     n.components = 3) # set to 3 to use with VR

# Plot UMAP
DimPlot(mouse.sct)

Find clusters

# Determine the K-nearest neighbor graph
mouse.unannotated <- FindNeighbors(object = mouse.sct,
                                   assay = "SCT", # set as default after SCTransform
                                   reduction = "pca",
                                   dims = 1:min.pc)

# Determine the clusters for various resolutions
mouse.unannotated <- FindClusters(object = mouse.unannotated,
                                  algorithm = 1, # 1 = Louvain
                                  resolution = seq(0.1,0.5,by=0.1))
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1106
## Number of edges: 34907
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9499
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1106
## Number of edges: 34907
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9241
## Number of communities: 5
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1106
## Number of edges: 34907
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8983
## Number of communities: 5
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1106
## Number of edges: 34907
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8748
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1106
## Number of edges: 34907
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8516
## Number of communities: 6
## Elapsed time: 0 seconds
mouse.unannotated$seurat_clusters <- mouse.unannotated$SCT_snn_res.0.5

Explore resolutions

# 0.1
DimPlot(mouse.unannotated,
        group.by = "SCT_snn_res.0.1",
        label = TRUE)

# 0.2
DimPlot(mouse.unannotated,
        group.by = "SCT_snn_res.0.2",
        label = TRUE)

# 0.3
DimPlot(mouse.unannotated,
        group.by = "SCT_snn_res.0.3",
        label = TRUE)

# 0.4
DimPlot(mouse.unannotated,
        group.by = "SCT_snn_res.0.4",
        label = TRUE)

# 0.5
DimPlot(mouse.unannotated,
        group.by = "SCT_snn_res.0.5",
        label = TRUE)

3D UMAP

embeddings <- mouse.unannotated@reductions$umap@cell.embeddings
embeddings <- cbind(embeddings, as.character(mouse.unannotated$seurat_clusters))
colnames(embeddings)[4] <- "seurat_clusters"
embeddings <- as.data.frame(embeddings)
embeddings$seurat_clusters <- factor(embeddings$seurat_clusters,
                                     levels = c("0","1","2","3","4","5"))
cluster_colors <- c("gray","red1","orange","yellow","green",
                    "blue")

three.dim <- plot_ly(embeddings,
                     x = ~UMAP_1, 
                     y = ~UMAP_2, 
                     z = ~UMAP_3, 
                     color = ~seurat_clusters,
                     colors = cluster_colors,
                     size = 1) 
three.dim <- three.dim %>% add_markers() 
three.dim <- three.dim %>% layout(scene = list(xaxis = list(title = 'UMAP_1'), 
                                     yaxis = list(title = 'UMAP_2'), 
                                     zaxis = list(title = 'UMAP_3')))
three.dim

Potential Markers

Astrocytes

DefaultAssay(mouse.unannotated) <- "RNA"
mouse.unannotated <- NormalizeData(mouse.unannotated)
Idents(mouse.unannotated) <- "SCT_snn_res.0.5"
VlnPlot(mouse.unannotated,
        features = "Gfap",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Gja1",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

B-cell

  • Cd19: expressed in B-cells and follicular dendritic cells
  • Fcrla: a B-cell specific protein in mice
  • Cd79a & Cd79b: together form BCR complex
  • Ms4a1: aka Cd20, expressed beginning at pro-B phase and progressively increases in concentration until maturity
  • Clusters 11, 20, and 21 are B cell clusters
VlnPlot(mouse.unannotated,
        features = "Cd79b",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

Endothelial cells

VlnPlot(mouse.unannotated,
        features = "Pecam1",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

Fibroblasts

Mast cells

Macrophage/Microglia

VlnPlot(mouse.unannotated,
        features = "Tmem119",
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Itgam", # aka Cd11b
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Cd14",
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Cd68",
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Ccr5",
        group.by = "SCT_snn_res.0.5")

Neurons

Oligodendrocyte precursor cells

VlnPlot(mouse.unannotated,
        features = "Cspg5",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##       
## This message will be shown once per session.

T-cell

  • Trac/Cd3d/Cd3e/Cd3g components of the TCR

Shiny App

Cleanup object

mouse.unannotated@assays$RNA@var.features <- mouse.unannotated@assays$SCT@var.features
metadata <- mouse.unannotated@meta.data
metadata <- metadata[,c(15,16,2:14)]
mouse.unannotated@meta.data <- metadata
mouse.unannotated@assays$SCT@meta.features <- metadata
mouse.unannotated@assays$RNA@meta.features <- metadata

Output directory

# make shiny folder
DefaultAssay(mouse.unannotated) <- "RNA"
Idents(mouse.unannotated) <- mouse.unannotated$seurat_clusters
sc.config <- createConfig(mouse.unannotated)
makeShinyApp(mouse.unannotated, sc.config, gene.mapping = TRUE,
             shiny.title = "IgG_F_959B ")