Setup

Set working directory

knitr::opts_knit$set(root.dir = "/research/labs/neurology/fryer/m214960/aducanumab/scripts/R")

Load libraries

# load libraries
library(cowplot)    # plot_grid()
library(dplyr)      # left_join()
library(ggplot2)    # ggplot()
library(gridExtra)  # grid.arrange()
library(harmony)    # RunHarmony()
library(parallel)   # detectCores()
library(plotly)     # plot_ly()
library(Seurat)     # Read10X_h5()
library(ShinyCell)  # makeShinyApp()
library(stringr)    # str_match()

Set variables and thresholds

# variables
sample_colors <- "yellow"

# thresholds
nCount.min <- 200
nCount.max <- 10000
nFeature.min <- 100
complexity.cutoff <- 0.8
mt.cutoff <- 20
ribo.cutoff <- 20
hb.cutoff <- 3

# set seed
set.seed(8)

# work in parallel
options(mc.cores = detectCores() - 1)

Load data

# Load counts
prefix <- "../../cellbender/"
suffix <- "_fpr_0.05_filtered.h5"
mouse <- CreateSeuratObject(Read10X_h5(paste0(prefix,"823A_IgG_Male",suffix)))

# Load annotation file
if (file.exists("../../rObjects/annotation.rds")) {
  genes <- readRDS("../../rObjects/annotation.rds")
} else {
  gtf.file <- "../../refs/mouse_genes.gtf"
  genes <- rtracklayer::import(gtf.file)
  genes <- as.data.frame(genes)
  genes <- genes[genes$type == "gene",]
  saveRDS(genes, "../../rObjects/annotation.rds")
}

Metadata columns

# cell.complexity
mouse$cell.complexity <- log10(mouse$nFeature_RNA) / log10(mouse$nCount_RNA)

# percent.mt
mt.genes <- genes[genes$seqnames == "chrM",13]
mouse$percent.mt <- PercentageFeatureSet(mouse, features = mt.genes)
mt.genes
##  [1] "mt-Nd1"  "mt-Nd2"  "mt-Co1"  "mt-Co2"  "mt-Atp8" "mt-Atp6" "mt-Co3" 
##  [8] "mt-Nd3"  "mt-Nd4l" "mt-Nd4"  "mt-Nd5"  "mt-Nd6"  "mt-Cytb"
# percent.ribo
# ribosomal proteins begin with 'Rps' or 'Rpl' in this annotation file
# mitochondrial ribosomes start with 'Mrps' or 'Mrpl'
gene.names <- genes$gene_name
ribo <- gene.names[grep("^Rp[sl]", gene.names)]
mt.ribo <- gene.names[grep("^Mrp[sl]", gene.names)]
ribo.combined <- c(mt.ribo,ribo)
mouse$percent.ribo.protein <- PercentageFeatureSet(mouse, features = ribo.combined)
ribo.combined
##   [1] "Mrpl15"     "Mrpl30"     "Mrps9"      "Mrpl44"     "Mrps14"    
##   [6] "Mrpl41"     "Mrps2"      "Mrps5"      "Mrps26"     "Mrps28"    
##  [11] "Mrpl47"     "Mrpl24"     "Mrpl9"      "Mrps21"     "Mrpl50"    
##  [16] "Mrpl37"     "Mrps15"     "Mrpl20"     "Mrpl33"     "Mrpl1"     
##  [21] "Mrps18c"    "Mrps17"     "Mrps33"     "Mrpl35"     "Mrpl19"    
##  [26] "Mrpl53"     "Mrps25"     "Mrpl51"     "Mrps35"     "Mrps12"    
##  [31] "Mrpl46"     "Mrps11"     "Mrpl48"     "Mrpl17"     "Mrpl23"    
##  [36] "Mrps31"     "Mrpl34"     "Mrpl4"      "Mrps22"     "Mrpl3"     
##  [41] "Mrpl54"     "Mrpl42"     "Mrps24"     "Mrpl22"     "Mrpl55"    
##  [46] "Mrps23"     "Mrpl27"     "Mrpl10"     "Mrpl45"     "Mrpl58"    
##  [51] "Mrps7"      "Mrpl38"     "Mrpl12"     "Mrpl32"     "Mrpl36"    
##  [56] "Mrps27"     "Mrps36"     "Mrps30"     "Mrps16"     "Mrpl52"    
##  [61] "Mrpl57"     "Mrpl13"     "Mrpl40"     "Mrpl39"     "Mrps6"     
##  [66] "Mrpl18"     "Mrps34"     "Mrpl28"     "Mrps18b"    "Mrpl14"    
##  [71] "Mrps18a"    "Mrpl2"      "Mrps10"     "Mrpl21"     "Mrpl11"    
##  [76] "Mrpl49"     "Mrpl16"     "Mrpl43"     "Rpl7"       "Rpl31"     
##  [81] "Rpl37a"     "Rps6kc1"    "Rpl7a"      "Rpl12"      "Rpl35"     
##  [86] "Rps21"      "Rpl22l1"    "Rps3a1"     "Rps27"      "Rpl34"     
##  [91] "Rps20"      "Rps6"       "Rps8"       "Rps6ka1"    "Rpl11"     
##  [96] "Rpl22"      "Rpl9"       "Rpl5"       "Rplp0"      "Rpl6"      
## [101] "Rpl21"      "Rpl32"      "Rps9"       "Rpl28"      "Rps5"      
## [106] "Rps19"      "Rps16"      "Rps11"      "Rpl13a"     "Rpl18"     
## [111] "Rps17"      "Rps3"       "Rpl27a"     "Rps13"      "Rps15a"    
## [116] "Rplp2"      "Rpl18a"     "Rpl13"      "Rps25"      "Rpl10-ps3" 
## [121] "Rplp1"      "Rpl4"       "Rps27l"     "Rpl29"      "Rps27rt"   
## [126] "Rpsa"       "Rpl14"      "Rps12"      "Rps15"      "Rpl41"     
## [131] "Rps26"      "Rps27a"     "Rpl26"      "Rpl23a"     "Rpl9-ps1"  
## [136] "Rps6kb1"    "Rpl23"      "Rpl19"      "Rpl27"      "Rpl38"     
## [141] "Rps7"       "Rpl10l"     "Rps29"      "Rpl36al"    "Rps6kl1"   
## [146] "Rps6ka5"    "Rps23"      "Rpl15"      "Rps24"      "Rpl36a-ps1"
## [151] "Rpl37"      "Rpl30"      "Rpl8"       "Rpl3"       "Rps19bp1"  
## [156] "Rpl39l"     "Rpl35a"     "Rpl24"      "Rps6ka2"    "Rps2"      
## [161] "Rpl3l"      "Rps10"      "Rpl10a"     "Rps28"      "Rps18"     
## [166] "Rpl7l1"     "Rpl36"      "Rpl36-ps4"  "Rps14"      "Rpl17"     
## [171] "Rps6kb2"    "Rps6ka4"    "Rpl9-ps6"   "Rpl39"      "Rpl10"     
## [176] "Rps4x"      "Rps6ka6"    "Rpl36a"     "Rps6ka3"
# percent.hb
# percent.hb - hemoglobin proteins begin with 'Hbb' or 'Hba' for mouse
hb.genes <- gene.names[grep("^Hb[ba]-", gene.names)]
mouse$percent.hb <- PercentageFeatureSet(mouse, features = hb.genes)
hb.genes
## [1] "Hbb-bt"  "Hbb-bs"  "Hbb-bh2" "Hbb-bh1" "Hbb-y"   "Hba-x"   "Hba-a1" 
## [8] "Hba-a2"
# sample
mouse$sample <- "IgG.M.823A"

Pre-filtering QC

Number of cells

# Visualize the number of cell counts per sample
data <- as.data.frame(table(mouse$sample))
colnames(data) <- c("sample","frequency")

ncells1 <- ggplot(data, aes(x = sample, y = frequency, fill = sample)) + 
  geom_col() +
  theme_classic() +
  geom_text(aes(label = frequency), 
            position=position_dodge(width=0.9), 
            vjust=-0.25) +
  scale_fill_manual(values = sample_colors) + 
  ggtitle("Raw: cells per sample") +
  theme(legend.position =  "none") + 
  scale_y_continuous(breaks = seq(0,20000, by = 2000), limits = c(0,20000)) +
  theme(axis.text.x = element_text(angle = 45, hjust=1))
ncells1

Density plots

# Visualize nCount_RNA
den1 <- ggplot(mouse@meta.data,
       aes(color = sample,
           x = nCount_RNA,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_log10() +
  scale_color_manual(values = sample_colors) +
  scale_fill_manual(values = sample_colors) +
  xlab("nCount_RNA") +
  ylab("Density") +
  theme(legend.position =  "none") +
  geom_vline(xintercept = nCount.min) +
  geom_vline(xintercept = nCount.max) +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize nFeature_RNA
den2 <- ggplot(mouse@meta.data,
       aes(color = sample,
           x = nFeature_RNA,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_log10() +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("nFeature_RNA") +
  ylab("Density") +
  geom_vline(xintercept = nFeature.min) +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize cell complexity
# Quality cells are usually above 0.85
den3 <- ggplot(mouse@meta.data,
       aes(color = sample,
           x = cell.complexity,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("Cell Complexity (log10(nFeature/nCount))") +
  ylab("Density") +
  geom_vline(xintercept = complexity.cutoff) +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize percent.mt
den4 <- ggplot(mouse@meta.data,
       aes(color = sample,
           x = percent.mt,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_continuous(n.breaks = 4) +
  geom_vline(xintercept = mt.cutoff) +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("% Mitochondrial Genes") +
  ylab("Density") +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize percent.ribo.protein
den5 <- ggplot(mouse@meta.data,
       aes(color = sample,
           x = percent.ribo.protein,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_log10() +
  geom_vline(xintercept = ribo.cutoff) +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("% Ribosomal Protein Genes") +
  ylab("Density") +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize percent.hb
den6 <- ggplot(mouse@meta.data,
       aes(color = sample,
           x = percent.hb,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_log10() +
  geom_vline(xintercept = hb.cutoff) +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("% Hemoglobin Genes") +
  ylab("Density") +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Arrange graphs in grid
plots <- list(den1,den2,den3,den4,den5,den6)
layout <- rbind(c(1,4),c(2,5),c(3,6))
grid <- grid.arrange(grobs = plots, layout_matrix = layout)

Violin plots

# nFeature, nCount, and cell.complexity violins
v1 <- VlnPlot(mouse,
              features = c("nFeature_RNA", "nCount_RNA","cell.complexity"),
              ncol = 3,
              group.by = 'sample',
              cols = sample_colors,
              pt.size = 0)
v1

#  percent violins
v2 <- VlnPlot(mouse,
              features = c("percent.mt","percent.ribo.protein","percent.hb"),
              ncol = 3,
              group.by = 'sample',
              cols = sample_colors,
              pt.size = 0)
v2

Scatter plots

s1 <- ggplot(
  mouse@meta.data,
  aes(x = nCount_RNA, y = nFeature_RNA, color = percent.mt)) + 
  geom_point() + 
  geom_smooth(method = "lm") +
    scale_x_log10() +       
  scale_y_log10() + 
  theme_classic() +
  geom_vline(xintercept = nCount.min) + 
  geom_hline(yintercept = nFeature.min) + 
  facet_wrap(~sample) +
  scale_colour_gradient(low = "gray90", high = "black", limits =c(0,100))
s1
## `geom_smooth()` using formula 'y ~ x'

s2 <- FeatureScatter(mouse,
                     feature1 = "nCount_RNA",
                     feature2 = "percent.mt",
                     group.by = 'sample',
                     cols = sample_colors,
                     shuffle = TRUE)
s2

Filtering

Cell-level filtering

# filter
mouse.filtered <- subset(mouse,
                        subset = (nCount_RNA > nCount.min) &
                          (nCount_RNA < nCount.max) &
                          (nFeature_RNA > nFeature.min) &
                          (cell.complexity > complexity.cutoff) &
                          (percent.mt < mt.cutoff) &
                          (percent.ribo.protein < ribo.cutoff) &
                          (percent.hb < hb.cutoff))

# print cells removed
print(paste0(dim(mouse)[2] - dim(mouse.filtered)[2]," cells removed"))
## [1] "1565 cells removed"

Gene-level filtering

Remove lowly expressed genes. We will keep genes that have at least 1 count in 10 cells.

# filter genes
counts <- GetAssayData(object = mouse.filtered, slot = "counts")
nonzero <- counts > 0  # produces logical
keep <- Matrix::rowSums(nonzero) >= 10  # sum the true/false
counts.filtered <- counts[keep,]  # keep certain genes

# overwrite mouse.filtered
mouse.filtered <- CreateSeuratObject(counts.filtered, 
                                    meta.data = mouse.filtered@meta.data)

# print features removed
print(paste0(dim(counts)[1] - dim(counts.filtered)[1], " features removed"))
## [1] "15083 features removed"

Remove mitochondrial genes.

# remove mt.genes
counts <- GetAssayData(object = mouse.filtered, slot = "counts")
keep <- !rownames(counts) %in% mt.genes # false when mt.gene
counts.filtered <- counts[keep,]

# overwrite mouse.filtered
mouse.filtered <- CreateSeuratObject(counts.filtered, 
                                    meta.data = mouse.filtered@meta.data)

# print features removed
print(paste0(dim(counts)[1] - dim(counts.filtered)[1], " features removed"))
## [1] "13 features removed"
# cleanup data
remove(mouse,counts,counts.filtered,nonzero)

Post-filtering QC

Number of cells

# Visualize the number of cell counts per sample
data <- as.data.frame(table(mouse.filtered$sample))
colnames(data) <- c("sample","frequency")

ncells2 <- ggplot(data, aes(x = sample, y = frequency, fill = sample)) + 
  geom_col() +
  theme_classic() +
  geom_text(aes(label = frequency), 
            position=position_dodge(width=0.9), 
            vjust=-0.25) +
  scale_fill_manual(values = sample_colors) + 
  scale_y_continuous(breaks = seq(0,20000, by = 2000), limits = c(0,20000)) +
  ggtitle("Filtered: cells per sample") +
  theme(legend.position =  "none") + 
  theme(axis.text.x = element_text(angle = 45, hjust=1))

# Arrange graphs in grid
plots <- list(ncells1,ncells2)
layout <- cbind(c(1),c(2))
grid <- grid.arrange(grobs = plots, layout_matrix = layout)

Density plots

# Visualize nCount_RNA
den1 <- ggplot(mouse.filtered@meta.data,
       aes(color = sample,
           x = nCount_RNA,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_log10() +
  scale_color_manual(values = sample_colors) +
  scale_fill_manual(values = sample_colors) +
  xlab("nCount_RNA") +
  ylab("Density") +
  theme(legend.position =  "none") +
  geom_vline(xintercept = nCount.min) +
  geom_vline(xintercept = nCount.max) +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize nFeature_RNA
den2 <- ggplot(mouse.filtered@meta.data,
       aes(color = sample,
           x = nFeature_RNA,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_log10() +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("nFeature_RNA") +
  ylab("Density") +
  geom_vline(xintercept = nFeature.min) +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize cell complexity
# Quality cells are usually above 0.85
den3 <- ggplot(mouse.filtered@meta.data,
       aes(color = sample,
           x = cell.complexity,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("Cell Complexity (log10(nFeature/nCount))") +
  ylab("Density") +
  geom_vline(xintercept = complexity.cutoff) +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize percent.mt
den4 <- ggplot(mouse.filtered@meta.data,
       aes(color = sample,
           x = percent.mt,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_continuous(n.breaks = 4) +
  geom_vline(xintercept = mt.cutoff) +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("% Mitochondrial Genes") +
  ylab("Density") +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize percent.ribo.protein
den5 <- ggplot(mouse.filtered@meta.data,
       aes(color = sample,
           x = percent.ribo.protein,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_log10() +
  geom_vline(xintercept = ribo.cutoff) +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("% Ribosomal Protein Genes") +
  ylab("Density") +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize percent.hb
den6 <- ggplot(mouse.filtered@meta.data,
       aes(color = sample,
           x = percent.hb,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_log10() +
  geom_vline(xintercept = hb.cutoff) +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("% Hemoglobin Genes") +
  ylab("Density") +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Arrange graphs in grid
plots <- list(den1,den2,den3,den4,den5,den6)
layout <- rbind(c(1,4),c(2,5),c(3,6))
grid <- grid.arrange(grobs = plots, layout_matrix = layout)

Violin plots

# nFeature, nCount, and cell.complexity violins
v3 <- VlnPlot(mouse.filtered,
              features = c("nFeature_RNA", "nCount_RNA","cell.complexity"),
              ncol = 3,
              group.by = 'sample',
              cols = sample_colors,
              pt.size = 0)
v3

#  percent violins
v4 <- VlnPlot(mouse.filtered,
              features = c("percent.mt","percent.ribo.protein","percent.hb"),
              ncol = 3,
              group.by = 'sample',
              cols = sample_colors,
              pt.size = 0)
v4

Scatter plots

s3 <- ggplot(
  mouse.filtered@meta.data,
  aes(x = nCount_RNA, y = nFeature_RNA, color = percent.mt)) + 
  geom_point() + 
  geom_smooth(method = "lm") +
    scale_x_log10() +       
  scale_y_log10() + 
  theme_classic() +
  geom_vline(xintercept = nCount.min) + 
  geom_hline(yintercept = nFeature.min) + 
  facet_wrap(~sample) +
  scale_colour_gradient(low = "gray90", high = "black", limits =c(0,100))
s3
## `geom_smooth()` using formula 'y ~ x'

s4 <- FeatureScatter(mouse.filtered,
                     feature1 = "nCount_RNA",
                     feature2 = "percent.mt",
                     group.by = 'sample',
                     cols = sample_colors,
                     shuffle = TRUE)
s4

Box plot

# Visualize the distribution of genes detected per cell via boxplot
b1 <- ggplot(mouse.filtered@meta.data,
       aes(x = sample, 
           y = log10(nFeature_RNA), 
           fill=sample)) + 
  geom_boxplot() + 
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
  theme(plot.title = element_text(hjust = 0.5, face="bold")) +
  ggtitle("Unique Genes / Cell / Sample") +
  scale_color_manual(values = sample_colors) +
  scale_fill_manual(values = sample_colors) +
  xlab("Sample")
b1

PCA, UMAP, Cluster

SCTransform and PCA

# SCT
mouse.sct <- SCTransform(mouse.filtered, verbose = FALSE)

# Cluster with the SCT workflow
mouse.sct <- RunPCA(object = mouse.sct, assay = "SCT")
## PC_ 1 
## Positive:  Cst3, C1qb, Ctss, C1qa, Hexb, Ctsd, C1qc, Itm2b, Trem2, Fcer1g 
##     Tyrobp, Cx3cr1, Ctsb, Tmsb4x, Cd9, Ctsz, Cd81, Ctsl, Csf1r, Laptm5 
##     Gpr34, Lgmn, Apoe, Lamp1, Sparc, Fth1, P2ry12, Fcrls, Ly86, Fau 
## Negative:  R3hdm1, Gria2, Ptprd, Ahi1, Rian, Arpp21, Gm26917, Syne1, Epha5, Sorbs2 
##     Dclk1, Opcml, Auts2, Tcf4, Atp2b1, Erc2, Nrxn3, Lrp1b, Ntrk2, Pde10a 
##     Phactr1, Homer1, Ntm, Negr1, Lsamp, Gm45159, Mir100hg, Gnao1, Gm3764, Cit 
## PC_ 2 
## Positive:  Fth1, Ppia, Cox8a, Rpl13, Eef1a1, Rpl41, Cox4i1, Rpl19, Tpt1, Ubb 
##     Rps24, Cox6c, Chchd2, Rpl9, Dbi, Rps27a, Gpx4, Fau, Mt3, Rpl3 
##     Rps20, Rplp1, Atp5h, Rpl11, Rps29, Rps10, Ndufa4, Rps3a1, Rpl21, Gm42418 
## Negative:  Cst3, Ctss, Hexb, C1qb, C1qa, C1qc, Ctsd, Cx3cr1, Trem2, Gpr34 
##     Itm2b, Csf1r, Lgmn, Fcer1g, Fcrls, Laptm5, Lpcat2, Ctsz, Olfml3, P2ry12 
##     Cd9, Cd81, Ctsl, Selplg, Serinc3, Ifngr1, Tgfbr1, Cyba, Lamp1, Tmem119 
## PC_ 3 
## Positive:  Slc1a2, Atp1a2, Slc1a3, Ptprz1, Apoe, Qk, Bcan, Ntsr2, Appl2, Sparcl1 
##     Ndrg2, Acsl3, Gm3764, Vegfa, Gm6145, Daam2, Slc7a11, Cspg5, Nwd1, Prex2 
##     Plpp3, Phkg1, Fgfr3, Glul, Sat1, Neat1, Prdx6, Hnrnpa2b1, Son, Gnao1 
## Negative:  R3hdm1, Ahi1, Ptprd, Arpp21, Rian, Sorbs2, Atp2b1, Mef2c, Opcml, Erc2 
##     Epha5, Pde10a, Homer1, Nrxn3, Phactr1, Auts2, Negr1, Gm45159, Tmsb4x, Tpt1 
##     Csmd3, A830036E02Rik, Fau, Cacna2d3, Rgs9, Eef1a1, Ftl1, Ppia, Cit, Tafa1 
## PC_ 4 
## Positive:  Tmsb4x, Rplp1, Rps4x, Rpl13, Rps24, Fau, Rps29, Gm42418, Rps3a1, Rpl27a 
##     Rpsa, Rps3, Rpl30, Rps9, Rpl21, Rpl18a, Ftl1, Rps5, Rplp0, Rpl35a 
##     Actb, Rps11, Rpl23, Rpl19, Rps23, Rpl37, Rps12, Rpl9, Rps10, Rpl11 
## Negative:  Ttr, Enpp2, Ecrg4, Chchd10, Atp5g1, Ptgds, Dbi, Kl, Rbp1, Igfbp2 
##     Prdx5, Ndufa11, Bsg, Ndufa4, 2900040C04Rik, Calml4, Hist1h2bc, Mt3, Tubb4b, Arl6ip1 
##     Mdh1, Atp5h, Sostdc1, Cox7b, Uqcr10, Map1lc3a, Atp5l, Cox6c, Uqcrq, Cryab 
## PC_ 5 
## Positive:  Apoe, Cst7, Fth1, Camk2n1, Lyz2, Ctsb, Selenow, Ctsz, Pcsk1n, Cpe 
##     Cox8a, Trem2, Cd9, Ctsd, Ctsl, Gm42418, Aldoc, Mt1, Mt3, Tyrobp 
##     Cd63, Npc2, Ckb, Hexa, Syngr1, Timp2, Ramp1, 2900097C17Rik, Ndufb9, Cox4i1 
## Negative:  Tmsb4x, Flt1, Rgs5, Actb, Cldn5, Cxcl12, Bsg, Slco1a4, Ptprb, Adgrl4 
##     Vtn, Ly6a, Pltp, Ly6c1, Itm2a, Igfbp7, Ahnak, Abcb1a, Egfl7, Adgrf5 
##     Ptma, Hspb1, Klf2, Epas1, Pecam1, Id1, Ramp2, Cgnl1, Eng, Kdr
# Plot PCA
p1 <- DimPlot(object = mouse.sct, 
              reduction = "pca",
              group.by = "sample",
              cols = sample_colors,
              shuffle = TRUE) + NoLegend()
p1

# Cleanup
remove(mouse.filtered)

Significant PCs

First metric

# Determine percent of variation associated with each PC
stdv <- mouse.sct[["pca"]]@stdev
sum.stdv <- sum(mouse.sct[["pca"]]@stdev)
percent.stdv <- (stdv / sum.stdv) * 100

# Calculate cumulative percents for each PC
cumulative <- cumsum(percent.stdv)

# Determine which PC exhibits cumulative percent greater than 90% and
# and % variation associated with the PC as less than 5
co1 <- which(cumulative > 90 & percent.stdv < 5)[1]
co1
## [1] 42

Second metric

# Determine the difference between variation of PC and subsequent PC
co2 <- sort(which(
  (percent.stdv[1:length(percent.stdv) - 1] - 
     percent.stdv[2:length(percent.stdv)]) > 0.1), 
  decreasing = T)[1] + 1

# last point where change of % of variation is more than 0.1%.
co2
## [1] 10

Choose the minimum of these two metrics as the PCs covering the majority of the variation in the data.

# Minimum of the two calculation
min.pc <- min(co1, co2)
min.pc
## [1] 10

UMAP

# Run UMAP
mouse.sct <- RunUMAP(mouse.sct,
                     dims = 1:min.pc,
                     reduction = "pca",
                     n.components = 3) # set to 3 to use with VR

# Plot UMAP
DimPlot(mouse.sct)

Find clusters

# Determine the K-nearest neighbor graph
mouse.unannotated <- FindNeighbors(object = mouse.sct,
                                   assay = "SCT", # set as default after SCTransform
                                   reduction = "pca",
                                   dims = 1:min.pc)

# Determine the clusters for various resolutions
mouse.unannotated <- FindClusters(object = mouse.unannotated,
                                  algorithm = 1, # 1 = Louvain
                                  resolution = seq(0.1,0.5,by=0.1))
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 12340
## Number of edges: 396801
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9546
## Number of communities: 5
## Elapsed time: 1 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 12340
## Number of edges: 396801
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9307
## Number of communities: 7
## Elapsed time: 1 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 12340
## Number of edges: 396801
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9140
## Number of communities: 11
## Elapsed time: 1 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 12340
## Number of edges: 396801
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8984
## Number of communities: 12
## Elapsed time: 1 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 12340
## Number of edges: 396801
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8874
## Number of communities: 14
## Elapsed time: 1 seconds
mouse.unannotated$seurat_clusters <- mouse.unannotated$SCT_snn_res.0.5

Explore resolutions

# 0.1
DimPlot(mouse.unannotated,
        group.by = "SCT_snn_res.0.1",
        label = TRUE)

# 0.2
DimPlot(mouse.unannotated,
        group.by = "SCT_snn_res.0.2",
        label = TRUE)

# 0.3
DimPlot(mouse.unannotated,
        group.by = "SCT_snn_res.0.3",
        label = TRUE)

# 0.4
DimPlot(mouse.unannotated,
        group.by = "SCT_snn_res.0.4",
        label = TRUE)

# 0.5
DimPlot(mouse.unannotated,
        group.by = "SCT_snn_res.0.5",
        label = TRUE)

3D UMAP

embeddings <- mouse.unannotated@reductions$umap@cell.embeddings
embeddings <- cbind(embeddings, as.character(mouse.unannotated$seurat_clusters))
colnames(embeddings)[4] <- "seurat_clusters"
embeddings <- as.data.frame(embeddings)
embeddings$seurat_clusters <- factor(embeddings$seurat_clusters,
                                     levels = c("0","1","2","3","4","5","6","7",
                                                "8","9","10","11","12","13"))
cluster_colors <- c("chocolate4","gray40","gray","red1","orange","yellow","chartreuse1","forestgreen",
                    "cyan","steelblue","blue","plum1", "magenta1","purple")

three.dim <- plot_ly(embeddings,
                     x = ~UMAP_1, 
                     y = ~UMAP_2, 
                     z = ~UMAP_3, 
                     color = ~seurat_clusters,
                     colors = cluster_colors,
                     size = 1) 
three.dim <- three.dim %>% add_markers() 
three.dim <- three.dim %>% layout(scene = list(xaxis = list(title = 'UMAP_1'), 
                                     yaxis = list(title = 'UMAP_2'), 
                                     zaxis = list(title = 'UMAP_3')))
three.dim

Potential Markers

Astrocytes

DefaultAssay(mouse.unannotated) <- "RNA"
mouse.unannotated <- NormalizeData(mouse.unannotated)
Idents(mouse.unannotated) <- "SCT_snn_res.0.5"
VlnPlot(mouse.unannotated,
        features = "Aqp4",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Gfap",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Gja1",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

B-cell

  • Cd19: expressed in B-cells and follicular dendritic cells
  • Fcrla: a B-cell specific protein in mice
  • Cd79a & Cd79b: together form BCR complex
  • Ms4a1: aka Cd20, expressed beginning at pro-B phase and progressively increases in concentration until maturity
  • Clusters 11, 20, and 21 are B cell clusters
VlnPlot(mouse.unannotated,
        features = "Cd79b",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Cd80",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

Endothelial cells

VlnPlot(mouse.unannotated,
        features = "Pecam1",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Kdr",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Flt1",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

Fibroblasts

VlnPlot(mouse.unannotated,
        features = "Col1a2",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

Mast cells

VlnPlot(mouse.unannotated,
        features = "Kit", # aka Cd117
        group.by = "SCT_snn_res.0.5")

Macrophage/Microglia

VlnPlot(mouse.unannotated,
        features = "Tmem119",
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Itgam", # aka Cd11b
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Cd14",
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Cd68",
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Ccr5",
        group.by = "SCT_snn_res.0.5")

Neurons

VlnPlot(mouse.unannotated,
        features = "Gad1",
        cols = cluster_colors,
        split.by = "seurat_clusters")
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##       
## This message will be shown once per session.

VlnPlot(mouse.unannotated,
        features = "Gad2",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Slc32a1",
        cols = cluster_colors,
        split.by = "seurat_clusters")

Oligodendrocyte precursor cells

VlnPlot(mouse.unannotated,
        features = "Cspg5",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Olig1",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

T-cell

  • Trac/Cd3d/Cd3e/Cd3g components of the TCR

Shiny App

Cleanup object

mouse.unannotated@assays$RNA@var.features <- mouse.unannotated@assays$SCT@var.features
metadata <- mouse.unannotated@meta.data
metadata <- metadata[,c(15,16,2:14)]
mouse.unannotated@meta.data <- metadata
mouse.unannotated@assays$SCT@meta.features <- metadata
mouse.unannotated@assays$RNA@meta.features <- metadata

Output directory

# make shiny folder
DefaultAssay(mouse.unannotated) <- "RNA"
Idents(mouse.unannotated) <- mouse.unannotated$seurat_clusters
sc.config <- createConfig(mouse.unannotated)
makeShinyApp(mouse.unannotated, sc.config, gene.mapping = TRUE,
             shiny.title = "IgG_M_823A")