Setup

Set working directory

knitr::opts_knit$set(root.dir = "/research/labs/neurology/fryer/m214960/aducanumab/scripts/R")

Load libraries

# load libraries
library(cowplot)    # plot_grid()
library(dplyr)      # left_join()
library(ggplot2)    # ggplot()
library(gridExtra)  # grid.arrange()
library(harmony)    # RunHarmony()
library(parallel)   # detectCores()
library(plotly)     # plot_ly()
library(Seurat)     # Read10X_h5()
library(ShinyCell)  # makeShinyApp()
library(stringr)    # str_match()

Set variables and thresholds

# variables
sample_colors <- "blue"

# thresholds
nCount.min <- 200
nCount.max <- 10000
nFeature.min <- 100
complexity.cutoff <- 0.8
mt.cutoff <- 20
ribo.cutoff <- 20
hb.cutoff <- 3

# set seed
set.seed(8)

# work in parallel
options(mc.cores = detectCores() - 1)

Load data

# Load counts
prefix <- "../../cellbender/"
suffix <- "_fpr_0.05_filtered.h5"
mouse <- CreateSeuratObject(Read10X_h5(paste0(prefix,"738B_Adu_Female",suffix)))

# Load annotation file, GENCODE GRCm38 version M23 (Ensembl 98)
if (file.exists("../../rObjects/annotation.rds")) {
  genes <- readRDS("../../rObjects/annotation.rds")
} else {
  gtf.file <- "../../refs/mouse_genes.gtf"
  genes <- rtracklayer::import(gtf.file)
  genes <- as.data.frame(genes)
  genes <- genes[genes$type == "gene",]
  saveRDS(genes, "../../rObjects/annotation.rds")
}

Metadata columns

# cell.complexity
mouse$cell.complexity <- log10(mouse$nFeature_RNA) / log10(mouse$nCount_RNA)

# percent.mt
mt.genes <- genes[genes$seqnames == "chrM",13]
mouse$percent.mt <- PercentageFeatureSet(mouse, features = mt.genes)
mt.genes
##  [1] "mt-Nd1"  "mt-Nd2"  "mt-Co1"  "mt-Co2"  "mt-Atp8" "mt-Atp6" "mt-Co3" 
##  [8] "mt-Nd3"  "mt-Nd4l" "mt-Nd4"  "mt-Nd5"  "mt-Nd6"  "mt-Cytb"
# percent.ribo
# ribosomal proteins begin with 'Rps' or 'Rpl' in this annotation file
# mitochondrial ribosomes start with 'Mrps' or 'Mrpl'
gene.names <- genes$gene_name
ribo <- gene.names[grep("^Rp[sl]", gene.names)]
mt.ribo <- gene.names[grep("^Mrp[sl]", gene.names)]
ribo.combined <- c(mt.ribo,ribo)
mouse$percent.ribo.protein <- PercentageFeatureSet(mouse, features = ribo.combined)
ribo.combined
##   [1] "Mrpl15"     "Mrpl30"     "Mrps9"      "Mrpl44"     "Mrps14"    
##   [6] "Mrpl41"     "Mrps2"      "Mrps5"      "Mrps26"     "Mrps28"    
##  [11] "Mrpl47"     "Mrpl24"     "Mrpl9"      "Mrps21"     "Mrpl50"    
##  [16] "Mrpl37"     "Mrps15"     "Mrpl20"     "Mrpl33"     "Mrpl1"     
##  [21] "Mrps18c"    "Mrps17"     "Mrps33"     "Mrpl35"     "Mrpl19"    
##  [26] "Mrpl53"     "Mrps25"     "Mrpl51"     "Mrps35"     "Mrps12"    
##  [31] "Mrpl46"     "Mrps11"     "Mrpl48"     "Mrpl17"     "Mrpl23"    
##  [36] "Mrps31"     "Mrpl34"     "Mrpl4"      "Mrps22"     "Mrpl3"     
##  [41] "Mrpl54"     "Mrpl42"     "Mrps24"     "Mrpl22"     "Mrpl55"    
##  [46] "Mrps23"     "Mrpl27"     "Mrpl10"     "Mrpl45"     "Mrpl58"    
##  [51] "Mrps7"      "Mrpl38"     "Mrpl12"     "Mrpl32"     "Mrpl36"    
##  [56] "Mrps27"     "Mrps36"     "Mrps30"     "Mrps16"     "Mrpl52"    
##  [61] "Mrpl57"     "Mrpl13"     "Mrpl40"     "Mrpl39"     "Mrps6"     
##  [66] "Mrpl18"     "Mrps34"     "Mrpl28"     "Mrps18b"    "Mrpl14"    
##  [71] "Mrps18a"    "Mrpl2"      "Mrps10"     "Mrpl21"     "Mrpl11"    
##  [76] "Mrpl49"     "Mrpl16"     "Mrpl43"     "Rpl7"       "Rpl31"     
##  [81] "Rpl37a"     "Rps6kc1"    "Rpl7a"      "Rpl12"      "Rpl35"     
##  [86] "Rps21"      "Rpl22l1"    "Rps3a1"     "Rps27"      "Rpl34"     
##  [91] "Rps20"      "Rps6"       "Rps8"       "Rps6ka1"    "Rpl11"     
##  [96] "Rpl22"      "Rpl9"       "Rpl5"       "Rplp0"      "Rpl6"      
## [101] "Rpl21"      "Rpl32"      "Rps9"       "Rpl28"      "Rps5"      
## [106] "Rps19"      "Rps16"      "Rps11"      "Rpl13a"     "Rpl18"     
## [111] "Rps17"      "Rps3"       "Rpl27a"     "Rps13"      "Rps15a"    
## [116] "Rplp2"      "Rpl18a"     "Rpl13"      "Rps25"      "Rpl10-ps3" 
## [121] "Rplp1"      "Rpl4"       "Rps27l"     "Rpl29"      "Rps27rt"   
## [126] "Rpsa"       "Rpl14"      "Rps12"      "Rps15"      "Rpl41"     
## [131] "Rps26"      "Rps27a"     "Rpl26"      "Rpl23a"     "Rpl9-ps1"  
## [136] "Rps6kb1"    "Rpl23"      "Rpl19"      "Rpl27"      "Rpl38"     
## [141] "Rps7"       "Rpl10l"     "Rps29"      "Rpl36al"    "Rps6kl1"   
## [146] "Rps6ka5"    "Rps23"      "Rpl15"      "Rps24"      "Rpl36a-ps1"
## [151] "Rpl37"      "Rpl30"      "Rpl8"       "Rpl3"       "Rps19bp1"  
## [156] "Rpl39l"     "Rpl35a"     "Rpl24"      "Rps6ka2"    "Rps2"      
## [161] "Rpl3l"      "Rps10"      "Rpl10a"     "Rps28"      "Rps18"     
## [166] "Rpl7l1"     "Rpl36"      "Rpl36-ps4"  "Rps14"      "Rpl17"     
## [171] "Rps6kb2"    "Rps6ka4"    "Rpl9-ps6"   "Rpl39"      "Rpl10"     
## [176] "Rps4x"      "Rps6ka6"    "Rpl36a"     "Rps6ka3"
# percent.hb
# percent.hb - hemoglobin proteins begin with 'Hbb' or 'Hba' for mouse
hb.genes <- gene.names[grep("^Hb[ba]-", gene.names)]
mouse$percent.hb <- PercentageFeatureSet(mouse, features = hb.genes)
hb.genes
## [1] "Hbb-bt"  "Hbb-bs"  "Hbb-bh2" "Hbb-bh1" "Hbb-y"   "Hba-x"   "Hba-a1" 
## [8] "Hba-a2"
# sample
mouse$sample <- "Adu.F.738B"

Pre-filtering QC

Number of cells

# Visualize the number of cell counts per sample
data <- as.data.frame(table(mouse$sample))
colnames(data) <- c("sample","frequency")

ncells1 <- ggplot(data, aes(x = sample, y = frequency, fill = sample)) + 
  geom_col() +
  theme_classic() +
  geom_text(aes(label = frequency), 
            position=position_dodge(width=0.9), 
            vjust=-0.25) +
  scale_fill_manual(values = sample_colors) + 
  ggtitle("Raw: cells per sample") +
  theme(legend.position =  "none") + 
  scale_y_continuous(breaks = seq(0,8000, by = 2000), limits = c(0,8000)) +
  theme(axis.text.x = element_text(angle = 45, hjust=1))
ncells1

Density plots

# Visualize nCount_RNA
den1 <- ggplot(mouse@meta.data,
       aes(color = sample,
           x = nCount_RNA,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_log10() +
  scale_color_manual(values = sample_colors) +
  scale_fill_manual(values = sample_colors) +
  xlab("nCount_RNA") +
  ylab("Density") +
  theme(legend.position =  "none") +
  geom_vline(xintercept = nCount.min) +
  geom_vline(xintercept = nCount.max) +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize nFeature_RNA
den2 <- ggplot(mouse@meta.data,
       aes(color = sample,
           x = nFeature_RNA,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_log10() +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("nFeature_RNA") +
  ylab("Density") +
  geom_vline(xintercept = nFeature.min) +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize cell complexity
# Quality cells are usually above 0.85
den3 <- ggplot(mouse@meta.data,
       aes(color = sample,
           x = cell.complexity,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("Cell Complexity (log10(nFeature/nCount))") +
  ylab("Density") +
  geom_vline(xintercept = complexity.cutoff) +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize percent.mt
den4 <- ggplot(mouse@meta.data,
       aes(color = sample,
           x = percent.mt,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_continuous(n.breaks = 4) +
  geom_vline(xintercept = mt.cutoff) +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("% Mitochondrial Genes") +
  ylab("Density") +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize percent.ribo.protein
den5 <- ggplot(mouse@meta.data,
       aes(color = sample,
           x = percent.ribo.protein,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_log10() +
  geom_vline(xintercept = ribo.cutoff) +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("% Ribosomal Protein Genes") +
  ylab("Density") +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize percent.hb
den6 <- ggplot(mouse@meta.data,
       aes(color = sample,
           x = percent.hb,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_log10() +
  geom_vline(xintercept = hb.cutoff) +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("% Hemoglobin Genes") +
  ylab("Density") +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Arrange graphs in grid
plots <- list(den1,den2,den3,den4,den5,den6)
layout <- rbind(c(1,4),c(2,5),c(3,6))
grid <- grid.arrange(grobs = plots, layout_matrix = layout)

Violin plots

# nFeature, nCount, and cell.complexity violins
v1 <- VlnPlot(mouse,
              features = c("nFeature_RNA", "nCount_RNA","cell.complexity"),
              ncol = 3,
              group.by = 'sample',
              cols = sample_colors,
              pt.size = 0)
v1

#  percent violins
v2 <- VlnPlot(mouse,
              features = c("percent.mt","percent.ribo.protein","percent.hb"),
              ncol = 3,
              group.by = 'sample',
              cols = sample_colors,
              pt.size = 0)
v2

Scatter plots

s1 <- ggplot(
  mouse@meta.data,
  aes(x = nCount_RNA, y = nFeature_RNA, color = percent.mt)) + 
  geom_point() + 
  geom_smooth(method = "lm") +
    scale_x_log10() +       
  scale_y_log10() + 
  theme_classic() +
  geom_vline(xintercept = nCount.min) + 
  geom_hline(yintercept = nFeature.min) + 
  facet_wrap(~sample) +
  scale_colour_gradient(low = "gray90", high = "black", limits =c(0,100))
s1
## `geom_smooth()` using formula 'y ~ x'

s2 <- FeatureScatter(mouse,
                     feature1 = "nCount_RNA",
                     feature2 = "percent.mt",
                     group.by = 'sample',
                     cols = sample_colors,
                     shuffle = TRUE)
s2

Filtering

Cell-level filtering

# filter
mouse.filtered <- subset(mouse,
                        subset = (nCount_RNA > nCount.min) &
                          (nCount_RNA < nCount.max) &
                          (nFeature_RNA > nFeature.min) &
                          (cell.complexity > complexity.cutoff) &
                          (percent.mt < mt.cutoff) &
                          (percent.ribo.protein < ribo.cutoff) &
                          (percent.hb < hb.cutoff))

# print cells removed
print(paste0(dim(mouse)[2] - dim(mouse.filtered)[2]," cells removed"))
## [1] "2539 cells removed"

Gene-level filtering

Remove lowly expressed genes. We will keep genes that have at least 1 count in 10 cells.

# filter genes
counts <- GetAssayData(object = mouse.filtered, slot = "counts")
nonzero <- counts > 0  # produces logical
keep <- Matrix::rowSums(nonzero) >= 10  # sum the true/false
counts.filtered <- counts[keep,]  # keep certain genes

# overwrite mouse.filtered
mouse.filtered <- CreateSeuratObject(counts.filtered, 
                                    meta.data = mouse.filtered@meta.data)

# print features removed
print(paste0(dim(counts)[1] - dim(counts.filtered)[1], " features removed"))
## [1] "20023 features removed"

Remove mitochondrial genes.

# remove mt.genes
counts <- GetAssayData(object = mouse.filtered, slot = "counts")
keep <- !rownames(counts) %in% mt.genes # false when mt.gene
counts.filtered <- counts[keep,]

# overwrite mouse.filtered
mouse.filtered <- CreateSeuratObject(counts.filtered, 
                                    meta.data = mouse.filtered@meta.data)

# print features removed
print(paste0(dim(counts)[1] - dim(counts.filtered)[1], " features removed"))
## [1] "13 features removed"
# cleanup data
remove(mouse,counts,counts.filtered,nonzero)

Post-filtering QC

Number of cells

# Visualize the number of cell counts per sample
data <- as.data.frame(table(mouse.filtered$sample))
colnames(data) <- c("sample","frequency")

ncells2 <- ggplot(data, aes(x = sample, y = frequency, fill = sample)) + 
  geom_col() +
  theme_classic() +
  geom_text(aes(label = frequency), 
            position=position_dodge(width=0.9), 
            vjust=-0.25) +
  scale_fill_manual(values = sample_colors) + 
  scale_y_continuous(breaks = seq(0,8000, by = 2000), limits = c(0,8000)) +
  ggtitle("Filtered: cells per sample") +
  theme(legend.position =  "none") + 
  theme(axis.text.x = element_text(angle = 45, hjust=1))

# Arrange graphs in grid
plots <- list(ncells1,ncells2)
layout <- cbind(c(1),c(2))
grid <- grid.arrange(grobs = plots, layout_matrix = layout)

Density plots

# Visualize nCount_RNA
den1 <- ggplot(mouse.filtered@meta.data,
       aes(color = sample,
           x = nCount_RNA,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_log10() +
  scale_color_manual(values = sample_colors) +
  scale_fill_manual(values = sample_colors) +
  xlab("nCount_RNA") +
  ylab("Density") +
  theme(legend.position =  "none") +
  geom_vline(xintercept = nCount.min) +
  geom_vline(xintercept = nCount.max) +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize nFeature_RNA
den2 <- ggplot(mouse.filtered@meta.data,
       aes(color = sample,
           x = nFeature_RNA,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_log10() +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("nFeature_RNA") +
  ylab("Density") +
  geom_vline(xintercept = nFeature.min) +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize cell complexity
# Quality cells are usually above 0.85
den3 <- ggplot(mouse.filtered@meta.data,
       aes(color = sample,
           x = cell.complexity,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("Cell Complexity (log10(nFeature/nCount))") +
  ylab("Density") +
  geom_vline(xintercept = complexity.cutoff) +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize percent.mt
den4 <- ggplot(mouse.filtered@meta.data,
       aes(color = sample,
           x = percent.mt,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_continuous(n.breaks = 4) +
  geom_vline(xintercept = mt.cutoff) +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("% Mitochondrial Genes") +
  ylab("Density") +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize percent.ribo.protein
den5 <- ggplot(mouse.filtered@meta.data,
       aes(color = sample,
           x = percent.ribo.protein,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_log10() +
  geom_vline(xintercept = ribo.cutoff) +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("% Ribosomal Protein Genes") +
  ylab("Density") +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Visualize percent.hb
den6 <- ggplot(mouse.filtered@meta.data,
       aes(color = sample,
           x = percent.hb,
           fill = sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_log10() +
  geom_vline(xintercept = hb.cutoff) +
  scale_color_manual(values = sample_colors) +
  theme(legend.position =  "none") +
  scale_fill_manual(values = sample_colors) +
  xlab("% Hemoglobin Genes") +
  ylab("Density") +
  theme(legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size=9))

# Arrange graphs in grid
plots <- list(den1,den2,den3,den4,den5,den6)
layout <- rbind(c(1,4),c(2,5),c(3,6))
grid <- grid.arrange(grobs = plots, layout_matrix = layout)

Violin plots

# nFeature, nCount, and cell.complexity violins
v3 <- VlnPlot(mouse.filtered,
              features = c("nFeature_RNA", "nCount_RNA","cell.complexity"),
              ncol = 3,
              group.by = 'sample',
              cols = sample_colors,
              pt.size = 0)
v3

#  percent violins
v4 <- VlnPlot(mouse.filtered,
              features = c("percent.mt","percent.ribo.protein","percent.hb"),
              ncol = 3,
              group.by = 'sample',
              cols = sample_colors,
              pt.size = 0)
v4

Scatter plots

s3 <- ggplot(
  mouse.filtered@meta.data,
  aes(x = nCount_RNA, y = nFeature_RNA, color = percent.mt)) + 
  geom_point() + 
  geom_smooth(method = "lm") +
    scale_x_log10() +       
  scale_y_log10() + 
  theme_classic() +
  geom_vline(xintercept = nCount.min) + 
  geom_hline(yintercept = nFeature.min) + 
  facet_wrap(~sample) +
  scale_colour_gradient(low = "gray90", high = "black", limits =c(0,100))
s3
## `geom_smooth()` using formula 'y ~ x'

s4 <- FeatureScatter(mouse.filtered,
                     feature1 = "nCount_RNA",
                     feature2 = "percent.mt",
                     group.by = 'sample',
                     cols = sample_colors,
                     shuffle = TRUE)
s4

Box plot

# Visualize the distribution of genes detected per cell via boxplot
b1 <- ggplot(mouse.filtered@meta.data,
       aes(x = sample, 
           y = log10(nFeature_RNA), 
           fill=sample)) + 
  geom_boxplot() + 
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
  theme(plot.title = element_text(hjust = 0.5, face="bold")) +
  ggtitle("Unique Genes / Cell / Sample") +
  scale_color_manual(values = sample_colors) +
  scale_fill_manual(values = sample_colors) +
  xlab("Sample")
b1

PCA, UMAP, Cluster

PCA

# SCT
mouse.sct <- SCTransform(mouse.filtered, verbose = FALSE)

# Cluster with the SCT workflow
mouse.sct <- RunPCA(object = mouse.sct, assay = "SCT")
## PC_ 1 
## Positive:  Snhg11, Cdr1os, Syne1, Gm26917, Matk, Chd5, Trank1, Pclo, Map1b, Snhg14 
##     Syt1, Gria2, Ahi1, Arpp21, Luc7l3, Rbm25, Ryr2, Dnm1, Peg3, Ankrd11 
##     Lmtk3, Unc13a, Ttc3, Kif1a, Nrxn1, Slc4a3, Ank3, Kalrn, Mical3, Unc80 
## Negative:  Apoe, Tmsb4x, Rps29, Tyrobp, Fth1, B2m, Ctsb, Ctsl, Fau, Actb 
##     Ctsd, Cd9, Ctsz, Lyz2, Rplp1, Rpl41, Eef1a1, Rps21, Tpt1, Rpl37a 
##     Rpl39, Cst7, Rps28, Rps24, Rpl37, Rpl23, Rpl35a, Cd63, Rps27, Rpl13 
## PC_ 2 
## Positive:  Cd9, Ctsl, Ctsz, Ctsd, Ctsb, B2m, Gpr84, Lyz2, Cd63, Cst7 
##     Lpl, Ccl12, Tyrobp, Csf1, Apoe, Ccl4, Malt1, Ccl3, Cd74, Dcaf17 
##     Ccl6, Zcchc2, Itgax, Upf2, Cep170, Lilrb4a, Chd2, Cd52, Col6a3, Lsg1 
## Negative:  Mt3, Dbi, Cox8a, Cox6c, Ttr, Clu, Rpl41, Enpp2, Atp5e, Chchd2 
##     Chchd10, Ecrg4, Atp5l, Gpx4, Uqcr11, Atp5md, Atp5g1, Ptgds, Ndufa4, Atp5h 
##     Fxyd1, Uqcrq, Cox7b, Uqcr10, Atp5j2, Cox7c, Uqcrh, Cox4i1, Ubb, Ppia 
## PC_ 3 
## Positive:  Tmsb4x, Rps29, Snhg11, Rplp1, Fau, Rps28, Rpl39, Rps24, Rpl35a, Tyrobp 
##     Rpl23, Actb, Eef1a1, Rps4x, Rps12, Rpl37, Fth1, Rps3a1, Tpt1, Rpl13 
##     Rpl37a, Rps27, Rpl21, Rpl32, Apoe, Cdr1os, Rpl27a, Trank1, Lyz2, Ftl1 
## Negative:  Atp1a2, Slc1a2, Gnao1, Bcan, Ddhd1, Ptprz1, Grin2c, Gm3764, Daam2, Utp14b 
##     Slc4a4, Vegfa, Sparcl1, Nwd1, Appl2, Clu, Plcd4, Clmn, Mir100hg, Ntsr2 
##     Cspg5, Plpp3, Slc7a10, Itih3, Msi2, Myo6, Myo10, Trpm3, Mt3, Ndrg2 
## PC_ 4 
## Positive:  Atp1a2, Tmsb4x, Slc1a2, Rps29, Rplp1, Fth1, Fau, Rps28, Gnao1, Rpl39 
##     Sparcl1, Rps24, Rpl37, Bcan, Rpl35a, Rps4x, Rpl23, Rps3a1, Rps12, Rpl32 
##     Rpl13, Actb, Slc4a4, Eef1a1, Ddhd1, Apoe, Ptprz1, Rpl21, Rps27, Rpl37a 
## Negative:  Ecrg4, Ttr, Enpp2, Chchd10, 2900040C04Rik, Kl, Fxyd1, Rbp1, Calml4, Ptgds 
##     Folr1, Igfbp2, Sostdc1, Prlr, Cab39l, Atp5g1, Ndufa4, Clic6, Kcnj13, Ndufc1 
##     Atp5l, Cdkn1c, Car2, Cox8b, Uqcr11, Tubb4b, Atp5md, Uqcrb, Cryab, Slc22a17 
## PC_ 5 
## Positive:  Tmsb4x, Actb, Rps29, Rplp1, Tpt1, Rps24, Rpl39, Rps28, Rps27, Rpl32 
##     Rpl19, Rps26, Rps4x, Rpl35a, Rps20, Tmsb10, Rps12, Rps3a1, Eef1a1, Rps8 
##     Rpl37, Il7r, Ckb, Fau, Rpl13, Ltb, Rpl28, Rps23, Rpl34, Rpl37a 
## Negative:  Apoe, Cst7, Lyz2, Ctsl, Ctsb, Cd63, Cd74, Ctsz, Cd9, Tyrobp 
##     Ctsd, Lpl, Itgax, Csf1, B2m, Spp1, H2-Ab1, Ccl6, Ccl3, H2-Aa 
##     Cd52, Prnp, Rps21, Gnas, Fabp5, H2-Eb1, Ccl4, Atp1a3, Snhg11, Cybb
# Plot PCA
p1 <- DimPlot(object = mouse.sct, 
              reduction = "pca",
              group.by = "sample",
              cols = sample_colors,
              shuffle = TRUE) + NoLegend()
p1

# Cleanup
remove(mouse.filtered)

Significant PCs

First metric

# Determine percent of variation associated with each PC
stdv <- mouse.sct[["pca"]]@stdev
sum.stdv <- sum(mouse.sct[["pca"]]@stdev)
percent.stdv <- (stdv / sum.stdv) * 100

# Calculate cumulative percents for each PC
cumulative <- cumsum(percent.stdv)

# Determine which PC exhibits cumulative percent greater than 90% and
# and % variation associated with the PC as less than 5
co1 <- which(cumulative > 90 & percent.stdv < 5)[1]
co1
## [1] 44

Second metric

# Determine the difference between variation of PC and subsequent PC
co2 <- sort(which(
  (percent.stdv[1:length(percent.stdv) - 1] - 
     percent.stdv[2:length(percent.stdv)]) > 0.1), 
  decreasing = T)[1] + 1

# last point where change of % of variation is more than 0.1%.
co2
## [1] 10

Choose the minimum of these two metrics as the PCs covering the majority of the variation in the data.

# Minimum of the two calculation
min.pc <- min(co1, co2)
min.pc
## [1] 10

UMAP

# Run UMAP
mouse.sct <- RunUMAP(mouse.sct,
                     dims = 1:min.pc,
                     reduction = "pca",
                     n.components = 3) # set to 3 to use with VR

# Plot UMAP
DimPlot(mouse.sct)

Find clusters

# Determine the K-nearest neighbor graph
mouse.unannotated <- FindNeighbors(object = mouse.sct,
                                   assay = "SCT", # set as default after SCTransform
                                   reduction = "pca",
                                   dims = 1:min.pc)

# Determine the clusters for various resolutions
mouse.unannotated <- FindClusters(object = mouse.unannotated,
                                  algorithm = 1, # 1 = Louvain
                                  resolution = seq(0.1,0.5,by=0.1))
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2382
## Number of edges: 76096
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9542
## Number of communities: 5
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2382
## Number of edges: 76096
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9244
## Number of communities: 5
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2382
## Number of edges: 76096
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8970
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2382
## Number of edges: 76096
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8696
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2382
## Number of edges: 76096
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8501
## Number of communities: 8
## Elapsed time: 0 seconds
mouse.unannotated$seurat_clusters <- mouse.unannotated$SCT_snn_res.0.5

Explore resolutions

# 0.1
DimPlot(mouse.unannotated,
        group.by = "SCT_snn_res.0.1",
        label = TRUE)

# 0.2
DimPlot(mouse.unannotated,
        group.by = "SCT_snn_res.0.2",
        label = TRUE)

# 0.3
DimPlot(mouse.unannotated,
        group.by = "SCT_snn_res.0.3",
        label = TRUE)

# 0.4
DimPlot(mouse.unannotated,
        group.by = "SCT_snn_res.0.4",
        label = TRUE)

# 0.5
DimPlot(mouse.unannotated,
        group.by = "SCT_snn_res.0.5",
        label = TRUE)

3D UMAP

embeddings <- mouse.unannotated@reductions$umap@cell.embeddings
embeddings <- cbind(embeddings, as.character(mouse.unannotated$seurat_clusters))
colnames(embeddings)[4] <- "seurat_clusters"
embeddings <- as.data.frame(embeddings)
embeddings$seurat_clusters <- factor(embeddings$seurat_clusters,
                                     levels = c("0","1","2","3","4","5","6","7"))
cluster_colors <- c("gray","red1","yellow","chartreuse1",
                    "cyan","blue","plum1", "magenta1")

three.dim <- plot_ly(embeddings,
                     x = ~UMAP_1, 
                     y = ~UMAP_2, 
                     z = ~UMAP_3, 
                     color = ~seurat_clusters,
                     colors = cluster_colors,
                     size = 1) 
three.dim <- three.dim %>% add_markers() 
three.dim <- three.dim %>% layout(scene = list(xaxis = list(title = 'UMAP_1'), 
                                     yaxis = list(title = 'UMAP_2'), 
                                     zaxis = list(title = 'UMAP_3')))
three.dim

Potential Markers

Astrocytes

DefaultAssay(mouse.unannotated) <- "RNA"
mouse.unannotated <- NormalizeData(mouse.unannotated)
Idents(mouse.unannotated) <- "SCT_snn_res.0.5"
VlnPlot(mouse.unannotated,
        features = "Aqp4",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Gfap",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Gja1",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

B-cell

  • Cd19: expressed in B-cells and follicular dendritic cells
  • Fcrla: a B-cell specific protein in mice
  • Cd79a & Cd79b: together form BCR complex
  • Ms4a1: aka Cd20, expressed beginning at pro-B phase and progressively increases in concentration until maturity
  • Clusters 11, 20, and 21 are B cell clusters
VlnPlot(mouse.unannotated,
        features = "Cd79b",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Cd80",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

Endothelial cells

VlnPlot(mouse.unannotated,
        features = "Pecam1",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Kdr",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Flt1",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

Fibroblasts

VlnPlot(mouse.unannotated,
        features = "Col1a2",
        cols = cluster_colors,
        group.by = "SCT_snn_res.0.5")

Mast cells

VlnPlot(mouse.unannotated,
        features = "Kit", # aka Cd117
        group.by = "SCT_snn_res.0.5")

Macrophage/Microglia

VlnPlot(mouse.unannotated,
        features = "Tmem119",
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Itgam", # aka Cd11b
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Cd14",
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Cd68",
        group.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Ccr5",
        group.by = "SCT_snn_res.0.5")

Neurons

VlnPlot(mouse.unannotated,
        features = "Gad1",
        cols = cluster_colors,
        split.by = "seurat_clusters")
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##       
## This message will be shown once per session.

VlnPlot(mouse.unannotated,
        features = "Gad2",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Slc32a1",
        cols = cluster_colors,
        split.by = "seurat_clusters")

Oligodendrocyte precursor cells

VlnPlot(mouse.unannotated,
        features = "Cspg5",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Olig1",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

T-cell

  • Trac/Cd3d/Cd3e/Cd3g components of the TCR

Shiny App

Cleanup object

mouse.unannotated@assays$RNA@var.features <- mouse.unannotated@assays$SCT@var.features
metadata <- mouse.unannotated@meta.data
metadata <- metadata[,c(15,16,2:14)]
mouse.unannotated@meta.data <- metadata
mouse.unannotated@assays$SCT@meta.features <- metadata
mouse.unannotated@assays$RNA@meta.features <- metadata

Output directory

# make shiny folder
DefaultAssay(mouse.unannotated) <- "RNA"
Idents(mouse.unannotated) <- mouse.unannotated$seurat_clusters
sc.config <- createConfig(mouse.unannotated)
makeShinyApp(mouse.unannotated, sc.config, gene.mapping = TRUE,
             shiny.title = "Adu_F_738B")