Pipseq chemistry kit version 4

Sample NPID control female NA04-324

Libraries & paths

library(dplyr)
library(Seurat)
library(patchwork)
library(knitr)
library(dittoSeq)
library(reshape2)
Sys.setenv(RSTUDIO_PANDOC="/usr/local/biotools/pandoc/3.1.2/bin")

sampleID <- c("v4")
sample_color.panel <- c("#B4AF46")
color.panel <- dittoColors()

Read in data

See script 01 for pre-processing and QC filtering. script 02 runs FindAllMarkers. Output is Wilcox and ROC.

dataObject <- readRDS(file = paste0("../../rObjects/", sampleID, ".filtered.rds"))
markers.strict <- readRDS(file = paste0("../../rObjects/markers/", 
                          sampleID, "_FindAllMarkers_strict_logFC1_FDR0.05.rds"))
ROC.markers <- readRDS(file = paste0("../../rObjects/markers/", 
                       sampleID, "_ROC_markers.rds"))

Unannotated

UMAP

ditto_umap <- dittoDimPlot(object = dataObject,
             var = "seurat_clusters",
             reduction.use = "umap",
             do.label = TRUE,
             labels.highlight = TRUE)
ditto_umap

Nuclei count per cluster

count_per_cluster <- FetchData(dataObject,
                               vars = c("ident", "orig.ident")) %>%
  dplyr::count(ident, orig.ident) %>%
  tidyr::spread(ident, n)
count_per_cluster
##   orig.ident    0    1    2    3    4   5   6   7   8   9  10  11  12  13  14
## 1         v4 2488 2360 2041 1944 1720 907 774 536 472 461 383 357 256 249 185
##    15  16
## 1 136 128
count_melt <- reshape2::melt(count_per_cluster)
colnames(count_melt) <- c("ident", "cluster", "number of nuclei")
count_max <- count_melt[which.max(count_melt$`number of nuclei`), ]
count_max_value <- count_max$`number of nuclei`
cellmax <- count_max_value + 200 # so that the figure doesn't cut off the text
count_bar <- ggplot(count_melt, aes(x = factor(cluster), y = `number of nuclei`, fill = `ident`)) +
  geom_bar(
    stat = "identity",
    colour = "black",
    width = 1,
    position = position_dodge(width = 0.8)
  ) +
  geom_text(
    aes(label = `number of nuclei`),
    position = position_dodge(width = 0.9),
    vjust = -0.25,
    angle = 45,
    hjust = -.01
  ) +
  theme_classic() + scale_fill_manual(values = sample_color.panel) +
  ggtitle("Number of nuclei per cluster") +  xlab("cluster") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_y_continuous(limits = c(0, cellmax))
count_bar

Cluster tree

dataObject <- BuildClusterTree(
  object = dataObject,
  dims = 1:30,
  reorder = FALSE,
  reorder.numeric = FALSE
)

tree <- dataObject@tools$BuildClusterTree
tree$tip.label <- paste0("Cluster ", tree$tip.label)
nClusters <- length(tree$tip.label)

tree_graph <- ggtree::ggtree(tree, aes(x, y)) +
  scale_y_reverse() +
  ggtree::geom_tree() +
  ggtree::theme_tree() +
  ggtree::geom_tiplab(offset = 1) +
  ggtree::geom_tippoint(color = color.panel[1:nClusters],
                        shape = 16,
                        size = 5) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = unit(c(0, 2.5, 0, 0), 'cm'))
tree_graph

Finding differentially expressed features (cluster biomarkers)

Seurat can help you find markers that define clusters via differential expression (DE). By default, it identifies positive and negative markers of a single cluster (specified in ident.1), compared to all other cells. FindAllMarkers() automates this process for all clusters, but you can also test groups of clusters vs. each other, or against all cells.

The default is wilcox which identifies differentially expressed genes between two groups of cells using a Wilcoxon Rank Sum test. ## FindAllMarkers default wilxcon

# find markers for every cluster compared to all remaining cells, 
# report only the positive ones
all.markers <- FindAllMarkers(dataObject, only.pos = TRUE)

# rearrange to order by cluster & filter to only include log2FC > 1 & FDR < 0.05
markers.strict <- all.markers %>%
    group_by(cluster) %>%
    dplyr::filter(avg_log2FC > 1 & p_val_adj < 0.05)

# compare 
table(all.markers$cluster)
table(markers.strict$cluster)

Get markers for each cluster

# subset
cluster0 <- markers.strict[markers.strict$cluster == 0,]
cluster1 <- markers.strict[markers.strict$cluster == 1,]
cluster2 <- markers.strict[markers.strict$cluster == 2,]
cluster3 <- markers.strict[markers.strict$cluster == 3,]
cluster4 <- markers.strict[markers.strict$cluster == 4,]
cluster5 <- markers.strict[markers.strict$cluster == 5,]
cluster6 <- markers.strict[markers.strict$cluster == 6,]
cluster7 <- markers.strict[markers.strict$cluster == 7,]
cluster8 <- markers.strict[markers.strict$cluster == 8,]
cluster9 <- markers.strict[markers.strict$cluster == 9,]
cluster10 <- markers.strict[markers.strict$cluster == 10,]
cluster11 <- markers.strict[markers.strict$cluster == 11,]
cluster12 <- markers.strict[markers.strict$cluster == 12,]
cluster13 <- markers.strict[markers.strict$cluster == 13,]
cluster14 <- markers.strict[markers.strict$cluster == 14,]
cluster15 <- markers.strict[markers.strict$cluster == 15,]
cluster16 <- markers.strict[markers.strict$cluster == 16,]

ROC

Seurat has several tests for differential expression which can be set with the test.use parameter (see https://satijalab.org/seurat/articles/de_vignette for details). For example, the ROC test returns the ‘classification power’ for any individual marker (ranging from 0 - random, to 1 - perfect).

Best if run in a script as the compute takes ~3 hours to run.

ROC.markers <-
  FindAllMarkers(
    dataObject,
    logfc.threshold = 0.25,
    test.use = "roc",
    only.pos = TRUE
  )

Get markers for each cluster

# subset
ROC.cluster0 <- ROC.markers[ROC.markers$cluster == 0,]
ROC.cluster1 <- ROC.markers[ROC.markers$cluster == 1,]
ROC.cluster2 <- ROC.markers[ROC.markers$cluster == 2,]
ROC.cluster3 <- ROC.markers[ROC.markers$cluster == 3,]
ROC.cluster4 <- ROC.markers[ROC.markers$cluster == 4,]
ROC.cluster5 <- ROC.markers[ROC.markers$cluster == 5,]
ROC.cluster6 <- ROC.markers[ROC.markers$cluster == 6,]
ROC.cluster7 <- ROC.markers[ROC.markers$cluster == 7,]
ROC.cluster8 <- ROC.markers[ROC.markers$cluster == 8,]
ROC.cluster9 <- ROC.markers[ROC.markers$cluster == 9,]
ROC.cluster10 <- ROC.markers[ROC.markers$cluster == 10,]
ROC.cluster11 <- ROC.markers[ROC.markers$cluster == 11,]
ROC.cluster12 <- ROC.markers[ROC.markers$cluster == 12,]
ROC.cluster13 <- ROC.markers[ROC.markers$cluster == 13,]
ROC.cluster14 <- ROC.markers[ROC.markers$cluster == 14,]
ROC.cluster15 <- ROC.markers[ROC.markers$cluster == 15,]
ROC.cluster16 <- ROC.markers[ROC.markers$cluster == 16,]

Top variable genes

# print top variable genes
top100 <- dataObject@assays$SCT@var.features[1:100]
top100
##   [1] "DPP10"         "ROBO2"         "KCNIP4"        "TNR"          
##   [5] "LHFPL3"        "SLC1A3"        "SYT1"          "ADAM28"       
##   [9] "OPCML"         "DTNA"          "CSMD1"         "LRRTM4"       
##  [13] "DSCAM"         "FLT1"          "CEMIP"         "NRG3"         
##  [17] "SLC11A1"       "TNC"           "FAM189A2"      "SLC1A2"       
##  [21] "DOCK8"         "HIF1A-AS3"     "ADAMTS9"       "ADCY2"        
##  [25] "PTPRZ1"        "FGF14"         "NRXN1"         "MMP16"        
##  [29] "RP11-6L16.1"   "RALYL"         "LRMDA"         "GLIS3"        
##  [33] "SORBS1"        "RBFOX1"        "CNTNAP2"       "TLR2"         
##  [37] "RP11-358F13.1" "ARHGAP24"      "GRIK1"         "SNTG1"        
##  [41] "ZSCAN31"       "CD44"          "MT-RNR2"       "SRGN"         
##  [45] "LRP1B"         "GPC5"          "GALNTL6"       "CHI3L1"       
##  [49] "LAMA2"         "APBB1IP"       "CXCL14"        "OLR1"         
##  [53] "RFX4"          "DLGAP1"        "SNHG14"        "PLP1"         
##  [57] "GFAP"          "VCAN"          "PITPNC1"       "ATRNL1"       
##  [61] "CLDN5"         "USP53"         "AQP4"          "RYR3"         
##  [65] "BEST3"         "NXPH1"         "FAM155A"       "ASIC2"        
##  [69] "RP11-909M7.3"  "INHBA"         "STXBP5L"       "KIAA1217"     
##  [73] "DENND3"        "CTNNA2"        "PTPRC"         "KCNQ5"        
##  [77] "MT2A"          "AHCYL1"        "CNTN5"         "SAT1"         
##  [81] "WWC1"          "MGAT4C"        "GRIK2"         "GRID2"        
##  [85] "GPM6A"         "CLU"           "CELF2"         "ABCB1"        
##  [89] "CHST11"        "ST6GAL1"       "SPARCL1"       "CA10"         
##  [93] "IRAG1"         "LRRC4C"        "RP11-13N12.1"  "LINC01010"    
##  [97] "FYB1"          "PDE3B"         "LNCAROD"       "GNA14"
write.table(top100, paste0("../../results/variance/", sampleID, "_top100_variable_features.txt"), 
                           sep = "\t", row.names = FALSE, quote = FALSE)

PCs

# Printing out the most variable genes driving PCs
print(x = dataObject[["pca"]], 
      dims = 1:5, 
      nfeatures = 10)
## PC_ 1 
## Positive:  PLP1, QKI, ST18, SIK3, PIP4K2A, CTNNA3, IL1RAPL1, RNF220, TMTC2, MBP 
## Negative:  DPP10, KCNIP4, ROBO2, NRG3, CSMD1, OPCML, NRXN1, LRRTM4, SYT1, TNR 
## PC_ 2 
## Positive:  KCNIP4, ROBO2, CSMD1, SYT1, OPCML, LRRTM4, RBFOX1, FGF14, RALYL, TNR 
## Negative:  DTNA, DPP10, SLC1A3, FAM189A2, ADCY2, GLIS3, SORBS1, SLC1A2, RFX4, RP11-6L16.1 
## PC_ 3 
## Positive:  ADAM28, SLC11A1, DOCK8, LRMDA, ARHGAP24, TLR2, APBB1IP, DENND3, PTPRC, RP11-358F13.1 
## Negative:  DPP10, DTNA, NRG3, ADCY2, FAM189A2, SORBS1, SLC1A2, PCDH9, RFX4, RYR3 
## PC_ 4 
## Positive:  ROBO2, SYT1, DPP10, RBFOX1, SNHG14, RALYL, FRMPD4, CNTNAP2, KCNQ5, KHDRBS2 
## Negative:  TNR, LHFPL3, DSCAM, PTPRZ1, MMP16, VCAN, BEST3, CA10, MEGF11, LRRC4C 
## PC_ 5 
## Positive:  SLC1A3, SLC1A2, GPC5, PCDH9, PLP1, CABLES1, GNA14, ADGRV1, RYR3, NRXN1 
## Negative:  FLT1, ADAMTS9, CLDN5, CEMIP, ABCB1, ATP10A, USP53, MT2A, RP11-326C3.17, HLA-E

Plot markers

Heatmap

# get the top 10 markers for each cluster 
top10_Markers <- markers.strict %>%
  group_by(cluster) %>%
  dplyr::filter(avg_log2FC > 2) %>%
  slice_head(n = 5) %>%
  ungroup()
# DoHeatmap function for plotting
heat_markers <-
  DoHeatmap(dataObject, features = top10_Markers$gene) + NoLegend()
# downsample to 100 nuclei per cluster. 
heat_markers_downsample <-
  DoHeatmap(subset(dataObject, downsample = 100),
            features = top10_Markers$gene,
            size = 3)

top10_ROC <- ROC.markers %>%
  group_by(cluster) %>%
  dplyr::filter(avg_log2FC > 2 & myAUC > 0.8) %>%
  slice_head(n = 5) %>%
  ungroup()
heat_ROC <-
  DoHeatmap(dataObject, features = top10_ROC$gene) + NoLegend()
heat_ROC_downsample <-
  DoHeatmap(subset(dataObject, downsample = 100),
            features = top10_ROC$gene,
            size = 3)

heat_markers

heat_markers_downsample

heat_ROC

heat_ROC_downsample

Ridge plot

# MALAT1 is highly expressed in brain nuclei 
RidgePlot(dataObject, features = c("XIST", "MALAT1"), ncol = 2)

RidgePlot(dataObject, features = c("GFAP"), ncol = 1)

RidgePlot(dataObject, features = c("VCAN"), ncol = 1)

Feature plot

# UMAP showing the expression of select features
umap_feature <-
  FeaturePlot(dataObject,
              features = c("TYROBP", "MOG", "AQP4", "ENO2", "XIST", "MALAT1"))
umap_feature

Dot plot markers

markers.to.plot <-
  c(
    "SLC1A2",
    "GJA1",
    "CLDN5",
    "TGM2",
    "STAB1",
    "CD74",
    "VCAN",
    "TNR",
    "S100B",
    "ABCA2",
    "NRG1",
    "MEG3",
    "ERBB4",
    "GAD1"
  )
dot <- DotPlot(dataObject, features = markers.to.plot,
    dot.scale = 8) + RotatedAxis()
dot

Violins - Canonical cell-type markers

Markers obtained from dropviz

# Astrocytes
VlnPlot(dataObject,
        features = c("AQP4", "GJA1", "F3"))

# Endothelial
VlnPlot(dataObject,
        features = c("CLDN5", "ADGRF5", "FLT1"))

# Fibroblast-Like_Dcn
VlnPlot(dataObject,
        features = c("DCN", "SLC6A13", "IGF2"))

# Oligodendrocyte
VlnPlot(dataObject,
        features = c("MOG", "OLIG1", "MBP"))

# Polydendrocyte
VlnPlot(dataObject,
        features = c("VCAN", "TNR", "C1QL1"))

# Smooth muscle 
VlnPlot(dataObject,
        features = c("ACTA2", "RGS5"))

# Microglia / Marchophage 
VlnPlot(dataObject,
        features = c("CTSS", "C1QC", "C1QB"))

# Microglia / Marchophage 
VlnPlot(dataObject,
        features = c("TYROBP","P2RY12", "AIF1"))

# Interneuron
VlnPlot(dataObject,
        features = c("VIP", "CRH", "KIT"))

# Interneuron - GAD1 & GAD2
VlnPlot(dataObject,
        features = c("GAD1", "GAD2"))

Cluster Annotation

Cluster 0 - polydendrocyte / oligodenrocyte

CNP - polydendrocyte
SUN2 - microglia / macrophage / mural
FTH1 - oligodendrocyte
APOD - fibroblast like / oligodenrocyte
PLP1 - oligodenrocyte
MBP - oligodenrocyte

# Number of cells per condition
count_per_cluster[,c(1,2)]
##   orig.ident    0
## 1         v4 2488
# UMAP with only cluster 0
DimPlot(object = subset(dataObject, seurat_clusters == "0"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[1])

VlnPlot(dataObject,
        features = cluster0$gene[1:10],
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject,
        features = ROC.cluster0$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster0$gene[1:10]
##  [1] "CNP"       "LINC00844" "APOD"      "SUN2"      "FTH1"      "VWA1"     
##  [7] "LINC01608" "AMER2"     "PLP1"      "APLP1"
ROC.cluster0$gene[1:10]
##  [1] "PLP1"      "CNP"       "GPM6B"     "SUN2"      "APOD"      "FTH1"     
##  [7] "LINC00844" "MBP"       "EDIL3"     "IL1RAPL1"

Cluster 1 - oligodendrocyte, non-specifice

CLASP2 - neuron / oligodendrocyte
DYSF - mural / enodthelial

# Number of cells per condition
count_per_cluster[,c(1,3)]
##   orig.ident    1
## 1         v4 2360
DimPlot(object = subset(dataObject, seurat_clusters == "1"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#56B4E9")

VlnPlot(dataObject,
        features = cluster1$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject,
        features = ROC.cluster1$gene[1],
        cols = color.panel,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster1$gene[1:10]
##  [1] "LINC01608"     "DYSF"          "KIF19"         "CR1"          
##  [5] "LINC02073"     "RP4-537K23.4"  "RP11-50D16.4"  "RP11-446H18.5"
##  [9] "MMP17"         "KLRK1-AS1"
ROC.cluster1$gene[1:10]
##  [1] "CLASP2" NA       NA       NA       NA       NA       NA       NA      
##  [9] NA       NA

Cluster 2 - fibroblast-like, non-specific

FGFR1 - fibroblast-like
SH3RF1 - neuron / polydendrocyte
GADD45B - fibroblast-like ITPKB - astrocyte UHRF2 - neuron GTF2H2 - microglia GLUL - astrocyte

# Number of cells per condition
count_per_cluster[,c(1,4)]
##   orig.ident    2
## 1         v4 2041
DimPlot(object = subset(dataObject, seurat_clusters == "2"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#009E73")

VlnPlot(dataObject,
        features = cluster2$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject,
        features = ROC.cluster2$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster2$gene[1:10]
##  [1] "FGFR1"    "SH3RF1"   "GADD45B"  "ITPKB"    "UHRF2"    "GTF2H2B" 
##  [7] "SH3PXD2B" "GTF2H2"   "GTF2H2C"  "IL6R"
ROC.cluster2$gene[1:10]
##  [1] "PRUNE2"  "AKAP6"   "SIK3"    "TMTC2"   "SPOCK1"  "ELL2"    "FGFR1"  
##  [8] "SH3RF1"  "GTF2H2B" "GLUL"

Cluster 3 - oligodendrocyte / neuron - non-specific

SLC5A11 - oligodendrocyte / polydendrocyte / astrocyte
SAMD12 - neuron endothelial
RASGRF1 - neuron
LRRC63 - microglia / neurogenesis

# Number of cells per condition
count_per_cluster[,c(1,4)]
##   orig.ident    2
## 1         v4 2041
DimPlot(object = subset(dataObject, seurat_clusters == "3"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#F0E442")

VlnPlot(dataObject,
        features = cluster3$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject,
        features = ROC.cluster3$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster3$gene[1:10]
##  [1] "SLC5A11"      "ANKRD18A"     "SAMD12"       "RASGRF1"      "LRRC63"      
##  [6] "RP11-141F7.1" "LINC00609"    "PLEKHG1"      "RP1-223B1.1"  "SEC14L5"
ROC.cluster3$gene[1:10]
##  [1] "SLC5A11"        "CH507-528H12.1" "SAMD12"         "CH507-513H4.1" 
##  [5] "ANKRD18A"       NA               NA               NA              
##  [9] NA               NA

Cluster 4 - MT

# Number of cells per condition
count_per_cluster[,c(1,5)]
##   orig.ident    3
## 1         v4 1944
DimPlot(object = subset(dataObject, seurat_clusters == "4"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#0072B2")

VlnPlot(dataObject,
        features = cluster4$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject,
        features = ROC.cluster4$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster4$gene[1:10]
##  [1] "MT-CO2"    "MT-ND2"    "MT-ND3"    "MT-ND4"    "MT-RNR2"   "MT-CO3"   
##  [7] "MT-ATP6"   "MTRNR2L12" "MT-CYB"    "MTCO2P12"
ROC.cluster4$gene[1:10]
##  [1] "MT-RNR2"        "MT-CO2"         "MT-ND4"         "MT-ND2"        
##  [5] "MT-ND3"         "CH507-528H12.1" "CH507-513H4.1"  "MT-CO3"        
##  [9] "MT-ATP6"        "TERT"

Cluster 5 - oligodendrocyte / fibroblast-like

S100B - oligodendrocyte
TUBA1A - neurogenesis / polydendrocytes
SPP1 - fibroblast-like
CRYAB - oligodendrocyte
SPP1 - fibroblast-like

# Number of cells per condition
count_per_cluster[,c(1,6)]
##   orig.ident    4
## 1         v4 1720
DimPlot(object = subset(dataObject, seurat_clusters == "5"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#D55E00")

VlnPlot(dataObject,
        features = cluster5$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster5$gene[1:10]
##  [1] "S100B"        "TUBA1A"       "MARCKSL1"     "SPP1"         "CRYAB"       
##  [6] "MYLK"         "RASGRF1"      "RP11-141F7.1" "TUBB2B"       "COL18A1"
VlnPlot(dataObject,
        features = ROC.cluster5$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster5$gene[1:10]
##  [1] "S100B"        "TUBA1A"       "MARCKSL1"     "SPP1"         "CRYAB"       
##  [6] "MYLK"         "RASGRF1"      "RP11-141F7.1" "TUBB2B"       "COL18A1"
ROC.cluster5$gene[1:10]
##  [1] "TF"       "TUBA1A"   "PLP1"     "SPP1"     "S100B"    "FTL"     
##  [7] "CNP"      "CRYAB"    "MARCKSL1" "SCD"

Cluster 6 - oligodendrocyte / polydendrocyte

MBP - oligodendrocyte
CNP - polydendrocyte
FTH1 - oligodendrocyte
MAG - polydendrocyte

# Number of cells per condition
count_per_cluster[,c(1,7)]
##   orig.ident   5
## 1         v4 907
DimPlot(object = subset(dataObject, seurat_clusters == "6"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#CC79A7")

VlnPlot(dataObject,
        features = cluster6$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject,
        features = ROC.cluster6$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster6$gene[1:10]
##  [1] "CNP"      "PLP1"     "TF"       "MBP"      "MARCKSL1" "FTH1"    
##  [7] "ATP1B1"   "SCD"      "SELENOP"  "MAG"
ROC.cluster6$gene[1:10]
##  [1] "PLP1"    "TF"      "CNP"     "MBP"     "PCDH9"   "SLC5A11" "FTH1"   
##  [8] "ATP1B1"  "FTL"     "SCD"

Cluster 7 - astrocyte

AQP4 - astrocyte
GFAP - astrocyte
DTNA - astrocyte
CD44 - astrocyte
ADCY2 - neuron / astrocyte

# Number of cells per condition
count_per_cluster[,c(1,8)]
##   orig.ident   6
## 1         v4 774
DimPlot(object = subset(dataObject, seurat_clusters == "7"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#666666")

VlnPlot(dataObject,
        features = cluster7$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject,
        features = ROC.cluster7$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster7$gene[1:10]
##  [1] "GLIS3"       "CD44"        "FAM189A2"    "ADCY2"       "GFAP"       
##  [6] "RFX4"        "RP11-6L16.1" "WWC1"        "AQP4"        "ABLIM1"
ROC.cluster7$gene[1:10]
##  [1] "DPP10"    "DTNA"     "GLIS3"    "SORBS1"   "NRG3"     "ADCY2"   
##  [7] "CD44"     "GFAP"     "ABLIM1"   "FAM189A2"

Cluster 8 - microglia

DOCK8 - microglia / macrophage
ADAM28 - interneuron
ARHGAP24 - polydendrocytes
SLC11A1 - microglia / macrophage

# Number of cells per condition
count_per_cluster[,c(1,9)]
##   orig.ident   7
## 1         v4 536
DimPlot(object = subset(dataObject, seurat_clusters == "8"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#AD7700")

VlnPlot(dataObject,
        features = cluster8$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject,
        features = ROC.cluster8$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster8$gene[1:10]
##  [1] "ADAM28"   "SLC11A1"  "DOCK8"    "LRMDA"    "ARHGAP24" "DENND3"  
##  [7] "APBB1IP"  "PTPRC"    "MEF2C"    "CHST11"
ROC.cluster8$gene[1:10]
##  [1] "LRMDA"    "DOCK8"    "ADAM28"   "SRGAP2B"  "ARHGAP24" "SRGAP2"  
##  [7] "ARHGAP26" "SLC11A1"  "PLXDC2"   "MEF2C"

Cluster 9 - neuron

FOS - polydendrocyte / mural
DDIT3 - nueron / mural
EGR1 - neuron CREB5 - polydendrocyte / oligodendrocyte
FGFR1 - fibroblast-like
ATF3 - microglia

# Number of cells per condition
count_per_cluster[,c(1,10)]
##   orig.ident   8
## 1         v4 472
DimPlot(object = subset(dataObject, seurat_clusters == "9"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#1C91D4")

VlnPlot(dataObject,
        features = cluster9$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject,
        features = ROC.cluster9$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster9$gene[1:10]
##  [1] "DDIT3"          "FOS"            "EGR1"           "GADD45B"       
##  [5] "HSP90B1"        "RP11-181L23.10" "NAMPT"          "FGFR1"         
##  [9] "ATF3"           "CREB5"
ROC.cluster9$gene[1:10]
##  [1] "CREB5"   "GLUL"    "DDIT3"   "USP32"   "CNDP1"   "FOS"     "PIP4K2A"
##  [8] "IQGAP1"  "HSP90B1" "FGFR1"

Cluster 10 - polydendrocyte

TNR - polydendrocyte
VCAN - polydendrocyte
PTPRZ1 - polydendrocyte   MMP16 - polydendrocyte

# Number of cells per condition
count_per_cluster[,c(1,11)]
##   orig.ident   9
## 1         v4 461
DimPlot(object = subset(dataObject, seurat_clusters == "10"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#007756")

VlnPlot(dataObject,
        features = cluster10$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject,
        features = ROC.cluster10$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster10$gene[1:10]
##  [1] "TNR"    "VCAN"   "PTPRZ1" "MMP16"  "CA10"   "LHFPL3" "MEGF11" "FGF14" 
##  [9] "DSCAM"  "LUZP2"
ROC.cluster10$gene[1:10]
##  [1] "TNR"    "LHFPL3" "PTPRZ1" "FGF14"  "DSCAM"  "LRRC4C" "OPCML"  "VCAN"  
##  [9] "MMP16"  "CA10"

Cluster 11 - astrocyte

SLC1A2 - astrocyte
AQP4 - astrocyte
BMPR1B - astrocyte
FAM189A2 - astrocyte
ADGRV1 - oligodendrocyte
RYR3 - neuron

# Number of cells per condition
count_per_cluster[,c(1,12)]
##   orig.ident  10
## 1         v4 383
DimPlot(object = subset(dataObject, seurat_clusters == "11"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#D5C711")

VlnPlot(dataObject,
        features = cluster11$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject,
        features = ROC.cluster11$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster11$gene[1:10]
##  [1] "IRAG1"    "SLC1A2"   "BMPR1B"   "FAM189A2" "AQP4"     "RFX4"    
##  [7] "ADGRV1"   "RYR3"     "SLC14A1"  "GLIS3"
ROC.cluster11$gene[1:10]
##  [1] "SLC1A3" "GPC5"   "AHCYL1" "SLC1A2" "DTNA"   "RYR3"   "ADGRV1" "ADCY2" 
##  [9] "BMPR1B" "SOX5"

Cluster 12 - neuron

RALYL - interneuron NELL2 - neuron MEG3 - neuron LRFN5 - neuron

# Number of cells per condition
count_per_cluster[,c(1,13)]
##   orig.ident  11
## 1         v4 357
DimPlot(object = subset(dataObject, seurat_clusters == "12"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#005685")

VlnPlot(dataObject,
        features = cluster12$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject,
        features = ROC.cluster12$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster12$gene[1:10]
##  [1] "RALYL"        "NELL2"        "RP11-909M7.3" "MEG3"         "SCN2A"       
##  [6] "LRFN5"        "SNAP25"       "GRIA1"        "STXBP5L"      "UNC80"
ROC.cluster12$gene[1:10]
##  [1] "RALYL"        "KCNIP4"       "SYT1"         "RP11-909M7.3" "RBFOX1"      
##  [6] "MEG3"         "DLGAP1"       "SNHG14"       "STXBP5L"      "NELL2"

Cluster 13 - neuron

PAK3 - neuron GRIP1 - interneuron FGF14 - interneuron MYT1L - neuron MEG3 - neuron CNTNAP2 - interneuron

# Number of cells per condition
count_per_cluster[,c(1,14)]
##   orig.ident  12
## 1         v4 256
DimPlot(object = subset(dataObject, seurat_clusters == "13"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#A04700")

VlnPlot(dataObject,
        features = cluster13$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject,
        features = ROC.cluster13$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster13$gene[1:10]
##  [1] "RP11-909M7.3" "PAK3"         "GRIP1"        "FGF14"        "MYT1L"       
##  [6] "MEG3"         "STXBP5L"      "GRIN2B"       "CELF4"        "FGF12"
ROC.cluster13$gene[1:10]
##  [1] "SNHG14"       "CNTNAP2"      "DLGAP1"       "GRIK2"        "RP11-909M7.3"
##  [6] "GRIP1"        "FGF14"        "PLCB1"        "FGF12"        "STXBP5L"

Cluster 14 - neuron

MEG3 - neuron
HS3ST4 - neuron
CELF4 - neuron
GABRG3 - neuron

# Number of cells per condition
count_per_cluster[,c(1,15)]
##   orig.ident  13
## 1         v4 249
DimPlot(object = subset(dataObject, seurat_clusters == "14"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#B14380")

VlnPlot(dataObject,
        features = cluster14$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject,
        features = ROC.cluster14$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster14$gene[1:10]
##  [1] "RP11-909M7.3" "MEG3"         "CTD-2337L2.1" "HS3ST4"       "CELF4"       
##  [6] "ARPP21"       "MYT1L"        "GABRG3"       "SCN2A"        "GABRB2"
ROC.cluster14$gene[1:10]
##  [1] "SNHG14"       "SYT1"         "RBFOX1"       "CSMD1"        "MDGA2"       
##  [6] "RP11-909M7.3" "FAM155A"      "NRXN1"        "MEG3"         "LRP1B"

Cluster 15 - fibroblast-like

EBF1 - mural
NID1 - fibroblast-like
CFH - fibroblast-like
LEF1 - endothelial
RBMS3 - neuron
DCN - fibroblast-like

# Number of cells per condition
count_per_cluster[,c(1,16)]
##   orig.ident  14
## 1         v4 185
DimPlot(object = subset(dataObject, seurat_clusters == "15"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#4D4D4D")

VlnPlot(dataObject,
        features = cluster15$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject,
        features = ROC.cluster15$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster15$gene[1:10]
##  [1] "EBF1"   "NID1"   "CFH"    "LEF1"   "DCN"    "IFITM1" "COL1A2" "IGFBP4"
##  [9] "NR2F2"  "EGFL7"
ROC.cluster15$gene[1:10]
##  [1] "RBMS3"         "PTPRG"         "USP53"         "SVIL"         
##  [5] "ZEB1"          "RP11-9J18.1"   "ARHGAP29"      "RP11-649A16.1"
##  [9] "RBPMS"         "EPS8"

Cluster 16 - neuron

NEFL - neuron
ENC1 - neuron
NRGN - neuron
SLC17A7 - neuron
PHYHIP - interneuron

# Number of cells per condition
count_per_cluster[,c(1,17)]
##   orig.ident  15
## 1         v4 136
DimPlot(object = subset(dataObject, seurat_clusters == "16"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[17])

VlnPlot(dataObject,
        features = cluster16$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject,
        features = ROC.cluster16$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster16$gene[1:10]
##  [1] "NEFL"    "ENC1"    "NRGN"    "SLC17A7" "PHYHIP"  "STMN2"   "RGS4"   
##  [8] "NEFM"    "THY1"    "VSNL1"
ROC.cluster16$gene[1:10]
##  [1] "MT-CO3"  "MAP1B"   "CALM1"   "MT-CO1"  "BCYRN1"  "MT-CO2"  "SYT1"   
##  [8] "NRGN"    "NEFL"    "MT-ATP6"

Assign identities

# Rename all identities
dataObject.annotated <- RenameIdents(object = dataObject, 
                               "0" = "Oligodendrocytes",
                               "1" = "Oligodendrocytes",
                               "2" = "Fibroblast-like",
                               "3" = "Oligodendrocytes",
                               "4" = "MT",
                               "5" = "Oligodendrocytes",
                               "6" = "Oligodendrocytes",
                               "7" = "Astrocytes",
                               "8" = "Microglia",
                               "9" = "Neurons",
                               "10" = "Polydendrocytes",
                               "11" = "Astrocytes",
                               "12" = "Neurons",
                               "13" = "Neurons", 
                               "14" = "Neurons", 
                               "15" = "Fibroblast-like",
                               "16" = "Neurons")
dataObject.annotated$annotated_clusters <- factor(Idents(dataObject.annotated),
                                             levels = c("Astrocytes",
                                                        "Fibroblast-like",
                                                        "Microglia", 
                                                        "Oligodendrocytes",
                                                        "Polydendrocytes", 
                                                        "Neurons", 
                                                        "MT"))
Idents(dataObject.annotated) <- "annotated_clusters"

Unannotated

UMAP

ditto_umap <- dittoDimPlot(object = dataObject.annotated,
             var = "annotated_clusters",
             reduction.use = "umap",
             do.label = TRUE,
             labels.highlight = TRUE)
ditto_umap

Dot plot markers

markers.to.plot <-
  c(
    "SLC1A2",
    "GJA1",
    "CLDN5",
    "TGM2",
    "STAB1",
    "CD74",
    "VCAN",
    "TNR",
    "S100B",
    "ABCA2",
    "NRG1",
    "MEG3",
    "ERBB4",
    "GAD1"
  )
dot <- DotPlot(dataObject.annotated, features = markers.to.plot, 
    dot.scale = 8) + RotatedAxis()
dot

Nuclei count per cluster

count_per_cluster <- FetchData(dataObject.annotated,
                               vars = c("ident", "orig.ident")) %>%
  dplyr::count(ident, orig.ident) %>%
  tidyr::spread(ident, n)
count_per_cluster
##   orig.ident Astrocytes Fibroblast-like Microglia Oligodendrocytes
## 1         v4        893            2177       472             8473
##   Polydendrocytes Neurons   MT
## 1             383    1279 1720
count_melt <- reshape2::melt(count_per_cluster)
colnames(count_melt) <- c("ident", "cluster", "number of nuclei")
count_max <- count_melt[which.max(count_melt$`number of nuclei`), ]
count_max_value <- count_max$`number of nuclei`
cellmax <- count_max_value + 500 # so that the figure doesn't cut off the text
count_bar <- ggplot(count_melt, aes(x = factor(cluster), y = `number of nuclei`, fill = `cluster`)) +
  geom_bar(
    stat = "identity",
    colour = "black",
    width = 1,
    position = position_dodge(width = 0.8)
  ) +
  geom_text(
    aes(label = `number of nuclei`),
    position = position_dodge(width = 0.9),
    vjust = -0.25,
    angle = 45,
    hjust = -.01
  ) +
  theme_classic() + scale_fill_manual(values = color.panel) +
  ggtitle("Number of nuclei per cluster") +  xlab("cluster") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_y_continuous(limits = c(0, cellmax))
count_bar

## Relative abundance

relative_abundance <- dataObject.annotated@meta.data %>%
  group_by(annotated_clusters, orig.ident) %>%
  dplyr::count() %>%
  group_by(orig.ident) %>%
  dplyr::mutate(percent = 100*n/sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x=orig.ident,y=percent, fill=annotated_clusters)) +
  geom_col() +
  scale_fill_manual(values = color.panel) +
  ggtitle("Percentage of cell type") +
  theme_bw()+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
relative_abundance

Save RDS

saveRDS(dataObject.annotated, paste0("../../rObjects/", sampleID, ".annotated.rds"))