Pipseq chemistry kit version 4

Sample NPID control female NA04-324

Libraries & paths

library(dplyr)
library(Seurat)
library(patchwork)
library(knitr)
library(dittoSeq)
library(reshape2)
Sys.setenv(RSTUDIO_PANDOC="/usr/local/biotools/pandoc/3.1.2/bin")

sampleID <- c("v4")
sample_color.panel <- c("#B4AF46")
color.panel <- dittoColors()

Read in data

See script 01 for pre-processing and QC filtering. script 02 runs FindAllMarkers. Output is Wilcox and ROC.

dataObject <- readRDS(file = paste0("../../rObjects/", sampleID, ".filtered.pass_strict.doublet_info.rds"))
markers.strict <- readRDS(file = paste0("../../rObjects/markers_pass_strict/", 
                          sampleID, "_FindAllMarkers_strict_logFC1_FDR0.05.rds"))
ROC.markers <- readRDS(file = paste0("../../rObjects/markers_pass_strict/", 
                       sampleID, "_ROC_markers.rds"))

Unannotated

UMAP

ditto_umap <- dittoDimPlot(object = dataObject,
             var = "seurat_clusters",
             reduction.use = "umap",
             do.label = TRUE,
             labels.highlight = TRUE)
ditto_umap

Nuclei count per cluster

count_per_cluster <- FetchData(dataObject,
                               vars = c("ident", "orig.ident")) %>%
  dplyr::count(ident, orig.ident) %>%
  tidyr::spread(ident, n)
count_per_cluster

##   orig.ident    0    1    2    3    4    5    6   7   8   9  10  11  12  13  14
## 1         v4 1913 1779 1370 1351 1323 1313 1259 744 629 517 484 469 372 342 228
##    15  16  17
## 1 222 202 131

count_melt <- reshape2::melt(count_per_cluster)
colnames(count_melt) <- c("ident", "cluster", "number of nuclei")
count_max <- count_melt[which.max(count_melt$`number of nuclei`), ]
count_max_value <- count_max$`number of nuclei`
cellmax <- count_max_value + 200 # so that the figure doesn't cut off the text
count_bar <- ggplot(count_melt, aes(x = factor(cluster), y = `number of nuclei`, fill = `ident`)) +
  geom_bar(
    stat = "identity",
    colour = "black",
    width = 1,
    position = position_dodge(width = 0.8)
  ) +
  geom_text(
    aes(label = `number of nuclei`),
    position = position_dodge(width = 0.9),
    vjust = -0.25,
    angle = 45,
    hjust = -.01
  ) +
  theme_classic() + scale_fill_manual(values = sample_color.panel) +
  ggtitle("Number of nuclei per cluster") +  xlab("cluster") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_y_continuous(limits = c(0, cellmax))
count_bar

Cluster tree

dataObject <- BuildClusterTree(
  object = dataObject,
  dims = 1:30,
  reorder = FALSE,
  reorder.numeric = FALSE
)

tree <- dataObject@tools$BuildClusterTree
tree$tip.label <- paste0("Cluster ", tree$tip.label)
nClusters <- length(tree$tip.label)

tree_graph <- ggtree::ggtree(tree, aes(x, y)) +
  scale_y_reverse() +
  ggtree::geom_tree() +
  ggtree::theme_tree() +
  ggtree::geom_tiplab(offset = 1) +
  ggtree::geom_tippoint(color = color.panel[1:nClusters],
                        shape = 16,
                        size = 5) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = unit(c(0, 2.5, 0, 0), 'cm'))
tree_graph

Finding differentially expressed features (cluster biomarkers)

Seurat can help you find markers that define clusters via differential expression (DE). By default, it identifies positive and negative markers of a single cluster (specified in ident.1), compared to all other cells. FindAllMarkers() automates this process for all clusters, but you can also test groups of clusters vs. each other, or against all cells.

The default is wilcox which identifies differentially expressed genes between two groups of cells using a Wilcoxon Rank Sum test. ## FindAllMarkers default wilxcon

# find markers for every cluster compared to all remaining cells, 
# report only the positive ones
all.markers <- FindAllMarkers(dataObject, only.pos = TRUE)

# rearrange to order by cluster & filter to only include log2FC > 1 & FDR < 0.05
markers.strict <- all.markers %>%
    group_by(cluster) %>%
    dplyr::filter(avg_log2FC > 1 & p_val_adj < 0.05)

# compare 
table(all.markers$cluster)
table(markers.strict$cluster)

Get markers for each cluster

# subset
cluster0 <- markers.strict[markers.strict$cluster == 0,]
cluster1 <- markers.strict[markers.strict$cluster == 1,]
cluster2 <- markers.strict[markers.strict$cluster == 2,]
cluster3 <- markers.strict[markers.strict$cluster == 3,]
cluster4 <- markers.strict[markers.strict$cluster == 4,]
cluster5 <- markers.strict[markers.strict$cluster == 5,]
cluster6 <- markers.strict[markers.strict$cluster == 6,]
cluster7 <- markers.strict[markers.strict$cluster == 7,]
cluster8 <- markers.strict[markers.strict$cluster == 8,]
cluster9 <- markers.strict[markers.strict$cluster == 9,]
cluster10 <- markers.strict[markers.strict$cluster == 10,]
cluster11 <- markers.strict[markers.strict$cluster == 11,]
cluster12 <- markers.strict[markers.strict$cluster == 12,]
cluster13 <- markers.strict[markers.strict$cluster == 13,]
cluster14 <- markers.strict[markers.strict$cluster == 14,]
cluster15 <- markers.strict[markers.strict$cluster == 15,]
cluster16 <- markers.strict[markers.strict$cluster == 16,]
cluster17 <- markers.strict[markers.strict$cluster == 17,]

ROC

Seurat has several tests for differential expression which can be set with the test.use parameter (see https://satijalab.org/seurat/articles/de_vignette for details). For example, the ROC test returns the ‘classification power’ for any individual marker (ranging from 0 - random, to 1 - perfect).

Best if run in a script as the compute takes ~3 hours to run.

ROC.markers <-
  FindAllMarkers(
    dataObject,
    logfc.threshold = 0.25,
    test.use = "roc",
    only.pos = TRUE
  )

Get markers for each cluster

# subset
ROC.cluster0 <- ROC.markers[ROC.markers$cluster == 0,]
ROC.cluster1 <- ROC.markers[ROC.markers$cluster == 1,]
ROC.cluster2 <- ROC.markers[ROC.markers$cluster == 2,]
ROC.cluster3 <- ROC.markers[ROC.markers$cluster == 3,]
ROC.cluster4 <- ROC.markers[ROC.markers$cluster == 4,]
ROC.cluster5 <- ROC.markers[ROC.markers$cluster == 5,]
ROC.cluster6 <- ROC.markers[ROC.markers$cluster == 6,]
ROC.cluster7 <- ROC.markers[ROC.markers$cluster == 7,]
ROC.cluster8 <- ROC.markers[ROC.markers$cluster == 8,]
ROC.cluster9 <- ROC.markers[ROC.markers$cluster == 9,]
ROC.cluster10 <- ROC.markers[ROC.markers$cluster == 10,]
ROC.cluster11 <- ROC.markers[ROC.markers$cluster == 11,]
ROC.cluster12 <- ROC.markers[ROC.markers$cluster == 12,]
ROC.cluster13 <- ROC.markers[ROC.markers$cluster == 13,]
ROC.cluster14 <- ROC.markers[ROC.markers$cluster == 14,]
ROC.cluster15 <- ROC.markers[ROC.markers$cluster == 15,]
ROC.cluster16 <- ROC.markers[ROC.markers$cluster == 16,]
ROC.cluster17 <- ROC.markers[ROC.markers$cluster == 17,]

Top variable genes

# print top variable genes
top100 <- dataObject@assays$SCT@var.features[1:100]
top100

##   [1] "DPP10"          "TNR"            "LHFPL3"         "ROBO2"         
##   [5] "KCNIP4"         "DSCAM"          "DTNA"           "OPCML"         
##   [9] "SLC1A3"         "ADAM28"         "CEMIP"          "MMP16"         
##  [13] "NRG3"           "SLC11A1"        "FLT1"           "SLC1A2"        
##  [17] "FGF14"          "ADCY2"          "LRRTM4"         "CSMD1"         
##  [21] "FAM189A2"       "DOCK8"          "TNC"            "SYT1"          
##  [25] "HIF1A-AS3"      "PTPRZ1"         "LRMDA"          "ADAMTS9"       
##  [29] "RP11-6L16.1"    "SORBS1"         "GLIS3"          "SNTG1"         
##  [33] "ARHGAP24"       "RP11-358F13.1"  "NRXN1"          "ZSCAN31"       
##  [37] "TLR2"           "CD44"           "RALYL"          "SRGN"          
##  [41] "GRIK1"          "GPC5"           "CNTNAP2"        "USP53"         
##  [45] "APBB1IP"        "GALNTL6"        "OLR1"           "RFX4"          
##  [49] "LAMA2"          "RYR3"           "PLP1"           "DENND3"        
##  [53] "LRP1B"          "CHI3L1"         "VCAN"           "SAT1"          
##  [57] "ATRNL1"         "PITPNC1"        "CA10"           "PDE3B"         
##  [61] "AQP4"           "WWC1"           "BEST3"          "GFAP"          
##  [65] "RBFOX1"         "NXPH1"          "NRCAM"          "ST6GAL1"       
##  [69] "GRID2"          "CHST11"         "GPM6A"          "ABCB1"         
##  [73] "CXCL14"         "AHCYL1"         "GRIK2"          "PTPRC"         
##  [77] "INHBA"          "SNHG14"         "DLGAP1"         "LRRC4C"        
##  [81] "SPARCL1"        "MT2A"           "CNTN5"          "RP11-13N12.1"  
##  [85] "LINC01010"      "LNCAROD"        "FAM155A"        "FYB1"          
##  [89] "ADGRV1"         "IRAG1"          "PTPRM"          "GNA14"         
##  [93] "CH507-528H12.1" "RP11-909M7.3"   "MEF2C"          "LUZP2"         
##  [97] "CTNNA2"         "ITPR2"          "SLC2A3"         "BRINP3"

write.table(top100, paste0("../../results/pass_strict/variance/", sampleID, "_top100_variable_features.txt"), 
                           sep = "\t", row.names = FALSE, quote = FALSE)

PCs

# Printing out the most variable genes driving PCs
print(x = dataObject[["pca"]], 
      dims = 1:5, 
      nfeatures = 10)

## PC_ 1 
## Positive:  PLP1, ST18, QKI, IL1RAPL1, SIK3, CTNNA3, PIP4K2A, RNF220, TMTC2, MBP 
## Negative:  DPP10, TNR, NRG3, DTNA, LHFPL3, ADCY2, SLC1A3, OPCML, PTPRZ1, DSCAM 
## PC_ 2 
## Positive:  DPP10, DTNA, SLC1A3, FAM189A2, ADCY2, GLIS3, SORBS1, RFX4, RYR3, RP11-6L16.1 
## Negative:  TNR, LHFPL3, OPCML, DSCAM, CSMD1, FGF14, MMP16, KCNIP4, LRRTM4, SNTG1 
## PC_ 3 
## Positive:  ADAM28, SLC11A1, LRMDA, DOCK8, ARHGAP24, DENND3, TLR2, APBB1IP, PTPRC, RP11-358F13.1 
## Negative:  DPP10, DTNA, NRG3, ADCY2, FAM189A2, SORBS1, SLC1A2, PCDH9, LSAMP, TNR 
## PC_ 4 
## Positive:  ROBO2, SYT1, CNTNAP2, SNHG14, RBFOX1, RALYL, RP11-909M7.3, GALNTL6, FRMPD4, CNTN5 
## Negative:  TNR, LHFPL3, PTPRZ1, DSCAM, MMP16, VCAN, CA10, BEST3, MEGF11, RP4-668E10.4 
## PC_ 5 
## Positive:  CEMIP, FLT1, ADAMTS9, USP53, MT2A, DPP10, TNC, ABCB1, CD44, FGFR1 
## Negative:  SLC1A3, SLC1A2, GPC5, PCDH9, CABLES1, PLP1, GNA14, ADGRV1, RYR3, NRXN1

Plot markers

Heatmap

# get the top 10 markers for each cluster 
top10_Markers <- markers.strict %>%
  group_by(cluster) %>%
  dplyr::filter(avg_log2FC > 2) %>%
  slice_head(n = 5) %>%
  ungroup()
# DoHeatmap function for plotting
heat_markers <-
  DoHeatmap(dataObject, features = top10_Markers$gene) + NoLegend()
# downsample to 100 nuclei per cluster. 
heat_markers_downsample <-
  DoHeatmap(subset(dataObject, downsample = 100),
            features = top10_Markers$gene,
            size = 3)

top10_ROC <- ROC.markers %>%
  group_by(cluster) %>%
  dplyr::filter(avg_log2FC > 2 & myAUC > 0.8) %>%
  slice_head(n = 5) %>%
  ungroup()
heat_ROC <-
  DoHeatmap(dataObject, features = top10_ROC$gene) + NoLegend()
heat_ROC_downsample <-
  DoHeatmap(subset(dataObject, downsample = 100),
            features = top10_ROC$gene,
            size = 3)

heat_markers

heat_markers_downsample

heat_ROC

heat_ROC_downsample

Ridge plot

# MALAT1 is highly expressed in brain nuclei 
RidgePlot(dataObject, features = c("XIST", "MALAT1"), ncol = 2)

RidgePlot(dataObject, features = c("GFAP"), ncol = 1)

RidgePlot(dataObject, features = c("VCAN"), ncol = 1)

Feature plot

# UMAP showing the expression of select features
umap_feature <-
  FeaturePlot(dataObject,
              features = c("TYROBP", "MOG", "AQP4", "ENO2", "XIST", "MALAT1"))
umap_feature

Dot plot markers

markers.to.plot <-
  c(
    "SLC1A2",
    "GJA1",
    "CLDN5",
    "TGM2",
    "STAB1",
    "CD74",
    "VCAN",
    "TNR",
    "S100B",
    "ABCA2",
    "NRG1",
    "MEG3",
    "ERBB4",
    "GAD1"
  )
dot <- DotPlot(dataObject, features = markers.to.plot,
    dot.scale = 8) + RotatedAxis()
dot

Violins - Canonical cell-type markers

Markers obtained from dropviz

# Astrocytes
VlnPlot(dataObject,
        features = c("AQP4", "GJA1", "F3"))

# Endothelial
VlnPlot(dataObject,
        features = c("CLDN5", "ADGRF5", "FLT1"))

# Fibroblast-Like_Dcn
VlnPlot(dataObject,
        features = c("DCN", "SLC6A13", "IGF2"))

# Oligodendrocyte
VlnPlot(dataObject,
        features = c("MOG", "OLIG1", "MBP"))

# Polydendrocyte
VlnPlot(dataObject,
        features = c("VCAN", "TNR", "C1QL1"))

# Smooth muscle 
VlnPlot(dataObject,
        features = c("ACTA2", "RGS5"))

# Microglia / Marchophage 
VlnPlot(dataObject,
        features = c("CTSS", "C1QC", "C1QB"))

# Microglia / Marchophage 
VlnPlot(dataObject,
        features = c("TYROBP","P2RY12", "AIF1"))

# Interneuron
VlnPlot(dataObject,
        features = c("VIP", "CRH", "KIT"))

# Interneuron - GAD1 & GAD2
VlnPlot(dataObject,
        features = c("GAD1", "GAD2"))

## Feature plot - Canonical cell-type markers

# neuron 
dataObject[["percent.neurons"]] <- PercentageFeatureSet(dataObject, features = c("SNAP25","SYT1", "GAD1", "GAD2"))

# oligodendrocyte
dataObject[["percent.oligo"]] <- PercentageFeatureSet(dataObject, features = c("PLP1","MBP", "MOG"))

# oligodendrocyte precursor cells
dataObject[["percent.opc"]] <- PercentageFeatureSet(dataObject, features = c("OLIG1","PDGFRA", "VCAN"))

# astrocyte
dataObject[["percent.astro"]] <- PercentageFeatureSet(dataObject, features = c("CLU","GFAP", "AQP4", "CLDN5", "ADGRF5", "FLT1"))

# microglia
dataObject[["percent.micro"]] <- PercentageFeatureSet(dataObject, features = c("HEXB","C1QA", "CTSS", "C1QC", "C1QB", "TYROBP","P2RY12", "AIF1")) 

# endothelial
dataObject[["percent.endo"]] <- PercentageFeatureSet(dataObject, features = c("RGS5", "IGFBP7", "FN1", "CLDN5", "ADGRF5", "FLT1"))

# neuron 
FeaturePlot(dataObject, features = "percent.neurons", label = TRUE)

# oligodendrocyte
FeaturePlot(dataObject, features = "percent.oligo", label = TRUE)

# oligodendrocyte precursor cells
FeaturePlot(dataObject, features = "percent.opc", label = TRUE)

# astrocyte
FeaturePlot(dataObject, features = "percent.astro", label = TRUE)

# microglia
FeaturePlot(dataObject, features = "percent.micro", label = TRUE)

# endothelial
FeaturePlot(dataObject, features = "percent.endo", label = TRUE)

Cluster Annotation

Cluster 0 - oligo

# Number of cells per condition
count_per_cluster[,c(1,2)]

##   orig.ident    0
## 1         v4 1913

# UMAP with only cluster 0
DimPlot(object = subset(dataObject, seurat_clusters == "0"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[1])

VlnPlot(dataObject,
        features = cluster0$gene[1:10],
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject,
        features = ROC.cluster0$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster0$gene[1:10]

##  [1] "CNP"       "LINC00844" "APOD"      "LINC01608" "SUN2"      "FTH1"     
##  [7] "VWA1"      "MBP"       "PLP1"      "AMER2"

ROC.cluster0$gene[1:10]

##  [1] "PLP1"      "CNP"       "MBP"       "LINC00844" "APOD"      "FTH1"     
##  [7] "SUN2"      "EDIL3"     "AMER2"     "LINC01608"

Cluster 1 - noise

# Number of cells per condition
count_per_cluster[,c(1,3)]

##   orig.ident    1
## 1         v4 1779

DimPlot(object = subset(dataObject, seurat_clusters == "1"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#56B4E9")

VlnPlot(dataObject,
        features = cluster1$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

#VlnPlot(dataObject,
 #       features = ROC.cluster1$gene[1],
 #       cols = color.panel,
  #      flip = TRUE,
  #      split.by = "seurat_clusters")

cluster1$gene[1:10]

##  [1] "LINC01608"     "DYSF"          "KIF19"         "RP11-50D16.4" 
##  [5] "RP4-537K23.4"  "CR1"           "LINC02073"     "RP11-143J24.1"
##  [9] "KLRK1-AS1"     "PRAMEF10"

#ROC.cluster1$gene[1:10]

Cluster 2 - noise

# Number of cells per condition
count_per_cluster[,c(1,4)]

##   orig.ident    2
## 1         v4 1370

DimPlot(object = subset(dataObject, seurat_clusters == "2"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#009E73")

VlnPlot(dataObject,
        features = cluster2$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

#VlnPlot(dataObject,
 #       features = ROC.cluster2$gene[1:10],
  #      cols = color.panel,
  #      stack = TRUE,
  #      flip = TRUE,
   #     split.by = "seurat_clusters")

cluster2$gene[1:10]

##  [1] "CCDC200"       "LINC00685"     "CERS4"         "SLCO5A1"      
##  [5] "LINC00463"     "MMP17"         "COL19A1"       "RP11-322M19.4"
##  [9] "BPTFP1"        "PRDM1"

ROC.cluster2$gene[1:10]

##  [1] "TERT" NA     NA     NA     NA     NA     NA     NA     NA     NA

Cluster 3 - oligo

# Number of cells per condition
count_per_cluster[,c(1,4)]

##   orig.ident    2
## 1         v4 1370

DimPlot(object = subset(dataObject, seurat_clusters == "3"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#F0E442")

VlnPlot(dataObject,
        features = cluster3$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject,
        features = ROC.cluster3$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster3$gene[1:10]

##  [1] "MARCKSL1" "SLC5A11"  "SPP1"     "TUBA1A"   "CNP"      "SELENOP" 
##  [7] "S100B"    "STMN1"    "FTL"      "RASGRF1"

ROC.cluster3$gene[1:10]

##  [1] "TF"       "PLP1"     "CNP"      "TUBA1A"   "MBP"      "MARCKSL1"
##  [7] "SPP1"     "FTL"      "SLC5A11"  "FTH1"

Cluster 4 - noise

# Number of cells per condition
count_per_cluster[,c(1,5)]

##   orig.ident    3
## 1         v4 1351

DimPlot(object = subset(dataObject, seurat_clusters == "4"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#0072B2")

VlnPlot(dataObject,
        features = cluster4$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject,
        features = ROC.cluster4$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster4$gene[1:10]

##  [1] "FGFR1"   "UHRF2"   "SH3RF1"  "PLA2G4C" "ACTN2"   "AMPD3"   "IL6R"   
##  [8] "GADD45B" "GLUL"    "NAV2"

ROC.cluster4$gene[1:10]

##  [1] "AKAP6"   "PRUNE2"  "PLA2G4C" "NAV2"    "TMTC2"   "GLUL"    "ZNF536" 
##  [8] "NCOA7"   "SPOCK1"  "SIK3"

Cluster 5 - noise

# Number of cells per condition
count_per_cluster[,c(1,6)]

##   orig.ident    4
## 1         v4 1323

DimPlot(object = subset(dataObject, seurat_clusters == "5"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#D55E00")

VlnPlot(dataObject,
        features = cluster5$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster5$gene[1:10]

##  [1] "GADD45B" "FGFR1"   "FOS"     "EGR1"    "CNDP1"   "PRUNE2"  "DDIT3"  
##  [8] "ELL2"    "GLUL"    "PTPRM"

VlnPlot(dataObject,
        features = ROC.cluster5$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster5$gene[1:10]

##  [1] "GADD45B" "FGFR1"   "FOS"     "EGR1"    "CNDP1"   "PRUNE2"  "DDIT3"  
##  [8] "ELL2"    "GLUL"    "PTPRM"

ROC.cluster5$gene[1:10]

##  [1] "CNDP1"   "PRUNE2"  "SIK3"    "PIP4K2A" "ELL2"    "GLUL"    "FGFR1"  
##  [8] "GADD45B" "PTPRM"   "GUSBP2"

Cluster 6 - noise

# Number of cells per condition
count_per_cluster[,c(1,7)]

##   orig.ident    5
## 1         v4 1313

DimPlot(object = subset(dataObject, seurat_clusters == "6"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#CC79A7")

VlnPlot(dataObject,
        features = cluster6$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster6$gene[1:10]

##  [1] "SLC5A11"      "LINC00609"    "LRRC63"       "RASGRF1"      "RP1-223B1.1" 
##  [6] "RP11-141F7.1" "ANKRD18A"     "SAMD12"       "LDB3"         "AC012593.1"

ROC.cluster6$gene[1:10]

##  [1] "SLC5A11"     "LINC00609"   "CTNNA3"      "SAMD12"      "RP1-223B1.1"
##  [6] "LRRC63"      "ANKRD18A"    "LDB3"        NA            NA

Cluster 7 - oligo

QDPR - oligodendrocyte
CD9 - polydendrocyte
SUN2 - microglia SYT11 - oligodendrocyte
AMD1 - oligodendrocyte
APOD - fibroblast & oligodendrocyte

# Number of cells per condition
count_per_cluster[,c(1,8)]

##   orig.ident    6
## 1         v4 1259

DimPlot(object = subset(dataObject, seurat_clusters == "7"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#666666")

VlnPlot(dataObject,
        features = cluster7$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster7$gene[1:10]

##  [1] "QDPR"  "CD9"   "SUN2"  "SYT11" "AMD1"  "APOD"  "VWA1"  "H3-3B" "SFRP1"
## [10] "PSAP"

ROC.cluster7$gene[1:10]

##  [1] "GPM6B"  "PLP1"   "QDPR"   "CNDP1"  "MALAT1" NA       NA       NA      
##  [9] NA       NA

Cluster 8 - noise

# Number of cells per condition
count_per_cluster[,c(1,9)]

##   orig.ident   7
## 1         v4 744

DimPlot(object = subset(dataObject, seurat_clusters == "8"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#AD7700")

VlnPlot(dataObject,
        features = cluster8$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster8$gene[1:10]

##  [1] "CH507-528H12.1" "CH507-513H4.1"  "SLC5A11"        "ANKRD18A"      
##  [5] "PLA2G4C"        "SEC14L5"        "CLIP2"          "SLCO5A1"       
##  [9] "RP11-159L20.2"  "CCDC144B"

ROC.cluster8$gene[1:10]

##  [1] "CH507-528H12.1" "CH507-513H4.1"  "SLC5A11"        "CCDC88A"       
##  [5] "ANKRD18A"       "TERT"           NA               NA              
##  [9] NA               NA

Cluster 9 - astrocyte

# Number of cells per condition
count_per_cluster[,c(1,10)]

##   orig.ident   8
## 1         v4 629

DimPlot(object = subset(dataObject, seurat_clusters == "9"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#1C91D4")

VlnPlot(dataObject,
        features = cluster9$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject,
        features = ROC.cluster9$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster9$gene[1:10]

##  [1] "GLIS3"       "CD44"        "FAM189A2"    "ADCY2"       "GFAP"       
##  [6] "ABLIM1"      "RFX4"        "WWC1"        "RP11-6L16.1" "NRG3"

ROC.cluster9$gene[1:10]

##  [1] "DPP10"    "DTNA"     "NRG3"     "GLIS3"    "SORBS1"   "CD44"    
##  [7] "ADCY2"    "ABLIM1"   "GFAP"     "FAM189A2"

Cluster 10 - noise

FRY - mural / endothelial
KCNIP4 - neuron
CNTN1 - polydendrocyte
ERBB4 - inerneuron

# Number of cells per condition
count_per_cluster[,c(1,11)]

##   orig.ident   9
## 1         v4 517

DimPlot(object = subset(dataObject, seurat_clusters == "10"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#007756")

VlnPlot(dataObject,
        features = cluster10$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject,
        features = ROC.cluster10$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster10$gene[1:10]

##  [1] "FRY"           "KCNIP4"        "CNTN1"         "ERBB4"        
##  [5] "FCHSD2"        "RP11-791I24.5" "LAMA2"         "CTNND2"       
##  [9] "KCNAB1"        "TMPRSS5"

ROC.cluster10$gene[1:10]

##  [1] "FRY"     "ERBB4"   "KCNIP4"  "FCHSD2"  "PPP2R2B" "CTNND2"  "CNTN1"  
##  [8] "ANK3"    "LAMA2"   "TMPRSS5"

Cluster 11 - microglia

# Number of cells per condition
count_per_cluster[,c(1,12)]

##   orig.ident  10
## 1         v4 484

DimPlot(object = subset(dataObject, seurat_clusters == "11"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#D5C711")

VlnPlot(dataObject,
        features = cluster11$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject,
        features = ROC.cluster11$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster11$gene[1:10]

##  [1] "ADAM28"   "SLC11A1"  "DOCK8"    "LRMDA"    "ARHGAP24" "DENND3"  
##  [7] "APBB1IP"  "PTPRC"    "MEF2C"    "CHST11"

ROC.cluster11$gene[1:10]

##  [1] "LRMDA"    "DOCK8"    "ADAM28"   "SRGAP2B"  "SRGAP2"   "ARHGAP24"
##  [7] "ARHGAP26" "SLC11A1"  "MEF2C"    "PLXDC2"

Cluster 12 - opc

# Number of cells per condition
count_per_cluster[,c(1,13)]

##   orig.ident  11
## 1         v4 469

DimPlot(object = subset(dataObject, seurat_clusters == "12"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#005685")

VlnPlot(dataObject,
        features = cluster12$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject,
        features = ROC.cluster12$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster12$gene[1:10]

##  [1] "TNR"    "MMP16"  "VCAN"   "PTPRZ1" "CA10"   "LHFPL3" "MEGF11" "FGF14" 
##  [9] "DSCAM"  "LUZP2"

ROC.cluster12$gene[1:10]

##  [1] "TNR"    "LHFPL3" "PTPRZ1" "FGF14"  "OPCML"  "DSCAM"  "LRRC4C" "CSMD1" 
##  [9] "MMP16"  "VCAN"

Cluster 13 - astrocyte

# Number of cells per condition
count_per_cluster[,c(1,14)]

##   orig.ident  12
## 1         v4 372

DimPlot(object = subset(dataObject, seurat_clusters == "13"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#A04700")

VlnPlot(dataObject,
        features = cluster13$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject,
        features = ROC.cluster13$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster13$gene[1:10]

##  [1] "IRAG1"    "SLC1A2"   "BMPR1B"   "FAM189A2" "AQP4"     "RFX4"    
##  [7] "ADGRV1"   "RYR3"     "ADCY2"    "GLIS3"

ROC.cluster13$gene[1:10]

##  [1] "GPC5"   "SLC1A3" "DTNA"   "SLC1A2" "AHCYL1" "RYR3"   "ADGRV1" "NRG3"  
##  [9] "CLU"    "ADCY2"

Cluster 14 - noise

# Number of cells per condition
count_per_cluster[,c(1,15)]

##   orig.ident  13
## 1         v4 342

DimPlot(object = subset(dataObject, seurat_clusters == "14"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#B14380")

VlnPlot(dataObject,
        features = cluster14$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

#VlnPlot(dataObject,
#        features = ROC.cluster14$gene[1:10],
#        cols = color.panel,
#        stack = TRUE,
#        flip = TRUE,
#        split.by = "seurat_clusters")

cluster14$gene[1:10]

##  [1] "ZSCAN31"      "SLC5A11"      "RP11-141F7.1" "SAMD12"       "PLEKHG1"     
##  [6] "LINC00609"    "RP1-223B1.1"  "MIR4280HG"    "LRRC63"       "AC012593.1"

ROC.cluster14$gene[1:10]

##  [1] "ZSCAN31"   "SLC5A11"   "SAMD12"    "CTNNA3"    "LINC00609" NA         
##  [7] NA          NA          NA          NA

Cluster 15 - neuron

# Number of cells per condition
count_per_cluster[,c(1,16)]

##   orig.ident  14
## 1         v4 228

DimPlot(object = subset(dataObject, seurat_clusters == "15"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#4D4D4D")

VlnPlot(dataObject,
        features = cluster15$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject,
        features = ROC.cluster15$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster15$gene[1:10]

##  [1] "RP11-909M7.3" "MEG3"         "SYT1"         "RALYL"        "STXBP5L"     
##  [6] "ARPP21"       "KHDRBS2"      "NELL2"        "SNAP25"       "SCN2A"

ROC.cluster15$gene[1:10]

##  [1] "SYT1"         "RBFOX1"       "SNHG14"       "RP11-909M7.3" "MEG3"        
##  [6] "CSMD1"        "NRXN1"        "OPCML"        "FAM155A"      "CELF2"

Cluster 16 - neuron

# Number of cells per condition
count_per_cluster[,c(1,17)]

##   orig.ident  15
## 1         v4 222

DimPlot(object = subset(dataObject, seurat_clusters == "16"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[17])

VlnPlot(dataObject,
        features = cluster16$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject,
        features = ROC.cluster16$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster16$gene[1:10]

##  [1] "RP11-909M7.3" "PAK3"         "MEG3"         "GRIP1"        "MYT1L"       
##  [6] "CELF4"        "GRIN2B"       "KCNC2"        "GRIA1"        "CACNA1B"

ROC.cluster16$gene[1:10]

##  [1] "SNHG14"       "CNTNAP2"      "DLGAP1"       "GRIK2"        "RP11-909M7.3"
##  [6] "FGF14"        "PLCB1"        "NRG3"         "STXBP5L"      "GRIP1"

Cluster 17 - fibroblast-like

EBF1 - mural
NID1 - fibroblast-like / mural
CFH - fibroblast-like
LEF1 - endothelial
DCN - fibroblast-like
COL1A2 - fibroblast-like
IFITM1 - mural / fibroblast-like

# Number of cells per condition
count_per_cluster[,c(1,18)]

##   orig.ident  16
## 1         v4 202

DimPlot(object = subset(dataObject, seurat_clusters == "17"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[18])

VlnPlot(dataObject,
        features = cluster17$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject,
        features = ROC.cluster17$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster17$gene[1:10]

##  [1] "EBF1"   "NID1"   "CFH"    "LEF1"   "DCN"    "COL1A2" "IFITM1" "NR2F2" 
##  [9] "PDLIM1" "TBX18"

ROC.cluster17$gene[1:10]

##  [1] "RBMS3"         "PTPRG"         "USP53"         "RP11-9J18.1"  
##  [5] "ZEB1"          "SVIL"          "ARHGAP29"      "EPS8"         
##  [9] "RBPMS"         "RP11-649A16.1"

Assign identities

# Rename all identities
dataObject.annotated <- RenameIdents(object = dataObject, 
                               "0" = "Oligodendrocytes",
                               "1" = "Noise",
                               "2" = "Noise",
                               "3" = "Oligodendrocytes",
                               "4" = "Noise",
                               "5" = "Noise",
                               "6" = "Noise",
                               "7" = "Oligodendrocytes",
                               "8" = "Noise",
                               "9" = "Astrocytes",
                               "10" = "Noise",
                               "11" = "Microglia",
                               "12" = "OPC",
                               "13" = "Astrocytes", 
                               "14" = "Noise", 
                               "15" = "Neurons",
                               "16" = "Neurons",
                               "17" = "Fibroblast-like")
dataObject.annotated$annotated_clusters <- factor(Idents(dataObject.annotated),
                                             levels = c("Astrocytes",
                                                        "Fibroblast-like",
                                                        "Microglia", 
                                                        "Oligodendrocytes",
                                                        "OPC", 
                                                        "Neurons", 
                                                        "Noise"))
Idents(dataObject.annotated) <- "annotated_clusters"

Unannotated

UMAP

ditto_umap <- dittoDimPlot(object = dataObject.annotated,
             var = "annotated_clusters",
             reduction.use = "umap",
             do.label = TRUE,
             labels.highlight = TRUE)
ditto_umap

Dot plot markers

markers.to.plot <-
  c(
    "SLC1A2",
    "GJA1",
    "CLDN5",
    "TGM2",
    "STAB1",
    "CD74",
    "VCAN",
    "TNR",
    "S100B",
    "ABCA2",
    "NRG1",
    "MEG3",
    "ERBB4",
    "GAD1"
  )
dot <- DotPlot(dataObject.annotated, features = markers.to.plot, 
    dot.scale = 8) + RotatedAxis()
dot

Nuclei count per cluster

count_per_cluster <- FetchData(dataObject.annotated,
                               vars = c("ident", "orig.ident")) %>%
  dplyr::count(ident, orig.ident) %>%
  tidyr::spread(ident, n)
count_per_cluster

##   orig.ident Astrocytes Fibroblast-like Microglia Oligodendrocytes OPC Neurons
## 1         v4        859             131       469             4008 372     424
##   Noise
## 1  8385

count_melt <- reshape2::melt(count_per_cluster)
colnames(count_melt) <- c("ident", "cluster", "number of nuclei")
count_max <- count_melt[which.max(count_melt$`number of nuclei`), ]
count_max_value <- count_max$`number of nuclei`
cellmax <- count_max_value + 500 # so that the figure doesn't cut off the text
count_bar <- ggplot(count_melt, aes(x = factor(cluster), y = `number of nuclei`, fill = `cluster`)) +
  geom_bar(
    stat = "identity",
    colour = "black",
    width = 1,
    position = position_dodge(width = 0.8)
  ) +
  geom_text(
    aes(label = `number of nuclei`),
    position = position_dodge(width = 0.9),
    vjust = -0.25,
    angle = 45,
    hjust = -.01
  ) +
  theme_classic() + scale_fill_manual(values = color.panel) +
  ggtitle("Number of nuclei per cluster") +  xlab("cluster") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_y_continuous(limits = c(0, cellmax))
count_bar

## Relative abundance

relative_abundance <- dataObject.annotated@meta.data %>%
  group_by(annotated_clusters, orig.ident) %>%
  dplyr::count() %>%
  group_by(orig.ident) %>%
  dplyr::mutate(percent = 100*n/sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x=orig.ident,y=percent, fill=annotated_clusters)) +
  geom_col() +
  scale_fill_manual(values = color.panel) +
  ggtitle("Percentage of cell type") +
  theme_bw()+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
relative_abundance

Save RDS

saveRDS(dataObject.annotated, paste0("../../rObjects/", sampleID, ".annotated.pass_strict.rds"))

v4 annotations

Kimberly Olney, PhD

2023-12-26

Pipseq chemistry kit version 4

Libraries & paths

Read in data

Unannotated

UMAP

Nuclei count per cluster

Cluster tree

Finding differentially expressed features (cluster biomarkers)

ROC

Top variable genes

PCs

Plot markers

Heatmap

Ridge plot

Feature plot

Dot plot markers

Violins - Canonical cell-type markers

Cluster Annotation

Cluster 0 - oligo

Cluster 1 - noise

Cluster 2 - noise

Cluster 3 - oligo

Cluster 4 - noise

Cluster 5 - noise

Cluster 6 - noise

Cluster 7 - oligo

Cluster 8 - noise

Cluster 9 - astrocyte

Cluster 10 - noise

Cluster 11 - microglia

Cluster 12 - opc

Cluster 13 - astrocyte

Cluster 14 - noise

Cluster 15 - neuron

Cluster 16 - neuron

Cluster 17 - fibroblast-like

Assign identities

Unannotated

UMAP

Dot plot markers

Nuclei count per cluster

Save RDS