Pipseq chemistry kit comparison Sample NPID control female NA05-055

Libraries & paths

library(dplyr)
library(Seurat)
library(patchwork)
library(knitr)
library(dittoSeq)
library(reshape2)
Sys.setenv(RSTUDIO_PANDOC="/usr/local/biotools/pandoc/3.1.2/bin")

sampleID <- c("v4_PA_mod0.6x")
sample_color.panel <- c("#009E73")
color.panel <- dittoColors()

Read in object

# read object
dataObject <- readRDS(file = paste0("../../../rObjects/", sampleID, ".filtered.rds"))
markers.strict <- readRDS(file = paste0("../../../rObjects/", 
                          sampleID, "_FindAllMarkers_strict_logFC1_FDR0.05.rds"))

Unannotated

UMAP

ditto_umap <- dittoDimPlot(object = dataObject,
             var = "seurat_clusters",
             reduction.use = "umap",
             do.label = TRUE,
             labels.highlight = TRUE)
ditto_umap

Nuclei count per cluster

count_per_cluster <- FetchData(dataObject,
                               vars = c("ident", "orig.ident")) %>%
  dplyr::count(ident, orig.ident) %>%
  tidyr::spread(ident, n)
count_per_cluster

##     orig.ident    0    1    2    3    4    5    6    7    8    9  10  11  12
## 1 v4PA_mod0.6x 2337 2280 1830 1577 1431 1197 1138 1136 1107 1001 923 907 900
##    13  14  15  16  17  18  19  20  21  22  23  24 25 26 27
## 1 666 579 566 510 382 278 215 212 199 162 149 124 96 62 17

count_melt <- reshape2::melt(count_per_cluster)
colnames(count_melt) <- c("ident", "cluster", "number of nuclei")
count_max <- count_melt[which.max(count_melt$`number of nuclei`), ]
count_max_value <- count_max$`number of nuclei`
cellmax <- count_max_value + 200 # so that the figure doesn't cut off the text
count_bar <- ggplot(count_melt, aes(x = factor(cluster), y = `number of nuclei`, fill = `ident`)) +
  geom_bar(
    stat = "identity",
    colour = "black",
    width = 1,
    position = position_dodge(width = 0.8)
  ) +
  geom_text(
    aes(label = `number of nuclei`),
    position = position_dodge(width = 0.9),
    vjust = -0.25,
    angle = 45,
    hjust = -.01
  ) +
  theme_classic() + scale_fill_manual(values = sample_color.panel) +
  ggtitle("Number of nuclei per cluster") +  xlab("cluster") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_y_continuous(limits = c(0, cellmax))
count_bar

Cluster tree

dataObject <- BuildClusterTree(
  object = dataObject,
  dims = 1:30,
  reorder = FALSE,
  reorder.numeric = FALSE
)

tree <- dataObject@tools$BuildClusterTree
tree$tip.label <- paste0("Cluster ", tree$tip.label)
nClusters <- length(tree$tip.label)

tree_graph <- ggtree::ggtree(tree, aes(x, y)) +
  scale_y_reverse() +
  ggtree::geom_tree() +
  ggtree::theme_tree() +
  ggtree::geom_tiplab(offset = 1) +
  ggtree::geom_tippoint(color = color.panel[1:nClusters],
                        shape = 16,
                        size = 5) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = unit(c(0, 2.5, 0, 0), 'cm'))
tree_graph

Violins - Canonical cell-type markers

Markers obtained from dropviz

# Astrocytes
VlnPlot(dataObject,
        features = c("AQP4", "GJA1", "CLU", "GFAP"))

# Endothelial
VlnPlot(dataObject,
        features = c("CLDN5", "ADGRF5", "FLT1"))

# Fibroblast-Like_Dcn
VlnPlot(dataObject,
        features = c("COL1A1", "COL1A2", "DCN"))

# Microglia / Marchophage 
VlnPlot(dataObject,
        features = c("C1QA", "C1QC", "C1QB"))

# Microglia / Marchophage 
VlnPlot(dataObject,
        features = c( "HEXB", "TMEM119", "ITGAM", "TYROBP","P2RY12", "AIF1"))

# Neurons
VlnPlot(dataObject,
        features = c("RBFOX3", "SNAP25","SYT1", "GAD1", "GAD2"))

# Oligodendrocyte
VlnPlot(dataObject,
        features = c("PLP1","MBP", "MOG"))

# OPC
VlnPlot(dataObject,
        features = c("OLIG1","PDGFRA", "VCAN", "TNR"))

# Smooth muscle 
VlnPlot(dataObject,
        features = c("ACTA2","RGS5", "VTN", "MYL5"))

Percent cell type - Canonical cell-type markers

# astrocyte
dataObject[["percent.astrocyte"]] <- PercentageFeatureSet(dataObject, 
                                     features = c("CLU", "GFAP", "AQP4", "GJA1"))
# endothelial
dataObject[["percent.endothelial"]] <- PercentageFeatureSet(dataObject, 
                                       features = c("CLDN5", "ADGRF5", "FLT1"))
# fibroblast 
dataObject[["percent.fibroblast"]] <- PercentageFeatureSet(dataObject, 
                                      features = c("COL1A1", "COL1A2", "DCN"))
# microglia
dataObject[["percent.microglia"]] <- PercentageFeatureSet(dataObject, 
                                     features = c("HEXB", "C1QA", "C1QC", "C1QB", 
                                     "TMEM119", "ITGAM", "TYROBP","P2RY12", "AIF1")) 
# neuron 
dataObject[["percent.neurons"]] <- PercentageFeatureSet(dataObject, 
                                   features = c("RBFOX3", "SNAP25","SYT1", "GAD1", "GAD2"))
# oligodendrocyte
dataObject[["percent.oligodendrocyte"]] <- PercentageFeatureSet(dataObject, 
                                           features = c("PLP1","MBP", "MOG"))
# oligodendrocyte precursor cells
dataObject[["percent.opc"]] <- PercentageFeatureSet(dataObject, 
                               features = c("OLIG1","PDGFRA", "VCAN", "TNR"))
# smooth muscle 
dataObject[["percent.smooth"]] <- PercentageFeatureSet(dataObject, 
                                  features = c("ACTA2","RGS5", "VTN", "MYL5"))

Feature plot percent cell type - Canonical cell-type markers

# astrocyte
FeaturePlot(dataObject, features = "percent.astrocyte", label = TRUE)

# endothelial
FeaturePlot(dataObject, features = "percent.endothelial", label = TRUE)

# fibroblast
FeaturePlot(dataObject, features = "percent.fibroblast", label = TRUE)

# microglia
FeaturePlot(dataObject, features = "percent.microglia", label = TRUE)

# neuron 
FeaturePlot(dataObject, features = "percent.neurons", label = TRUE)

# oligodendrocyte
FeaturePlot(dataObject, features = "percent.oligodendrocyte", label = TRUE)

# oligodendrocyte precursor cells
FeaturePlot(dataObject, features = "percent.opc", label = TRUE)

# smooth
FeaturePlot(dataObject, features = "percent.smooth", label = TRUE)

Markers per cluster

Get markers for each cluster

# unique clusters variable
unique_clusters <- unique(markers.strict$cluster)

# empty list to store individual cluster data frames
cluster_list <- list()

# loop through each cluster and create a data frame
for (i in unique_clusters) {
  cluster_name <- paste0("cluster", i)
  cluster_data <- markers.strict[markers.strict$cluster == i, ]
  assign(cluster_name, cluster_data)
  cluster_list[[cluster_name]] <- cluster_data
}

Feature plot

# UMAP showing the expression of select features
umap_feature <-
  FeaturePlot(dataObject,
              features = c("TYROBP", "MOG", "AQP4", "RBFOX3"))
umap_feature

Cluster Annotation

Cluster 0 - oligodendrocyte

# Number of cells per condition
count_per_cluster[,c(1,2)]

##     orig.ident    0
## 1 v4PA_mod0.6x 2337

# UMAP with only cluster 0
DimPlot(object = subset(dataObject, seurat_clusters == "0"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[1])

VlnPlot(dataObject,
        features = cluster0$gene[1:10],
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster0$gene[1:10]

##  [1] "CNP"     "ST18"    "MOBP"    "TF"      "TMEM144" "PLP1"    "MBP"    
##  [8] "CLDND1"  "CRYAB"   "ENPP2"

Cluster 1 - oligodendrocyte

count_per_cluster[,c(1,3)]

##     orig.ident    1
## 1 v4PA_mod0.6x 2280

DimPlot(object = subset(dataObject, seurat_clusters == "1"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[2])

VlnPlot(dataObject,
        features = cluster1$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster1$gene[1:10]

##  [1] "ST18"         "TMEM144"      "CTNNA3"       "RNF220"       "PLP1"        
##  [6] "MOBP"         "MBP"          "RP11-141F7.1" "TF"           "PIP4K2A"

Cluster 2 - polydendrocyte

count_per_cluster[,c(1,4)]

##     orig.ident    2
## 1 v4PA_mod0.6x 1830

DimPlot(object = subset(dataObject, seurat_clusters == "2"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[3])

VlnPlot(dataObject,
        features = cluster2$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster2$gene[1:10]

##  [1] "VCAN"         "LHFPL3"       "RP4-668E10.4" "PTPRZ1"       "NXPH1"       
##  [6] "SMOC1"        "GRIK1"        "PCDH15"       "MEGF11"       "LUZP2"

Cluster 3 - neuron

count_per_cluster[,c(1,4)]

##     orig.ident    2
## 1 v4PA_mod0.6x 1830

DimPlot(object = subset(dataObject, seurat_clusters == "3"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols =  color.panel[4])

VlnPlot(dataObject,
        features = cluster3$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster3$gene[1:10]

##  [1] "CDH12"        "AC114765.1"   "CBLN2"        "SLC35F3"      "AC067956.1"  
##  [6] "CNTN4"        "RP11-191L9.4" "PDZRN4"       "RP4-809F18.1" "CLSTN2"

Cluster 4 - neuron

count_per_cluster[,c(1,5)]

##     orig.ident    3
## 1 v4PA_mod0.6x 1577

DimPlot(object = subset(dataObject, seurat_clusters == "4"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[5])

VlnPlot(dataObject,
        features = cluster4$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster4$gene[1:10]

##  [1] "CBLN2"       "AC011288.2"  "CDH9"        "EPHA6"       "CTC-552D5.1"
##  [6] "GRM1"        "ST6GALNAC5"  "GALNTL6"     "ADGRL2"      "CNTN5"

Cluster 5 - microglia

count_per_cluster[,c(1,6)]

##     orig.ident    4
## 1 v4PA_mod0.6x 1431

DimPlot(object = subset(dataObject, seurat_clusters == "5"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[6])

VlnPlot(dataObject,
        features = cluster5$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster5$gene[1:10]

##  [1] "DOCK8"    "LNCAROD"  "TBXAS1"   "DLEU1"    "FYB1"     "APBB1IP" 
##  [7] "ADAM28"   "ARHGAP24" "ARHGAP15" "ST6GAL1"

Cluster 6 - astrocyte

count_per_cluster[,c(1,7)]

##     orig.ident    5
## 1 v4PA_mod0.6x 1197

DimPlot(object = subset(dataObject, seurat_clusters == "6"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#CC79A7")

VlnPlot(dataObject,
        features = cluster6$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster6$gene[1:10]

##  [1] "RP11-6L16.1"   "ADGRV1"        "GLIS3"         "GFAP"         
##  [5] "RP11-134O15.3" "HIF3A"         "RFX4"          "LINC00299"    
##  [9] "RP11-627D16.1" "PITPNC1"

Cluster 7 - astrocyte

SLC1A2 - astrocyte & oligodendrocyte
ADGRV1 - ependyma & oligodendrocyte
GPC5 - astrocyte

count_per_cluster[,c(1,8)]

##     orig.ident    6
## 1 v4PA_mod0.6x 1138

DimPlot(object = subset(dataObject, seurat_clusters == "7"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[7])

VlnPlot(dataObject,
        features = cluster7$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster7$gene[1:10]

##  [1] "ADGRV1"    "LINC00499" "OBI1-AS1"  "SLC1A2"    "GPC5"      "HIF3A"    
##  [7] "RYR3"      "LINC00299" "NKAIN3"    "CABLES1"

Cluster 8 - neuron

count_per_cluster[,c(1,9)]

##     orig.ident    7
## 1 v4PA_mod0.6x 1136

DimPlot(object = subset(dataObject, seurat_clusters == "8"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[8])

VlnPlot(dataObject,
        features = cluster8$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster8$gene[1:10]

##  [1] "CTC-340A15.2" "CTC-535M15.2" "CLSTN2"       "CPNE4"        "TAFA1"       
##  [6] "NRG1"         "RP11-320L2.1" "ST6GALNAC5"   "KCNH7"        "KCNB2"

Cluster 9 - astrocyte

CABLES1 - astrocyte
atp1a2 - astrocyte

count_per_cluster[,c(1,10)]

##     orig.ident    8
## 1 v4PA_mod0.6x 1107

DimPlot(object = subset(dataObject, seurat_clusters == "9"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[9])

VlnPlot(dataObject,
        features = cluster9$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster9$gene[1:10]

##  [1] "LINC00499"     "OBI1-AS1"      "RP11-134O15.3" "CABLES1"      
##  [5] "RANBP3L"       "PPP1R9A-AS1"   "BMPR1B"        "GLIS3"        
##  [9] "DNAH7"         "ATP1A2"

Cluster 10 - neuron

count_per_cluster[,c(1,11)]

##     orig.ident    9
## 1 v4PA_mod0.6x 1001

DimPlot(object = subset(dataObject, seurat_clusters == "10"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[10])

VlnPlot(dataObject,
        features = cluster10$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster10$gene[1:10]

##  [1] "DLX6-AS1" "RGS12"    "ADARB2"   "PWRN1"    "SYNPR"    "GALNTL6" 
##  [7] "PLD5"     "BTBD11"   "GRIP1"    "CHRM3"

Cluster 11 - oligodendrocyte

count_per_cluster[,c(1,12)]

##     orig.ident  10
## 1 v4PA_mod0.6x 923

DimPlot(object = subset(dataObject, seurat_clusters == "11"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[11])

VlnPlot(dataObject,
        features = cluster11$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster11$gene[1:10]

##  [1] "RP1-223B1.1"  "SLC5A11"      "RP11-141F7.1" "ST18"         "MOBP"        
##  [6] "LINC00609"    "TMEM144"      "TF"           "RNF220"       "MBP"

Cluster 12 - neuron

count_per_cluster[,c(1,13)]

##     orig.ident  11
## 1 v4PA_mod0.6x 907

DimPlot(object = subset(dataObject, seurat_clusters == "12"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[12])

VlnPlot(dataObject,
        features = cluster12$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster12$gene[1:10]

##  [1] "CBLN2"         "ST6GALNAC5"    "TAFA1"         "RP11-673E1.1" 
##  [5] "CDH12"         "RP11-170M17.1" "RGS6"          "UNC5D"        
##  [9] "OLFM3"         "AC011288.2"

Cluster 13 - neuron

count_per_cluster[,c(1,14)]

##     orig.ident  12
## 1 v4PA_mod0.6x 900

DimPlot(object = subset(dataObject, seurat_clusters == "13"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[13])

VlnPlot(dataObject,
        features = cluster13$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster13$gene[1:10]

##  [1] "HS3ST4"       "PCDH11X"      "CTD-2337L2.1" "KIAA1217"     "EGFEM1P"     
##  [6] "GABRG3"       "MSC-AS1"      "PCSK5"        "FRMD3"        "FRMPD4"

Cluster 14 - neuron

count_per_cluster[,c(1,15)]

##     orig.ident  13
## 1 v4PA_mod0.6x 666

DimPlot(object = subset(dataObject, seurat_clusters == "14"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[14])

VlnPlot(dataObject,
        features = cluster14$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster14$gene[1:10]

##  [1] "EGFEM1P"       "CTD-2337L2.1"  "HS3ST4"        "RP11-640F22.1"
##  [5] "RP11-17M24.3"  "CTC-304I17.6"  "SEMA3E"        "ZNF385B"      
##  [9] "KIAA1217"      "CLSTN2"

Cluster 15 - neuron

count_per_cluster[,c(1,16)]

##     orig.ident  14
## 1 v4PA_mod0.6x 579

DimPlot(object = subset(dataObject, seurat_clusters == "15"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[15])

VlnPlot(dataObject,
        features = cluster15$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster15$gene[1:10]

##  [1] "NXPH1"         "ZNF385D"       "KCNC2"         "RP11-123O10.3"
##  [5] "KIAA1217"      "GRIK1"         "GRIP1"         "SLIT2"        
##  [9] "PAM"           "GABRG3"

Cluster 16 - noise

count_per_cluster[,c(1,17)]

##     orig.ident  15
## 1 v4PA_mod0.6x 566

DimPlot(object = subset(dataObject, seurat_clusters == "16"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[16])

VlnPlot(dataObject,
        features = cluster16$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster16$gene[1:10]

##  [1] "MTCO1P12" "NEFL"     "NRGN"     "NEFM"     NA         NA        
##  [7] NA         NA         NA         NA

Cluster 17 - neuron

count_per_cluster[,c(1,18)]

##     orig.ident  16
## 1 v4PA_mod0.6x 510

DimPlot(object = subset(dataObject, seurat_clusters == "17"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[17])

VlnPlot(dataObject,
        features = cluster17$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster17$gene[1:10]

##  [1] "FGF13"         "GRIK1"         "NXPH1"         "RP11-123O10.3"
##  [5] "PTCHD4"        "FSTL5"         "ALK"           "MYO16"        
##  [9] "GRIP1"         "EYA4"

Cluster 18 - neuron

count_per_cluster[,c(1,1)]

##     orig.ident orig.ident.1
## 1 v4PA_mod0.6x v4PA_mod0.6x

DimPlot(object = subset(dataObject, seurat_clusters == "18"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[19])

VlnPlot(dataObject,
        features = cluster18$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster18$gene[1:10]

##  [1] "COL5A2"        "AJ006998.2"    "ATP6V1C2"      "SCN1A-AS1"    
##  [5] "RP11-335E8.3"  "SYT2"          "RP11-153I24.5" "GRM8"         
##  [9] "BCL11B"        "COL24A1"

Cluster 19 - astrocyte

count_per_cluster[,c(1,19)]

##     orig.ident  17
## 1 v4PA_mod0.6x 382

DimPlot(object = subset(dataObject, seurat_clusters == "19"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[20])

VlnPlot(dataObject,
        features = cluster19$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster19$gene[1:10]

##  [1] "ADGRV1"       "LINC00499"    "GLIS3"        "OBI1-AS1"     "GPC5"        
##  [6] "GJA1"         "SLC1A2"       "RP11-624A4.2" "PPP1R9A-AS1"  "NHSL1"

Cluster 20 - neuron

count_per_cluster[,c(1,20)]

##     orig.ident  18
## 1 v4PA_mod0.6x 278

DimPlot(object = subset(dataObject, seurat_clusters == "20"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[21])

VlnPlot(dataObject,
        features = cluster20$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster20$gene[1:10]

##  [1] "ATP10A"        "RP11-47B24.1"  "SMYD1"         "RP11-632B21.2"
##  [5] "STPG2-AS1"     "LINC00683"     "NR4A2"         "GALNT14"      
##  [9] "SYNPR"         "TRPC5"

Cluster 21 - mural

count_per_cluster[,c(1,21)]

##     orig.ident  19
## 1 v4PA_mod0.6x 215

DimPlot(object = subset(dataObject, seurat_clusters == "21"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[22])

VlnPlot(dataObject,
        features = cluster21$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster21$gene[1:10]

##  [1] "NR2F2-AS1"     "RP11-649A16.1" "EBF1"          "NR2F2"        
##  [5] "ITIH5"         "SLC38A11"      "SLC6A12"       "NOTCH3"       
##  [9] "COL1A2"        "TBX2"

Cluster 22 - neuron

count_per_cluster[,c(1,22)]

##     orig.ident  20
## 1 v4PA_mod0.6x 212

DimPlot(object = subset(dataObject, seurat_clusters == "22"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[23])

VlnPlot(dataObject,
        features = cluster22$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster22$gene[1:10]

##  [1] "NPSR1-AS1"     "HTR2C"         "IFNG-AS1"      "FER1L6-AS2"   
##  [5] "NXPH2"         "RP11-354I13.2" "LINC02218"     "ENPP7P1"      
##  [9] "VWC2L"         "TSHZ2"

Cluster 23 - RP

count_per_cluster[,c(1,23)]

##     orig.ident  21
## 1 v4PA_mod0.6x 199

DimPlot(object = subset(dataObject, seurat_clusters == "23"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[24])

VlnPlot(dataObject,
        features = cluster23$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster23$gene[1:10]

##  [1] "RP11-79E3.2"   "CTD-2195H9.1"  "RP11-404H1.1"  "RP11-897M7.4" 
##  [5] "LINC02364"     "RP11-582C12.1" "RP11-360K13.1" "IL1RAPL2"     
##  [9] "RORB"          "AC005150.1"

Cluster 24 - astrocyte

GPNMB - astrocyte
LMO1 - astrocyte
CSPG4 - poydendrocyte

count_per_cluster[,c(1,24)]

##     orig.ident  22
## 1 v4PA_mod0.6x 162

DimPlot(object = subset(dataObject, seurat_clusters == "24"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[25])

VlnPlot(dataObject,
        features = cluster24$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster24$gene[1:10]

##  [1] "GPNMB"         "LINC02315"     "RP11-739N10.1" "LINC02552"    
##  [5] "LMO1"          "LINC02058"     "RP4-668E10.4"  "MIR3681HG"    
##  [9] "CSPG4"         "LINC01117"

Cluster 25 - neuron

count_per_cluster[,c(1,25)]

##     orig.ident  23
## 1 v4PA_mod0.6x 149

DimPlot(object = subset(dataObject, seurat_clusters == "25"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[26])

VlnPlot(dataObject,
        features = cluster25$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster25$gene[1:10]

##  [1] "LHX6"          "SCUBE3"        "NOG"           "RP11-238K6.2" 
##  [5] "ANK1"          "SP9"           "PLCXD3"        "GPR149"       
##  [9] "RP11-519H15.1" "SCN1A-AS1"

Cluster 26 - endothelial

count_per_cluster[,c(1,26)]

##     orig.ident  24
## 1 v4PA_mod0.6x 124

DimPlot(object = subset(dataObject, seurat_clusters == "26"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[27])

VlnPlot(dataObject,
        features = cluster26$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster26$gene[1:10]

##  [1] "ATP10A"  "MECOM"   "FLT1"    "EGFL7"   "VWF"     "CLDN5"   "CFH"    
##  [8] "CP"      "TGM2"    "SLCO4A1"

Cluster 27 - RP

count_per_cluster[,c(1,27)]

##     orig.ident 25
## 1 v4PA_mod0.6x 96

DimPlot(object = subset(dataObject, seurat_clusters == "27"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[28])

VlnPlot(dataObject,
        features = cluster27$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster27$gene[1:10]

##  [1] "TTC39DP"       "RP11-338E21.1" "RP5-827L5.2"   "RP11-317J9.1" 
##  [5] "NPHS2"         "KCNK5"         "RP11-286N3.1"  "RP4-745K6.1"  
##  [9] "RP11-685M7.3"  "SCAT8"

Assign identities

Individual

dataObject.annotated <- RenameIdents(object = dataObject, 
                               "0" = "oligodendrocyte 1",
                               "1" = "oligodendrocyte 2",
                               "2" = "polydendrocyte",
                               "3" = "neuron 1",
                               "4" = "neuron 2",
                               "5" = "microglia",
                               "6" = "astrocyte 1",
                               "7" = "astrocyte 2",
                               "8" = "neuron 3",
                               "9" = "astrocyte 3",
                               "10" = "neuron 4",
                               "11" = "oligodendrocyte 3",
                               "12" = "neuron 5",
                               "13" = "neuron 6",
                               "14" = "neuron 7",
                               "15" = "neuron 8",
                               "16" = "noise",
                               "17" = "neuron 9",
                               "18" = "neuron 10",
                               "19" = "astrocyte 4",
                               "20" = "neuron 11",
                               "21" = "mural",
                               "22" = "neuron 12",
                               "23" = "RP 1",
                               "24" = "astrocyte 5",
                               "25" = "neuron 13",
                               "26" = "endothelial",
                               "27" = "RP 2")
dataObject.annotated$individual_clusters <- factor(Idents(dataObject.annotated))

UMAP_ind <- dittoDimPlot(object = dataObject.annotated,
             var = "individual_clusters",
             reduction.use = "umap",
             do.label = TRUE,
             labels.highlight = TRUE)
UMAP_ind

## png 
##   2

Nuclei count per cluster

count_per_cluster <- FetchData(dataObject.annotated,
                               vars = c("ident", "orig.ident")) %>%
  dplyr::count(ident, orig.ident) %>%
  tidyr::spread(ident, n)
count_per_cluster

##     orig.ident oligodendrocyte 1 oligodendrocyte 2 polydendrocyte neuron 1
## 1 v4PA_mod0.6x              2337              2280           1830     1577
##   neuron 2 microglia astrocyte 1 astrocyte 2 neuron 3 astrocyte 3 neuron 4
## 1     1431      1197        1138        1136     1107        1001      923
##   oligodendrocyte 3 neuron 5 neuron 6 neuron 7 neuron 8 noise neuron 9
## 1               907      900      666      579      566   510      382
##   neuron 10 astrocyte 4 neuron 11 mural neuron 12 RP 1 astrocyte 5 neuron 13
## 1       278         215       212   199       162  149         124        96
##   endothelial RP 2
## 1          62   17

count_melt <- reshape2::melt(count_per_cluster)
colnames(count_melt) <- c("ident", "cluster", "number of nuclei")
count_max <- count_melt[which.max(count_melt$`number of nuclei`), ]
count_max_value <- count_max$`number of nuclei`
cellmax <- count_max_value + 500 # so that the figure doesn't cut off the text
count_bar <- ggplot(count_melt, aes(x = factor(cluster), y = `number of nuclei`, fill = `cluster`)) +
  geom_bar(
    stat = "identity",
    colour = "black",
    width = 1,
    position = position_dodge(width = 0.8)
  ) +
  geom_text(
    aes(label = `number of nuclei`),
    position = position_dodge(width = 0.9),
    vjust = -0.25,
    angle = 45,
    hjust = -.01
  ) +
  theme_classic() + scale_fill_manual(values = color.panel) +
  ggtitle("Number of nuclei per cluster") +  xlab("cluster") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_y_continuous(limits = c(0, cellmax))
count_bar

## png 
##   2

DotPlot

markers.to.plot <-
  c(
"CLU", 
"GFAP", 
"AQP4", 
"GJA1",
"CLDN5", 
"ADGRF5", 
"FLT1",
"COL1A1", 
"COL1A2", 
"DCN",
"HEXB", 
"C1QA", 
"C1QC", 
"C1QB", 
"TMEM119", 
"ITGAM", 
"TYROBP",
"P2RY12", 
"AIF1",
"RBFOX3", 
"SNAP25",
"SYT1", 
"GAD1", 
"GAD2",
"PLP1",
"MBP", 
"MOG",
"OLIG1",
"PDGFRA", 
"VCAN", 
"TNR",
"ACTA2",
"RGS5", 
"VTN", 
"MYL5"
  )

dot_ind <- DotPlot(dataObject, 
                   features = markers.to.plot, 
                   cluster.idents = TRUE,
                   dot.scale = 8) + RotatedAxis()
dot_ind

## png 
##   2

Merged

dataObject.annotated <- RenameIdents(object = dataObject.annotated, 
"oligodendrocyte 1" = "oligodendrocyte",
"oligodendrocyte 2" = "oligodendrocyte",
"polydendrocyte" = "polydendrocyte",
"neuron 1" = "neuron",
"neuron 2" = "neuron",
"microglia" = "microglia",
"astrocyte 1" = "astrocyte",
"astrocyte 2" = "astrocyte",
"neuron 3" = "neuron",
"astrocyte 3" = "astrocyte",
"neuron 4" = "neuron",
"oligodendrocyte 3" = "oligodendrocyte",
"neuron 5" = "neuron",
"neuron 6" = "neuron",
"neuron 7" = "neuron",
"neuron 8" = "neuron",
"noise" = "noise",
"neuron 9" = "neuron",
"neuron 10" = "neuron",
"astrocyte 4" = "astrocyte",
"neuron 11" = "neuron",
"mural" = "mural",
"neuron 12" = "neuron",
"RP 1" = "RP",
"astrocyte 5" = "astrocyte",
"neuron 13" = "neuron",
"endothelial" = "endothelial",
"RP 2" = "RP")
dataObject.annotated$annotated_clusters <- factor(Idents(dataObject.annotated))
Idents(dataObject.annotated) <- "annotated_clusters"

UMAP_merge <- dittoDimPlot(object = dataObject.annotated,
             var = "annotated_clusters",
             reduction.use = "umap",
             do.label = TRUE,
             labels.highlight = TRUE)
UMAP_merge

## png 
##   2

Nuclei count per cluster

count_per_cluster <- FetchData(dataObject.annotated,
                               vars = c("ident", "orig.ident")) %>%
  dplyr::count(ident, orig.ident) %>%
  tidyr::spread(ident, n)
count_per_cluster

##     orig.ident oligodendrocyte polydendrocyte neuron microglia astrocyte noise
## 1 v4PA_mod0.6x            5524           1830   8879      1197      3614   510
##   mural  RP endothelial
## 1   199 166          62

count_melt <- reshape2::melt(count_per_cluster)
colnames(count_melt) <- c("ident", "cluster", "number of nuclei")
count_max <- count_melt[which.max(count_melt$`number of nuclei`), ]
count_max_value <- count_max$`number of nuclei`
cellmax <- count_max_value + 500 # so that the figure doesn't cut off the text
count_bar <- ggplot(count_melt, aes(x = factor(cluster), y = `number of nuclei`, fill = `cluster`)) +
  geom_bar(
    stat = "identity",
    colour = "black",
    width = 1,
    position = position_dodge(width = 0.8)
  ) +
  geom_text(
    aes(label = `number of nuclei`),
    position = position_dodge(width = 0.9),
    vjust = -0.25,
    angle = 45,
    hjust = -.01
  ) +
  theme_classic() + scale_fill_manual(values = color.panel) +
  ggtitle("Number of nuclei per cluster") +  xlab("cluster") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_y_continuous(limits = c(0, cellmax))
count_bar

## png 
##   2

Relative abundance

relative_abundance <- dataObject.annotated@meta.data %>%
  group_by(annotated_clusters, orig.ident) %>%
  dplyr::count() %>%
  group_by(orig.ident) %>%
  dplyr::mutate(percent = 100 * n / sum(n)) %>%
  ungroup()


rel_abun <- ggplot(relative_abundance, aes(x = orig.ident, y = percent, fill = annotated_clusters)) +
  geom_col() +
  geom_text(aes(label = paste0(round(percent), "%")), 
            position = position_stack(vjust = 0.5), size = 3, color = "white") +
  scale_fill_manual(values = color.panel) +
  ggtitle("Percentage of cell type") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
rel_abun

DotPlot

markers.to.plot <-
  c(
"CLU", 
"GFAP", 
"AQP4", 
"GJA1",
"CLDN5", 
"ADGRF5", 
"FLT1",
"COL1A1", 
"COL1A2", 
"DCN",
"HEXB", 
"C1QA", 
"C1QC", 
"C1QB", 
"TMEM119", 
"ITGAM", 
"TYROBP",
"P2RY12", 
"AIF1",
"RBFOX3", 
"SNAP25",
"SYT1", 
"GAD1", 
"GAD2",
"PLP1",
"MBP", 
"MOG",
"OLIG1",
"PDGFRA", 
"VCAN", 
"TNR",
"ACTA2",
"RGS5", 
"VTN", 
"MYL5"
  )
dot_merge <- DotPlot(dataObject.annotated, 
               features = markers.to.plot, 
               cluster.idents = TRUE,
               dot.scale = 8) + RotatedAxis()
dot_merge

## png 
##   2

Save RDS

saveRDS(dataObject.annotated, paste0("../../../rObjects/", sampleID, ".annotated.rds"))

v4 PA_mod0.6x annotation

Kimberly Olney, PhD

02/12/2024

Libraries & paths

Read in object

Unannotated

UMAP

Nuclei count per cluster

Cluster tree

Violins - Canonical cell-type markers

Percent cell type - Canonical cell-type markers

Feature plot percent cell type - Canonical cell-type markers

Markers per cluster

Feature plot

Cluster Annotation

Cluster 0 - oligodendrocyte

Cluster 1 - oligodendrocyte

Cluster 2 - polydendrocyte

Cluster 3 - neuron

Cluster 4 - neuron

Cluster 5 - microglia

Cluster 6 - astrocyte

Cluster 7 - astrocyte

Cluster 8 - neuron

Cluster 9 - astrocyte

Cluster 10 - neuron

Cluster 11 - oligodendrocyte

Cluster 12 - neuron

Cluster 13 - neuron

Cluster 14 - neuron

Cluster 15 - neuron

Cluster 16 - noise

Cluster 17 - neuron

Cluster 18 - neuron

Cluster 19 - astrocyte

Cluster 20 - neuron

Cluster 21 - mural

Cluster 22 - neuron

Cluster 23 - RP

Cluster 24 - astrocyte

Cluster 25 - neuron

Cluster 26 - endothelial

Cluster 27 - RP

Assign identities

Individual

Nuclei count per cluster

DotPlot

Merged

Nuclei count per cluster

Relative abundance

DotPlot

Save RDS