Libraris, paths, colors

Read in object

# read object
dataObject <- readRDS(file = paste0("../../rObjects/", treatment, "_unannotated_integrated.rds"))

DefaultAssay(dataObject) <- "RNA"
Idents(dataObject) <- "seurat_clusters"
dataObject@assays$SCT <- NULL
dataObject <- NormalizeData(dataObject)
dataObject <- FindVariableFeatures(dataObject)
dataObject <- ScaleData(dataObject)
dataObject <- JoinLayers(dataObject)

# inspect
dataObject

## An object of class Seurat 
## 32122 features across 59353 samples within 1 assay 
## Active assay: RNA (32122 features, 2000 variable features)
##  3 layers present: data, counts, scale.data
##  4 dimensional reductions calculated: pca, umap, harmony, umap.harmony

Unannotated

UMAP

ditto_umap <- dittoDimPlot(object = dataObject,
             var = "seurat_clusters",
             reduction.use = "umap",
             do.label = TRUE,
             labels.highlight = TRUE)
ditto_umap

Nuclei count per cluster

count_per_cluster <- FetchData(dataObject,
                               vars = c("ident", "orig.ident")) %>%
  dplyr::count(ident, orig.ident) %>%
  tidyr::spread(ident, n)
count_per_cluster

##      orig.ident     0    1    2    3    4    5    6    7    8    9   10   11
## 1 SeuratProject 10650 5900 5676 4874 4241 3667 3661 3287 2572 1845 1782 1574
##     12   13   14   15   16  17  18  19  20  21
## 1 1411 1315 1262 1218 1050 922 724 707 556 459

count_melt <- reshape2::melt(count_per_cluster)
colnames(count_melt) <- c("ident", "cluster", "number of nuclei")
count_max <- count_melt[which.max(count_melt$`number of nuclei`), ]
count_max_value <- count_max$`number of nuclei`
cellmax <- count_max_value + 200 # so that the figure doesn't cut off the text
count_bar <- ggplot(count_melt, aes(x = factor(cluster), y = `number of nuclei`, fill = `ident`)) +
  geom_bar(
    stat = "identity",
    colour = "black",
    width = 1,
    position = position_dodge(width = 0.8)
  ) +
  geom_text(
    aes(label = `number of nuclei`),
    position = position_dodge(width = 0.9),
    vjust = -0.25,
    angle = 45,
    hjust = -.01
  ) +
  theme_classic() + scale_fill_manual(values = sample_colors) +
  ggtitle("Number of nuclei per cluster") +  xlab("cluster") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_y_continuous(limits = c(0, cellmax))
count_bar

Cluster tree

dataObject <- BuildClusterTree(
  object = dataObject,
  dims = 1:30,
  reorder = FALSE,
  reorder.numeric = FALSE
)

tree <- dataObject@tools$BuildClusterTree
tree$tip.label <- paste0("Cluster ", tree$tip.label)
nClusters <- length(tree$tip.label)

tree_graph <- ggtree::ggtree(tree, aes(x, y)) +
  scale_y_reverse() +
  ggtree::geom_tree() +
  ggtree::theme_tree() +
  ggtree::geom_tiplab(offset = 1) +
  ggtree::geom_tippoint(color = color.panel[1:nClusters],
                        shape = 16,
                        size = 5) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = unit(c(0, 2.5, 0, 0), 'cm'))
tree_graph

Violins - Canonical cell-type markers

Markers obtained from dropviz

# Astrocytes
VlnPlot(dataObject,
        features = c("AQP4", "GJA1", "CLU", "GFAP"))

# Endothelial
VlnPlot(dataObject,
        features = c("CLDN5", "ADGRF5", "FLT1"))

# Fibroblast-Like_Dcn
VlnPlot(dataObject,
        features = c("COL1A1", "COL1A2", "DCN"))

# Microglia / Marchophage 
VlnPlot(dataObject,
        features = c("C1QA", "C1QC", "C1QB"))

# Microglia / Marchophage 
VlnPlot(dataObject,
        features = c( "HEXB", "TMEM119", "ITGAM", "TYROBP","P2RY12", "AIF1"))

# Neurons
VlnPlot(dataObject,
        features = c("RBFOX3", "SNAP25","SYT1", "GAD1", "GAD2"))

# Oligodendrocyte
VlnPlot(dataObject,
        features = c("PLP1","MBP", "MOG"))

# OPC
VlnPlot(dataObject,
        features = c("OLIG1","PDGFRA", "VCAN", "TNR"))

# Smooth muscle 
VlnPlot(dataObject,
        features = c("ACTA2","RGS5", "VTN", "MYL5"))

Percent cell type - Canonical cell-type markers

# astrocyte
dataObject[["percent.astrocyte"]] <- PercentageFeatureSet(dataObject, 
                                     features = c("CLU", "GFAP", "AQP4", "GJA1"))
# endothelial
dataObject[["percent.endothelial"]] <- PercentageFeatureSet(dataObject, 
                                       features = c("CLDN5", "ADGRF5", "FLT1"))
# fibroblast 
dataObject[["percent.fibroblast"]] <- PercentageFeatureSet(dataObject, 
                                      features = c("COL1A1", "COL1A2", "DCN"))
# microglia
dataObject[["percent.microglia"]] <- PercentageFeatureSet(dataObject, 
                                     features = c("HEXB", "C1QA", "C1QC", "C1QB", 
                                     "TMEM119", "ITGAM", "TYROBP","P2RY12", "AIF1")) 
# neuron 
dataObject[["percent.neurons"]] <- PercentageFeatureSet(dataObject, 
                                   features = c("RBFOX3", "SNAP25","SYT1", "GAD1", "GAD2"))
# oligodendrocyte
dataObject[["percent.oligodendrocyte"]] <- PercentageFeatureSet(dataObject, 
                                           features = c("PLP1","MBP", "MOG"))
# oligodendrocyte precursor cells
dataObject[["percent.opc"]] <- PercentageFeatureSet(dataObject, 
                               features = c("OLIG1","PDGFRA", "VCAN", "TNR"))
# smooth muscle 
dataObject[["percent.smooth"]] <- PercentageFeatureSet(dataObject, 
                                  features = c("ACTA2","RGS5", "VTN", "MYL5"))

Feature plot percent cell type - Canonical cell-type markers

# astrocyte
FeaturePlot(dataObject, features = "percent.astrocyte", label = TRUE)

# endothelial
FeaturePlot(dataObject, features = "percent.endothelial", label = TRUE)

# fibroblast
FeaturePlot(dataObject, features = "percent.fibroblast", label = TRUE)

# microglia
FeaturePlot(dataObject, features = "percent.microglia", label = TRUE)

# neuron 
FeaturePlot(dataObject, features = "percent.neurons", label = TRUE)

# oligodendrocyte
FeaturePlot(dataObject, features = "percent.oligodendrocyte", label = TRUE)

# oligodendrocyte precursor cells
FeaturePlot(dataObject, features = "percent.opc", label = TRUE)

# smooth
FeaturePlot(dataObject, features = "percent.smooth", label = TRUE)

Markers per cluster

markers <- SeuratWrappers::RunPrestoAll(
  object = dataObject,
  assay = "RNA",
  slot = "counts",
  only.pos = FALSE
)
write.table(markers, 
            paste0("../../results/markers/", treatment, "_markers.tsv"),
            quote = FALSE,
            row.names = FALSE)
saveRDS(markers, paste0("../../rObjects/", treatment, "_markers.rds"))

# rearrange to order by cluster & filter to only include log2FC > 1 & FDR < 0.05
 all.markers.strict <- markers %>%
   group_by(cluster) %>%
   dplyr::filter(avg_log2FC > 1 & p_val_adj < 0.05)

saveRDS(all.markers.strict, paste0("../../rObjects/", treatment,"_markers_log2FC1_q0.01.rds"))

Get markers for each cluster

# unique clusters variable
unique_clusters <- unique(markers$cluster)

# empty list to store individual cluster data frames
cluster_list <- list()

# loop through each cluster and create a data frame
for (i in unique_clusters) {
  cluster_name <- paste0("cluster", i)
  cluster_data <- all.markers.strict[all.markers.strict$cluster == i, ]
  assign(cluster_name, cluster_data)
  cluster_list[[cluster_name]] <- cluster_data
}

Feature plot

# UMAP showing the expression of select features
umap_feature <-
  FeaturePlot(dataObject,
              features = c("TYROBP", "MOG", "AQP4", "RBFOX3"))
umap_feature

Cluster Annotation

Cluster 0 - oligodendrocyte

# Number of cells per condition
count_per_cluster[,c(1,2)]

##      orig.ident     0
## 1 SeuratProject 10650

# UMAP with only cluster 0
DimPlot(object = subset(dataObject, seurat_clusters == "0"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[1])

VlnPlot(dataObject,
        features = cluster0$gene[1:10],
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster0$gene[1:10]

##  [1] "ST18"      "PLP1"      "MOBP"      "CTNNA3"    "RNF220"    "FRMD4B"   
##  [7] "CLMN"      "TMEM144"   "LINC01608" "CLDN11"

Cluster 1 - polydendrocyte

count_per_cluster[,c(1,3)]

##      orig.ident    1
## 1 SeuratProject 5900

DimPlot(object = subset(dataObject, seurat_clusters == "1"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[2])

VlnPlot(dataObject,
        features = cluster1$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster1$gene[1:10]

##  [1] "VCAN"            "MEGF11"          "ENSG00000278254" "PDGFRA"         
##  [5] "LHFPL3"          "PCDH15"          "SMOC1"           "NXPH1"          
##  [9] "GRIK1"           "CA10"

Cluster 2 - neuron

count_per_cluster[,c(1,4)]

##      orig.ident    2
## 1 SeuratProject 5676

DimPlot(object = subset(dataObject, seurat_clusters == "2"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[3])

VlnPlot(dataObject,
        features = cluster2$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster2$gene[1:10]

##  [1] "NRG1"       "ST6GALNAC5" "CPNE4"      "KCNQ5"      "RALYL"     
##  [6] "CLSTN2"     "KCNB2"      "DLGAP2"     "SYT1"       "NELL2"

Cluster 3 - astrocyte

count_per_cluster[,c(1,4)]

##      orig.ident    2
## 1 SeuratProject 5676

DimPlot(object = subset(dataObject, seurat_clusters == "3"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols =  color.panel[4])

VlnPlot(dataObject,
        features = cluster3$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster3$gene[1:10]

##  [1] "ENSG00000286757" "SLC14A1"         "OBI1-AS1"        "ADGRV1"         
##  [5] "NHSL1"           "RANBP3L"         "TPD52L1"         "GLIS3"          
##  [9] "BMPR1B"          "RFX4"

Cluster 4 - oligodendrocyte

count_per_cluster[,c(1,5)]

##      orig.ident    3
## 1 SeuratProject 4874

DimPlot(object = subset(dataObject, seurat_clusters == "4"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[5])

VlnPlot(dataObject,
        features = cluster4$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster4$gene[1:10]

##  [1] "SLC5A11"         "ST18"            "MOBP"            "LINC00609"      
##  [5] "PLP1"            "LRP2"            "TF"              "ENSG00000286749"
##  [9] "LRRC63"          "ENSG00000228793"

Cluster 5 - astrocyte

count_per_cluster[,c(1,6)]

##      orig.ident    4
## 1 SeuratProject 4241

DimPlot(object = subset(dataObject, seurat_clusters == "5"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[6])

VlnPlot(dataObject,
        features = cluster5$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster5$gene[1:10]

##  [1] "OBI1-AS1"        "GLIS3"           "AQP4"            "ADGRV1"         
##  [5] "RFX4"            "ENSG00000287704" "ENSG00000286757" "ATP1A2"         
##  [9] "SLC14A1"         "TPD52L1"

Cluster 6 - neuron

count_per_cluster[,c(1,7)]

##      orig.ident    5
## 1 SeuratProject 3667

DimPlot(object = subset(dataObject, seurat_clusters == "6"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#CC79A7")

VlnPlot(dataObject,
        features = cluster6$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster6$gene[1:10]

##  [1] "HS3ST4"          "ENSG00000285882" "PCDH11X"         "NPTX1"          
##  [5] "ADAMTSL1"        "SEMA3E"          "GABRG3"          "ENSG00000279668"
##  [9] "ROBO2"           "SV2B"

Cluster 7 - neuron

count_per_cluster[,c(1,8)]

##      orig.ident    6
## 1 SeuratProject 3661

DimPlot(object = subset(dataObject, seurat_clusters == "7"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[7])

VlnPlot(dataObject,
        features = cluster7$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster7$gene[1:10]

##  [1] "CBLN2"           "ST6GALNAC5"      "ENSG00000229618" "CUX2"           
##  [5] "LINGO2"          "SYN3"            "KCNQ5"           "CHRM3"          
##  [9] "NDST3"           "CDH12"

Cluster 8 - neuron

count_per_cluster[,c(1,9)]

##      orig.ident    7
## 1 SeuratProject 3287

DimPlot(object = subset(dataObject, seurat_clusters == "8"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[8])

VlnPlot(dataObject,
        features = cluster8$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster8$gene[1:10]

##  [1] "ENSG00000239498" "EPIC1"           "CBLN2"           "ENSG00000255595"
##  [5] "ENSG00000236451" "CDH12"           "ST6GALNAC5"      "SYN3"           
##  [9] "ZNF804B"         "KCNQ5"

Cluster 9 - interneuron

count_per_cluster[,c(1,10)]

##      orig.ident    8
## 1 SeuratProject 2572

DimPlot(object = subset(dataObject, seurat_clusters == "9"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[9])

VlnPlot(dataObject,
        features = cluster9$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster9$gene[1:10]

##  [1] "NXPH1"           "SCN1A-AS1"       "ABTB3"           "ZNF385D"        
##  [5] "ANK1"            "MYO16"           "ENSG00000257083" "PTCHD4"         
##  [9] "KCNC2"           "ZNF804A"

Cluster 10 - interneuron

count_per_cluster[,c(1,11)]

##      orig.ident    9
## 1 SeuratProject 1845

DimPlot(object = subset(dataObject, seurat_clusters == "10"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[10])

VlnPlot(dataObject,
        features = cluster10$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster10$gene[1:10]

##  [1] "DLX6-AS1" "GALNTL6"  "SYNPR"    "RGS12"    "CHRM3"    "ROBO2"   
##  [7] "LINGO2"   "ADARB2"   "ABTB3"    "ZNF385D"

Cluster 11 - RP

count_per_cluster[,c(1,12)]

##      orig.ident   10
## 1 SeuratProject 1782

DimPlot(object = subset(dataObject, seurat_clusters == "11"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[11])

VlnPlot(dataObject,
        features = cluster11$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster11$gene[1:10]

##  [1] "TNC"   "RPS27" "LPAR6" "RPL41" "RPS18" "AQP1"  "APOE"  "CD74"  "RPS12"
## [10] "RPL23"

Cluster 12 - polydendrocyte

count_per_cluster[,c(1,13)]

##      orig.ident   11
## 1 SeuratProject 1574

DimPlot(object = subset(dataObject, seurat_clusters == "12"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[12])

VlnPlot(dataObject,
        features = cluster12$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster12$gene[1:10]

##  [1] "VCAN"            "LHFPL3"          "PCDH15"          "MEGF11"         
##  [5] "SMOC1"           "ENSG00000278254" "PDGFRA"          "CA10"           
##  [9] "KCNMB2"          "LUZP2"

Cluster 13 - interneuron

count_per_cluster[,c(1,14)]

##      orig.ident   12
## 1 SeuratProject 1411

DimPlot(object = subset(dataObject, seurat_clusters == "13"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[13])

VlnPlot(dataObject,
        features = cluster13$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster13$gene[1:10]

##  [1] "FGF13"           "PTCHD4"          "GRIK1"           "NXPH1"          
##  [5] "MYO16"           "UNC13C"          "EYA4"            "GAD2"           
##  [9] "ENSG00000257083" "ALK"

Cluster 14 - microglia

count_per_cluster[,c(1,15)]

##      orig.ident   13
## 1 SeuratProject 1315

DimPlot(object = subset(dataObject, seurat_clusters == "14"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[14])

VlnPlot(dataObject,
        features = cluster14$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster14$gene[1:10]

##  [1] "DOCK8"    "APBB1IP"  "TBXAS1"   "FYB1"     "ARHGAP15" "INPP5D"  
##  [7] "SLCO2B1"  "LNCAROD"  "ARHGAP24" "LRMDA"

Cluster 15 - astrocyte

count_per_cluster[,c(1,16)]

##      orig.ident   14
## 1 SeuratProject 1262

DimPlot(object = subset(dataObject, seurat_clusters == "15"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[15])

VlnPlot(dataObject,
        features = cluster15$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster15$gene[1:10]

##  [1] "ENSG00000287704" "GFAP"            "ENSG00000286757" "ENSG00000259255"
##  [5] "RFX4"            "GLIS3"           "ADGRV1"          "ENTREP1"        
##  [9] "SLC14A1"         "HIF3A"

Cluster 16 - interneuron

count_per_cluster[,c(1,17)]

##      orig.ident   15
## 1 SeuratProject 1218

DimPlot(object = subset(dataObject, seurat_clusters == "16"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[16])

VlnPlot(dataObject,
        features = cluster16$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster16$gene[1:10]

##  [1] "NXPH1"           "GRIK1"           "SYNPR"           "TRHDE"          
##  [5] "GRIN3A"          "PLCH1"           "SHISA6"          "LINC01322"      
##  [9] "ENSG00000257083" "ST6GALNAC5"

Cluster 17 - neuron

count_per_cluster[,c(1,18)]

##      orig.ident   16
## 1 SeuratProject 1050

DimPlot(object = subset(dataObject, seurat_clusters == "17"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[17])

VlnPlot(dataObject,
        features = cluster17$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster17$gene[1:10]

##  [1] "ZNF804B" "SYNPR"   "TRHDE"   "CPNE4"   "CBLN2"   "TRPC5"   "NWD2"   
##  [8] "ZNF804A" "FRAS1"   "GALNT14"

Cluster 18 - endothelial

count_per_cluster[,c(1,1)]

##      orig.ident  orig.ident.1
## 1 SeuratProject SeuratProject

DimPlot(object = subset(dataObject, seurat_clusters == "18"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[19])

VlnPlot(dataObject,
        features = cluster18$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster18$gene[1:10]

##  [1] "ABCB1"  "FLT1"   "ATP10A" "CLDN5"  "VWF"    "ANO2"   "EPAS1"  "MECOM" 
##  [9] "FLI1"   "EGFL7"

Cluster 19 - oligodendrocyte

count_per_cluster[,c(1,19)]

##      orig.ident  17
## 1 SeuratProject 922

DimPlot(object = subset(dataObject, seurat_clusters == "19"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[20])

VlnPlot(dataObject,
        features = cluster19$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster19$gene[1:10]

##  [1] "MIR219A2HG"      "ENSG00000228793" "SLC5A11"         "FOLH1"          
##  [5] "CARNS1"          "ENPP2"           "TF"              "CERCAM"         
##  [9] "MOG"             "LDB3"

Cluster 20 - neuron

count_per_cluster[,c(1,20)]

##      orig.ident  18
## 1 SeuratProject 724

DimPlot(object = subset(dataObject, seurat_clusters == "20"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[21])

VlnPlot(dataObject,
        features = cluster20$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster20$gene[1:10]

##  [1] "NPSR1-AS1"       "HTR2C"           "IFNG-AS1"        "TSHZ2"          
##  [5] "ENSG00000285882" "ENSG00000228566" "ITGA8"           "VWC2L"          
##  [9] "HS3ST4"          "GRM8"

Cluster 21 - fibroblast

count_per_cluster[,c(1,21)]

##      orig.ident  19
## 1 SeuratProject 707

DimPlot(object = subset(dataObject, seurat_clusters == "21"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = color.panel[22])

VlnPlot(dataObject,
        features = cluster21$gene[1:10],
        cols = color.panel,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster21$gene[1:10]

##  [1] "EBF1"            "ENSG00000243620" "NR2F2-AS1"       "RBPMS"          
##  [5] "SVIL"            "ITIH5"           "SLC19A1"         "NID1"           
##  [9] "COBLL1"          "ARHGAP29"

Assign identities

Individual

dataObject.annotated <- RenameIdents(object = dataObject, 
                               "0" = "oligodendrocyte 1",
                               "1" = "polydendrocyte 1",
                               "2" = "neuron 1",
                               "3" = "astrocyte 1",
                               "4" = "oligodendrocyte 2",
                               "5" = "astrocyte 2",
                               "6" = "neuron 2",
                               "7" = "neuron 3",
                               "8" = "neuron 4",
                               "9" = "interneuron 1",
                               "10" = "interneuron 2",
                               "11" = "RP",
                               "12" = "polydendrocyte 2",
                               "13" = "interneuron 3",
                               "14" = "microglia",
                               "15" = "astrocyte 3",
                               "16" = "interneuron 4",
                               "17" = "neuron 5",
                               "18" = "endothelial",
                               "19" = "oligodendrocyte",
                               "20" = "neuron 6",
                               "21" = "fibroblast")
dataObject.annotated$individual_clusters <- factor(Idents(dataObject.annotated))

UMAP_ind <- dittoDimPlot(object = dataObject.annotated,
             var = "individual_clusters",
             reduction.use = "umap",
             do.label = TRUE,
             labels.highlight = TRUE)
UMAP_ind

## png 
##   2

Nuclei count per cluster

count_per_cluster <- FetchData(dataObject.annotated,
                               vars = c("ident", "orig.ident")) %>%
  dplyr::count(ident, orig.ident) %>%
  tidyr::spread(ident, n)
count_per_cluster

##      orig.ident oligodendrocyte 1 polydendrocyte 1 neuron 1 astrocyte 1
## 1 SeuratProject             10650             5900     5676        4874
##   oligodendrocyte 2 astrocyte 2 neuron 2 neuron 3 neuron 4 interneuron 1
## 1              4241        3667     3661     3287     2572          1845
##   interneuron 2   RP polydendrocyte 2 interneuron 3 microglia astrocyte 3
## 1          1782 1574             1411          1315      1262        1218
##   interneuron 4 neuron 5 endothelial oligodendrocyte neuron 6 fibroblast
## 1          1050      922         724             707      556        459

count_melt <- reshape2::melt(count_per_cluster)
colnames(count_melt) <- c("ident", "cluster", "number of nuclei")
count_max <- count_melt[which.max(count_melt$`number of nuclei`), ]
count_max_value <- count_max$`number of nuclei`
cellmax <- count_max_value + 500 # so that the figure doesn't cut off the text
count_bar <- ggplot(count_melt, aes(x = factor(cluster), y = `number of nuclei`, fill = `cluster`)) +
  geom_bar(
    stat = "identity",
    colour = "black",
    width = 1,
    position = position_dodge(width = 0.8)
  ) +
  geom_text(
    aes(label = `number of nuclei`),
    position = position_dodge(width = 0.9),
    vjust = -0.25,
    angle = 45,
    hjust = -.01
  ) +
  theme_classic() + scale_fill_manual(values = color.panel) +
  ggtitle("Number of nuclei per cluster") +  xlab("cluster") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_y_continuous(limits = c(0, cellmax))
count_bar

## png 
##   2

DotPlot

markers.to.plot <-
  c(
"CLU", 
"GFAP", 
"AQP4", 
"GJA1",
"CLDN5", 
"ADGRF5", 
"FLT1",
"COL1A1", 
"COL1A2", 
"DCN",
"HEXB", 
"C1QA", 
"C1QC", 
"C1QB", 
"TMEM119", 
"ITGAM", 
"TYROBP",
"P2RY12", 
"AIF1",
"RBFOX3", 
"SNAP25",
"SYT1", 
"GAD1", 
"GAD2",
"PLP1",
"MBP", 
"MOG",
"OLIG1",
"PDGFRA", 
"VCAN", 
"TNR",
"ACTA2",
"RGS5", 
"VTN", 
"MYL5"
  )

dot_ind <- DotPlot(dataObject, 
                   features = markers.to.plot, 
                   cluster.idents = TRUE,
                   dot.scale = 8) + RotatedAxis()
dot_ind

## png 
##   2

Merged

dataObject.annotated <- RenameIdents(object = dataObject.annotated, 
"oligodendrocyte 1" =   "oligodendrocyte",
"polydendrocyte 1"  =   "polydendrocyte",
"neuron 1"  =   "neuron",
"astrocyte 1"   =   "astrocyte",
"oligodendrocyte 2" =   "oligodendrocyte",
"astrocyte 2"   =   "astrocyte",
"neuron 2"  =   "neuron",
"neuron 3"  =   "neuron",
"neuron 4"  =   "neuron",
"interneuron 1" =   "interneuron",
"interneuron 2" =   "interneuron",
"RP"    =   "RP",
"polydendrocyte 2"  =   "polydendrocyte",
"interneuron 3" =   "interneuron",
"microglia" =   "microglia",
"astrocyte 3"   =   "astrocyte",
"interneuron 4" =   "interneuron",
"neuron 5"  =   "neuron",
"endothelial"   =   "endothelial",
"oligodendrocyte 3" =   "oligodendrocyte",
"neuron 6"  =   "neuron",
"fibroblast"    =   "fibroblast")
dataObject.annotated$annotated_clusters <- factor(Idents(dataObject.annotated))
Idents(dataObject.annotated) <- "annotated_clusters"

ok <- levels(Idents(dataObject.annotated))
write.table(ok, "ok.txt")
UMAP_merge <- dittoDimPlot(object = dataObject.annotated,
             var = "annotated_clusters",
             reduction.use = "umap",
             do.label = TRUE,
             labels.highlight = TRUE)
UMAP_merge

## png 
##   2

Nuclei count per cluster

count_per_cluster <- FetchData(dataObject.annotated,
                               vars = c("ident", "orig.ident")) %>%
  dplyr::count(ident, orig.ident) %>%
  tidyr::spread(ident, n)
count_per_cluster

##      orig.ident oligodendrocyte polydendrocyte neuron astrocyte interneuron
## 1 SeuratProject           15598           7311  16674      9759        5992
##     RP microglia endothelial fibroblast
## 1 1574      1262         724        459

count_melt <- reshape2::melt(count_per_cluster)
colnames(count_melt) <- c("ident", "cluster", "number of nuclei")
count_max <- count_melt[which.max(count_melt$`number of nuclei`), ]
count_max_value <- count_max$`number of nuclei`
cellmax <- count_max_value + 500 # so that the figure doesn't cut off the text
count_bar <- ggplot(count_melt, aes(x = factor(cluster), y = `number of nuclei`, fill = `cluster`)) +
  geom_bar(
    stat = "identity",
    colour = "black",
    width = 1,
    position = position_dodge(width = 0.8)
  ) +
  geom_text(
    aes(label = `number of nuclei`),
    position = position_dodge(width = 0.9),
    vjust = -0.25,
    angle = 45,
    hjust = -.01
  ) +
  theme_classic() + scale_fill_manual(values = color.panel) +
  ggtitle("Number of nuclei per cluster") +  xlab("cluster") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_y_continuous(limits = c(0, cellmax))
count_bar

## png 
##   2

Relative abundance

relative_abundance <- dataObject.annotated@meta.data %>%
  group_by(annotated_clusters, orig.ident) %>%
  dplyr::count() %>%
  group_by(orig.ident) %>%
  dplyr::mutate(percent = 100 * n / sum(n)) %>%
  ungroup()


rel_abun <- ggplot(relative_abundance, aes(x = orig.ident, y = percent, fill = annotated_clusters)) +
  geom_col() +
  geom_text(aes(label = paste0(round(percent), "%")), 
            position = position_stack(vjust = 0.5), size = 3, color = "white") +
  scale_fill_manual(values = color.panel) +
  ggtitle("Percentage of cell type") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
rel_abun

pdf(
  paste0(
    "../../results/UMAP/annotated/",
    treatment,
    "_merged_clusters_relative_abundance.pdf"
  ),
  width = 6,
  height = 8
)
rel_abun
dev.off()

## png 
##   2

DotPlot

markers.to.plot <-
  c(
"CLU", 
"GFAP", 
"AQP4", 
"GJA1",
"CLDN5", 
"ADGRF5", 
"FLT1",
"COL1A1", 
"COL1A2", 
"DCN",
"HEXB", 
"C1QA", 
"C1QC", 
"C1QB", 
"TMEM119", 
"ITGAM", 
"TYROBP",
"P2RY12", 
"AIF1",
"RBFOX3", 
"SNAP25",
"SYT1", 
"GAD1", 
"GAD2",
"PLP1",
"MBP", 
"MOG",
"OLIG1",
"PDGFRA", 
"VCAN", 
"TNR",
"ACTA2",
"RGS5", 
"VTN", 
"MYL5"
  )
dot_merge <- DotPlot(dataObject.annotated, 
               features = markers.to.plot, 
               cluster.idents = TRUE,
               dot.scale = 8) + RotatedAxis()
dot_merge

## png 
##   2

Save RDS

saveRDS(dataObject.annotated, paste0("../../rObjects/", treatment, ".annotated.rds"))

annotations

Kimberly Olney, PhD

04/08/2024

Libraris, paths, colors

Read in object

Unannotated

UMAP

Nuclei count per cluster

Cluster tree

Violins - Canonical cell-type markers

Percent cell type - Canonical cell-type markers

Feature plot percent cell type - Canonical cell-type markers

Markers per cluster

Feature plot

Cluster Annotation

Cluster 0 - oligodendrocyte

Cluster 1 - polydendrocyte

Cluster 2 - neuron

Cluster 3 - astrocyte

Cluster 4 - oligodendrocyte

Cluster 5 - astrocyte

Cluster 6 - neuron

Cluster 7 - neuron

Cluster 8 - neuron

Cluster 9 - interneuron

Cluster 10 - interneuron

Cluster 11 - RP

Cluster 12 - polydendrocyte

Cluster 13 - interneuron

Cluster 14 - microglia

Cluster 15 - astrocyte

Cluster 16 - interneuron

Cluster 17 - neuron

Cluster 18 - endothelial

Cluster 19 - oligodendrocyte

Cluster 20 - neuron

Cluster 21 - fibroblast

Assign identities

Individual

Nuclei count per cluster

DotPlot

Merged

Nuclei count per cluster

Relative abundance

DotPlot

Save RDS