Set working directory

knitr::opts_knit$set(root.dir = ".")

Libraris, paths, colors

source(here::here("snRNA/scripts/R", "file_paths_and_colours.R"))
tissue <- "Brain"
treatment <- "pilot"

Read in object

# read object
dataObject.unannotated <- readRDS("../../rObjects/brain_unannotated.rds")

Define resolution and groups

# set levels and idents
Idents(dataObject.unannotated) <- "seurat_clusters"
# normalize if SCTtransform was used
DefaultAssay(dataObject.unannotated) <- "RNA"

Clustering QC before annotation

Unannotated UMAP

dittoDimPlot(object = dataObject.unannotated,
             var = "seurat_clusters",
             reduction.use = "umap",
             do.label = TRUE,
             labels.highlight = TRUE)

Cells per cluster

dataObject.unannotated$sample <- factor(dataObject.unannotated$sample, levels = c("beads", "debris"))

n_cells <- FetchData(dataObject.unannotated, 
  vars = c("ident", "sample")) %>%
  dplyr::count(ident,sample) %>%
  tidyr::spread(ident, n)
n_cells
##   sample    0    1    2    3    4    5    6    7   8   9  10  11  12  13  14
## 1  beads 3745 3208 2168 1700 1005 1113 1281 1392 843 879 856 685 715 562 532
## 2 debris 3347 2837 1992 1496 1556 1378 1156 1001 996 912 907 813 671 722 665
##    15  16  17  18  19  20  21
## 1 554 365 438 391 235 144 207
## 2 542 713 438 375 250 267 202

Cluster tree

dataObject.unannotated <- BuildClusterTree(object = dataObject.unannotated,
                                     dims = 1:20,
                                     reorder = FALSE,
                                     reorder.numeric = FALSE)

tree <- dataObject.unannotated@tools$BuildClusterTree
tree$tip.label <- paste0("Cluster ", tree$tip.label)

tree_1 <- ggtree::ggtree(tree, aes(x, y)) +
  scale_y_reverse() +
  ggtree::geom_tree() +
  ggtree::theme_tree() +
  ggtree::geom_tiplab(offset = 1) +
#  ggtree::geom_tippoint(color = colors[1:length(tree$tip.label)], 
  #                      shape = 16, size = 5) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))
tree_1

Marker exploration

FindMarkers will find markers between two different identity groups - you have to specify both identity groups. This is useful for comparing the differences between two specific groups.

FindAllMarkers will find markers differentially expressed in each identity group by comparing it to all of the others - you don’t have to manually define anything. Note that markers may bleed over between closely-related groups - they are not forced to be specific to only one group. This is what most people use (and likely what you want).

FindConservedMarkers will find markers that are conserved between two groups - this can be useful if you want to find markers that are conserved between a treated and untreated condition for a specific cell type or group of cells. It means they are differentially expressed compared to other groups, but have similar expression between the two groups you’re actually comparing. https://www.biostars.org/p/409790/ ### Find all markers

# read object
all.markers <- readRDS("../../rObjects/unannotated_all_markers.rds")

# more stringent filtering
all.markers <- all.markers[all.markers$p_val_adj < 0.05,]
markers.strict <- all.markers[
  all.markers$delta_pct > summary(all.markers$delta_pct)[5],]

# compare 
table(all.markers$cluster)
## 
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
## 1052 1056 1068  939 1529 1530 1179 1770 1827 1311 1691 1468 1539 1805 1712 1658 
##   16   17   18   19   20   21 
## 1417 1350 1531 1364 1176 1272
table(markers.strict$cluster)
## 
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
##   15    4   35    7  165  956   63   18 1642   62  267  104  734 1678 1448   17 
##   16   17   18   19   20   21 
##   76   28  335   61   27   68
# subset
cluster0 <- markers.strict[markers.strict$cluster == 0,]
cluster1 <- markers.strict[markers.strict$cluster == 1,]
cluster2 <- markers.strict[markers.strict$cluster == 2,]
cluster3 <- markers.strict[markers.strict$cluster == 3,]
cluster4 <- markers.strict[markers.strict$cluster == 4,]
cluster5 <- markers.strict[markers.strict$cluster == 5,]
cluster6 <- markers.strict[markers.strict$cluster == 6,]
cluster7 <- markers.strict[markers.strict$cluster == 7,]
cluster8 <- markers.strict[markers.strict$cluster == 8,]
cluster9 <- markers.strict[markers.strict$cluster == 9,]
cluster10 <- markers.strict[markers.strict$cluster == 10,]
cluster11 <- markers.strict[markers.strict$cluster == 11,]
cluster12 <- markers.strict[markers.strict$cluster == 12,]
cluster13 <- markers.strict[markers.strict$cluster == 13,]
cluster14 <- markers.strict[markers.strict$cluster == 14,]
cluster15 <- markers.strict[markers.strict$cluster == 15,]
cluster16 <- markers.strict[markers.strict$cluster == 16,]
cluster17 <- markers.strict[markers.strict$cluster == 17,]
cluster18 <- markers.strict[markers.strict$cluster == 18,]
cluster19 <- markers.strict[markers.strict$cluster == 19,]
cluster20 <- markers.strict[markers.strict$cluster == 20,]
cluster21 <- markers.strict[markers.strict$cluster == 21,]

Conserved markers

Identify conserved cell type markers To identify canonical cell type marker genes that are conserved across conditions, we provide the FindConservedMarkers() function. This function performs differential gene expression testing for each dataset/group and combines the p-values using meta-analysis methods from the MetaDE R package. For example, we can calculated the genes that are conserved markers irrespective of stimulation condition in cluster 6 (NK cells).

conserved.markers <- readRDS("../../rObjects/brain_conserved_markers.rds")
conserved.markers.strict <- readRDS("../../rObjects/brain_conserved_markers_strict.rds")

# compare 
table(conserved.markers$cluster)
## 
##    0    1   10   11   12   13   14   15   16   17   18   19    2   20   21    3 
## 1694 1552 3192 2621 3259 4013 3876 2475 1941 1843 2814 1521 1615 1158 1540 1405 
##    4    5    6    7    8    9 
## 1942 3509 1779 2474 4106 1642
table(conserved.markers.strict$cluster)
## 
##    0    1   10   11   12   13   14   15   16   17   18   19    2   20   21    3 
##   45   20  499  189 1120 2814 2275   86  116   70  569   89   92   81  113   37 
##    4    5    6    7    8    9 
##  280 1504  102   67 2722  101
conserved.cluster0 <- conserved.markers.strict[conserved.markers.strict$cluster == 0,]
conserved.cluster1 <- conserved.markers.strict[conserved.markers.strict$cluster == 1,]
conserved.cluster2 <- conserved.markers.strict[conserved.markers.strict$cluster == 2,]
conserved.cluster3 <- conserved.markers.strict[conserved.markers.strict$cluster == 3,]
conserved.cluster4 <- conserved.markers.strict[conserved.markers.strict$cluster == 4,]
conserved.cluster5 <- conserved.markers.strict[conserved.markers.strict$cluster == 5,]
conserved.cluster6 <- conserved.markers.strict[conserved.markers.strict$cluster == 6,]
conserved.cluster7 <- conserved.markers.strict[conserved.markers.strict$cluster == 7,]
conserved.cluster8 <- conserved.markers.strict[conserved.markers.strict$cluster == 8,]
conserved.cluster9 <- conserved.markers.strict[conserved.markers.strict$cluster == 9,]
conserved.cluster10 <- conserved.markers.strict[conserved.markers.strict$cluster == 10,]
conserved.cluster11 <- conserved.markers.strict[conserved.markers.strict$cluster == 11,]
conserved.cluster12 <- conserved.markers.strict[conserved.markers.strict$cluster == 12,]
conserved.cluster13 <- conserved.markers.strict[conserved.markers.strict$cluster == 13,]
conserved.cluster14 <- conserved.markers.strict[conserved.markers.strict$cluster == 14,]
conserved.cluster15 <- conserved.markers.strict[conserved.markers.strict$cluster == 15,]
conserved.cluster16 <- conserved.markers.strict[conserved.markers.strict$cluster == 16,]
conserved.cluster17 <- conserved.markers.strict[conserved.markers.strict$cluster == 17,]
conserved.cluster18 <- conserved.markers.strict[conserved.markers.strict$cluster == 18,]
conserved.cluster19 <- conserved.markers.strict[conserved.markers.strict$cluster == 19,]
conserved.cluster20 <- conserved.markers.strict[conserved.markers.strict$cluster == 20,]
conserved.cluster21 <- conserved.markers.strict[conserved.markers.strict$cluster == 21,]

PCs

# Printing out the most variable genes driving PCs
print(x = dataObject.unannotated[["pca"]], 
      dims = 1:5, 
      nfeatures = 10)
## PC_ 1 
## Positive:  PLP1, MBP, TF, CNP, CTNNA3, ST18, SELENOP, CRYAB, CERCAM, CLDND1 
## Negative:  AQP4, SLC1A2, SLC1A3, GFAP, GJA1, F3, CHI3L1, ANGPTL4, DTNA, ATP1A2 
## PC_ 2 
## Positive:  KCNIP4, MEG3, RP11-909M7.3, RBFOX1, ROBO2, SYT1, CSMD1, SNHG14, OPCML, RALYL 
## Negative:  SLC1A3, AQP4, SLC1A2, GJA1, GLUL, GFAP, PLP1, F3, CHI3L1, ANGPTL4 
## PC_ 3 
## Positive:  VCAN, PTPRZ1, LHFPL3, TNR, PDGFRA, BCAN, GPNMB, PCDH15, GRIK1, CSPG4 
## Negative:  NRGN, ENC1, CHN1, SLC17A7, VSNL1, CALM1, CLU, SNAP25, OLFM1, AQP4 
## PC_ 4 
## Positive:  VCAN, PTPRZ1, GPNMB, BCAN, PDGFRA, TNR, CSPG4, LHFPL3, APOD, COL9A1 
## Negative:  CH507-528H12.1, ERBB4, DPP10, CNTNAP2, ROBO2, GRIP1, CH507-513H4.1, RBFOX1, GRIK1, CXCL14 
## PC_ 5 
## Positive:  CH507-528H12.1, KCNIP4, CH507-513H4.1, PVT1, PCDH9, RALYL, LRP1B, PTPRD, LRRTM4, CSMD1 
## Negative:  CXCL14, SLC6A1, GAD2, KIT, GAD1, ERBB4, RELN, GRIK1, GRIP1, DLX6-AS1

Top variable genes

# print top variable genes
top100 <- dataObject.unannotated@assays$SCT@var.features[1:100]
top100
##   [1] "CXCL14"         "CHI3L1"         "NPY"            "SST"           
##   [5] "SLC1A2"         "AQP4"           "GJA1"           "AQP1"          
##   [9] "COL19A1"        "PLP1"           "PVT1"           "RELN"          
##  [13] "GRIK1"          "SLC1A3"         "VCAN"           "CH507-528H12.1"
##  [17] "GPNMB"          "KIT"            "GFAP"           "ANGPTL4"       
##  [21] "PTPRZ1"         "DPP10"          "RP11-986E7.7"   "SERPINA3"      
##  [25] "F3"             "ERBB4"          "SGCZ"           "FAM189A2"      
##  [29] "BAALC-AS1"      "LHFPL3"         "VIP"            "ROBO2"         
##  [33] "KCNIP4"         "GPC5"           "CNR1"           "CH507-513H4.1" 
##  [37] "TF"             "RP11-212P7.3"   "DTNA"           "GLUL"          
##  [41] "PDGFRA"         "VEGFA"          "MBP"            "ATP1A2"        
##  [45] "ADGRV1"         "PCDH9"          "BCAN"           "AEBP1"         
##  [49] "SLC14A1"        "TNR"            "CLDN5"          "CALB2"         
##  [53] "ZFP36"          "AGT"            "CTD-2126E3.6"   "MT2A"          
##  [57] "CTNNA3"         "NXPH1"          "CRH"            "CST3"          
##  [61] "SPP1"           "GAD2"           "SLC6A1"         "CD44"          
##  [65] "RP1-223B1.1"    "CCK"            "RP11-6L16.1"    "ZNF385D"       
##  [69] "RBFOX1"         "ST18"           "CEP162"         "CNP"           
##  [73] "RFX4"           "RP11-141F7.1"   "CLU"            "CNTN5"         
##  [77] "CNTNAP2"        "ADAMTS9"        "RP4-668E10.4"   "GRIP1"         
##  [81] "CSPG4"          "GLIS3"          "ADARB2"         "C4B"           
##  [85] "CRYAB"          "APOD"           "C4A"            "FGFR3"         
##  [89] "EFEMP1"         "KIAA1217"       "RYR3"           "AC084149.2"    
##  [93] "MT1M"           "OBI1-AS1"       "SELENOP"        "SMOC1"         
##  [97] "OPALIN"         "CCL2"           "GAD1"           "LINC00609"

Cluster Annotation

Cluster 0 - oligodendrocyte / polydendrocyte

OPALIN - oligodendrocyte MAG - polydendrocyte CLDN11 - polydendrocyte / oligodendrocyte ANLN - oligodendrocyte APOD - fibroblast-like / oligodendrocyte

# Number of cells per condition
n_cells[,c(1,2)]
##   sample    0
## 1  beads 3745
## 2 debris 3347
# UMAP with only cluster 0
DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "0"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#E69F00")

VlnPlot(dataObject.unannotated,
        features = cluster0$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster0$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster0$gene[1:10]
##  [1] "OPALIN"  "RNASE1"  "TF"      "SCD"     "MAG"     "SPP1"    "MAL"    
##  [8] "SELENOP" "ENPP2"   "CLDN11"
conserved.cluster0$gene[1:10]
##  [1] "ANLN"   "APOD"   "CBR1"   "CD9"    "CERCAM" "CLDN11" "CLDND1" "CNDP1" 
##  [9] "CNP"    "CNTN2"

Cluster 1 - oligodendrocyte

CTNNA3 - mural
CARNS1 - oligodendrocyte
CERCAM - polydendrocyte / oligodendrocyte  MOBP - oligodendrocyte

# Number of cells per condition
n_cells[,c(1,3)]
##   sample    1
## 1  beads 3208
## 2 debris 2837
DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "1"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#56B4E9")

VlnPlot(dataObject.unannotated,
        features = cluster1$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster1$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster1$gene[1:10]
##  [1] "CTNNA3"      "RP1-223B1.1" "ST18"        "FRMD4B"      NA           
##  [6] NA            NA            NA            NA            NA
conserved.cluster1$gene[1:10]
##  [1] "CARNS1"    "CERCAM"    "CTNNA3"    "DOCK10"    "DOCK5"     "FRMD4B"   
##  [7] "LINC00609" "MOBP"      "NCKAP5"    "PIP4K2A"

Cluster 2 - oligodendrocyte

COL18A1 - fibroblast-like  S100B - oligodendrocyte
PCSK6 - purkinjeneuron  ABCA2 - oligodendrocyte
ANLN - oligodendrocyte

# Number of cells per condition
n_cells[,c(1,4)]
##   sample    2
## 1  beads 2168
## 2 debris 1992
DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "2"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#009E73")

VlnPlot(dataObject.unannotated,
        features = cluster2$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster2$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster2$gene[1:10]
##  [1] "COL18A1"  "S100B"    "SELENOP"  "PCSK6"    "SLC5A11"  "TF"      
##  [7] "SPP1"     "MARCKSL1" "CDK18"    "HAPLN2"
conserved.cluster2$gene[1:10]
##  [1] "ABCA2"   "ANLN"    "CARNS1"  "CDK18"   "CLDND1"  "CNP"     "CNTN2"  
##  [8] "COL18A1" "CRYAB"   "DHCR24"

Cluster 3 - oligodendrocyte / polydendrocyte

CLDND1 - oligodendrocyte
MAG - polydendrocyte
MOBP - oligodendrocyte
CNP - polydendrocyte
ENPP2 - oligodendrocyte / choroid plexus 

# Number of cells per condition
n_cells[,c(1,4)]
##   sample    2
## 1  beads 2168
## 2 debris 1992
DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "3"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#F0E442")

VlnPlot(dataObject.unannotated,
        features = cluster3$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster3$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster3$gene[1:10]
##  [1] "OPALIN"        "SCD"           "MAG"           "MAL"          
##  [5] "SELENOP"       "ENPP2"         "RP11-654K19.6" NA             
##  [9] NA              NA
conserved.cluster3$gene[1:10]
##  [1] "CNP"           "TF"            "CERCAM"        "CLDND1"       
##  [5] "SCD"           "SELENOP"       "RP11-654K19.6" "MAG"          
##  [9] "GPRC5B"        "MOBP"

Cluster 4 - astrocyte

SLC1A2 - astrocyte  GJA1 - astrocyte
AQP4 - astrocyte
SLC1A3 - astrocyte
GFAP - astrocyte
AGT - astrocyte

# Number of cells per condition
n_cells[,c(1,5)]
##   sample    3
## 1  beads 1700
## 2 debris 1496
DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "4"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#0072B2")

VlnPlot(dataObject.unannotated,
        features = cluster4$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster4$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster4$gene[1:10]
##  [1] "SLC1A2" "AQP4"   "GJA1"   "SLC1A3" "CHI3L1" "F3"     "ATP1A2" "GFAP"  
##  [9] "AGT"    "ADGRV1"
conserved.cluster4$gene[1:10]
##  [1] "ADGRV1"  "AGT"     "AHCYL1"  "AHNAK"   "ALDH1L1" "AMZ2"    "ANGPTL4"
##  [8] "APOE"    "AQP4"    "ASPH"

Cluster 5 - neuron

KCNIP4 - purkinjenueron  NRG1 - neuron  MEG3 - neuron  ACTN1 - mural  ADAMTS6 - polydendrocyte /neuron  ADGRL2 - neuron  ARPP21 - neuron 

# Number of cells per condition
n_cells[,c(1,6)]
##   sample    4
## 1  beads 1005
## 2 debris 1556
DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "5"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#D55E00")

VlnPlot(dataObject.unannotated,
        features = cluster5$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster5$gene[1:10]
##  [1] "KCNIP4"       "NRG1"         "MEG3"         "RP11-909M7.3" "ADAMTS6"     
##  [6] "RALYL"        "STXBP5L"      "RBFOX1"       "POLE"         "KCNQ5"
VlnPlot(dataObject.unannotated,
        features = conserved.cluster5$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster5$gene[1:10]
##  [1] "KCNIP4"       "NRG1"         "MEG3"         "RP11-909M7.3" "ADAMTS6"     
##  [6] "RALYL"        "STXBP5L"      "RBFOX1"       "POLE"         "KCNQ5"
conserved.cluster5$gene[1:10]
##  [1] "ACTN1"    "ADAMTS6"  "ADGRL2"   "AGBL4"    "ARPP21"   "CABP1"   
##  [7] "CACNA1B"  "CACNA1C"  "CACNA2D3" "CADPS2"

Cluster 6 - polydendrocyte

VCAN - polydendrocyte
TNR - polydendrocyte
PTPRZ1 - polydendrocyte
LHFPL3 - polydendrocyte
BCAN - astrocyte / polydendrocyte
PDGFRA - polydendrocyte
ABHD2 - endothelial  ADGRG1 - astrocyte  ASCL1 - neurogensis / polydendrocyte

# Number of cells per condition
n_cells[,c(1,7)]
##   sample    5
## 1  beads 1113
## 2 debris 1378
DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "6"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#CC79A7")

VlnPlot(dataObject.unannotated,
        features = cluster6$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster6$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster6$gene[1:10]
##  [1] "VCAN"   "TNR"    "PTPRZ1" "LHFPL3" "BCAN"   "PDGFRA" "GPNMB"  "DSCAM" 
##  [9] "PCDH15" "CSPG4"
conserved.cluster6$gene[1:10]
##  [1] "ABHD2"  "ADGRG1" "APOD"   "ASCL1"  "B3GNT7" "BCAN"   "BRINP3" "C1QL1" 
##  [9] "CA10"   "CCDC50"

Cluster 7 - neuron

NEFM - neuron  NEFL - neuron  ENC1 - neuron  TMEM130 - interneuron 

# Number of cells per condition
n_cells[,c(1,8)]
##   sample    6
## 1  beads 1281
## 2 debris 1156
DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "7"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#666666")

VlnPlot(dataObject.unannotated,
        features = cluster7$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster7$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster7$gene[1:10]
##  [1] "NEFM"    "NEFL"    "SLC17A7" "THY1"    "NRGN"    "VSNL1"   "STMN2"  
##  [8] "ENC1"    "TMEM130" "CHN1"
conserved.cluster7$gene[1:10]
##  [1] "BASP1"  "CALM3"  "CAMK2A" "CHN1"   "DKK3"   "DNM1"   "ENC1"   "GAP43" 
##  [9] "NEFL"   "NEFM"

Cluster 8 - neuron

ENC1 - neuron  OLFM1 - neuron
CHN1 - neuron  CAMK2A - neuron
ACTN1 - mural
ACTR3B - neuron  ADAM11 - interneuron
ADAM23 - purkinjeneuron  ADCY1 - neuron \

# Number of cells per condition
n_cells[,c(1,9)]
##   sample    7
## 1  beads 1392
## 2 debris 1001
DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "8"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#AD7700")

VlnPlot(dataObject.unannotated,
        features = cluster8$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster8$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster8$gene[1:10]
##  [1] "ENC1"   "OLFM1"  "CHN1"   "NPTX1"  "HOPX"   "KCNIP4" "CAMK2A" "HPCAL1"
##  [9] "PHYHIP" "VSNL1"
conserved.cluster8$gene[1:10]
##  [1] "AC011288.2" "ACTN1"      "ACTR3B"     "ADAM11"     "ADAM23"    
##  [6] "ADCY1"      "ADGRB2"     "ADGRL2"     "AGAP2"      "AMPH"

Cluster 9 - astrocyte

AQP1 - choroid plexus / microglia macrophage  GFAP - astrocyte  FAM189A2 - astrocyte
DPP10 - purkinjeneuron  GLIS3 - ependyma  DTNA - astrocyte
GPC5 - astrocyte

# Number of cells per condition
n_cells[,c(1,10)]
##   sample   8
## 1  beads 843
## 2 debris 996
DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "9"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#1C91D4")

VlnPlot(dataObject.unannotated,
        features = cluster9$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster9$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster9$gene[1:10]
##  [1] "AQP1"         "GFAP"         "FAM189A2"     "RP11-212P7.3" "DPP10"       
##  [6] "GLIS3"        "DTNA"         "GPC5"         "RP11-986E7.7" "SERPINA3"
conserved.cluster9$gene[1:10]
##  [1] "ABLIM1"   "ADCY2"    "ADGRV1"   "AEBP1"    "ANGPTL4"  "AQP1"    
##  [7] "AQP4-AS1" "BCL6"     "CD44"     "DCLK2"

Cluster 10 - interneuron

ERBB4 - interneuron  GAD1 - interneuron
GAD2 - interneuron
KCNC2 - interneuron
ANK1 - interneuron
ARX - interneuron

# Number of cells per condition
n_cells[,c(1,11)]
##   sample   9
## 1  beads 879
## 2 debris 912
DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "10"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#007756")

VlnPlot(dataObject.unannotated,
        features = cluster10$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster10$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster10$gene[1:10]
##  [1] "ERBB4"   "KCNC2"   "ZNF385D" "GAD1"    "DPP10"   "TAC1"    "GAD2"   
##  [8] "NXPH1"   "GRIP1"   "CNTNAP2"
conserved.cluster10$gene[1:10]
##  [1] "ANK1"    "ARX"     "BTBD11"  "CNTNAP2" "CPLX1"   "DPP10"   "ERBB4"  
##  [8] "FGF12"   "GABRB2"  "GAD1"

Cluster 11 - neuron / interneuron

CXCL14 - interneuron / astrocyte  CALB2 - interneuron
CNR1 - interneuron
RELN - neuron / interneuron
RGS12 - neuron / endothelial  GALNTL6 - interneuron
ADARB2 - interneuron
SYNPR - neuron / interneuron

# Number of cells per condition
n_cells[,c(1,12)]
##   sample  10
## 1  beads 856
## 2 debris 907
DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "11"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#D5C711")

VlnPlot(dataObject.unannotated,
        features = cluster11$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster11$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster11$gene[1:10]
##  [1] "CXCL14"   "CALB2"    "CNR1"     "DLX6-AS1" "RGS12"    "RELN"    
##  [7] "GALNTL6"  "ADARB2"   "SYNPR"    "ERBB4"
conserved.cluster11$gene[1:10]
##  [1] "ADARB2"   "CALB2"    "CNR1"     "CNTNAP2"  "CXCL14"   "DLX6-AS1"
##  [7] "ERBB4"    "GALNTL6"  "GRIK2"    "NR2F2"

Cluster 12 - neuron / interneuron

HS3ST4 - neuron  ROBO2 - interneuron
ASIC2 - interneuron
PCDH11X - internueron  DIRAS2 - neuron  FRMPD4 - internueron
MDGA2 - neuron 

# Number of cells per condition
n_cells[,c(1,13)]
##   sample  11
## 1  beads 685
## 2 debris 813
DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "12"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#005685")

VlnPlot(dataObject.unannotated,
        features = cluster12$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster12$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster12$gene[1:10]
##  [1] "HS3ST4"       "ROBO2"        "ASIC2"        "KIAA1217"     "FRMPD4"      
##  [6] "PCDH11X"      "DIRAS2"       "MDGA2"        "CTD-2337L2.1" "CELF2"
conserved.cluster12$gene[1:10]
##  [1] "ASIC2"        "CELF2"        "CTD-2337L2.1" "DIRAS2"       "FRMPD4"      
##  [6] "HS3ST4"       "KIAA1217"     "MDGA2"        "MEG3"         "PCDH11X"

Cluster 13 - neuron

ENC1 - neuron  OLFM1 - neuron
CHN1 - neuron
NPTX1 - neuron / purkinjenueron  HOPX - bergmann glia / astrocyte  CAMK2A - neuron  CBLN2 - neuron 

# Number of cells per condition
n_cells[,c(1,14)]
##   sample  12
## 1  beads 715
## 2 debris 671
DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "13"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#A04700")

VlnPlot(dataObject.unannotated,
        features = cluster13$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster13$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster13$gene[1:10]
##  [1] "ENC1"   "OLFM1"  "HOPX"   "KCNIP4" "CAMK2A" "CHN1"   "HPCAL1" "NPTX1" 
##  [9] "PHYHIP" "VSNL1"
conserved.cluster13$gene[1:10]
##  [1] "CBLN2"  "ENC1"   "ITPKA"  "HOPX"   "TESPA1" "EPHA4"  "CRACDL" "PCDH8" 
##  [9] "GRM1"   "CDH9"

Cluster 14 - neuron / interneuron

NRG1 - interneurons
RALYL - interneurons
RORB - astrocyte  CPNE4 - neuron  NRGN - neuron  TUBB2A - interneuron

# Number of cells per condition
n_cells[,c(1,15)]
##   sample  13
## 1  beads 562
## 2 debris 722
DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "14"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#B14380")

VlnPlot(dataObject.unannotated,
        features = cluster14$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster14$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster14$gene[1:10]
##  [1] "RORB"         "CTC-340A15.2" "NRGN"         "BASP1"        "TUBA1B"      
##  [6] "RALYL"        "CPNE4"        "TMSB10"       "NRG1"         "SLC17A7"
conserved.cluster14$gene[1:10]
##  [1] "RORB"         "CPNE4"        "NRGN"         "CTC-340A15.2" "RXFP1"       
##  [6] "PTPRT"        "TUBA1B"       "RALYL"        "NRG1"         "CHGA"

Cluster 15 - neuron

NEFL - neuron
SLC17A7 - neuron  NRGN - neuron  VSNL1 - neuron
THY1 - purkinjeneuron  TUBA1B - nuerogensis
SNAP25 - granular neuron 

# Number of cells per condition
n_cells[,c(1,16)]
##   sample  14
## 1  beads 532
## 2 debris 665
DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "15"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#4D4D4D")

VlnPlot(dataObject.unannotated,
        features = cluster15$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster15$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster15$gene[1:10]
##  [1] "NEFL"    "NRGN"    "SLC17A7" "VSNL1"   "THY1"    "CHN1"    "YWHAH"  
##  [8] "ENC1"    "NEFM"    "OLFM1"
conserved.cluster15$gene[1:10]
##  [1] "THY1"    "TUBA1B"  "SNAP25"  "VSNL1"   "NRGN"    "NEFL"    "CALM3"  
##  [8] "SLC17A7" "RTN1"    "CHN1"

Cluster 16 - neuron / interneuron

GRIK1 - purkinjeneuron  CNTNAP2 - interneuron
GRIK2 - interneuron
RBFOX1 - neuron  XKR4 - interneuron
SST - internueron  ROBO2 - internueron
GRIP1 - interneuon  PCDH11X - neuron 

# Number of cells per condition
n_cells[,c(1,17)]
##   sample  15
## 1  beads 554
## 2 debris 542
DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "16"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#FFBE2D")

VlnPlot(dataObject.unannotated,
        features = cluster16$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster16$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster16$gene[1:10]
##  [1] "GRIK1"    "SST"      "ROBO2"    "KIAA1217" "RBFOX1"   "GRIP1"   
##  [7] "XKR4"     "PCDH11X"  "CNTNAP2"  "GRIK2"
conserved.cluster16$gene[1:10]
##  [1] "GRIK1"    "RBFOX1"   "CNTNAP2"  "KIAA1217" "GRIK2"    "XKR4"    
##  [7] "ROBO2"    "SNHG14"   "GRIP1"    "SYNPR"

Cluster 17 - neuron / interneuron

KCNIP4 - purkinjeneuron  LRRTM4 - neuron / polydendocyte  CSMD1 - neuron  RALYL - interneuron  FAM155A - interneuron  NRXN1 - neuron  FAM155A - interneuron 

# Number of cells per condition
n_cells[,c(1,18)]
##   sample  16
## 1  beads 365
## 2 debris 713
DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "17"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#80C7EF")

VlnPlot(dataObject.unannotated,
        features = cluster17$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster17$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster17$gene[1:10]
##  [1] "CH507-513H4.1" "KCNIP4"        "MTCO1P12"      "LRRTM4"       
##  [5] "CSMD1"         "RALYL"         "FAM155A"       "ROBO2"        
##  [9] "NRXN1"         "STXBP5L"
conserved.cluster17$gene[1:10]
##  [1] "CH507-513H4.1"  "CH507-528H12.1" "FAM155A"        "KCNIP4"        
##  [5] "MTCO1P12"       "LRRTM4"         "CSMD1"          "NRXN1"         
##  [9] "ADGRB3"         "RALYL"

Cluster 18 - interneurons

KIT - interneurons 
SLC6A1 - interneurons
GAD2 - interneurons
GRIP1 - interneurons
PTPRT - neuron 

# Number of cells per condition
n_cells[,c(1,19)]
##   sample  17
## 1  beads 438
## 2 debris 438
DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "18"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = colors[19])

VlnPlot(dataObject.unannotated,
        features = cluster18$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster18$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster18$gene[1:10]
##  [1] "KIT"    "SLC6A1" "GAD2"   "GRIP1"  "NRIP3"  "LAMP5"  "FGF13"  "SGCZ"  
##  [9] "GRIN2A" "PTPRT"
conserved.cluster18$gene[1:10]
##  [1] "FGF13"  "GAD2"   "GRIN2A" "KIT"    "NRIP3"  "SLC6A1" "SV2C"   "GRIP1" 
##  [9] "LAMP5"  "PTPRT"

Cluster 19 - microglia marcophage

STAB1 - microglia marcophage  PLXDC2 - microglia marcophage  C1QB - microglia marcophage
CD74 - microglia marcophage
SLC11A1 - microglia marcophage

# Number of cells per condition
n_cells[,c(1,20)]
##   sample  18
## 1  beads 391
## 2 debris 375
DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "19"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = colors[20])

VlnPlot(dataObject.unannotated,
        features = cluster19$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster19$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster19$gene[1:10]
##  [1] "C3"       "STAB1"    "FYB1"     "PLXDC2"   "C1QB"     "CD74"    
##  [7] "SLC11A1"  "ARHGAP24" "ST6GAL1"  "CHST11"
conserved.cluster19$gene[1:10]
##  [1] "DENND3"  "C3"      "PLXDC2"  "C1QB"    "SLC11A1" "CD74"    "FYB1"   
##  [8] "SRGAP2"  "STAB1"   "APBB1IP"

Cluster 20 - endothelial

CLDN5 - endothelial  IFI27 - choroid plexus / endothelial
TGM2 - endothelial
ADAMTS9 - astrocyte
FLT1 - endothelial

# Number of cells per condition
n_cells[,c(1,21)]
##   sample  19
## 1  beads 235
## 2 debris 250
DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "20"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = colors[21])

VlnPlot(dataObject.unannotated,
        features = cluster20$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster20$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster20$gene[1:10]
##  [1] "CLDN5"         "ADAMTS9"       "IFI27"         "FLT1"         
##  [5] "TGM2"          "MT2A"          "SLCO4A1"       "RP11-326C3.17"
##  [9] "A2M"           "PODXL"
conserved.cluster20$gene[1:10]
##  [1] "CLDN5"   "IFI27"   "MT2A"    "FLT1"    "EGFL7"   "TGM2"    "ADAMTS9"
##  [8] "PODXL"   "SLCO4A1" "KLF2"

Cluster 21 - oligodendrocyte / polydendrocyte

CTNNA3 - mural  PDE4B - oligodendrocyte
ST18 - granular neuron polydendrocyte / oligodendrocyte
PLCL1 - oligodendrocyte
DPYD - polydendrocyte

# Number of cells per condition
n_cells[,c(1,22)]
##   sample  20
## 1  beads 144
## 2 debris 267
DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "21"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = colors[22])

VlnPlot(dataObject.unannotated,
        features = cluster21$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster21$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster21$gene[1:10]
##  [1] "CH507-513H4.1"     "CTNNA3"            "PDE4B"            
##  [4] "ST18"              "UNC5C"             "DPYD"             
##  [7] "PLCL1"             "ENSG10010138868.1" "MAN2A1"           
## [10] "NCKAP5"
conserved.cluster21$gene[1:10]
##  [1] "CH507-528H12.1"    "CH507-513H4.1"     "CTNNA3"           
##  [4] "PDE4B"             "ST18"              "NKAIN2"           
##  [7] "UNC5C"             "PLCL1"             "DPYD"             
## [10] "ENSG10010138868.1"

Assign identities

# Rename all identities
dataObject.annotated <- RenameIdents(object = dataObject.unannotated, 
                               "0" = "Oligodendrocytes / Polydendrocyte",
                               "1" = "Oligodendrocytes",
                               "2" = "Oligodendrocytes",
                               "3" = "Oligodendrocytes / Polydendrocyte",
                               "4" = "Astrocytes",
                               "5" = "Neurons",
                               "6" = "Polydendrocyte",
                               "7" = "Neurons",
                               "8" = "Neurons",
                               "9" = "Astrocytes",
                               "10" = "Interneurons",
                               "11" = "Neurons / Interneurons",
                               "12" = "Neurons / Interneurons",
                               "13" = "Neurons", 
                               "14" = "Neurons / Interneurons", 
                               "15" = "Neurons", 
                               "16" = "Neurons / Interneurons",
                               "17" = "Neurons / Interneurons", 
                               "18" = "Interneurons", 
                               "19" = "Microglia", 
                               "20" = "Endothelial", 
                               "21" = "Oligodendrocytes / Polydendrocyte")
dataObject.annotated$annotated_clusters <- factor(Idents(dataObject.annotated),
                                             levels = c("Astrocytes",
                                                        "Endothelial",
                                                        "Microglia", 
                                                        "Oligodendrocytes",
                                                        "Polydendrocyte", 
                                                        "Oligodendrocytes / Polydendrocyte",
                                                        "Interneurons",
                                                        "Neurons",
                                                        "Neurons / Interneurons"))
Idents(dataObject.annotated) <- "annotated_clusters"

UMAP - annotated

u1 <- DimPlot(dataObject.annotated,
              group.by = "annotated_clusters",
              cols = colors,
              raster = FALSE,
              label = FALSE) +
  ggtitle("LBD pilot brain")
u1

u2 <- DimPlot(dataObject.annotated,
              group.by = "annotated_clusters",
              cols = colors,
              dims = c(2,3),
              raster = FALSE,
              label = FALSE) +
  ggtitle("LBD pilot brain")
u2

Cell count

Table

Relative abundance

# sample
b2 <- dataObject.annotated@meta.data %>%
  group_by(annotated_clusters, sample) %>%
  dplyr::count() %>%
  group_by(annotated_clusters) %>%
  dplyr::mutate(percent = 100*n/sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x=annotated_clusters,y=percent, fill=sample)) +
  geom_col() +
  scale_fill_manual(values = sample_colors) +
  ggtitle("Percentage of sample per cluster") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  geom_hline(yintercept = 50, color = "black")
b2

Dot plot markers

markers.to.plot <-
  c(
    "SLC1A2",
    "GJA1",
    "CLDN5",
    "TGM2",
    "STAB1",
    "CD74",
    "VCAN",
    "TNR",
    "S100B",
    "ABCA2",
    "NRG1",
    "MEG3",
    "ERBB4",
    "GAD1"
  )
dot <-DotPlot(dataObject.annotated, features = markers.to.plot, cols = sample_colors,
    dot.scale = 8, split.by = "sample") + RotatedAxis()
dot