Set working directory

knitr::opts_knit$set(root.dir = ".")

Libraris, paths, colors

source(here::here("snRNA/scripts/R", "file_paths_and_colours.R"))
tissue <- "Brain"
treatment <- "pilot"

Read in object

# read object
dataObject.unannotated <- readRDS("../../rObjects/brain_unannotated.rds")

Define resolution and groups

# set levels and idents
Idents(dataObject.unannotated) <- "seurat_clusters"
# normalize if SCTtransform was used
DefaultAssay(dataObject.unannotated) <- "RNA"

Clustering QC before annotation

Unannotated UMAP

dittoDimPlot(object = dataObject.unannotated,
             var = "seurat_clusters",
             reduction.use = "umap",
             do.label = TRUE,
             labels.highlight = TRUE)

Cells per cluster

dataObject.unannotated$sample <- factor(dataObject.unannotated$sample, levels = c("beads", "debris"))

n_cells <- FetchData(dataObject.unannotated, 
  vars = c("ident", "sample")) %>%
  dplyr::count(ident,sample) %>%
  tidyr::spread(ident, n)
n_cells

##   sample    0    1    2    3    4    5    6    7   8   9  10  11  12  13  14
## 1  beads 3745 3208 2168 1700 1005 1113 1281 1392 843 879 856 685 715 562 532
## 2 debris 3347 2837 1992 1496 1556 1378 1156 1001 996 912 907 813 671 722 665
##    15  16  17  18  19  20  21
## 1 554 365 438 391 235 144 207
## 2 542 713 438 375 250 267 202

Cluster tree

dataObject.unannotated <- BuildClusterTree(object = dataObject.unannotated,
                                     dims = 1:20,
                                     reorder = FALSE,
                                     reorder.numeric = FALSE)

tree <- dataObject.unannotated@tools$BuildClusterTree
tree$tip.label <- paste0("Cluster ", tree$tip.label)

tree_1 <- ggtree::ggtree(tree, aes(x, y)) +
  scale_y_reverse() +
  ggtree::geom_tree() +
  ggtree::theme_tree() +
  ggtree::geom_tiplab(offset = 1) +
#  ggtree::geom_tippoint(color = colors[1:length(tree$tip.label)], 
  #                      shape = 16, size = 5) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))
tree_1

Marker exploration

FindMarkers will find markers between two different identity groups - you have to specify both identity groups. This is useful for comparing the differences between two specific groups.

FindAllMarkers will find markers differentially expressed in each identity group by comparing it to all of the others - you don’t have to manually define anything. Note that markers may bleed over between closely-related groups - they are not forced to be specific to only one group. This is what most people use (and likely what you want).

FindConservedMarkers will find markers that are conserved between two groups - this can be useful if you want to find markers that are conserved between a treated and untreated condition for a specific cell type or group of cells. It means they are differentially expressed compared to other groups, but have similar expression between the two groups you’re actually comparing. https://www.biostars.org/p/409790/ ### Find all markers

# read object
all.markers <- readRDS("../../rObjects/unannotated_all_markers.rds")

# more stringent filtering
all.markers <- all.markers[all.markers$p_val_adj < 0.05,]
markers.strict <- all.markers[
  all.markers$delta_pct > summary(all.markers$delta_pct)[5],]

# compare 
table(all.markers$cluster)

## 
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
## 1052 1056 1068  939 1529 1530 1179 1770 1827 1311 1691 1468 1539 1805 1712 1658 
##   16   17   18   19   20   21 
## 1417 1350 1531 1364 1176 1272

table(markers.strict$cluster)

## 
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
##   15    4   35    7  165  956   63   18 1642   62  267  104  734 1678 1448   17 
##   16   17   18   19   20   21 
##   76   28  335   61   27   68

# subset
cluster0 <- markers.strict[markers.strict$cluster == 0,]
cluster1 <- markers.strict[markers.strict$cluster == 1,]
cluster2 <- markers.strict[markers.strict$cluster == 2,]
cluster3 <- markers.strict[markers.strict$cluster == 3,]
cluster4 <- markers.strict[markers.strict$cluster == 4,]
cluster5 <- markers.strict[markers.strict$cluster == 5,]
cluster6 <- markers.strict[markers.strict$cluster == 6,]
cluster7 <- markers.strict[markers.strict$cluster == 7,]
cluster8 <- markers.strict[markers.strict$cluster == 8,]
cluster9 <- markers.strict[markers.strict$cluster == 9,]
cluster10 <- markers.strict[markers.strict$cluster == 10,]
cluster11 <- markers.strict[markers.strict$cluster == 11,]
cluster12 <- markers.strict[markers.strict$cluster == 12,]
cluster13 <- markers.strict[markers.strict$cluster == 13,]
cluster14 <- markers.strict[markers.strict$cluster == 14,]
cluster15 <- markers.strict[markers.strict$cluster == 15,]
cluster16 <- markers.strict[markers.strict$cluster == 16,]
cluster17 <- markers.strict[markers.strict$cluster == 17,]
cluster18 <- markers.strict[markers.strict$cluster == 18,]
cluster19 <- markers.strict[markers.strict$cluster == 19,]
cluster20 <- markers.strict[markers.strict$cluster == 20,]
cluster21 <- markers.strict[markers.strict$cluster == 21,]

Conserved markers

Identify conserved cell type markers To identify canonical cell type marker genes that are conserved across conditions, we provide the FindConservedMarkers() function. This function performs differential gene expression testing for each dataset/group and combines the p-values using meta-analysis methods from the MetaDE R package. For example, we can calculated the genes that are conserved markers irrespective of stimulation condition in cluster 6 (NK cells).

conserved.markers <- readRDS("../../rObjects/brain_conserved_markers.rds")
conserved.markers.strict <- readRDS("../../rObjects/brain_conserved_markers_strict.rds")

# compare 
table(conserved.markers$cluster)

## 
##    0    1   10   11   12   13   14   15   16   17   18   19    2   20   21    3 
## 1694 1552 3192 2621 3259 4013 3876 2475 1941 1843 2814 1521 1615 1158 1540 1405 
##    4    5    6    7    8    9 
## 1942 3509 1779 2474 4106 1642

table(conserved.markers.strict$cluster)

## 
##    0    1   10   11   12   13   14   15   16   17   18   19    2   20   21    3 
##   45   20  499  189 1120 2814 2275   86  116   70  569   89   92   81  113   37 
##    4    5    6    7    8    9 
##  280 1504  102   67 2722  101

conserved.cluster0 <- conserved.markers.strict[conserved.markers.strict$cluster == 0,]
conserved.cluster1 <- conserved.markers.strict[conserved.markers.strict$cluster == 1,]
conserved.cluster2 <- conserved.markers.strict[conserved.markers.strict$cluster == 2,]
conserved.cluster3 <- conserved.markers.strict[conserved.markers.strict$cluster == 3,]
conserved.cluster4 <- conserved.markers.strict[conserved.markers.strict$cluster == 4,]
conserved.cluster5 <- conserved.markers.strict[conserved.markers.strict$cluster == 5,]
conserved.cluster6 <- conserved.markers.strict[conserved.markers.strict$cluster == 6,]
conserved.cluster7 <- conserved.markers.strict[conserved.markers.strict$cluster == 7,]
conserved.cluster8 <- conserved.markers.strict[conserved.markers.strict$cluster == 8,]
conserved.cluster9 <- conserved.markers.strict[conserved.markers.strict$cluster == 9,]
conserved.cluster10 <- conserved.markers.strict[conserved.markers.strict$cluster == 10,]
conserved.cluster11 <- conserved.markers.strict[conserved.markers.strict$cluster == 11,]
conserved.cluster12 <- conserved.markers.strict[conserved.markers.strict$cluster == 12,]
conserved.cluster13 <- conserved.markers.strict[conserved.markers.strict$cluster == 13,]
conserved.cluster14 <- conserved.markers.strict[conserved.markers.strict$cluster == 14,]
conserved.cluster15 <- conserved.markers.strict[conserved.markers.strict$cluster == 15,]
conserved.cluster16 <- conserved.markers.strict[conserved.markers.strict$cluster == 16,]
conserved.cluster17 <- conserved.markers.strict[conserved.markers.strict$cluster == 17,]
conserved.cluster18 <- conserved.markers.strict[conserved.markers.strict$cluster == 18,]
conserved.cluster19 <- conserved.markers.strict[conserved.markers.strict$cluster == 19,]
conserved.cluster20 <- conserved.markers.strict[conserved.markers.strict$cluster == 20,]
conserved.cluster21 <- conserved.markers.strict[conserved.markers.strict$cluster == 21,]

PCs

# Printing out the most variable genes driving PCs
print(x = dataObject.unannotated[["pca"]], 
      dims = 1:5, 
      nfeatures = 10)

## PC_ 1 
## Positive:  PLP1, MBP, TF, CNP, CTNNA3, ST18, SELENOP, CRYAB, CERCAM, CLDND1 
## Negative:  AQP4, SLC1A2, SLC1A3, GFAP, GJA1, F3, CHI3L1, ANGPTL4, DTNA, ATP1A2 
## PC_ 2 
## Positive:  KCNIP4, MEG3, RP11-909M7.3, RBFOX1, ROBO2, SYT1, CSMD1, SNHG14, OPCML, RALYL 
## Negative:  SLC1A3, AQP4, SLC1A2, GJA1, GLUL, GFAP, PLP1, F3, CHI3L1, ANGPTL4 
## PC_ 3 
## Positive:  VCAN, PTPRZ1, LHFPL3, TNR, PDGFRA, BCAN, GPNMB, PCDH15, GRIK1, CSPG4 
## Negative:  NRGN, ENC1, CHN1, SLC17A7, VSNL1, CALM1, CLU, SNAP25, OLFM1, AQP4 
## PC_ 4 
## Positive:  VCAN, PTPRZ1, GPNMB, BCAN, PDGFRA, TNR, CSPG4, LHFPL3, APOD, COL9A1 
## Negative:  CH507-528H12.1, ERBB4, DPP10, CNTNAP2, ROBO2, GRIP1, CH507-513H4.1, RBFOX1, GRIK1, CXCL14 
## PC_ 5 
## Positive:  CH507-528H12.1, KCNIP4, CH507-513H4.1, PVT1, PCDH9, RALYL, LRP1B, PTPRD, LRRTM4, CSMD1 
## Negative:  CXCL14, SLC6A1, GAD2, KIT, GAD1, ERBB4, RELN, GRIK1, GRIP1, DLX6-AS1

Top variable genes

# print top variable genes
top100 <- dataObject.unannotated@assays$SCT@var.features[1:100]
top100

##   [1] "CXCL14"         "CHI3L1"         "NPY"            "SST"           
##   [5] "SLC1A2"         "AQP4"           "GJA1"           "AQP1"          
##   [9] "COL19A1"        "PLP1"           "PVT1"           "RELN"          
##  [13] "GRIK1"          "SLC1A3"         "VCAN"           "CH507-528H12.1"
##  [17] "GPNMB"          "KIT"            "GFAP"           "ANGPTL4"       
##  [21] "PTPRZ1"         "DPP10"          "RP11-986E7.7"   "SERPINA3"      
##  [25] "F3"             "ERBB4"          "SGCZ"           "FAM189A2"      
##  [29] "BAALC-AS1"      "LHFPL3"         "VIP"            "ROBO2"         
##  [33] "KCNIP4"         "GPC5"           "CNR1"           "CH507-513H4.1" 
##  [37] "TF"             "RP11-212P7.3"   "DTNA"           "GLUL"          
##  [41] "PDGFRA"         "VEGFA"          "MBP"            "ATP1A2"        
##  [45] "ADGRV1"         "PCDH9"          "BCAN"           "AEBP1"         
##  [49] "SLC14A1"        "TNR"            "CLDN5"          "CALB2"         
##  [53] "ZFP36"          "AGT"            "CTD-2126E3.6"   "MT2A"          
##  [57] "CTNNA3"         "NXPH1"          "CRH"            "CST3"          
##  [61] "SPP1"           "GAD2"           "SLC6A1"         "CD44"          
##  [65] "RP1-223B1.1"    "CCK"            "RP11-6L16.1"    "ZNF385D"       
##  [69] "RBFOX1"         "ST18"           "CEP162"         "CNP"           
##  [73] "RFX4"           "RP11-141F7.1"   "CLU"            "CNTN5"         
##  [77] "CNTNAP2"        "ADAMTS9"        "RP4-668E10.4"   "GRIP1"         
##  [81] "CSPG4"          "GLIS3"          "ADARB2"         "C4B"           
##  [85] "CRYAB"          "APOD"           "C4A"            "FGFR3"         
##  [89] "EFEMP1"         "KIAA1217"       "RYR3"           "AC084149.2"    
##  [93] "MT1M"           "OBI1-AS1"       "SELENOP"        "SMOC1"         
##  [97] "OPALIN"         "CCL2"           "GAD1"           "LINC00609"

Cluster Annotation

Cluster 0 - oligodendrocyte / polydendrocyte

OPALIN - oligodendrocyte MAG - polydendrocyte CLDN11 - polydendrocyte / oligodendrocyte ANLN - oligodendrocyte APOD - fibroblast-like / oligodendrocyte

# Number of cells per condition
n_cells[,c(1,2)]

##   sample    0
## 1  beads 3745
## 2 debris 3347

# UMAP with only cluster 0
DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "0"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#E69F00")

VlnPlot(dataObject.unannotated,
        features = cluster0$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster0$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster0$gene[1:10]

##  [1] "OPALIN"  "RNASE1"  "TF"      "SCD"     "MAG"     "SPP1"    "MAL"    
##  [8] "SELENOP" "ENPP2"   "CLDN11"

conserved.cluster0$gene[1:10]

##  [1] "ANLN"   "APOD"   "CBR1"   "CD9"    "CERCAM" "CLDN11" "CLDND1" "CNDP1" 
##  [9] "CNP"    "CNTN2"

Cluster 1 - oligodendrocyte

CTNNA3 - mural
CARNS1 - oligodendrocyte
CERCAM - polydendrocyte / oligodendrocyte MOBP - oligodendrocyte

# Number of cells per condition
n_cells[,c(1,3)]

##   sample    1
## 1  beads 3208
## 2 debris 2837

DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "1"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#56B4E9")

VlnPlot(dataObject.unannotated,
        features = cluster1$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster1$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster1$gene[1:10]

##  [1] "CTNNA3"      "RP1-223B1.1" "ST18"        "FRMD4B"      NA           
##  [6] NA            NA            NA            NA            NA

conserved.cluster1$gene[1:10]

##  [1] "CARNS1"    "CERCAM"    "CTNNA3"    "DOCK10"    "DOCK5"     "FRMD4B"   
##  [7] "LINC00609" "MOBP"      "NCKAP5"    "PIP4K2A"

Cluster 2 - oligodendrocyte

COL18A1 - fibroblast-like S100B - oligodendrocyte
PCSK6 - purkinjeneuron ABCA2 - oligodendrocyte
ANLN - oligodendrocyte

# Number of cells per condition
n_cells[,c(1,4)]

##   sample    2
## 1  beads 2168
## 2 debris 1992

DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "2"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#009E73")

VlnPlot(dataObject.unannotated,
        features = cluster2$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster2$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster2$gene[1:10]

##  [1] "COL18A1"  "S100B"    "SELENOP"  "PCSK6"    "SLC5A11"  "TF"      
##  [7] "SPP1"     "MARCKSL1" "CDK18"    "HAPLN2"

conserved.cluster2$gene[1:10]

##  [1] "ABCA2"   "ANLN"    "CARNS1"  "CDK18"   "CLDND1"  "CNP"     "CNTN2"  
##  [8] "COL18A1" "CRYAB"   "DHCR24"

Cluster 3 - oligodendrocyte / polydendrocyte

CLDND1 - oligodendrocyte
MAG - polydendrocyte
MOBP - oligodendrocyte
CNP - polydendrocyte
ENPP2 - oligodendrocyte / choroid plexus

# Number of cells per condition
n_cells[,c(1,4)]

##   sample    2
## 1  beads 2168
## 2 debris 1992

DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "3"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#F0E442")

VlnPlot(dataObject.unannotated,
        features = cluster3$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster3$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster3$gene[1:10]

##  [1] "OPALIN"        "SCD"           "MAG"           "MAL"          
##  [5] "SELENOP"       "ENPP2"         "RP11-654K19.6" NA             
##  [9] NA              NA

conserved.cluster3$gene[1:10]

##  [1] "CNP"           "TF"            "CERCAM"        "CLDND1"       
##  [5] "SCD"           "SELENOP"       "RP11-654K19.6" "MAG"          
##  [9] "GPRC5B"        "MOBP"

Cluster 4 - astrocyte

SLC1A2 - astrocyte GJA1 - astrocyte
AQP4 - astrocyte
SLC1A3 - astrocyte
GFAP - astrocyte
AGT - astrocyte

# Number of cells per condition
n_cells[,c(1,5)]

##   sample    3
## 1  beads 1700
## 2 debris 1496

DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "4"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#0072B2")

VlnPlot(dataObject.unannotated,
        features = cluster4$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster4$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster4$gene[1:10]

##  [1] "SLC1A2" "AQP4"   "GJA1"   "SLC1A3" "CHI3L1" "F3"     "ATP1A2" "GFAP"  
##  [9] "AGT"    "ADGRV1"

conserved.cluster4$gene[1:10]

##  [1] "ADGRV1"  "AGT"     "AHCYL1"  "AHNAK"   "ALDH1L1" "AMZ2"    "ANGPTL4"
##  [8] "APOE"    "AQP4"    "ASPH"

Cluster 5 - neuron

KCNIP4 - purkinjenueron NRG1 - neuron MEG3 - neuron ACTN1 - mural ADAMTS6 - polydendrocyte /neuron ADGRL2 - neuron ARPP21 - neuron

# Number of cells per condition
n_cells[,c(1,6)]

##   sample    4
## 1  beads 1005
## 2 debris 1556

DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "5"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#D55E00")

VlnPlot(dataObject.unannotated,
        features = cluster5$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster5$gene[1:10]

##  [1] "KCNIP4"       "NRG1"         "MEG3"         "RP11-909M7.3" "ADAMTS6"     
##  [6] "RALYL"        "STXBP5L"      "RBFOX1"       "POLE"         "KCNQ5"

VlnPlot(dataObject.unannotated,
        features = conserved.cluster5$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster5$gene[1:10]

##  [1] "KCNIP4"       "NRG1"         "MEG3"         "RP11-909M7.3" "ADAMTS6"     
##  [6] "RALYL"        "STXBP5L"      "RBFOX1"       "POLE"         "KCNQ5"

conserved.cluster5$gene[1:10]

##  [1] "ACTN1"    "ADAMTS6"  "ADGRL2"   "AGBL4"    "ARPP21"   "CABP1"   
##  [7] "CACNA1B"  "CACNA1C"  "CACNA2D3" "CADPS2"

Cluster 6 - polydendrocyte

VCAN - polydendrocyte
TNR - polydendrocyte
PTPRZ1 - polydendrocyte
LHFPL3 - polydendrocyte
BCAN - astrocyte / polydendrocyte
PDGFRA - polydendrocyte
ABHD2 - endothelial ADGRG1 - astrocyte ASCL1 - neurogensis / polydendrocyte

# Number of cells per condition
n_cells[,c(1,7)]

##   sample    5
## 1  beads 1113
## 2 debris 1378

DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "6"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#CC79A7")

VlnPlot(dataObject.unannotated,
        features = cluster6$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster6$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster6$gene[1:10]

##  [1] "VCAN"   "TNR"    "PTPRZ1" "LHFPL3" "BCAN"   "PDGFRA" "GPNMB"  "DSCAM" 
##  [9] "PCDH15" "CSPG4"

conserved.cluster6$gene[1:10]

##  [1] "ABHD2"  "ADGRG1" "APOD"   "ASCL1"  "B3GNT7" "BCAN"   "BRINP3" "C1QL1" 
##  [9] "CA10"   "CCDC50"

Cluster 7 - neuron

NEFM - neuron NEFL - neuron ENC1 - neuron TMEM130 - interneuron

# Number of cells per condition
n_cells[,c(1,8)]

##   sample    6
## 1  beads 1281
## 2 debris 1156

DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "7"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#666666")

VlnPlot(dataObject.unannotated,
        features = cluster7$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster7$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster7$gene[1:10]

##  [1] "NEFM"    "NEFL"    "SLC17A7" "THY1"    "NRGN"    "VSNL1"   "STMN2"  
##  [8] "ENC1"    "TMEM130" "CHN1"

conserved.cluster7$gene[1:10]

##  [1] "BASP1"  "CALM3"  "CAMK2A" "CHN1"   "DKK3"   "DNM1"   "ENC1"   "GAP43" 
##  [9] "NEFL"   "NEFM"

Cluster 8 - neuron

ENC1 - neuron OLFM1 - neuron
CHN1 - neuron CAMK2A - neuron
ACTN1 - mural
ACTR3B - neuron ADAM11 - interneuron
ADAM23 - purkinjeneuron ADCY1 - neuron \

# Number of cells per condition
n_cells[,c(1,9)]

##   sample    7
## 1  beads 1392
## 2 debris 1001

DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "8"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#AD7700")

VlnPlot(dataObject.unannotated,
        features = cluster8$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster8$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster8$gene[1:10]

##  [1] "ENC1"   "OLFM1"  "CHN1"   "NPTX1"  "HOPX"   "KCNIP4" "CAMK2A" "HPCAL1"
##  [9] "PHYHIP" "VSNL1"

conserved.cluster8$gene[1:10]

##  [1] "AC011288.2" "ACTN1"      "ACTR3B"     "ADAM11"     "ADAM23"    
##  [6] "ADCY1"      "ADGRB2"     "ADGRL2"     "AGAP2"      "AMPH"

Cluster 9 - astrocyte

AQP1 - choroid plexus / microglia macrophage GFAP - astrocyte FAM189A2 - astrocyte
DPP10 - purkinjeneuron GLIS3 - ependyma DTNA - astrocyte
GPC5 - astrocyte

# Number of cells per condition
n_cells[,c(1,10)]

##   sample   8
## 1  beads 843
## 2 debris 996

DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "9"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#1C91D4")

VlnPlot(dataObject.unannotated,
        features = cluster9$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster9$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster9$gene[1:10]

##  [1] "AQP1"         "GFAP"         "FAM189A2"     "RP11-212P7.3" "DPP10"       
##  [6] "GLIS3"        "DTNA"         "GPC5"         "RP11-986E7.7" "SERPINA3"

conserved.cluster9$gene[1:10]

##  [1] "ABLIM1"   "ADCY2"    "ADGRV1"   "AEBP1"    "ANGPTL4"  "AQP1"    
##  [7] "AQP4-AS1" "BCL6"     "CD44"     "DCLK2"

Cluster 10 - interneuron

ERBB4 - interneuron GAD1 - interneuron
GAD2 - interneuron
KCNC2 - interneuron
ANK1 - interneuron
ARX - interneuron

# Number of cells per condition
n_cells[,c(1,11)]

##   sample   9
## 1  beads 879
## 2 debris 912

DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "10"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#007756")

VlnPlot(dataObject.unannotated,
        features = cluster10$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster10$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster10$gene[1:10]

##  [1] "ERBB4"   "KCNC2"   "ZNF385D" "GAD1"    "DPP10"   "TAC1"    "GAD2"   
##  [8] "NXPH1"   "GRIP1"   "CNTNAP2"

conserved.cluster10$gene[1:10]

##  [1] "ANK1"    "ARX"     "BTBD11"  "CNTNAP2" "CPLX1"   "DPP10"   "ERBB4"  
##  [8] "FGF12"   "GABRB2"  "GAD1"

Cluster 11 - neuron / interneuron

CXCL14 - interneuron / astrocyte CALB2 - interneuron
CNR1 - interneuron
RELN - neuron / interneuron
RGS12 - neuron / endothelial GALNTL6 - interneuron
ADARB2 - interneuron
SYNPR - neuron / interneuron

# Number of cells per condition
n_cells[,c(1,12)]

##   sample  10
## 1  beads 856
## 2 debris 907

DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "11"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#D5C711")

VlnPlot(dataObject.unannotated,
        features = cluster11$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster11$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster11$gene[1:10]

##  [1] "CXCL14"   "CALB2"    "CNR1"     "DLX6-AS1" "RGS12"    "RELN"    
##  [7] "GALNTL6"  "ADARB2"   "SYNPR"    "ERBB4"

conserved.cluster11$gene[1:10]

##  [1] "ADARB2"   "CALB2"    "CNR1"     "CNTNAP2"  "CXCL14"   "DLX6-AS1"
##  [7] "ERBB4"    "GALNTL6"  "GRIK2"    "NR2F2"

Cluster 12 - neuron / interneuron

HS3ST4 - neuron ROBO2 - interneuron
ASIC2 - interneuron
PCDH11X - internueron DIRAS2 - neuron FRMPD4 - internueron
MDGA2 - neuron

# Number of cells per condition
n_cells[,c(1,13)]

##   sample  11
## 1  beads 685
## 2 debris 813

DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "12"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#005685")

VlnPlot(dataObject.unannotated,
        features = cluster12$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster12$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster12$gene[1:10]

##  [1] "HS3ST4"       "ROBO2"        "ASIC2"        "KIAA1217"     "FRMPD4"      
##  [6] "PCDH11X"      "DIRAS2"       "MDGA2"        "CTD-2337L2.1" "CELF2"

conserved.cluster12$gene[1:10]

##  [1] "ASIC2"        "CELF2"        "CTD-2337L2.1" "DIRAS2"       "FRMPD4"      
##  [6] "HS3ST4"       "KIAA1217"     "MDGA2"        "MEG3"         "PCDH11X"

Cluster 13 - neuron

ENC1 - neuron OLFM1 - neuron
CHN1 - neuron
NPTX1 - neuron / purkinjenueron HOPX - bergmann glia / astrocyte CAMK2A - neuron CBLN2 - neuron

# Number of cells per condition
n_cells[,c(1,14)]

##   sample  12
## 1  beads 715
## 2 debris 671

DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "13"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#A04700")

VlnPlot(dataObject.unannotated,
        features = cluster13$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster13$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster13$gene[1:10]

##  [1] "ENC1"   "OLFM1"  "HOPX"   "KCNIP4" "CAMK2A" "CHN1"   "HPCAL1" "NPTX1" 
##  [9] "PHYHIP" "VSNL1"

conserved.cluster13$gene[1:10]

##  [1] "CBLN2"  "ENC1"   "ITPKA"  "HOPX"   "TESPA1" "EPHA4"  "CRACDL" "PCDH8" 
##  [9] "GRM1"   "CDH9"

Cluster 14 - neuron / interneuron

NRG1 - interneurons
RALYL - interneurons
RORB - astrocyte CPNE4 - neuron NRGN - neuron TUBB2A - interneuron

# Number of cells per condition
n_cells[,c(1,15)]

##   sample  13
## 1  beads 562
## 2 debris 722

DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "14"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#B14380")

VlnPlot(dataObject.unannotated,
        features = cluster14$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster14$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster14$gene[1:10]

##  [1] "RORB"         "CTC-340A15.2" "NRGN"         "BASP1"        "TUBA1B"      
##  [6] "RALYL"        "CPNE4"        "TMSB10"       "NRG1"         "SLC17A7"

conserved.cluster14$gene[1:10]

##  [1] "RORB"         "CPNE4"        "NRGN"         "CTC-340A15.2" "RXFP1"       
##  [6] "PTPRT"        "TUBA1B"       "RALYL"        "NRG1"         "CHGA"

Cluster 15 - neuron

NEFL - neuron
SLC17A7 - neuron NRGN - neuron VSNL1 - neuron
THY1 - purkinjeneuron TUBA1B - nuerogensis
SNAP25 - granular neuron

# Number of cells per condition
n_cells[,c(1,16)]

##   sample  14
## 1  beads 532
## 2 debris 665

DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "15"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#4D4D4D")

VlnPlot(dataObject.unannotated,
        features = cluster15$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster15$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster15$gene[1:10]

##  [1] "NEFL"    "NRGN"    "SLC17A7" "VSNL1"   "THY1"    "CHN1"    "YWHAH"  
##  [8] "ENC1"    "NEFM"    "OLFM1"

conserved.cluster15$gene[1:10]

##  [1] "THY1"    "TUBA1B"  "SNAP25"  "VSNL1"   "NRGN"    "NEFL"    "CALM3"  
##  [8] "SLC17A7" "RTN1"    "CHN1"

Cluster 16 - neuron / interneuron

GRIK1 - purkinjeneuron CNTNAP2 - interneuron
GRIK2 - interneuron
RBFOX1 - neuron XKR4 - interneuron
SST - internueron ROBO2 - internueron
GRIP1 - interneuon PCDH11X - neuron

# Number of cells per condition
n_cells[,c(1,17)]

##   sample  15
## 1  beads 554
## 2 debris 542

DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "16"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#FFBE2D")

VlnPlot(dataObject.unannotated,
        features = cluster16$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster16$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster16$gene[1:10]

##  [1] "GRIK1"    "SST"      "ROBO2"    "KIAA1217" "RBFOX1"   "GRIP1"   
##  [7] "XKR4"     "PCDH11X"  "CNTNAP2"  "GRIK2"

conserved.cluster16$gene[1:10]

##  [1] "GRIK1"    "RBFOX1"   "CNTNAP2"  "KIAA1217" "GRIK2"    "XKR4"    
##  [7] "ROBO2"    "SNHG14"   "GRIP1"    "SYNPR"

Cluster 17 - neuron / interneuron

KCNIP4 - purkinjeneuron LRRTM4 - neuron / polydendocyte CSMD1 - neuron RALYL - interneuron FAM155A - interneuron NRXN1 - neuron FAM155A - interneuron

# Number of cells per condition
n_cells[,c(1,18)]

##   sample  16
## 1  beads 365
## 2 debris 713

DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "17"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "#80C7EF")

VlnPlot(dataObject.unannotated,
        features = cluster17$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster17$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster17$gene[1:10]

##  [1] "CH507-513H4.1" "KCNIP4"        "MTCO1P12"      "LRRTM4"       
##  [5] "CSMD1"         "RALYL"         "FAM155A"       "ROBO2"        
##  [9] "NRXN1"         "STXBP5L"

conserved.cluster17$gene[1:10]

##  [1] "CH507-513H4.1"  "CH507-528H12.1" "FAM155A"        "KCNIP4"        
##  [5] "MTCO1P12"       "LRRTM4"         "CSMD1"          "NRXN1"         
##  [9] "ADGRB3"         "RALYL"

Cluster 18 - interneurons

KIT - interneurons
SLC6A1 - interneurons
GAD2 - interneurons
GRIP1 - interneurons
PTPRT - neuron

# Number of cells per condition
n_cells[,c(1,19)]

##   sample  17
## 1  beads 438
## 2 debris 438

DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "18"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = colors[19])

VlnPlot(dataObject.unannotated,
        features = cluster18$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster18$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster18$gene[1:10]

##  [1] "KIT"    "SLC6A1" "GAD2"   "GRIP1"  "NRIP3"  "LAMP5"  "FGF13"  "SGCZ"  
##  [9] "GRIN2A" "PTPRT"

conserved.cluster18$gene[1:10]

##  [1] "FGF13"  "GAD2"   "GRIN2A" "KIT"    "NRIP3"  "SLC6A1" "SV2C"   "GRIP1" 
##  [9] "LAMP5"  "PTPRT"

Cluster 19 - microglia marcophage

STAB1 - microglia marcophage PLXDC2 - microglia marcophage C1QB - microglia marcophage
CD74 - microglia marcophage
SLC11A1 - microglia marcophage

# Number of cells per condition
n_cells[,c(1,20)]

##   sample  18
## 1  beads 391
## 2 debris 375

DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "19"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = colors[20])

VlnPlot(dataObject.unannotated,
        features = cluster19$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster19$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster19$gene[1:10]

##  [1] "C3"       "STAB1"    "FYB1"     "PLXDC2"   "C1QB"     "CD74"    
##  [7] "SLC11A1"  "ARHGAP24" "ST6GAL1"  "CHST11"

conserved.cluster19$gene[1:10]

##  [1] "DENND3"  "C3"      "PLXDC2"  "C1QB"    "SLC11A1" "CD74"    "FYB1"   
##  [8] "SRGAP2"  "STAB1"   "APBB1IP"

Cluster 20 - endothelial

CLDN5 - endothelial IFI27 - choroid plexus / endothelial
TGM2 - endothelial
ADAMTS9 - astrocyte
FLT1 - endothelial

# Number of cells per condition
n_cells[,c(1,21)]

##   sample  19
## 1  beads 235
## 2 debris 250

DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "20"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = colors[21])

VlnPlot(dataObject.unannotated,
        features = cluster20$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster20$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster20$gene[1:10]

##  [1] "CLDN5"         "ADAMTS9"       "IFI27"         "FLT1"         
##  [5] "TGM2"          "MT2A"          "SLCO4A1"       "RP11-326C3.17"
##  [9] "A2M"           "PODXL"

conserved.cluster20$gene[1:10]

##  [1] "CLDN5"   "IFI27"   "MT2A"    "FLT1"    "EGFL7"   "TGM2"    "ADAMTS9"
##  [8] "PODXL"   "SLCO4A1" "KLF2"

Cluster 21 - oligodendrocyte / polydendrocyte

CTNNA3 - mural PDE4B - oligodendrocyte
ST18 - granular neuron polydendrocyte / oligodendrocyte
PLCL1 - oligodendrocyte
DPYD - polydendrocyte

# Number of cells per condition
n_cells[,c(1,22)]

##   sample  20
## 1  beads 144
## 2 debris 267

DimPlot(object = subset(dataObject.unannotated, seurat_clusters == "21"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = colors[22])

VlnPlot(dataObject.unannotated,
        features = cluster21$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

VlnPlot(dataObject.unannotated,
        features = conserved.cluster21$gene[1:10],
        cols = colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

cluster21$gene[1:10]

##  [1] "CH507-513H4.1"     "CTNNA3"            "PDE4B"            
##  [4] "ST18"              "UNC5C"             "DPYD"             
##  [7] "PLCL1"             "ENSG10010138868.1" "MAN2A1"           
## [10] "NCKAP5"

conserved.cluster21$gene[1:10]

##  [1] "CH507-528H12.1"    "CH507-513H4.1"     "CTNNA3"           
##  [4] "PDE4B"             "ST18"              "NKAIN2"           
##  [7] "UNC5C"             "PLCL1"             "DPYD"             
## [10] "ENSG10010138868.1"

Assign identities

# Rename all identities
dataObject.annotated <- RenameIdents(object = dataObject.unannotated, 
                               "0" = "Oligodendrocytes / Polydendrocyte",
                               "1" = "Oligodendrocytes",
                               "2" = "Oligodendrocytes",
                               "3" = "Oligodendrocytes / Polydendrocyte",
                               "4" = "Astrocytes",
                               "5" = "Neurons",
                               "6" = "Polydendrocyte",
                               "7" = "Neurons",
                               "8" = "Neurons",
                               "9" = "Astrocytes",
                               "10" = "Interneurons",
                               "11" = "Neurons / Interneurons",
                               "12" = "Neurons / Interneurons",
                               "13" = "Neurons", 
                               "14" = "Neurons / Interneurons", 
                               "15" = "Neurons", 
                               "16" = "Neurons / Interneurons",
                               "17" = "Neurons / Interneurons", 
                               "18" = "Interneurons", 
                               "19" = "Microglia", 
                               "20" = "Endothelial", 
                               "21" = "Oligodendrocytes / Polydendrocyte")
dataObject.annotated$annotated_clusters <- factor(Idents(dataObject.annotated),
                                             levels = c("Astrocytes",
                                                        "Endothelial",
                                                        "Microglia", 
                                                        "Oligodendrocytes",
                                                        "Polydendrocyte", 
                                                        "Oligodendrocytes / Polydendrocyte",
                                                        "Interneurons",
                                                        "Neurons",
                                                        "Neurons / Interneurons"))
Idents(dataObject.annotated) <- "annotated_clusters"

UMAP - annotated

u1 <- DimPlot(dataObject.annotated,
              group.by = "annotated_clusters",
              cols = colors,
              raster = FALSE,
              label = FALSE) +
  ggtitle("LBD pilot brain")
u1

u2 <- DimPlot(dataObject.annotated,
              group.by = "annotated_clusters",
              cols = colors,
              dims = c(2,3),
              raster = FALSE,
              label = FALSE) +
  ggtitle("LBD pilot brain")
u2

Cell count

Table

Relative abundance

# sample
b2 <- dataObject.annotated@meta.data %>%
  group_by(annotated_clusters, sample) %>%
  dplyr::count() %>%
  group_by(annotated_clusters) %>%
  dplyr::mutate(percent = 100*n/sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x=annotated_clusters,y=percent, fill=sample)) +
  geom_col() +
  scale_fill_manual(values = sample_colors) +
  ggtitle("Percentage of sample per cluster") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  geom_hline(yintercept = 50, color = "black")
b2

Dot plot markers

markers.to.plot <-
  c(
    "SLC1A2",
    "GJA1",
    "CLDN5",
    "TGM2",
    "STAB1",
    "CD74",
    "VCAN",
    "TNR",
    "S100B",
    "ABCA2",
    "NRG1",
    "MEG3",
    "ERBB4",
    "GAD1"
  )
dot <-DotPlot(dataObject.annotated, features = markers.to.plot, cols = sample_colors,
    dot.scale = 8, split.by = "sample") + RotatedAxis()
dot

LBD CWOW Brain Single Nucleus Find Markers

Kimberly Olney

06/08/2023