Sample NPID control female NA04-324
library(dplyr)
library(Seurat)
library(patchwork)
library(knitr)
library(dittoSeq)
library(reshape2)
Sys.setenv(RSTUDIO_PANDOC="/usr/local/biotools/pandoc/3.1.2/bin")
sampleID <- c("v4")
sample_color.panel <- c("#B4AF46")
color.panel <- dittoColors()
See script 01 for pre-processing and QC filtering. script 02 runs FindAllMarkers. Output is Wilcox and ROC.
dataObject <- readRDS(file = paste0("../../rObjects/", sampleID, ".filtered.pass_strict.doublet_info.rds"))
markers.strict <- readRDS(file = paste0("../../rObjects/markers_pass_strict/",
sampleID, "_FindAllMarkers_strict_logFC1_FDR0.05.rds"))
ROC.markers <- readRDS(file = paste0("../../rObjects/markers_pass_strict/",
sampleID, "_ROC_markers.rds"))
ditto_umap <- dittoDimPlot(object = dataObject,
var = "seurat_clusters",
reduction.use = "umap",
do.label = TRUE,
labels.highlight = TRUE)
ditto_umap
count_per_cluster <- FetchData(dataObject,
vars = c("ident", "orig.ident")) %>%
dplyr::count(ident, orig.ident) %>%
tidyr::spread(ident, n)
count_per_cluster
## orig.ident 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## 1 v4 1913 1779 1370 1351 1323 1313 1259 744 629 517 484 469 372 342 228
## 15 16 17
## 1 222 202 131
count_melt <- reshape2::melt(count_per_cluster)
colnames(count_melt) <- c("ident", "cluster", "number of nuclei")
count_max <- count_melt[which.max(count_melt$`number of nuclei`), ]
count_max_value <- count_max$`number of nuclei`
cellmax <- count_max_value + 200 # so that the figure doesn't cut off the text
count_bar <- ggplot(count_melt, aes(x = factor(cluster), y = `number of nuclei`, fill = `ident`)) +
geom_bar(
stat = "identity",
colour = "black",
width = 1,
position = position_dodge(width = 0.8)
) +
geom_text(
aes(label = `number of nuclei`),
position = position_dodge(width = 0.9),
vjust = -0.25,
angle = 45,
hjust = -.01
) +
theme_classic() + scale_fill_manual(values = sample_color.panel) +
ggtitle("Number of nuclei per cluster") + xlab("cluster") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_y_continuous(limits = c(0, cellmax))
count_bar
dataObject <- BuildClusterTree(
object = dataObject,
dims = 1:30,
reorder = FALSE,
reorder.numeric = FALSE
)
tree <- dataObject@tools$BuildClusterTree
tree$tip.label <- paste0("Cluster ", tree$tip.label)
nClusters <- length(tree$tip.label)
tree_graph <- ggtree::ggtree(tree, aes(x, y)) +
scale_y_reverse() +
ggtree::geom_tree() +
ggtree::theme_tree() +
ggtree::geom_tiplab(offset = 1) +
ggtree::geom_tippoint(color = color.panel[1:nClusters],
shape = 16,
size = 5) +
coord_cartesian(clip = 'off') +
theme(plot.margin = unit(c(0, 2.5, 0, 0), 'cm'))
tree_graph
Seurat can help you find markers that define clusters via differential expression (DE). By default, it identifies positive and negative markers of a single cluster (specified in ident.1), compared to all other cells. FindAllMarkers() automates this process for all clusters, but you can also test groups of clusters vs. each other, or against all cells.
The default is wilcox which identifies differentially expressed genes between two groups of cells using a Wilcoxon Rank Sum test. ## FindAllMarkers default wilxcon
# find markers for every cluster compared to all remaining cells,
# report only the positive ones
all.markers <- FindAllMarkers(dataObject, only.pos = TRUE)
# rearrange to order by cluster & filter to only include log2FC > 1 & FDR < 0.05
markers.strict <- all.markers %>%
group_by(cluster) %>%
dplyr::filter(avg_log2FC > 1 & p_val_adj < 0.05)
# compare
table(all.markers$cluster)
table(markers.strict$cluster)
Get markers for each cluster
# subset
cluster0 <- markers.strict[markers.strict$cluster == 0,]
cluster1 <- markers.strict[markers.strict$cluster == 1,]
cluster2 <- markers.strict[markers.strict$cluster == 2,]
cluster3 <- markers.strict[markers.strict$cluster == 3,]
cluster4 <- markers.strict[markers.strict$cluster == 4,]
cluster5 <- markers.strict[markers.strict$cluster == 5,]
cluster6 <- markers.strict[markers.strict$cluster == 6,]
cluster7 <- markers.strict[markers.strict$cluster == 7,]
cluster8 <- markers.strict[markers.strict$cluster == 8,]
cluster9 <- markers.strict[markers.strict$cluster == 9,]
cluster10 <- markers.strict[markers.strict$cluster == 10,]
cluster11 <- markers.strict[markers.strict$cluster == 11,]
cluster12 <- markers.strict[markers.strict$cluster == 12,]
cluster13 <- markers.strict[markers.strict$cluster == 13,]
cluster14 <- markers.strict[markers.strict$cluster == 14,]
cluster15 <- markers.strict[markers.strict$cluster == 15,]
cluster16 <- markers.strict[markers.strict$cluster == 16,]
cluster17 <- markers.strict[markers.strict$cluster == 17,]
Seurat has several tests for differential expression which can be set with the test.use parameter (see https://satijalab.org/seurat/articles/de_vignette for details). For example, the ROC test returns the ‘classification power’ for any individual marker (ranging from 0 - random, to 1 - perfect).
Best if run in a script as the compute takes ~3 hours to run.
ROC.markers <-
FindAllMarkers(
dataObject,
logfc.threshold = 0.25,
test.use = "roc",
only.pos = TRUE
)
Get markers for each cluster
# subset
ROC.cluster0 <- ROC.markers[ROC.markers$cluster == 0,]
ROC.cluster1 <- ROC.markers[ROC.markers$cluster == 1,]
ROC.cluster2 <- ROC.markers[ROC.markers$cluster == 2,]
ROC.cluster3 <- ROC.markers[ROC.markers$cluster == 3,]
ROC.cluster4 <- ROC.markers[ROC.markers$cluster == 4,]
ROC.cluster5 <- ROC.markers[ROC.markers$cluster == 5,]
ROC.cluster6 <- ROC.markers[ROC.markers$cluster == 6,]
ROC.cluster7 <- ROC.markers[ROC.markers$cluster == 7,]
ROC.cluster8 <- ROC.markers[ROC.markers$cluster == 8,]
ROC.cluster9 <- ROC.markers[ROC.markers$cluster == 9,]
ROC.cluster10 <- ROC.markers[ROC.markers$cluster == 10,]
ROC.cluster11 <- ROC.markers[ROC.markers$cluster == 11,]
ROC.cluster12 <- ROC.markers[ROC.markers$cluster == 12,]
ROC.cluster13 <- ROC.markers[ROC.markers$cluster == 13,]
ROC.cluster14 <- ROC.markers[ROC.markers$cluster == 14,]
ROC.cluster15 <- ROC.markers[ROC.markers$cluster == 15,]
ROC.cluster16 <- ROC.markers[ROC.markers$cluster == 16,]
ROC.cluster17 <- ROC.markers[ROC.markers$cluster == 17,]
# print top variable genes
top100 <- dataObject@assays$SCT@var.features[1:100]
top100
## [1] "DPP10" "TNR" "LHFPL3" "ROBO2"
## [5] "KCNIP4" "DSCAM" "DTNA" "OPCML"
## [9] "SLC1A3" "ADAM28" "CEMIP" "MMP16"
## [13] "NRG3" "SLC11A1" "FLT1" "SLC1A2"
## [17] "FGF14" "ADCY2" "LRRTM4" "CSMD1"
## [21] "FAM189A2" "DOCK8" "TNC" "SYT1"
## [25] "HIF1A-AS3" "PTPRZ1" "LRMDA" "ADAMTS9"
## [29] "RP11-6L16.1" "SORBS1" "GLIS3" "SNTG1"
## [33] "ARHGAP24" "RP11-358F13.1" "NRXN1" "ZSCAN31"
## [37] "TLR2" "CD44" "RALYL" "SRGN"
## [41] "GRIK1" "GPC5" "CNTNAP2" "USP53"
## [45] "APBB1IP" "GALNTL6" "OLR1" "RFX4"
## [49] "LAMA2" "RYR3" "PLP1" "DENND3"
## [53] "LRP1B" "CHI3L1" "VCAN" "SAT1"
## [57] "ATRNL1" "PITPNC1" "CA10" "PDE3B"
## [61] "AQP4" "WWC1" "BEST3" "GFAP"
## [65] "RBFOX1" "NXPH1" "NRCAM" "ST6GAL1"
## [69] "GRID2" "CHST11" "GPM6A" "ABCB1"
## [73] "CXCL14" "AHCYL1" "GRIK2" "PTPRC"
## [77] "INHBA" "SNHG14" "DLGAP1" "LRRC4C"
## [81] "SPARCL1" "MT2A" "CNTN5" "RP11-13N12.1"
## [85] "LINC01010" "LNCAROD" "FAM155A" "FYB1"
## [89] "ADGRV1" "IRAG1" "PTPRM" "GNA14"
## [93] "CH507-528H12.1" "RP11-909M7.3" "MEF2C" "LUZP2"
## [97] "CTNNA2" "ITPR2" "SLC2A3" "BRINP3"
write.table(top100, paste0("../../results/pass_strict/variance/", sampleID, "_top100_variable_features.txt"),
sep = "\t", row.names = FALSE, quote = FALSE)
# Printing out the most variable genes driving PCs
print(x = dataObject[["pca"]],
dims = 1:5,
nfeatures = 10)
## PC_ 1
## Positive: PLP1, ST18, QKI, IL1RAPL1, SIK3, CTNNA3, PIP4K2A, RNF220, TMTC2, MBP
## Negative: DPP10, TNR, NRG3, DTNA, LHFPL3, ADCY2, SLC1A3, OPCML, PTPRZ1, DSCAM
## PC_ 2
## Positive: DPP10, DTNA, SLC1A3, FAM189A2, ADCY2, GLIS3, SORBS1, RFX4, RYR3, RP11-6L16.1
## Negative: TNR, LHFPL3, OPCML, DSCAM, CSMD1, FGF14, MMP16, KCNIP4, LRRTM4, SNTG1
## PC_ 3
## Positive: ADAM28, SLC11A1, LRMDA, DOCK8, ARHGAP24, DENND3, TLR2, APBB1IP, PTPRC, RP11-358F13.1
## Negative: DPP10, DTNA, NRG3, ADCY2, FAM189A2, SORBS1, SLC1A2, PCDH9, LSAMP, TNR
## PC_ 4
## Positive: ROBO2, SYT1, CNTNAP2, SNHG14, RBFOX1, RALYL, RP11-909M7.3, GALNTL6, FRMPD4, CNTN5
## Negative: TNR, LHFPL3, PTPRZ1, DSCAM, MMP16, VCAN, CA10, BEST3, MEGF11, RP4-668E10.4
## PC_ 5
## Positive: CEMIP, FLT1, ADAMTS9, USP53, MT2A, DPP10, TNC, ABCB1, CD44, FGFR1
## Negative: SLC1A3, SLC1A2, GPC5, PCDH9, CABLES1, PLP1, GNA14, ADGRV1, RYR3, NRXN1
# get the top 10 markers for each cluster
top10_Markers <- markers.strict %>%
group_by(cluster) %>%
dplyr::filter(avg_log2FC > 2) %>%
slice_head(n = 5) %>%
ungroup()
# DoHeatmap function for plotting
heat_markers <-
DoHeatmap(dataObject, features = top10_Markers$gene) + NoLegend()
# downsample to 100 nuclei per cluster.
heat_markers_downsample <-
DoHeatmap(subset(dataObject, downsample = 100),
features = top10_Markers$gene,
size = 3)
top10_ROC <- ROC.markers %>%
group_by(cluster) %>%
dplyr::filter(avg_log2FC > 2 & myAUC > 0.8) %>%
slice_head(n = 5) %>%
ungroup()
heat_ROC <-
DoHeatmap(dataObject, features = top10_ROC$gene) + NoLegend()
heat_ROC_downsample <-
DoHeatmap(subset(dataObject, downsample = 100),
features = top10_ROC$gene,
size = 3)
heat_markers
heat_markers_downsample
heat_ROC
heat_ROC_downsample
# MALAT1 is highly expressed in brain nuclei
RidgePlot(dataObject, features = c("XIST", "MALAT1"), ncol = 2)
RidgePlot(dataObject, features = c("GFAP"), ncol = 1)
RidgePlot(dataObject, features = c("VCAN"), ncol = 1)
# UMAP showing the expression of select features
umap_feature <-
FeaturePlot(dataObject,
features = c("TYROBP", "MOG", "AQP4", "ENO2", "XIST", "MALAT1"))
umap_feature
markers.to.plot <-
c(
"SLC1A2",
"GJA1",
"CLDN5",
"TGM2",
"STAB1",
"CD74",
"VCAN",
"TNR",
"S100B",
"ABCA2",
"NRG1",
"MEG3",
"ERBB4",
"GAD1"
)
dot <- DotPlot(dataObject, features = markers.to.plot,
dot.scale = 8) + RotatedAxis()
dot
Markers obtained from dropviz
# Astrocytes
VlnPlot(dataObject,
features = c("AQP4", "GJA1", "F3"))
# Endothelial
VlnPlot(dataObject,
features = c("CLDN5", "ADGRF5", "FLT1"))
# Fibroblast-Like_Dcn
VlnPlot(dataObject,
features = c("DCN", "SLC6A13", "IGF2"))
# Oligodendrocyte
VlnPlot(dataObject,
features = c("MOG", "OLIG1", "MBP"))
# Polydendrocyte
VlnPlot(dataObject,
features = c("VCAN", "TNR", "C1QL1"))
# Smooth muscle
VlnPlot(dataObject,
features = c("ACTA2", "RGS5"))
# Microglia / Marchophage
VlnPlot(dataObject,
features = c("CTSS", "C1QC", "C1QB"))
# Microglia / Marchophage
VlnPlot(dataObject,
features = c("TYROBP","P2RY12", "AIF1"))
# Interneuron
VlnPlot(dataObject,
features = c("VIP", "CRH", "KIT"))
# Interneuron - GAD1 & GAD2
VlnPlot(dataObject,
features = c("GAD1", "GAD2"))
## Feature plot - Canonical cell-type markers
# neuron
dataObject[["percent.neurons"]] <- PercentageFeatureSet(dataObject, features = c("SNAP25","SYT1", "GAD1", "GAD2"))
# oligodendrocyte
dataObject[["percent.oligo"]] <- PercentageFeatureSet(dataObject, features = c("PLP1","MBP", "MOG"))
# oligodendrocyte precursor cells
dataObject[["percent.opc"]] <- PercentageFeatureSet(dataObject, features = c("OLIG1","PDGFRA", "VCAN"))
# astrocyte
dataObject[["percent.astro"]] <- PercentageFeatureSet(dataObject, features = c("CLU","GFAP", "AQP4", "CLDN5", "ADGRF5", "FLT1"))
# microglia
dataObject[["percent.micro"]] <- PercentageFeatureSet(dataObject, features = c("HEXB","C1QA", "CTSS", "C1QC", "C1QB", "TYROBP","P2RY12", "AIF1"))
# endothelial
dataObject[["percent.endo"]] <- PercentageFeatureSet(dataObject, features = c("RGS5", "IGFBP7", "FN1", "CLDN5", "ADGRF5", "FLT1"))
# neuron
FeaturePlot(dataObject, features = "percent.neurons", label = TRUE)
# oligodendrocyte
FeaturePlot(dataObject, features = "percent.oligo", label = TRUE)
# oligodendrocyte precursor cells
FeaturePlot(dataObject, features = "percent.opc", label = TRUE)
# astrocyte
FeaturePlot(dataObject, features = "percent.astro", label = TRUE)
# microglia
FeaturePlot(dataObject, features = "percent.micro", label = TRUE)
# endothelial
FeaturePlot(dataObject, features = "percent.endo", label = TRUE)
# Number of cells per condition
count_per_cluster[,c(1,2)]
## orig.ident 0
## 1 v4 1913
# UMAP with only cluster 0
DimPlot(object = subset(dataObject, seurat_clusters == "0"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
cols = color.panel[1])
VlnPlot(dataObject,
features = cluster0$gene[1:10],
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
VlnPlot(dataObject,
features = ROC.cluster0$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
cluster0$gene[1:10]
## [1] "CNP" "LINC00844" "APOD" "LINC01608" "SUN2" "FTH1"
## [7] "VWA1" "MBP" "PLP1" "AMER2"
ROC.cluster0$gene[1:10]
## [1] "PLP1" "CNP" "MBP" "LINC00844" "APOD" "FTH1"
## [7] "SUN2" "EDIL3" "AMER2" "LINC01608"
# Number of cells per condition
count_per_cluster[,c(1,3)]
## orig.ident 1
## 1 v4 1779
DimPlot(object = subset(dataObject, seurat_clusters == "1"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
cols = "#56B4E9")
VlnPlot(dataObject,
features = cluster1$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
#VlnPlot(dataObject,
# features = ROC.cluster1$gene[1],
# cols = color.panel,
# flip = TRUE,
# split.by = "seurat_clusters")
cluster1$gene[1:10]
## [1] "LINC01608" "DYSF" "KIF19" "RP11-50D16.4"
## [5] "RP4-537K23.4" "CR1" "LINC02073" "RP11-143J24.1"
## [9] "KLRK1-AS1" "PRAMEF10"
#ROC.cluster1$gene[1:10]
# Number of cells per condition
count_per_cluster[,c(1,4)]
## orig.ident 2
## 1 v4 1370
DimPlot(object = subset(dataObject, seurat_clusters == "2"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
cols = "#009E73")
VlnPlot(dataObject,
features = cluster2$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
#VlnPlot(dataObject,
# features = ROC.cluster2$gene[1:10],
# cols = color.panel,
# stack = TRUE,
# flip = TRUE,
# split.by = "seurat_clusters")
cluster2$gene[1:10]
## [1] "CCDC200" "LINC00685" "CERS4" "SLCO5A1"
## [5] "LINC00463" "MMP17" "COL19A1" "RP11-322M19.4"
## [9] "BPTFP1" "PRDM1"
ROC.cluster2$gene[1:10]
## [1] "TERT" NA NA NA NA NA NA NA NA NA
# Number of cells per condition
count_per_cluster[,c(1,4)]
## orig.ident 2
## 1 v4 1370
DimPlot(object = subset(dataObject, seurat_clusters == "3"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
cols = "#F0E442")
VlnPlot(dataObject,
features = cluster3$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
VlnPlot(dataObject,
features = ROC.cluster3$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
cluster3$gene[1:10]
## [1] "MARCKSL1" "SLC5A11" "SPP1" "TUBA1A" "CNP" "SELENOP"
## [7] "S100B" "STMN1" "FTL" "RASGRF1"
ROC.cluster3$gene[1:10]
## [1] "TF" "PLP1" "CNP" "TUBA1A" "MBP" "MARCKSL1"
## [7] "SPP1" "FTL" "SLC5A11" "FTH1"
# Number of cells per condition
count_per_cluster[,c(1,5)]
## orig.ident 3
## 1 v4 1351
DimPlot(object = subset(dataObject, seurat_clusters == "4"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
cols = "#0072B2")
VlnPlot(dataObject,
features = cluster4$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
VlnPlot(dataObject,
features = ROC.cluster4$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
cluster4$gene[1:10]
## [1] "FGFR1" "UHRF2" "SH3RF1" "PLA2G4C" "ACTN2" "AMPD3" "IL6R"
## [8] "GADD45B" "GLUL" "NAV2"
ROC.cluster4$gene[1:10]
## [1] "AKAP6" "PRUNE2" "PLA2G4C" "NAV2" "TMTC2" "GLUL" "ZNF536"
## [8] "NCOA7" "SPOCK1" "SIK3"
# Number of cells per condition
count_per_cluster[,c(1,6)]
## orig.ident 4
## 1 v4 1323
DimPlot(object = subset(dataObject, seurat_clusters == "5"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
cols = "#D55E00")
VlnPlot(dataObject,
features = cluster5$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
cluster5$gene[1:10]
## [1] "GADD45B" "FGFR1" "FOS" "EGR1" "CNDP1" "PRUNE2" "DDIT3"
## [8] "ELL2" "GLUL" "PTPRM"
VlnPlot(dataObject,
features = ROC.cluster5$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
cluster5$gene[1:10]
## [1] "GADD45B" "FGFR1" "FOS" "EGR1" "CNDP1" "PRUNE2" "DDIT3"
## [8] "ELL2" "GLUL" "PTPRM"
ROC.cluster5$gene[1:10]
## [1] "CNDP1" "PRUNE2" "SIK3" "PIP4K2A" "ELL2" "GLUL" "FGFR1"
## [8] "GADD45B" "PTPRM" "GUSBP2"
# Number of cells per condition
count_per_cluster[,c(1,7)]
## orig.ident 5
## 1 v4 1313
DimPlot(object = subset(dataObject, seurat_clusters == "6"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
cols = "#CC79A7")
VlnPlot(dataObject,
features = cluster6$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
cluster6$gene[1:10]
## [1] "SLC5A11" "LINC00609" "LRRC63" "RASGRF1" "RP1-223B1.1"
## [6] "RP11-141F7.1" "ANKRD18A" "SAMD12" "LDB3" "AC012593.1"
ROC.cluster6$gene[1:10]
## [1] "SLC5A11" "LINC00609" "CTNNA3" "SAMD12" "RP1-223B1.1"
## [6] "LRRC63" "ANKRD18A" "LDB3" NA NA
QDPR - oligodendrocyte
CD9 - polydendrocyte
SUN2 - microglia SYT11 - oligodendrocyte
AMD1 - oligodendrocyte
APOD - fibroblast & oligodendrocyte
# Number of cells per condition
count_per_cluster[,c(1,8)]
## orig.ident 6
## 1 v4 1259
DimPlot(object = subset(dataObject, seurat_clusters == "7"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
cols = "#666666")
VlnPlot(dataObject,
features = cluster7$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
cluster7$gene[1:10]
## [1] "QDPR" "CD9" "SUN2" "SYT11" "AMD1" "APOD" "VWA1" "H3-3B" "SFRP1"
## [10] "PSAP"
ROC.cluster7$gene[1:10]
## [1] "GPM6B" "PLP1" "QDPR" "CNDP1" "MALAT1" NA NA NA
## [9] NA NA
# Number of cells per condition
count_per_cluster[,c(1,9)]
## orig.ident 7
## 1 v4 744
DimPlot(object = subset(dataObject, seurat_clusters == "8"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
cols = "#AD7700")
VlnPlot(dataObject,
features = cluster8$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
cluster8$gene[1:10]
## [1] "CH507-528H12.1" "CH507-513H4.1" "SLC5A11" "ANKRD18A"
## [5] "PLA2G4C" "SEC14L5" "CLIP2" "SLCO5A1"
## [9] "RP11-159L20.2" "CCDC144B"
ROC.cluster8$gene[1:10]
## [1] "CH507-528H12.1" "CH507-513H4.1" "SLC5A11" "CCDC88A"
## [5] "ANKRD18A" "TERT" NA NA
## [9] NA NA
# Number of cells per condition
count_per_cluster[,c(1,10)]
## orig.ident 8
## 1 v4 629
DimPlot(object = subset(dataObject, seurat_clusters == "9"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
cols = "#1C91D4")
VlnPlot(dataObject,
features = cluster9$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
VlnPlot(dataObject,
features = ROC.cluster9$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
cluster9$gene[1:10]
## [1] "GLIS3" "CD44" "FAM189A2" "ADCY2" "GFAP"
## [6] "ABLIM1" "RFX4" "WWC1" "RP11-6L16.1" "NRG3"
ROC.cluster9$gene[1:10]
## [1] "DPP10" "DTNA" "NRG3" "GLIS3" "SORBS1" "CD44"
## [7] "ADCY2" "ABLIM1" "GFAP" "FAM189A2"
FRY - mural / endothelial
KCNIP4 - neuron
CNTN1 - polydendrocyte
ERBB4 - inerneuron
# Number of cells per condition
count_per_cluster[,c(1,11)]
## orig.ident 9
## 1 v4 517
DimPlot(object = subset(dataObject, seurat_clusters == "10"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
cols = "#007756")
VlnPlot(dataObject,
features = cluster10$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
VlnPlot(dataObject,
features = ROC.cluster10$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
cluster10$gene[1:10]
## [1] "FRY" "KCNIP4" "CNTN1" "ERBB4"
## [5] "FCHSD2" "RP11-791I24.5" "LAMA2" "CTNND2"
## [9] "KCNAB1" "TMPRSS5"
ROC.cluster10$gene[1:10]
## [1] "FRY" "ERBB4" "KCNIP4" "FCHSD2" "PPP2R2B" "CTNND2" "CNTN1"
## [8] "ANK3" "LAMA2" "TMPRSS5"
# Number of cells per condition
count_per_cluster[,c(1,12)]
## orig.ident 10
## 1 v4 484
DimPlot(object = subset(dataObject, seurat_clusters == "11"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
cols = "#D5C711")
VlnPlot(dataObject,
features = cluster11$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
VlnPlot(dataObject,
features = ROC.cluster11$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
cluster11$gene[1:10]
## [1] "ADAM28" "SLC11A1" "DOCK8" "LRMDA" "ARHGAP24" "DENND3"
## [7] "APBB1IP" "PTPRC" "MEF2C" "CHST11"
ROC.cluster11$gene[1:10]
## [1] "LRMDA" "DOCK8" "ADAM28" "SRGAP2B" "SRGAP2" "ARHGAP24"
## [7] "ARHGAP26" "SLC11A1" "MEF2C" "PLXDC2"
# Number of cells per condition
count_per_cluster[,c(1,13)]
## orig.ident 11
## 1 v4 469
DimPlot(object = subset(dataObject, seurat_clusters == "12"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
cols = "#005685")
VlnPlot(dataObject,
features = cluster12$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
VlnPlot(dataObject,
features = ROC.cluster12$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
cluster12$gene[1:10]
## [1] "TNR" "MMP16" "VCAN" "PTPRZ1" "CA10" "LHFPL3" "MEGF11" "FGF14"
## [9] "DSCAM" "LUZP2"
ROC.cluster12$gene[1:10]
## [1] "TNR" "LHFPL3" "PTPRZ1" "FGF14" "OPCML" "DSCAM" "LRRC4C" "CSMD1"
## [9] "MMP16" "VCAN"
# Number of cells per condition
count_per_cluster[,c(1,14)]
## orig.ident 12
## 1 v4 372
DimPlot(object = subset(dataObject, seurat_clusters == "13"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
cols = "#A04700")
VlnPlot(dataObject,
features = cluster13$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
VlnPlot(dataObject,
features = ROC.cluster13$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
cluster13$gene[1:10]
## [1] "IRAG1" "SLC1A2" "BMPR1B" "FAM189A2" "AQP4" "RFX4"
## [7] "ADGRV1" "RYR3" "ADCY2" "GLIS3"
ROC.cluster13$gene[1:10]
## [1] "GPC5" "SLC1A3" "DTNA" "SLC1A2" "AHCYL1" "RYR3" "ADGRV1" "NRG3"
## [9] "CLU" "ADCY2"
# Number of cells per condition
count_per_cluster[,c(1,15)]
## orig.ident 13
## 1 v4 342
DimPlot(object = subset(dataObject, seurat_clusters == "14"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
cols = "#B14380")
VlnPlot(dataObject,
features = cluster14$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
#VlnPlot(dataObject,
# features = ROC.cluster14$gene[1:10],
# cols = color.panel,
# stack = TRUE,
# flip = TRUE,
# split.by = "seurat_clusters")
cluster14$gene[1:10]
## [1] "ZSCAN31" "SLC5A11" "RP11-141F7.1" "SAMD12" "PLEKHG1"
## [6] "LINC00609" "RP1-223B1.1" "MIR4280HG" "LRRC63" "AC012593.1"
ROC.cluster14$gene[1:10]
## [1] "ZSCAN31" "SLC5A11" "SAMD12" "CTNNA3" "LINC00609" NA
## [7] NA NA NA NA
# Number of cells per condition
count_per_cluster[,c(1,16)]
## orig.ident 14
## 1 v4 228
DimPlot(object = subset(dataObject, seurat_clusters == "15"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
cols = "#4D4D4D")
VlnPlot(dataObject,
features = cluster15$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
VlnPlot(dataObject,
features = ROC.cluster15$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
cluster15$gene[1:10]
## [1] "RP11-909M7.3" "MEG3" "SYT1" "RALYL" "STXBP5L"
## [6] "ARPP21" "KHDRBS2" "NELL2" "SNAP25" "SCN2A"
ROC.cluster15$gene[1:10]
## [1] "SYT1" "RBFOX1" "SNHG14" "RP11-909M7.3" "MEG3"
## [6] "CSMD1" "NRXN1" "OPCML" "FAM155A" "CELF2"
# Number of cells per condition
count_per_cluster[,c(1,17)]
## orig.ident 15
## 1 v4 222
DimPlot(object = subset(dataObject, seurat_clusters == "16"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
cols = color.panel[17])
VlnPlot(dataObject,
features = cluster16$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
VlnPlot(dataObject,
features = ROC.cluster16$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
cluster16$gene[1:10]
## [1] "RP11-909M7.3" "PAK3" "MEG3" "GRIP1" "MYT1L"
## [6] "CELF4" "GRIN2B" "KCNC2" "GRIA1" "CACNA1B"
ROC.cluster16$gene[1:10]
## [1] "SNHG14" "CNTNAP2" "DLGAP1" "GRIK2" "RP11-909M7.3"
## [6] "FGF14" "PLCB1" "NRG3" "STXBP5L" "GRIP1"
EBF1 - mural
NID1 - fibroblast-like / mural Â
CFH - fibroblast-like Â
LEF1 - endothelial
DCN - fibroblast-like
COL1A2 - fibroblast-like
IFITM1 - mural / fibroblast-like
# Number of cells per condition
count_per_cluster[,c(1,18)]
## orig.ident 16
## 1 v4 202
DimPlot(object = subset(dataObject, seurat_clusters == "17"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
cols = color.panel[18])
VlnPlot(dataObject,
features = cluster17$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
VlnPlot(dataObject,
features = ROC.cluster17$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
cluster17$gene[1:10]
## [1] "EBF1" "NID1" "CFH" "LEF1" "DCN" "COL1A2" "IFITM1" "NR2F2"
## [9] "PDLIM1" "TBX18"
ROC.cluster17$gene[1:10]
## [1] "RBMS3" "PTPRG" "USP53" "RP11-9J18.1"
## [5] "ZEB1" "SVIL" "ARHGAP29" "EPS8"
## [9] "RBPMS" "RP11-649A16.1"
# Rename all identities
dataObject.annotated <- RenameIdents(object = dataObject,
"0" = "Oligodendrocytes",
"1" = "Noise",
"2" = "Noise",
"3" = "Oligodendrocytes",
"4" = "Noise",
"5" = "Noise",
"6" = "Noise",
"7" = "Oligodendrocytes",
"8" = "Noise",
"9" = "Astrocytes",
"10" = "Noise",
"11" = "Microglia",
"12" = "OPC",
"13" = "Astrocytes",
"14" = "Noise",
"15" = "Neurons",
"16" = "Neurons",
"17" = "Fibroblast-like")
dataObject.annotated$annotated_clusters <- factor(Idents(dataObject.annotated),
levels = c("Astrocytes",
"Fibroblast-like",
"Microglia",
"Oligodendrocytes",
"OPC",
"Neurons",
"Noise"))
Idents(dataObject.annotated) <- "annotated_clusters"
ditto_umap <- dittoDimPlot(object = dataObject.annotated,
var = "annotated_clusters",
reduction.use = "umap",
do.label = TRUE,
labels.highlight = TRUE)
ditto_umap
markers.to.plot <-
c(
"SLC1A2",
"GJA1",
"CLDN5",
"TGM2",
"STAB1",
"CD74",
"VCAN",
"TNR",
"S100B",
"ABCA2",
"NRG1",
"MEG3",
"ERBB4",
"GAD1"
)
dot <- DotPlot(dataObject.annotated, features = markers.to.plot,
dot.scale = 8) + RotatedAxis()
dot
count_per_cluster <- FetchData(dataObject.annotated,
vars = c("ident", "orig.ident")) %>%
dplyr::count(ident, orig.ident) %>%
tidyr::spread(ident, n)
count_per_cluster
## orig.ident Astrocytes Fibroblast-like Microglia Oligodendrocytes OPC Neurons
## 1 v4 859 131 469 4008 372 424
## Noise
## 1 8385
count_melt <- reshape2::melt(count_per_cluster)
colnames(count_melt) <- c("ident", "cluster", "number of nuclei")
count_max <- count_melt[which.max(count_melt$`number of nuclei`), ]
count_max_value <- count_max$`number of nuclei`
cellmax <- count_max_value + 500 # so that the figure doesn't cut off the text
count_bar <- ggplot(count_melt, aes(x = factor(cluster), y = `number of nuclei`, fill = `cluster`)) +
geom_bar(
stat = "identity",
colour = "black",
width = 1,
position = position_dodge(width = 0.8)
) +
geom_text(
aes(label = `number of nuclei`),
position = position_dodge(width = 0.9),
vjust = -0.25,
angle = 45,
hjust = -.01
) +
theme_classic() + scale_fill_manual(values = color.panel) +
ggtitle("Number of nuclei per cluster") + xlab("cluster") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_y_continuous(limits = c(0, cellmax))
count_bar
## Relative abundance
relative_abundance <- dataObject.annotated@meta.data %>%
group_by(annotated_clusters, orig.ident) %>%
dplyr::count() %>%
group_by(orig.ident) %>%
dplyr::mutate(percent = 100*n/sum(n)) %>%
ungroup() %>%
ggplot(aes(x=orig.ident,y=percent, fill=annotated_clusters)) +
geom_col() +
scale_fill_manual(values = color.panel) +
ggtitle("Percentage of cell type") +
theme_bw()+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
relative_abundance
saveRDS(dataObject.annotated, paste0("../../rObjects/", sampleID, ".annotated.pass_strict.rds"))