Read in processed filtered data. Remove doublets and reprocess. Check resolutions and save object.
# read object
dataObject <- readRDS(file = paste0("../rObjects/", treatment, "_unannotated_doublets_removed.rds"))
DefaultAssay(dataObject) <- "RNA"
dataObject <- NormalizeData(dataObject)
dataObject <- FindVariableFeatures(dataObject)
dataObject <- ScaleData(dataObject)
dataObject <- JoinLayers(dataObject)
# inspect
dataObject
## An object of class Seurat
## 16144 features across 3376 samples within 2 assays
## Active assay: RNA (8072 features, 2000 variable features)
## 3 layers present: counts, data, scale.data
## 1 other assay present: SCT
## 2 dimensional reductions calculated: pca, umap
ditto_umap <- dittoDimPlot(object = dataObject,
var = "seurat_clusters",
reduction.use = "umap",
do.label = TRUE,
labels.highlight = TRUE)
ditto_umap
### Cluster tree
dataObject <- BuildClusterTree(
object = dataObject,
dims = 1:30,
reorder = FALSE,
reorder.numeric = FALSE
)
tree <- dataObject@tools$BuildClusterTree
tree$tip.label <- paste0("Cluster ", tree$tip.label)
nClusters <- length(tree$tip.label)
tree_graph <- ggtree::ggtree(tree, aes(x, y)) +
scale_y_reverse() +
ggtree::geom_tree() +
ggtree::theme_tree() +
ggtree::geom_tiplab(offset = 1) +
ggtree::geom_tippoint(color = color.panel[1:nClusters],
shape = 16,
size = 5) +
coord_cartesian(clip = 'off') +
theme(plot.margin = unit(c(0, 2.5, 0, 0), 'cm'))
tree_graph
count_per_cluster <- FetchData(dataObject,
vars = c("ident", "orig.ident")) %>%
dplyr::count(ident, orig.ident) %>%
tidyr::spread(ident, n)
count_per_cluster
## orig.ident 0 1 2 3 4 5 6 7
## 1 SeuratProject 811 578 558 514 481 233 135 66
count_melt <- reshape2::melt(count_per_cluster)
colnames(count_melt) <- c("ident", "cluster", "number of nuclei")
count_max <- count_melt[which.max(count_melt$`number of nuclei`), ]
count_max_value <- count_max$`number of nuclei`
cellmax <- count_max_value + 200 # so that the figure doesn't cut off the text
count_bar <- ggplot(count_melt, aes(x = factor(cluster), y = `number of nuclei`, fill = `ident`)) +
geom_bar(
stat = "identity",
colour = "black",
width = 1,
position = position_dodge(width = 0.8)
) +
geom_text(
aes(label = `number of nuclei`),
position = position_dodge(width = 0.9),
vjust = -0.25,
angle = 45,
hjust = -.01
) +
theme_classic() + scale_fill_manual(values = sample_colors) +
ggtitle("Number of nuclei per cluster") + xlab("cluster") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_y_continuous(limits = c(0, cellmax))
count_bar
# Astrocytes
VlnPlot(dataObject,
features = c("AQP4","CLU", "GFAP"))
# Oligodendrocyte
VlnPlot(dataObject,
features = c("PLP1","MBP", "MOG"))
# OPC
VlnPlot(dataObject,
features = c("PDGFRA", "VCAN", "TNR"))
# Neurons
VlnPlot(dataObject,
features = c("RBFOX3", "GAD1", "GAD2"))
markers.to.plot <-
c(
"CLU",
"GFAP",
"AQP4",
"GJA1",
"CLDN5",
"ADGRF5",
"FLT1",
"COL1A1",
"COL1A2",
"DCN",
"HEXB",
"C1QA",
"C1QC",
"C1QB",
"TMEM119",
"ITGAM",
"TYROBP",
"P2RY12",
"AIF1",
"RBFOX3",
"SNAP25",
"SYT1",
"GAD1",
"GAD2",
"PLP1",
"MBP",
"MOG",
"OLIG1",
"PDGFRA",
"VCAN",
"TNR",
"ACTA2",
"RGS5",
"VTN",
"MYL5"
)
dot_ind <- DotPlot(dataObject,
features = markers.to.plot,
cluster.idents = TRUE,
dot.scale = 8) + RotatedAxis()
dot_ind
markers <- SeuratWrappers::RunPrestoAll(
object = dataObject,
assay = "RNA",
slot = "counts",
only.pos = FALSE
)
write.table(markers,
paste0("../results/markers/", treatment, "_markers.tsv"),
quote = FALSE,
row.names = FALSE)
saveRDS(markers, paste0("../rObjects/", treatment, "_wilcox_markers.rds"))
# rearrange to order by cluster & filter to only include log2FC > 1 & FDR < 0.05
all.markers.strict <- markers %>%
group_by(cluster) %>%
dplyr::filter(avg_log2FC > 1 & p_val_adj < 0.05)
saveRDS(all.markers.strict, paste0("../rObjects/", treatment,"_wilcox_markers_log2FC1_q0.01.rds"))
Get markers for each cluster
# unique clusters variable
unique_clusters <- unique(all.markers.strict$cluster)
# empty list to store individual cluster data frames
cluster_list <- list()
# loop through each cluster and create a data frame
for (i in unique_clusters) {
cluster_name <- paste0("cluster", i)
cluster_data <- all.markers.strict[all.markers.strict$cluster == i, ]
assign(cluster_name, cluster_data)
cluster_list[[cluster_name]] <- cluster_data
}
# Number of cells per condition
count_per_cluster[,c(1,2)]
## orig.ident 0
## 1 SeuratProject 811
# UMAP with only cluster 0
DimPlot(object = subset(dataObject, seurat_clusters == "0"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
cols = color.panel[1])
VlnPlot(dataObject,
features = cluster0$gene[1:10],
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
cluster0$gene[1:10]
## [1] "MBP" "DLC1" "ENSSSCG00000061137"
## [4] "PRR5L" "PEX5L" "EVA1C"
## [7] "PDE8A" "SLC24A2" "FMNL2"
## [10] "ANO4"
count_per_cluster[,c(1,3)]
## orig.ident 1
## 1 SeuratProject 578
DimPlot(object = subset(dataObject, seurat_clusters == "1"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
cols = color.panel[2])
VlnPlot(dataObject,
features = cluster1$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
cluster1$gene[1:10]
## [1] "ENSSSCG00000058036" "IFNAR2" "ARHGAP24"
## [4] "PRKCB" "ENSSSCG00000007520" "CALCR"
## [7] "TMCC3" "RNF128" "ANK1"
## [10] "DPP10"
count_per_cluster[,c(1,4)]
## orig.ident 2
## 1 SeuratProject 558
DimPlot(object = subset(dataObject, seurat_clusters == "2"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
cols = color.panel[3])
VlnPlot(dataObject,
features = cluster2$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
cluster2$gene[1:10]
## [1] "NKAIN2" "ENSSSCG00000010212" "DOCK10"
## [4] "SH3GL3" "NFASC" "SIK3"
## [7] "ERBB4" "YPEL2" "AUTS2"
## [10] "OPALIN"
count_per_cluster[,c(1,4)]
## orig.ident 2
## 1 SeuratProject 558
DimPlot(object = subset(dataObject, seurat_clusters == "3"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
cols = color.panel[4])
VlnPlot(dataObject,
features = cluster3$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
cluster3$gene[1:10]
## [1] "ENSSSCG00000033805" "ENSSSCG00000058255" "ITGA2B"
## [4] "ENSSSCG00000055221" "EDIL3" NA
## [7] NA NA NA
## [10] NA
count_per_cluster[,c(1,5)]
## orig.ident 3
## 1 SeuratProject 514
DimPlot(object = subset(dataObject, seurat_clusters == "4"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
cols = color.panel[5])
VlnPlot(dataObject,
features = cluster4$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
cluster4$gene[1:10]
## [1] "SLC1A2" "PITPNC1" "ETNPPL"
## [4] "FRMD4A" "CTNND2" "ENSSSCG00000059499"
## [7] "ENSSSCG00000058517" "TRPM3" "PLCB4"
## [10] "GLIS3"
count_per_cluster[,c(1,6)]
## orig.ident 4
## 1 SeuratProject 481
DimPlot(object = subset(dataObject, seurat_clusters == "5"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
cols = color.panel[6])
VlnPlot(dataObject,
features = cluster5$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
cluster5$gene[1:10]
## [1] "XYLT1" "TNR" "OPCML"
## [4] "MMP16" "ITGA9" "LHFPL3"
## [7] "KIF26B" "SEMA5A" "ENSSSCG00000033643"
## [10] "ATCAY"
count_per_cluster[,c(1,7)]
## orig.ident 5
## 1 SeuratProject 233
DimPlot(object = subset(dataObject, seurat_clusters == "6"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
cols = color.panel[7])
VlnPlot(dataObject,
features = cluster6$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
cluster6$gene[1:10]
## [1] "ENSSSCG00000058036" "PRKCB" "ENSSSCG00000007520"
## [4] "CALCR" "ANK1" "IL1RAPL2"
## [7] "DPP10" "SLC8A1" "TMCC3"
## [10] "RCSD1"
count_per_cluster[,c(1,8)]
## orig.ident 6
## 1 SeuratProject 135
DimPlot(object = subset(dataObject, seurat_clusters == "7"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
cols = color.panel[8])
VlnPlot(dataObject,
features = cluster7$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
cluster6$gene[1:10]
## [1] "ENSSSCG00000058036" "PRKCB" "ENSSSCG00000007520"
## [4] "CALCR" "ANK1" "IL1RAPL2"
## [7] "DPP10" "SLC8A1" "TMCC3"
## [10] "RCSD1"
dataObject.annotated <- RenameIdents(object = dataObject,
"0" = "oligodendrocyte",
"1" = "noise",
"2" = "oligodendrocyte",
"3" = "noise",
"4" = "astrocyte",
"5" = "polydendrocyte",
"6" = "neuron",
"7" = "polydendrocyte")
dataObject.annotated$individual_clusters <- factor(Idents(dataObject.annotated))
UMAP_ind <- dittoDimPlot(object = dataObject.annotated,
var = "individual_clusters",
reduction.use = "umap",
do.label = TRUE,
labels.highlight = TRUE)
UMAP_ind
## png
## 2
# filter
dataObject.no_noise <- subset(dataObject.annotated, subset =
(individual_clusters != "noise")
)
UMAP_ind <- dittoDimPlot(object = dataObject.no_noise,
var = "individual_clusters",
reduction.use = "umap",
do.label = TRUE,
labels.highlight = TRUE)
UMAP_ind
# Clean up
rm(all.markers.strict, cluster_data, cluster_list, cluster0, cluster1, cluster2,
cluster3, cluster4, cluster5, cluster6, cluster7, count_bar, count_max, count_melt,
count_per_clsuter, ditto_umap, makers, tree_graph, tree, umap_feature, UMAP_ind, umap0.2,
umap0.4, cellmax, nClusters, cluster_name, count_per_cluster, markers, s2)
rm(dataObject.annotated)
# save singlets as the new dataObject
dataObject <- dataObject.no_noise
# transform
dataObject <- SCTransform(dataObject, verbose = FALSE)
# run PCA on the merged object
dataObject <- RunPCA(object = dataObject)
Idents(dataObject) <- "sample"
# Determine the K-nearest neighbor graph
dataObject <- FindNeighbors(object = dataObject,
assay = "SCT",
reduction = "pca",
dims = 1:15)
# Run UMAP
dataObject <- RunUMAP(dataObject,
dims = 1:15,
reduction = "pca",
n.components = 3)
# Determine the clusters for various resolutions
dataObject <- FindClusters(object = dataObject,
algorithm = 1, # 1= Louvain
resolution = seq(0.1,1,by=0.1))
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2284
## Number of edges: 95795
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9622
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2284
## Number of edges: 95795
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9275
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2284
## Number of edges: 95795
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8929
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2284
## Number of edges: 95795
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8582
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2284
## Number of edges: 95795
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8239
## Number of communities: 5
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2284
## Number of edges: 95795
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7982
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2284
## Number of edges: 95795
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7751
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2284
## Number of edges: 95795
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7522
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2284
## Number of edges: 95795
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7295
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2284
## Number of edges: 95795
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7106
## Number of communities: 7
## Elapsed time: 0 seconds
ditto_umap <- dittoDimPlot(object = dataObject,
var = "seurat_clusters",
reduction.use = "umap",
do.label = TRUE,
labels.highlight = TRUE)
ditto_umap
# 0.4
umap0.2 <- DimPlot(dataObject,
group.by = "SCT_snn_res.0.2",
label = TRUE)
umap0.2
# 0.4
umap0.4 <- DimPlot(dataObject,
group.by = "SCT_snn_res.0.4",
label = TRUE)
umap0.4
dataObject$seurat_clusters <- dataObject$SCT_snn_res.0.2
Idents(dataObject) <- dataObject$seurat_clusters
DefaultAssay(dataObject) <- "RNA"
dataObject <- NormalizeData(dataObject)
dataObject <- FindVariableFeatures(dataObject)
dataObject <- ScaleData(dataObject)
dataObject <- JoinLayers(dataObject)
# Astrocytes
VlnPlot(dataObject,
features = c("AQP4","CLU", "GFAP"))
# Oligodendrocyte
VlnPlot(dataObject,
features = c("PLP1","MBP", "MOG"))
# OPC
VlnPlot(dataObject,
features = c("PDGFRA", "VCAN", "TNR"))
# Neurons
VlnPlot(dataObject,
features = c("RBFOX3", "GAD1", "GAD2"))
count_per_cluster <- FetchData(dataObject,
vars = c("ident", "orig.ident")) %>%
dplyr::count(ident, orig.ident) %>%
tidyr::spread(ident, n)
count_per_cluster
## orig.ident 0 1 2 3
## 1 SeuratProject 1370 480 298 136
count_melt <- reshape2::melt(count_per_cluster)
colnames(count_melt) <- c("ident", "cluster", "number of nuclei")
count_max <- count_melt[which.max(count_melt$`number of nuclei`), ]
count_max_value <- count_max$`number of nuclei`
cellmax <- count_max_value + 500 # so that the figure doesn't cut off the text
count_bar <- ggplot(count_melt, aes(x = factor(cluster), y = `number of nuclei`, fill = `cluster`)) +
geom_bar(
stat = "identity",
colour = "black",
width = 1,
position = position_dodge(width = 0.8)
) +
geom_text(
aes(label = `number of nuclei`),
position = position_dodge(width = 0.9),
vjust = -0.25,
angle = 45,
hjust = -.01
) +
theme_classic() + scale_fill_manual(values = color.panel) +
ggtitle("Number of nuclei per cluster") + xlab("cluster") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_y_continuous(limits = c(0, cellmax))
count_bar
rm(count_max, count_melt)
markers <- SeuratWrappers::RunPrestoAll(
object = dataObject,
assay = "RNA",
slot = "counts",
only.pos = FALSE
)
write.table(markers,
paste0("../results/markers/", treatment, "_markers_noise_clusters_removed.tsv"),
quote = FALSE,
row.names = FALSE)
saveRDS(markers, paste0("../rObjects/", treatment, "_wilcox_markers_noise_clusters_removed.rds"))
# rearrange to order by cluster & filter to only include log2FC > 1 & FDR < 0.05
all.markers.strict <- markers %>%
group_by(cluster) %>%
dplyr::filter(avg_log2FC > 1 & p_val_adj < 0.05)
saveRDS(all.markers.strict, paste0("../rObjects/", treatment,"_wilcox_markers_noise_clusters_removed_log2FC1_q0.01.rds"))
Get markers for each cluster
# unique clusters variable
unique_clusters <- unique(all.markers.strict$cluster)
# empty list to store individual cluster data frames
cluster_list <- list()
# loop through each cluster and create a data frame
for (i in unique_clusters) {
cluster_name <- paste0("cluster", i)
cluster_data <- all.markers.strict[all.markers.strict$cluster == i, ]
assign(cluster_name, cluster_data)
cluster_list[[cluster_name]] <- cluster_data
}
# Number of cells per condition
count_per_cluster[,c(1,2)]
## orig.ident 0
## 1 SeuratProject 1370
# UMAP with only cluster 0
DimPlot(object = subset(dataObject, seurat_clusters == "0"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
cols = color.panel[1])
VlnPlot(dataObject,
features = cluster0$gene[1:10],
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
cluster0$gene[1:10]
## [1] "NKAIN2" "RNF220" "C10orf90"
## [4] "PCDH9" "MBP" "OPALIN"
## [7] "PLEKHH1" "ENSSSCG00000010212" "SIK3"
## [10] "ZFYVE16"
count_per_cluster[,c(1,3)]
## orig.ident 1
## 1 SeuratProject 480
DimPlot(object = subset(dataObject, seurat_clusters == "1"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
cols = color.panel[2])
VlnPlot(dataObject,
features = cluster1$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
cluster1$gene[1:10]
## [1] "SLC1A2" "PITPNC1" "FRMD4A"
## [4] "ETNPPL" "CTNND2" "ENSSSCG00000058517"
## [7] "ENSSSCG00000059499" "TRPM3" "PLCB4"
## [10] "GLIS3"
count_per_cluster[,c(1,4)]
## orig.ident 2
## 1 SeuratProject 298
DimPlot(object = subset(dataObject, seurat_clusters == "2"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
cols = color.panel[3])
VlnPlot(dataObject,
features = cluster2$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
cluster2$gene[1:10]
## [1] "TNR" "OPCML" "XYLT1" "MMP16" "ITGA9" "LHFPL3" "KIF26B" "LRRTM4"
## [9] "SEMA5A" "ATCAY"
count_per_cluster[,c(1,4)]
## orig.ident 2
## 1 SeuratProject 298
DimPlot(object = subset(dataObject, seurat_clusters == "3"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
cols = color.panel[4])
VlnPlot(dataObject,
features = cluster3$gene[1:10],
cols = color.panel,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")
cluster3$gene[1:10]
## [1] "ENSSSCG00000058036" "CALCR" "PRKCB"
## [4] "ENSSSCG00000007520" "ANK1" "IFNAR2"
## [7] "IL1RAPL2" "ARHGAP24" "DPP10"
## [10] "TMCC3"
dataObject.annotated <- RenameIdents(object = dataObject,
"0" = "oligodendrocyte",
"1" = "astrocyte",
"2" = "polydendrocyte",
"3" = "neuron")
dataObject.annotated$annotated_clusters <- factor(Idents(dataObject.annotated))
UMAP_ind <- dittoDimPlot(object = dataObject.annotated,
var = "annotated_clusters",
reduction.use = "umap",
do.label = TRUE,
labels.highlight = TRUE)
UMAP_ind
## png
## 2
saveRDS(dataObject.annotated, paste0("../rObjects/",treatment,"_unannotated_noise_clusters_removed.rds"))