# libraries
library(Seurat) # for scrnaseq analyisis
## Attaching SeuratObject
library(here) # for reproducible paths
## here() starts at C:/Users/nbestard/OneDrive/Luise_HCA
library(ggplot2)# for adding some custom to the seurat plots
library(patchwork) # for adding customs to grid-composed Seurat plots
library(ggsci) # colors
# Set up the color palette
mypal <- pal_npg("nrc", alpha = 0.7)(10)
mypal2 <-pal_tron("legacy", alpha = 0.7)(7)
mypal3<-pal_lancet("lanonc", alpha = 0.7)(9)
mypal4<-pal_simpsons(palette = c("springfield"), alpha = 0.7)(16)
mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6)
mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5)
mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5)
mycoloursP<- c(mypal, mypal2, mypal3, mypal4)
In previous scripts the clusters at a resolution 0.8 are already computed. Here we display the cleaned dataset clustering with other resolutions.
srt_all <- readRDS(here("processed", "srt_filter_bad_clusters_02.RDS"))
Idents(srt_all) <- srt_all$clusters_0.8
DimPlot(srt_all, label = TRUE, cols = mycoloursP)
split the clustering by the original tissues
umap_clusters <- DimPlot(srt_all, label=TRUE, split.by = "Tissue", ncol = 2, cols = mycoloursP)
umap_clusters
# Visualisation This clustering can be visualized with a UMAP
I Plot here bellow, as reminder, the visualization of the same UMAP with the rough annotation, done before the considered bad quality samples were deleted
Idents(srt_all) <- srt_all$rough_annot
DimPlot(srt_all, label = TRUE, repel = TRUE, cols = mycoloursP) + ggtitle("Rough annotation")
I start with a resolution of 0.5, a bit low taking into account the number of cells, as this is more recommended for smaller datasets, but for cell level clustering this resolution will be more than enough.
# I choose to keep the first 25 dimensions, more details in srt_dim_reduction.R
## Clustering ----
srt_all <- FindNeighbors(srt_all, dims = 1:25)
## Computing nearest neighbor graph
## Computing SNN
srt_all <- FindClusters(srt_all, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 45528
## Number of edges: 1830108
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9409
## Number of communities: 21
## Elapsed time: 7 seconds
# Save the active ident into the metadata
srt_all$clusters_0.5 <- srt_all@active.ident
DimPlot(srt_all, label = TRUE, cols = mycoloursP)
With less resolution the Exitatory neurons are splitted like before the cluster qc again. Other celltypes are not very splitted.
Test some other resolutions for clustering
# I choose to keep the first 25 dimensions, more details in srt_dim_reduction.R
## Clustering ----
srt_all <- FindClusters(srt_all, resolution = 0.9)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 45528
## Number of edges: 1830108
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9151
## Number of communities: 28
## Elapsed time: 8 seconds
# Save the active ident into the metadata
srt_all$clusters_0.9 <- srt_all@active.ident
Idents(srt_all) <- srt_all$clusters_0.9
DimPlot(srt_all, label = TRUE, cols = mycoloursP) + ggtitle("Clusters 0.9")
The differences are only n OPC and Oligo clusters, we are not interested here about.
# I choose to keep the first 25 dimensions, more details in srt_dim_reduction.R
## Clustering ----
srt_all <- FindClusters(srt_all, resolution = 1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 45528
## Number of edges: 1830108
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9095
## Number of communities: 30
## Elapsed time: 7 seconds
# Save the active ident into the metadata
srt_all$clusters_1 <- srt_all@active.ident
Idents(srt_all) <- srt_all$clusters_1
DimPlot(srt_all, label = TRUE, cols = mycoloursP) + ggtitle("Clusters 1")
Microglia is splitted in differentiate clusters.
# I choose to keep the first 25 dimensions, more details in srt_dim_reduction.R
## Clustering ----
srt_all <- FindClusters(srt_all, resolution = 1.1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 45528
## Number of edges: 1830108
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9045
## Number of communities: 32
## Elapsed time: 8 seconds
# Save the active ident into the metadata
srt_all$clusters_1.1 <- srt_all@active.ident
Idents(srt_all) <- srt_all$clusters_1.1
DimPlot(srt_all, label = TRUE, cols = mycoloursP) + ggtitle("Clusters 1.1")
RELN+ divides differently, the microglia is still in 2 but splitted differently
# I choose to keep the first 25 dimensions, more details in srt_dim_reduction.R
## Clustering ----
srt_all <- FindClusters(srt_all, resolution = 1.2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 45528
## Number of edges: 1830108
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9010
## Number of communities: 32
## Elapsed time: 8 seconds
# Save the active ident into the metadata
srt_all$clusters_1.2 <- srt_all@active.ident
Idents(srt_all) <- srt_all$clusters_1.2
DimPlot(srt_all, label = TRUE, cols = mycoloursP) + ggtitle("Clusters 1.2")
The RELN+ division seems stable, microglia divides again as in 1.1. The big astrocyte cluster splits in two, but it is not a stable division
# I choose to keep the first 25 dimensions, more details in srt_dim_reduction.R
## Clustering ----
srt_all <- FindClusters(srt_all, resolution = 1.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 45528
## Number of edges: 1830108
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8963
## Number of communities: 33
## Elapsed time: 8 seconds
# Save the active ident into the metadata
srt_all$clusters_1.3 <- srt_all@active.ident
Idents(srt_all) <- srt_all$clusters_1.3
DimPlot(srt_all, label = TRUE, cols = mycoloursP) + ggtitle("Clusters 1.3")
The division in the astrocyte cluster disappears again, it was not a stable splitting. Microglia and RELN+ seem to keep stable
# I choose to keep the first 25 dimensions, more details in srt_dim_reduction.R
## Clustering ----
srt_all <- FindClusters(srt_all, resolution = 1.4)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 45528
## Number of edges: 1830108
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8924
## Number of communities: 33
## Elapsed time: 8 seconds
# Save the active ident into the metadata
srt_all$clusters_1.4 <- srt_all@active.ident
Idents(srt_all) <- srt_all$clusters_1.4
DimPlot(srt_all, label = TRUE, cols = mycoloursP) + ggtitle("Clusters 1.4")
And here we have the astrocytes again like 1.2. The stromal cells split further.
# I choose to keep the first 25 dimensions, more details in srt_dim_reduction.R
## Clustering ----
srt_all <- FindClusters(srt_all, resolution = 1.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 45528
## Number of edges: 1830108
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8887
## Number of communities: 34
## Elapsed time: 8 seconds
# Save the active ident into the metadata
srt_all$clusters_1.4 <- srt_all@active.ident
# save all these new clustering resolutions
saveRDS(srt_all, here("processed", "srt_extra_resolution.RDS"))
Idents(srt_all) <- srt_all$clusters_1.4
DimPlot(srt_all, label = TRUE, cols = mycoloursP) + ggtitle("Clusters 1.5")
like above, only the Oligodendrocytes are different.
The oligo and OPC clusters are totally ignored when deciding the resolution, as they will be separated. For the cluster quality control the resolution 0.8 is used. For the annotation it would be interesting keeping the subclustering from the microglia cells that appear after 1.0 resolution and the RELN+ that appears from the 1.1. However just at 1.1. the microglia splitting looks different from all the other resolutions, so better picking a higher one.
I am more reluctant about the big Astrocyte cluster splitting at 1.2 and 1.4.
Therefore, resolution 1.3 is probably what we are going for in the annotaion.
This will be done in the file annoation_01.Rmd
sessionInfo()
## R version 4.0.4 (2021-02-15)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19041)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United Kingdom.1252
## [2] LC_CTYPE=English_United Kingdom.1252
## [3] LC_MONETARY=English_United Kingdom.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United Kingdom.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggsci_2.9 patchwork_1.1.1 ggplot2_3.3.3 here_1.0.1
## [5] SeuratObject_4.0.0 Seurat_4.0.0
##
## loaded via a namespace (and not attached):
## [1] Rtsne_0.15 colorspace_2.0-0 deldir_0.2-10
## [4] ellipsis_0.3.1 ggridges_0.5.3 rprojroot_2.0.2
## [7] spatstat.data_2.0-0 farver_2.0.3 leiden_0.3.7
## [10] listenv_0.8.0 ggrepel_0.9.1 fansi_0.4.2
## [13] codetools_0.2-18 splines_4.0.4 knitr_1.31
## [16] polyclip_1.10-0 jsonlite_1.7.2 ica_1.0-2
## [19] cluster_2.1.1 png_0.1-7 uwot_0.1.10
## [22] shiny_1.6.0 sctransform_0.3.2 compiler_4.0.4
## [25] httr_1.4.2 assertthat_0.2.1 Matrix_1.3-2
## [28] fastmap_1.1.0 lazyeval_0.2.2 later_1.1.0.1
## [31] htmltools_0.5.1.1 tools_4.0.4 igraph_1.2.6
## [34] gtable_0.3.0 glue_1.4.2 RANN_2.6.1
## [37] reshape2_1.4.4 dplyr_1.0.4 Rcpp_1.0.6
## [40] spatstat_1.64-1 scattermore_0.7 jquerylib_0.1.3
## [43] vctrs_0.3.6 nlme_3.1-152 lmtest_0.9-38
## [46] xfun_0.21 stringr_1.4.0 globals_0.14.0
## [49] mime_0.10 miniUI_0.1.1.1 lifecycle_1.0.0
## [52] irlba_2.3.3 goftest_1.2-2 future_1.21.0
## [55] MASS_7.3-53.1 zoo_1.8-8 scales_1.1.1
## [58] promises_1.2.0.1 spatstat.utils_2.0-0 parallel_4.0.4
## [61] RColorBrewer_1.1-2 yaml_2.2.1 reticulate_1.18
## [64] pbapply_1.4-3 gridExtra_2.3 sass_0.3.1
## [67] rpart_4.1-15 stringi_1.5.3 highr_0.8
## [70] rlang_0.4.10 pkgconfig_2.0.3 matrixStats_0.58.0
## [73] evaluate_0.14 lattice_0.20-41 ROCR_1.0-11
## [76] purrr_0.3.4 tensor_1.5 htmlwidgets_1.5.3
## [79] labeling_0.4.2 cowplot_1.1.1 tidyselect_1.1.0
## [82] parallelly_1.23.0 RcppAnnoy_0.0.18 plyr_1.8.6
## [85] magrittr_2.0.1 R6_2.5.0 generics_0.1.0
## [88] DBI_1.1.1 pillar_1.5.0 withr_2.4.1
## [91] mgcv_1.8-34 fitdistrplus_1.1-3 survival_3.2-7
## [94] abind_1.4-5 tibble_3.1.0 future.apply_1.7.0
## [97] crayon_1.4.1 KernSmooth_2.23-18 utf8_1.1.4
## [100] plotly_4.9.3 rmarkdown_2.7 grid_4.0.4
## [103] data.table_1.14.0 digest_0.6.27 xtable_1.8-4
## [106] tidyr_1.1.2 httpuv_1.5.5 munsell_0.5.0
## [109] viridisLite_0.3.0 bslib_0.2.4