Setup

# libraries
library(Seurat) # for scrnaseq analyisis

## Attaching SeuratObject

library(here)   # for reproducible paths

## here() starts at C:/Users/nbestard/OneDrive/Luise_HCA

library(ggplot2)# for adding some custom to the seurat plots
library(patchwork) # for adding customs to grid-composed Seurat plots
library(ggsci)  # colors

# Set up the color palette
mypal <- pal_npg("nrc", alpha = 0.7)(10)
mypal2 <-pal_tron("legacy", alpha = 0.7)(7)
mypal3<-pal_lancet("lanonc", alpha = 0.7)(9)
mypal4<-pal_simpsons(palette = c("springfield"), alpha = 0.7)(16)
mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6)
mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5)
mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5)

mycoloursP<- c(mypal, mypal2, mypal3, mypal4)

In previous scripts the clusters at a resolution 0.8 are already computed. Here we display the cleaned dataset clustering with other resolutions.

The chosen 0.8 resolution

  srt_all <- readRDS(here("processed", "srt_filter_bad_clusters_02.RDS"))
  Idents(srt_all) <- srt_all$clusters_0.8

DimPlot(srt_all, label = TRUE, cols = mycoloursP)

Tissue

split the clustering by the original tissues

umap_clusters <- DimPlot(srt_all, label=TRUE, split.by = "Tissue", ncol = 2, cols = mycoloursP)
umap_clusters

# Visualisation This clustering can be visualized with a UMAP

I Plot here bellow, as reminder, the visualization of the same UMAP with the rough annotation, done before the considered bad quality samples were deleted

Idents(srt_all) <- srt_all$rough_annot
DimPlot(srt_all, label = TRUE, repel = TRUE, cols = mycoloursP) + ggtitle("Rough annotation")

Clustering with less resolution

I start with a resolution of 0.5, a bit low taking into account the number of cells, as this is more recommended for smaller datasets, but for cell level clustering this resolution will be more than enough.

# I choose to keep the first 25 dimensions, more details in srt_dim_reduction.R
## Clustering ----
    srt_all <- FindNeighbors(srt_all, dims = 1:25)

## Computing nearest neighbor graph

## Computing SNN

    srt_all <- FindClusters(srt_all, resolution = 0.5)

## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 45528
## Number of edges: 1830108
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9409
## Number of communities: 21
## Elapsed time: 7 seconds

    # Save the active ident into the metadata
    srt_all$clusters_0.5 <- srt_all@active.ident

DimPlot(srt_all, label = TRUE, cols = mycoloursP)

With less resolution the Exitatory neurons are splitted like before the cluster qc again. Other celltypes are not very splitted.

Cluster with greater resolutions, for more detail clustering

Test some other resolutions for clustering

0.9

# I choose to keep the first 25 dimensions, more details in srt_dim_reduction.R
## Clustering ----
  srt_all <- FindClusters(srt_all, resolution = 0.9)

## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 45528
## Number of edges: 1830108
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9151
## Number of communities: 28
## Elapsed time: 8 seconds

  # Save the active ident into the metadata
 srt_all$clusters_0.9 <- srt_all@active.ident

Idents(srt_all) <- srt_all$clusters_0.9
DimPlot(srt_all, label = TRUE, cols = mycoloursP) + ggtitle("Clusters 0.9")

The differences are only n OPC and Oligo clusters, we are not interested here about.

1

# I choose to keep the first 25 dimensions, more details in srt_dim_reduction.R
## Clustering ----
  srt_all <- FindClusters(srt_all, resolution = 1)

## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 45528
## Number of edges: 1830108
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9095
## Number of communities: 30
## Elapsed time: 7 seconds

  # Save the active ident into the metadata
 srt_all$clusters_1 <- srt_all@active.ident

  Idents(srt_all) <- srt_all$clusters_1
DimPlot(srt_all, label = TRUE, cols = mycoloursP) + ggtitle("Clusters 1")

Microglia is splitted in differentiate clusters.

1.1

# I choose to keep the first 25 dimensions, more details in srt_dim_reduction.R
## Clustering ----
  srt_all <- FindClusters(srt_all, resolution = 1.1)

## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 45528
## Number of edges: 1830108
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9045
## Number of communities: 32
## Elapsed time: 8 seconds

  # Save the active ident into the metadata
  srt_all$clusters_1.1 <- srt_all@active.ident

  Idents(srt_all) <- srt_all$clusters_1.1
DimPlot(srt_all, label = TRUE, cols = mycoloursP) + ggtitle("Clusters 1.1")

RELN+ divides differently, the microglia is still in 2 but splitted differently

1.2

# I choose to keep the first 25 dimensions, more details in srt_dim_reduction.R
## Clustering ----
  srt_all <- FindClusters(srt_all, resolution = 1.2)

## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 45528
## Number of edges: 1830108
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9010
## Number of communities: 32
## Elapsed time: 8 seconds

  # Save the active ident into the metadata
  srt_all$clusters_1.2 <- srt_all@active.ident

  Idents(srt_all) <- srt_all$clusters_1.2
DimPlot(srt_all, label = TRUE, cols = mycoloursP) + ggtitle("Clusters 1.2")

The RELN+ division seems stable, microglia divides again as in 1.1. The big astrocyte cluster splits in two, but it is not a stable division

1.3

# I choose to keep the first 25 dimensions, more details in srt_dim_reduction.R
## Clustering ----
  srt_all <- FindClusters(srt_all, resolution = 1.3)

## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 45528
## Number of edges: 1830108
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8963
## Number of communities: 33
## Elapsed time: 8 seconds

  # Save the active ident into the metadata
  srt_all$clusters_1.3 <- srt_all@active.ident

  Idents(srt_all) <- srt_all$clusters_1.3
DimPlot(srt_all, label = TRUE, cols = mycoloursP) + ggtitle("Clusters 1.3")

The division in the astrocyte cluster disappears again, it was not a stable splitting. Microglia and RELN+ seem to keep stable

1.4

# I choose to keep the first 25 dimensions, more details in srt_dim_reduction.R
## Clustering ----
  srt_all <- FindClusters(srt_all, resolution = 1.4)

## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 45528
## Number of edges: 1830108
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8924
## Number of communities: 33
## Elapsed time: 8 seconds

  # Save the active ident into the metadata
  srt_all$clusters_1.4 <- srt_all@active.ident

  Idents(srt_all) <- srt_all$clusters_1.4
DimPlot(srt_all, label = TRUE, cols = mycoloursP) + ggtitle("Clusters 1.4")

And here we have the astrocytes again like 1.2. The stromal cells split further.

1.5

# I choose to keep the first 25 dimensions, more details in srt_dim_reduction.R
## Clustering ----
  srt_all <- FindClusters(srt_all, resolution = 1.5)

## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 45528
## Number of edges: 1830108
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8887
## Number of communities: 34
## Elapsed time: 8 seconds

  # Save the active ident into the metadata
  srt_all$clusters_1.4 <- srt_all@active.ident
  
# save all these new clustering resolutions
saveRDS(srt_all, here("processed", "srt_extra_resolution.RDS"))

  Idents(srt_all) <- srt_all$clusters_1.4
DimPlot(srt_all, label = TRUE, cols = mycoloursP) + ggtitle("Clusters 1.5")

like above, only the Oligodendrocytes are different.

Conclusions

The oligo and OPC clusters are totally ignored when deciding the resolution, as they will be separated. For the cluster quality control the resolution 0.8 is used. For the annotation it would be interesting keeping the subclustering from the microglia cells that appear after 1.0 resolution and the RELN+ that appears from the 1.1. However just at 1.1. the microglia splitting looks different from all the other resolutions, so better picking a higher one.

I am more reluctant about the big Astrocyte cluster splitting at 1.2 and 1.4.

Therefore, resolution 1.3 is probably what we are going for in the annotaion.

This will be done in the file annoation_01.Rmd

Session information

Click to expand

sessionInfo()

## R version 4.0.4 (2021-02-15)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19041)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United Kingdom.1252 
## [2] LC_CTYPE=English_United Kingdom.1252   
## [3] LC_MONETARY=English_United Kingdom.1252
## [4] LC_NUMERIC=C                           
## [5] LC_TIME=English_United Kingdom.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] ggsci_2.9          patchwork_1.1.1    ggplot2_3.3.3      here_1.0.1        
## [5] SeuratObject_4.0.0 Seurat_4.0.0      
## 
## loaded via a namespace (and not attached):
##   [1] Rtsne_0.15           colorspace_2.0-0     deldir_0.2-10       
##   [4] ellipsis_0.3.1       ggridges_0.5.3       rprojroot_2.0.2     
##   [7] spatstat.data_2.0-0  farver_2.0.3         leiden_0.3.7        
##  [10] listenv_0.8.0        ggrepel_0.9.1        fansi_0.4.2         
##  [13] codetools_0.2-18     splines_4.0.4        knitr_1.31          
##  [16] polyclip_1.10-0      jsonlite_1.7.2       ica_1.0-2           
##  [19] cluster_2.1.1        png_0.1-7            uwot_0.1.10         
##  [22] shiny_1.6.0          sctransform_0.3.2    compiler_4.0.4      
##  [25] httr_1.4.2           assertthat_0.2.1     Matrix_1.3-2        
##  [28] fastmap_1.1.0        lazyeval_0.2.2       later_1.1.0.1       
##  [31] htmltools_0.5.1.1    tools_4.0.4          igraph_1.2.6        
##  [34] gtable_0.3.0         glue_1.4.2           RANN_2.6.1          
##  [37] reshape2_1.4.4       dplyr_1.0.4          Rcpp_1.0.6          
##  [40] spatstat_1.64-1      scattermore_0.7      jquerylib_0.1.3     
##  [43] vctrs_0.3.6          nlme_3.1-152         lmtest_0.9-38       
##  [46] xfun_0.21            stringr_1.4.0        globals_0.14.0      
##  [49] mime_0.10            miniUI_0.1.1.1       lifecycle_1.0.0     
##  [52] irlba_2.3.3          goftest_1.2-2        future_1.21.0       
##  [55] MASS_7.3-53.1        zoo_1.8-8            scales_1.1.1        
##  [58] promises_1.2.0.1     spatstat.utils_2.0-0 parallel_4.0.4      
##  [61] RColorBrewer_1.1-2   yaml_2.2.1           reticulate_1.18     
##  [64] pbapply_1.4-3        gridExtra_2.3        sass_0.3.1          
##  [67] rpart_4.1-15         stringi_1.5.3        highr_0.8           
##  [70] rlang_0.4.10         pkgconfig_2.0.3      matrixStats_0.58.0  
##  [73] evaluate_0.14        lattice_0.20-41      ROCR_1.0-11         
##  [76] purrr_0.3.4          tensor_1.5           htmlwidgets_1.5.3   
##  [79] labeling_0.4.2       cowplot_1.1.1        tidyselect_1.1.0    
##  [82] parallelly_1.23.0    RcppAnnoy_0.0.18     plyr_1.8.6          
##  [85] magrittr_2.0.1       R6_2.5.0             generics_0.1.0      
##  [88] DBI_1.1.1            pillar_1.5.0         withr_2.4.1         
##  [91] mgcv_1.8-34          fitdistrplus_1.1-3   survival_3.2-7      
##  [94] abind_1.4-5          tibble_3.1.0         future.apply_1.7.0  
##  [97] crayon_1.4.1         KernSmooth_2.23-18   utf8_1.1.4          
## [100] plotly_4.9.3         rmarkdown_2.7        grid_4.0.4          
## [103] data.table_1.14.0    digest_0.6.27        xtable_1.8-4        
## [106] tidyr_1.1.2          httpuv_1.5.5         munsell_0.5.0       
## [109] viridisLite_0.3.0    bslib_0.2.4

Other clustering

NadineBestard

12/11/2020