Introduction

The purpose of this script is to annotate single-cell RNA-seq clusters following filtering, normalization, and clustering of the data with Seurat. This script has been adapted from the Seurat (https://satijalab.org/seurat/) documentation.

Software

library(Seurat)
library(tidyverse)
library(patchwork)
library(pheatmap)
library(RColorBrewer)
library(data.table)
library(knitr)
library(presto)

Data

Input: Pre-processed Seurat object.

setwd("/scratch/alpine/edlarsen@colostate.edu/project_scrna_01/240828_scRNAseq/Cluster_Annotation")
ln_seurat <- readRDS(file = "../Normalization_and_Clustering/LN_NormalizedAndClustered.RData")

# differentially expressed features
## identifies all positive and negative markers of a single cluster compared to all other cells.
ln_seurat <- FindClusters(ln_seurat, resolution = 0.1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 29438
## Number of edges: 895159
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9696
## Number of communities: 9
## Elapsed time: 7 seconds
ln_seurat <- RunUMAP(ln_seurat, dims = 1:10)
ln.markers <- FindAllMarkers(ln_seurat, only.pos=TRUE)

# umap
DimPlot(ln_seurat,
        reduction = "umap",
                   label = TRUE,
                   label.size = 6) + 
  plot_annotation(title = "Canine Lymph Node, Resolution: 0.1", theme = theme(plot.title = element_text(hjust = 0.5, size = 20)))

Cell-Cycle Scoring

See if cluster differences are associated with differences in cell proliferation.

Assign Cell-Cycle Scores

Each cell will be assigned a score based on its expression of G2/M and S phase markers. The CellCycleStoring() function stores S and G2/M scores in object metadata, along with the predicted classification of each cell in either G2/M, S, or G1 phase.

# segregate G2/M phase and S phase markers from Seurat's built-in list of cell cycle markers
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes

# assign each cell a score based on expression of G2/M and S phase markers
ln_seurat_cycle <- CellCycleScoring(ln_seurat,
                                s.features = s.genes, 
                                g2m.features = g2m.genes, 
                                set.ident = TRUE)

Visualize cycling cells on UMAP

ln_seurat_cycle <- RunPCA(ln_seurat_cycle, features = c(s.genes, g2m.genes))
DimPlot(ln_seurat_cycle,
        group.by = "Phase",
        reduction = "umap",
        pt.size=0.5) + 
  ggtitle("Cell Cycle Scoring: Canine ln")

Module Scoring for Conde et al. human immune cell atlas

Module scoring the average expression levels of each cluster on a single-cell level, subtracted by the aggregated expression of control feature sets. All analyzed features are binned based on averaged expression, and the control features are randomly selected from each bin. Source: https://cells.ucsc.edu/?ds=pan-immune+global. The top 51 markers were exported from UCSC Cell Browser for each annotated cell type.

# make gene lists
human_ln_atlas_top51 <- read.csv("CondeEtAl_HumanImmuneCellAtlas_Global.csv")
colnames(human_ln_atlas_top51)
##  [1] "Trm_gut_CD8"                  "TnaiveCM_CD4"                
##  [3] "Tregs"                        "Pro_B"                       
##  [5] "Naive_B"                      "Pre_B"                       
##  [7] "Plasma_Cell"                  "Plasmablasts"                
##  [9] "Memory_B"                     "Erythroid"                   
## [11] "Plasmacytoid_DC"              "Megakaryocytes"              
## [13] "Mast_cells"                   "ILC3"                        
## [15] "Cycling_T_NK"                 "DC_1"                        
## [17] "DC_2"                         "Classical_Monocyte"          
## [19] "Cycling"                      "Progenitor"                  
## [21] "CD8_TEMRA"                    "Migratory_DC"                
## [23] "Trm_Tgd"                      "NK_CD56_Bright_CD16_Negative"
## [25] "NK_CD16_Positive"             "Tgd_CRTAM_Positive"          
## [27] "Tmem_CD8"                     "MAIT"                        
## [29] "T_naive_CM_CD4_activated"     "Trm_Th1_Th17"                
## [31] "T_CD4_CD8"                    "T_naive_CM_CD8"              
## [33] "TEM_CD4"                      "Tfh"
plot_list <- list() # initiate empty list for individual plots

for (column in colnames(human_ln_atlas_top51)){
  genes <- list(human_ln_atlas_top51[[column]])
  name <- paste(column, "Score", sep="_")
  ln_seurat <- AddModuleScore(ln_seurat, features = genes, name = name)
  newname <- paste(name, "1", sep="")
  modscoreplot <- FeaturePlot(ln_seurat, features = newname) + scale_color_gradientn(colours = brewer.pal(name = "RdPu", n=11))
  plot_list[[name]] <- modscoreplot # add plot to list
}

# split plot_list into groups of 6 and handle any remaining plots
plot_groups <- split(plot_list, ceiling(seq_along(plot_list) / 6))

# combined plot with dynamic number of columns/rows
combine_plots <- function(group, ncol = 3){
  n_plots <- length(group)
  nrow <- ceiling(n_plots / ncol)
  wrap_plots(group, ncol = ncol, nrow = nrow)
}
combined_plots <- lapply(plot_groups, function(group) {
  combine_plots(group, ncol = 3)
})

# display each combined plot
for (i in seq_along(combined_plots)) {
  print(combined_plots[[i]])
}

Module scoring for Ammons canine circulating leukocyte atlas

Source: https://cells.ucsc.edu/?ds=canine-leukocyte-atlas+healthy

# make gene lists
ammonsk9_ln_atlas_top51 <- read.csv("Ammons_CanineCirculatingLeukocyteAtlas.csv")
colnames(ammonsk9_ln_atlas_top51)
##  [1] "Immature_B_cell"        "Naive_B_cell"           "CD8_gd_T_cell"         
##  [4] "Cycling_T_cell"         "CD34_Unclassified"      "Unclassified_DC"       
##  [7] "Pre_DC"                 "PMN_MDSC"               "Plasmacytoid_DC"       
## [10] "Plasma_cell"            "NKT_cell"               "DN_T_cell"             
## [13] "CD8_Naive"              "CD4_Naive"              "CD4_TCM"               
## [16] "CD4_Treg"               "CD4_IFN_signature"      "CD4_TEM"               
## [19] "CD4_TEM_Th1_like"       "CD4_TEM_Th17_like"      "CD4_TEM_Th2_like"      
## [22] "CD8_Memory"             "CD8_Effector"           "NK_cell"               
## [25] "gd_T_cell"              "Myeloid_c_DC2"          "Monocyte_CD4_neg"      
## [28] "Myeloid_c_DC2.1"        "Monocyte_IFN_signature" "Monocyte_CD4_pos"      
## [31] "M_MDSC"                 "Basophil"               "Neutrophil"            
## [34] "Eosinophil"             "Class_switched_B_cell"  "Activated_B_cell"
plot_list <- list() # initiate empty list for individual plots

for (column in colnames(ammonsk9_ln_atlas_top51)){
  genes <- list(ammonsk9_ln_atlas_top51[[column]])
  name <- paste(column, "Score", sep="_")
  ln_seurat <- AddModuleScore(ln_seurat, features = genes, name = name)
  newname <- paste(name, "1", sep="")
  modscoreplot <- FeaturePlot(ln_seurat, features = newname) + scale_color_gradientn(colours = brewer.pal(name = "RdPu", n=11))
  plot_list[[name]] <- modscoreplot # add plot to list
}

# split plot_list into groups of 6 and handle any remaining plots
plot_groups <- split(plot_list, ceiling(seq_along(plot_list) / 6))

# combined plot with dynamic number of columns/rows
combine_plots <- function(group, ncol = 3){
  n_plots <- length(group)
  nrow <- ceiling(n_plots / ncol)
  wrap_plots(group, ncol = ncol, nrow = nrow)
}
combined_plots <- lapply(plot_groups, function(group) {
  combine_plots(group, ncol = 3)
})

# display each combined plot
for (i in seq_along(combined_plots)) {
  print(combined_plots[[i]])
}

Assigning cell type identity to clusters

To be done once confident cluster assignment has been achieved.

new.ln.cluster.ids <- c("LN0_Tcell", "LN1_BCell", "LN2_Tcell", "LN3_Tcell", "LN4_CD8_NK", "LN5_BCell", "LN6_Cycling", "LN7_Myeloid", "LN8_PlasmaCell")
names(new.ln.cluster.ids) <- levels(ln_seurat)
ln_seurat <- RenameIdents(ln_seurat, new.ln.cluster.ids)
DimPlot(ln_seurat, reduction = "umap", label = TRUE, label.size = 3, label.box = TRUE, pt.size = 0.5) + ggtitle("Canine Lymph Node, Resolution: 0.1")

saveRDS(ln_seurat, file="LN_Annotated.RData")

Citations

sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so;  LAPACK version 3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Etc/UTC
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] presto_1.0.0       Rcpp_1.0.13-1      knitr_1.49         data.table_1.16.4 
##  [5] RColorBrewer_1.1-3 pheatmap_1.0.12    patchwork_1.3.0    lubridate_1.9.4   
##  [9] forcats_1.0.0      stringr_1.5.1      dplyr_1.1.4        purrr_1.0.2       
## [13] readr_2.1.5        tidyr_1.3.1        tibble_3.2.1       ggplot2_3.5.1     
## [17] tidyverse_2.0.0    Seurat_5.2.0       SeuratObject_5.0.2 sp_2.1-4          
## 
## loaded via a namespace (and not attached):
##   [1] rstudioapi_0.17.1      jsonlite_1.8.9         magrittr_2.0.3        
##   [4] spatstat.utils_3.1-1   farver_2.1.2           rmarkdown_2.29        
##   [7] vctrs_0.6.5            ROCR_1.0-11            spatstat.explore_3.3-3
##  [10] htmltools_0.5.8.1      sass_0.4.9             sctransform_0.4.1     
##  [13] parallelly_1.40.1      KernSmooth_2.23-24     bslib_0.8.0           
##  [16] htmlwidgets_1.6.4      ica_1.0-3              plyr_1.8.9            
##  [19] plotly_4.10.4          zoo_1.8-12             cachem_1.1.0          
##  [22] igraph_2.1.2           mime_0.12              lifecycle_1.0.4       
##  [25] pkgconfig_2.0.3        Matrix_1.7-0           R6_2.5.1              
##  [28] fastmap_1.2.0          fitdistrplus_1.2-2     future_1.34.0         
##  [31] shiny_1.10.0           digest_0.6.37          colorspace_2.1-1      
##  [34] tensor_1.5             RSpectra_0.16-2        irlba_2.3.5.1         
##  [37] labeling_0.4.3         progressr_0.15.1       spatstat.sparse_3.1-0 
##  [40] timechange_0.3.0       httr_1.4.7             polyclip_1.10-7       
##  [43] abind_1.4-8            compiler_4.4.1         withr_3.0.2           
##  [46] fastDummies_1.7.5      MASS_7.3-60.2          tools_4.4.1           
##  [49] lmtest_0.9-40          httpuv_1.6.15          future.apply_1.11.3   
##  [52] goftest_1.2-3          glue_1.8.0             nlme_3.1-164          
##  [55] promises_1.3.2         grid_4.4.1             Rtsne_0.17            
##  [58] cluster_2.1.6          reshape2_1.4.4         generics_0.1.3        
##  [61] gtable_0.3.6           spatstat.data_3.1-4    tzdb_0.4.0            
##  [64] hms_1.1.3              spatstat.geom_3.3-4    RcppAnnoy_0.0.22      
##  [67] ggrepel_0.9.6          RANN_2.6.2             pillar_1.10.1         
##  [70] spam_2.11-1            RcppHNSW_0.6.0         limma_3.62.2          
##  [73] later_1.4.1            splines_4.4.1          lattice_0.22-6        
##  [76] survival_3.6-4         deldir_2.0-4           tidyselect_1.2.1      
##  [79] miniUI_0.1.1.1         pbapply_1.7-2          gridExtra_2.3         
##  [82] scattermore_1.2        xfun_0.49              statmod_1.5.0         
##  [85] matrixStats_1.5.0      stringi_1.8.4          lazyeval_0.2.2        
##  [88] yaml_2.3.10            evaluate_1.0.3         codetools_0.2-20      
##  [91] cli_3.6.3              uwot_0.2.2             xtable_1.8-4          
##  [94] reticulate_1.40.0      munsell_0.5.1          jquerylib_0.1.4       
##  [97] globals_0.16.3         spatstat.random_3.3-2  png_0.1-8             
## [100] spatstat.univar_3.1-1  parallel_4.4.1         dotCall64_1.2         
## [103] listenv_0.9.1          viridisLite_0.4.2      scales_1.3.0          
## [106] ggridges_0.5.6         rlang_1.1.5            cowplot_1.1.3
citation()
## To cite R in publications use:
## 
##   R Core Team (2024). _R: A Language and Environment for Statistical
##   Computing_. R Foundation for Statistical Computing, Vienna, Austria.
##   <https://www.R-project.org/>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {R: A Language and Environment for Statistical Computing},
##     author = {{R Core Team}},
##     organization = {R Foundation for Statistical Computing},
##     address = {Vienna, Austria},
##     year = {2024},
##     url = {https://www.R-project.org/},
##   }
## 
## We have invested a lot of time and effort in creating R, please cite it
## when using it for data analysis. See also 'citation("pkgname")' for
## citing R packages.