Introduction

The purpose of this script is to annotate single-cell RNA-seq clusters following filtering, normalization, and clustering of the data with Seurat. This script has been adapted from the Seurat documentation (https://satijalab.org/seurat/articles/pbmc3k_tutorial#assigning-cell-type-identity-to-clusters).

Software

library(Seurat)
library(tidyverse)
library(patchwork)
library(pheatmap)
library(RColorBrewer)
library(SingleR)
library(celldex) # To install: BiocManager::install("celldex")
library(data.table)
library(knitr)
library(clusterProfiler)
library(presto)

Data

Input: Pre-processed Seurat object.

setwd("/scratch/alpine/edlarsen@colostate.edu/project_scrna_01/240828_scRNAseq/Cluster_Annotation")
thym_seurat <- readRDS(file = "../Normalization_and_Clustering/THYM_NormalizedAndClustered.RData")

Differentially Expressed Features

By default, identifies all positive and negative markers of a single cluster compared to all other cells.

thym_seurat <- FindClusters(thym_seurat, resolution = 0.2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 22654
## Number of edges: 677273
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9488
## Number of communities: 11
## Elapsed time: 4 seconds
thym_seurat <- RunUMAP(thym_seurat, dims = 1:10)
thym.markers <- FindAllMarkers(thym_seurat, only.pos=TRUE)

# Export
#write.csv(thym.markers, file="THYMUS_DEfeaturesByUMAPcluster.csv")

# umap
thym_seurat <- RunUMAP(thym_seurat, 
                           dims = 1:10,
                           n.neighbors = 50, # default is 30
                           min.dist = 0.5) # default is 0.3

DimPlot(thym_seurat,
        reduction = "umap",
                   label = TRUE,
                   label.size = 6) + 
  plot_annotation(title = "Canine Thymus, Resolution: 0.2, \nn.neighbors = 50, min.dist = 0.5", theme = theme(plot.title = element_text(hjust = 0.5, size = 20)))

Top cluster markers

key.genes <- thym.markers[!grepl("ENSCAFG", row.names(thym.markers)),]
key.genes.sortedByPval <- key.genes[order(key.genes$p_val),]
features <- key.genes.sortedByPval %>% group_by(cluster) %>% do(head(., n=5))
features <- as.data.frame(features[!duplicated(features$gene),]) # prioritize unique clusters

DotPlot(thym_seurat,
        assay = "RNA",
        features = rev(features$gene),
        scale = TRUE) + 
  geom_point(aes(size=pct.exp), shape = 21, colour="black", stroke=0.5) +
  labs(y = "Cluster") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Feature plots

Notes: * CD34 expression is expected only in the earliest stages of thymocyte development. * ALPL and KIT are markers of hematopoietic stem cells. * In humans, CD44 is expressed on DN1 and DN2 thymocytes, then downregulated in DN3 thymocytes before being expressed again in SP thymocytes. * CD25 (IL2RA) is expressed in DN2 and DN3 thymocytes, but not DN1 or DN4 thymocytes. * CCR9 is expressed by most DP thymocytes but not DN thymocytes, and it is subsequently downregulated with the transition from the DP stage to SP stages of thymocyte development. * In cynomolgus monkeys, CD1 is expressed in DN and DP thymocytes, then downregulated as cells progressed to the SP stages.

Markers for DN T cells, naive CD4+ and CD8+ T cells, and CD34+ unclassified cells were derived from a single-cell atlas of circulating canine leukocytes (https://github.com/dyammons/Canine_Leukocyte_scRNA).

FeaturePlot(thym_seurat,
            reduction = "umap",
            features = c("CD34", "KIT", "ALPL", "CD44", "NOTCH1", "IL2RA", "CCR9", "DNTT", "RAG1", "RAG2", "CD1C")) + 
  plot_annotation(title = "Canine Thymus Precursor & Early Thymocyte Feature Expression", theme = theme(plot.title = element_text(hjust = 0.5, size = 20))) &
  scale_color_gradientn(colours = brewer.pal(name = "RdPu", n=11))

FeaturePlot(thym_seurat,
            reduction = "umap",
            features = c("CD3E", "CD5", "CD4", "CD8A", "FOXP3", "IL2RA", "IL7R", "GATA3", "TBX21", "RORC", "CCR5", "CD7")) + 
  plot_annotation(title = "Canine Thymus Mature Thymocyte and T-cell Feature Expression", theme = theme(plot.title = element_text(hjust = 0.5, size = 20))) &
  scale_color_gradientn(colours = brewer.pal(name = "RdPu", n=11))

FeaturePlot(thym_seurat,
            reduction = "umap",
            features = c("TFPI", "ZNF521", "CD34", "NDST3", "GUCY1A1", "HPGD", "CLEC3B", "CLEC3B", "KIT", "CD109", "DNTT")) + 
  plot_annotation(title = "Canine Thymus: Expression of Markers for CD34+ Unclassified Cells from Canine Circulating Leukocyte Atlas", theme = theme(plot.title = element_text(hjust = 0.5, size = 20))) &
  scale_color_gradientn(colours = brewer.pal(name = "RdPu", n=11))

FeaturePlot(thym_seurat,
            reduction = "umap",
            features = c("KIAA0825", "TMEM132D", "KANK1", "NMB", "CTLA4", "SYNJ2", "BICDL1", "SLF1", "ID3", "KIAA1549")) + 
  plot_annotation(title = "Canine Thymus: Expression of Markers for DN T-cells from Canine Circulating Leukocyte Atlas", theme = theme(plot.title = element_text(hjust = 0.5, size = 20))) &
  scale_color_gradientn(colours = brewer.pal(name = "RdPu", n=11))

FeaturePlot(thym_seurat,
            reduction = "umap",
            features = c("CD8A", "ITGA1", "PTGDR", "IL2RB", "ADGRG1", "NBEA")) + 
  plot_annotation(title = "Canine Thymus: Expression of Markers for Naive CD8 T-cells \nfrom Canine Circulating Leukocyte Atlas", theme = theme(plot.title = element_text(hjust = 0.5, size = 20))) &
  scale_color_gradientn(colours = brewer.pal(name = "RdPu", n=11))

FeaturePlot(thym_seurat,
            reduction = "umap",
            features = c("CD4", "LEF1", "CSTA", "RGS10", "ZNF536", "CCR7", "COL6A5", "LTB", "TNFSF8")) + 
  plot_annotation(title = "Canine Thymus: Expression of Markers for Naive CD4 T-cells \nfrom Canine Circulating Leukocyte Atlas", theme = theme(plot.title = element_text(hjust = 0.5, size = 20))) &
  scale_color_gradientn(colours = brewer.pal(name = "RdPu", n=11))

FeaturePlot(thym_seurat, 
            reduction = "umap",
            features = c("PAX5", "LYZ", "MMP8", "S100A12")) +
  plot_annotation(title = "Canine Thymus Non-T-Cell Feature Expression", theme = theme(plot.title = element_text(hjust = 0.5, size = 20))) &
  scale_color_gradientn(colours = brewer.pal(name = "RdPu", n=11))

Citations

sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so;  LAPACK version 3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Etc/UTC
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] presto_1.0.0                Rcpp_1.0.13-1              
##  [3] clusterProfiler_4.14.4      knitr_1.49                 
##  [5] data.table_1.16.4           celldex_1.16.0             
##  [7] SingleR_2.8.0               SummarizedExperiment_1.36.0
##  [9] Biobase_2.66.0              GenomicRanges_1.58.0       
## [11] GenomeInfoDb_1.42.1         IRanges_2.40.1             
## [13] S4Vectors_0.44.0            BiocGenerics_0.52.0        
## [15] MatrixGenerics_1.18.1       matrixStats_1.5.0          
## [17] RColorBrewer_1.1-3          pheatmap_1.0.12            
## [19] patchwork_1.3.0             lubridate_1.9.4            
## [21] forcats_1.0.0               stringr_1.5.1              
## [23] dplyr_1.1.4                 purrr_1.0.2                
## [25] readr_2.1.5                 tidyr_1.3.1                
## [27] tibble_3.2.1                ggplot2_3.5.1              
## [29] tidyverse_2.0.0             Seurat_5.2.0               
## [31] SeuratObject_5.0.2          sp_2.1-4                   
## 
## loaded via a namespace (and not attached):
##   [1] fs_1.6.5                  spatstat.sparse_3.1-0    
##   [3] enrichplot_1.26.6         httr_1.4.7               
##   [5] tools_4.4.1               sctransform_0.4.1        
##   [7] alabaster.base_1.6.1      R6_2.5.1                 
##   [9] HDF5Array_1.34.0          lazyeval_0.2.2           
##  [11] uwot_0.2.2                rhdf5filters_1.18.0      
##  [13] withr_3.0.2               gridExtra_2.3            
##  [15] progressr_0.15.1          cli_3.6.3                
##  [17] spatstat.explore_3.3-3    fastDummies_1.7.5        
##  [19] labeling_0.4.3            alabaster.se_1.6.0       
##  [21] sass_0.4.9                spatstat.data_3.1-4      
##  [23] ggridges_0.5.6            pbapply_1.7-2            
##  [25] yulab.utils_0.1.9         gson_0.1.0               
##  [27] DOSE_4.0.0                R.utils_2.12.3           
##  [29] parallelly_1.40.1         limma_3.62.2             
##  [31] rstudioapi_0.17.1         RSQLite_2.3.9            
##  [33] gridGraphics_0.5-1        generics_0.1.3           
##  [35] ica_1.0-3                 spatstat.random_3.3-2    
##  [37] GO.db_3.20.0              Matrix_1.7-0             
##  [39] abind_1.4-8               R.methodsS3_1.8.2        
##  [41] lifecycle_1.0.4           yaml_2.3.10              
##  [43] qvalue_2.38.0             rhdf5_2.50.2             
##  [45] SparseArray_1.6.1         BiocFileCache_2.14.0     
##  [47] Rtsne_0.17                grid_4.4.1               
##  [49] blob_1.2.4                promises_1.3.2           
##  [51] ExperimentHub_2.14.0      crayon_1.5.3             
##  [53] ggtangle_0.0.6            miniUI_0.1.1.1           
##  [55] lattice_0.22-6            beachmat_2.22.0          
##  [57] cowplot_1.1.3             KEGGREST_1.46.0          
##  [59] pillar_1.10.1             fgsea_1.32.2             
##  [61] future.apply_1.11.3       codetools_0.2-20         
##  [63] fastmatch_1.1-6           glue_1.8.0               
##  [65] ggfun_0.1.8               spatstat.univar_3.1-1    
##  [67] treeio_1.30.0             vctrs_0.6.5              
##  [69] png_0.1-8                 gypsum_1.2.0             
##  [71] spam_2.11-1               gtable_0.3.6             
##  [73] cachem_1.1.0              xfun_0.49                
##  [75] S4Arrays_1.6.0            mime_0.12                
##  [77] survival_3.6-4            statmod_1.5.0            
##  [79] fitdistrplus_1.2-2        ROCR_1.0-11              
##  [81] nlme_3.1-164              ggtree_3.14.0            
##  [83] bit64_4.6.0-1             alabaster.ranges_1.6.0   
##  [85] filelock_1.0.3            RcppAnnoy_0.0.22         
##  [87] bslib_0.8.0               irlba_2.3.5.1            
##  [89] KernSmooth_2.23-24        colorspace_2.1-1         
##  [91] DBI_1.2.3                 tidyselect_1.2.1         
##  [93] bit_4.5.0.1               compiler_4.4.1           
##  [95] curl_6.1.0                httr2_1.1.0              
##  [97] BiocNeighbors_2.0.1       DelayedArray_0.32.0      
##  [99] plotly_4.10.4             scales_1.3.0             
## [101] lmtest_0.9-40             rappdirs_0.3.3           
## [103] digest_0.6.37             goftest_1.2-3            
## [105] spatstat.utils_3.1-1      alabaster.matrix_1.6.1   
## [107] rmarkdown_2.29            XVector_0.46.0           
## [109] htmltools_0.5.8.1         pkgconfig_2.0.3          
## [111] sparseMatrixStats_1.18.0  dbplyr_2.5.0             
## [113] fastmap_1.2.0             rlang_1.1.5              
## [115] htmlwidgets_1.6.4         UCSC.utils_1.2.0         
## [117] shiny_1.10.0              DelayedMatrixStats_1.28.1
## [119] farver_2.1.2              jquerylib_0.1.4          
## [121] zoo_1.8-12                jsonlite_1.8.9           
## [123] BiocParallel_1.40.0       GOSemSim_2.32.0          
## [125] R.oo_1.27.0               BiocSingular_1.22.0      
## [127] magrittr_2.0.3            ggplotify_0.1.2          
## [129] GenomeInfoDbData_1.2.13   dotCall64_1.2            
## [131] Rhdf5lib_1.28.0           munsell_0.5.1            
## [133] ape_5.8-1                 reticulate_1.40.0        
## [135] stringi_1.8.4             alabaster.schemas_1.6.0  
## [137] zlibbioc_1.52.0           MASS_7.3-60.2            
## [139] AnnotationHub_3.14.0      plyr_1.8.9               
## [141] parallel_4.4.1            listenv_0.9.1            
## [143] ggrepel_0.9.6             deldir_2.0-4             
## [145] Biostrings_2.74.1         splines_4.4.1            
## [147] tensor_1.5                hms_1.1.3                
## [149] igraph_2.1.2              spatstat.geom_3.3-4      
## [151] RcppHNSW_0.6.0            reshape2_1.4.4           
## [153] ScaledMatrix_1.14.0       BiocVersion_3.20.0       
## [155] evaluate_1.0.3            BiocManager_1.30.25      
## [157] tzdb_0.4.0                httpuv_1.6.15            
## [159] RANN_2.6.2                polyclip_1.10-7          
## [161] future_1.34.0             scattermore_1.2          
## [163] rsvd_1.0.5                xtable_1.8-4             
## [165] tidytree_0.4.6            RSpectra_0.16-2          
## [167] later_1.4.1               viridisLite_0.4.2        
## [169] aplot_0.2.4               memoise_2.0.1            
## [171] AnnotationDbi_1.68.0      cluster_2.1.6            
## [173] timechange_0.3.0          globals_0.16.3
citation()
## To cite R in publications use:
## 
##   R Core Team (2024). _R: A Language and Environment for Statistical
##   Computing_. R Foundation for Statistical Computing, Vienna, Austria.
##   <https://www.R-project.org/>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {R: A Language and Environment for Statistical Computing},
##     author = {{R Core Team}},
##     organization = {R Foundation for Statistical Computing},
##     address = {Vienna, Austria},
##     year = {2024},
##     url = {https://www.R-project.org/},
##   }
## 
## We have invested a lot of time and effort in creating R, please cite it
## when using it for data analysis. See also 'citation("pkgname")' for
## citing R packages.