Introduction

The purpose of this script is to annotate single-cell RNA-seq clusters following filtering, normalization, and clustering of the data with Seurat. This script has been adapted from the Seurat (https://satijalab.org/seurat/) documentation.

Software

library(Seurat)
library(tidyverse)
library(patchwork)
library(pheatmap)
library(RColorBrewer)
library(data.table)
library(knitr)
library(clusterProfiler)
library(presto)

Data

Input: Pre-processed Seurat object.

setwd("/scratch/alpine/edlarsen@colostate.edu/project_scrna_01/240828_scRNAseq/Cluster_Annotation")
ln_seurat <- readRDS(file = "../Normalization_and_Clustering/LN_NormalizedAndClustered.RData")

Differentially Expressed Features

By default, identifies all positive and negative markers of a single cluster compared to all other cells.

ln_seurat <- FindClusters(ln_seurat, resolution = 0.1)

## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 29438
## Number of edges: 895159
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9696
## Number of communities: 9
## Elapsed time: 6 seconds

ln_seurat <- RunUMAP(ln_seurat, dims = 1:10)
ln.markers <- FindAllMarkers(ln_seurat, only.pos=TRUE)

# export
write.csv(ln.markers, file="LN_DEfeaturesByUMAPcluster_res0.1.csv")

# umap
DimPlot(ln_seurat,
        reduction = "umap",
                   label = TRUE,
                   label.size = 6) + 
  plot_annotation(title = "Canine Lymph Node, Resolution: 0.1", theme = theme(plot.title = element_text(hjust = 0.5, size = 20)))

Top cluster markers

key.genes <- ln.markers[!grepl("ENSCAFG", row.names(ln.markers)),]
key.genes.sortedByPval <- key.genes[order(key.genes$p_val),]
features <- key.genes.sortedByPval %>% group_by(cluster) %>% do(head(., n=5))
features <- as.data.frame(features[!duplicated(features$gene),]) # prioritize unique clusters

DotPlot(ln_seurat,
        assay = "RNA",
        features = rev(features$gene),
        scale = TRUE) + 
  geom_point(aes(size=pct.exp), shape = 21, colour="black", stroke=0.5) +
  labs(y = "Cluster") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Feature plots

Some of these markers were derived from a single-cell atlas of circulating canine leukocytes (https://github.com/dyammons/Canine_Leukocyte_scRNA).

FeaturePlot(ln_seurat, 
            reduction = "umap",
            ncol = 3,
            features = c("CD3E", "CD5", "CD4", "FOXP3", "IL2RA", "GATA3", "TBX21", "RORC")) +
  plot_annotation(title = "Canine Lymph Node: Expression of Canonical CD4 T-cell Markers", theme = theme(plot.title = element_text(hjust = 0.5, size = 20))) &
  scale_color_gradientn(colours = brewer.pal(name = "RdPu", n=11))

FeaturePlot(ln_seurat, 
            reduction = "umap",
            features = c("GNLY", "NKG7", "NCAM1", "KLRK1", "CD8A", "CD3E", "CD5"),
            ncol = 3) +
  plot_annotation(title = "Canine Lymph Node: Expression of Canonical CD8 & NK Cell Markers", theme = theme(plot.title = element_text(hjust = 0.5, size = 20))) &
  scale_color_gradientn(colours = brewer.pal(name = "RdPu", n=11))

FeaturePlot(ln_seurat, 
            reduction = "umap",
            features = c("CCR7", "SELL", "HOPX"),
            ncol = 3) +
  plot_annotation(title = "Canine Lymph Node: Expression of Naive T-cell Markers", theme = theme(plot.title = element_text(hjust = 0.5, size = 20))) &
  scale_color_gradientn(colours = brewer.pal(name = "RdPu", n=11))

FeaturePlot(ln_seurat, 
            reduction = "umap",
            features = c("ICOS", "IL2RA", "LTB", "CTLA4", "LGALS3", "MAF"),
            ncol = 3) +
  plot_annotation(title = "Canine Lymph Node: Expression of Activated T-cell Markers", theme = theme(plot.title = element_text(hjust = 0.5, size = 20))) &
  scale_color_gradientn(colours = brewer.pal(name = "RdPu", n=11))

FeaturePlot(ln_seurat, 
            reduction = "umap",
            features = c("MS4A1", "CD19", "PAX5", "IRF4")) +
  plot_annotation(title = "Canine Lymph Node: Expression of Canonical B-cell Markers", theme = theme(plot.title = element_text(hjust = 0.5, size = 20))) &
  scale_color_gradientn(colours = brewer.pal(name = "RdPu", n=11))

FeaturePlot(ln_seurat, 
            reduction = "umap",
            ncol = 3,
            features = c("LYZ", "FCER1A", "ITGAM", "CD163", "CD68")) +
  plot_annotation(title = "Canine Lymph Node: Expression of Canonical Monocyte/Macrophage/DC Markers", theme = theme(plot.title = element_text(hjust = 0.5, size = 20))) &
  scale_color_gradientn(colours = brewer.pal(name = "RdPu", n=11))

FeaturePlot(ln_seurat, 
            reduction = "umap",
            features = c("FCER1A", "MMP9", "CCR5", "CXCR2", "EPX", "MMP8", "S100A12", "SERPINA1"),
            ncol = 3) +
  plot_annotation(title = "Canine Lymph Node: Expression of Canonical Granulocyte Markers", theme = theme(plot.title = element_text(hjust = 0.5, size = 20))) &
  scale_color_gradientn(colours = brewer.pal(name = "RdPu", n=11))

Note: Granulocytes are difficult to capture with single-cell RNA-seq experiments given their relatively low RNA content and relatively high levels of RNAses and other inhibitory compounds, resulting in fewer detectable transcripts.

Citations

sessionInfo()

## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so;  LAPACK version 3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Etc/UTC
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] presto_1.0.0           Rcpp_1.0.13-1          clusterProfiler_4.14.4
##  [4] knitr_1.49             data.table_1.16.4      RColorBrewer_1.1-3    
##  [7] pheatmap_1.0.12        patchwork_1.3.0        lubridate_1.9.4       
## [10] forcats_1.0.0          stringr_1.5.1          dplyr_1.1.4           
## [13] purrr_1.0.2            readr_2.1.5            tidyr_1.3.1           
## [16] tibble_3.2.1           ggplot2_3.5.1          tidyverse_2.0.0       
## [19] Seurat_5.2.1           SeuratObject_5.0.2     sp_2.1-4              
## 
## loaded via a namespace (and not attached):
##   [1] RcppAnnoy_0.0.22        splines_4.4.1           later_1.4.1            
##   [4] ggplotify_0.1.2         R.oo_1.27.0             polyclip_1.10-7        
##   [7] fastDummies_1.7.5       lifecycle_1.0.4         globals_0.16.3         
##  [10] lattice_0.22-6          MASS_7.3-60.2           magrittr_2.0.3         
##  [13] limma_3.62.2            plotly_4.10.4           sass_0.4.9             
##  [16] rmarkdown_2.29          jquerylib_0.1.4         yaml_2.3.10            
##  [19] ggtangle_0.0.6          httpuv_1.6.15           sctransform_0.4.1      
##  [22] spam_2.11-1             spatstat.sparse_3.1-0   reticulate_1.40.0      
##  [25] cowplot_1.1.3           pbapply_1.7-2           DBI_1.2.3              
##  [28] abind_1.4-8             zlibbioc_1.52.0         Rtsne_0.17             
##  [31] R.utils_2.12.3          BiocGenerics_0.52.0     yulab.utils_0.1.9      
##  [34] GenomeInfoDbData_1.2.13 enrichplot_1.26.6       IRanges_2.40.1         
##  [37] S4Vectors_0.44.0        ggrepel_0.9.6           irlba_2.3.5.1          
##  [40] listenv_0.9.1           spatstat.utils_3.1-1    tidytree_0.4.6         
##  [43] goftest_1.2-3           RSpectra_0.16-2         spatstat.random_3.3-2  
##  [46] fitdistrplus_1.2-2      parallelly_1.40.1       codetools_0.2-20       
##  [49] DOSE_4.0.0              tidyselect_1.2.1        aplot_0.2.4            
##  [52] UCSC.utils_1.2.0        farver_2.1.2            matrixStats_1.5.0      
##  [55] stats4_4.4.1            spatstat.explore_3.3-3  jsonlite_1.8.9         
##  [58] progressr_0.15.1        ggridges_0.5.6          survival_3.6-4         
##  [61] tools_4.4.1             treeio_1.30.0           ica_1.0-3              
##  [64] glue_1.8.0              gridExtra_2.3           xfun_0.49              
##  [67] qvalue_2.38.0           GenomeInfoDb_1.42.1     withr_3.0.2            
##  [70] fastmap_1.2.0           digest_0.6.37           gridGraphics_0.5-1     
##  [73] timechange_0.3.0        R6_2.5.1                mime_0.12              
##  [76] colorspace_2.1-1        scattermore_1.2         GO.db_3.20.0           
##  [79] tensor_1.5              spatstat.data_3.1-4     RSQLite_2.3.9          
##  [82] R.methodsS3_1.8.2       generics_0.1.3          httr_1.4.7             
##  [85] htmlwidgets_1.6.4       uwot_0.2.2              pkgconfig_2.0.3        
##  [88] gtable_0.3.6            blob_1.2.4              lmtest_0.9-40          
##  [91] XVector_0.46.0          htmltools_0.5.8.1       dotCall64_1.2          
##  [94] fgsea_1.32.2            scales_1.3.0            Biobase_2.66.0         
##  [97] png_0.1-8               spatstat.univar_3.1-1   ggfun_0.1.8            
## [100] rstudioapi_0.17.1       tzdb_0.4.0              reshape2_1.4.4         
## [103] nlme_3.1-164            cachem_1.1.0            zoo_1.8-12             
## [106] KernSmooth_2.23-24      parallel_4.4.1          miniUI_0.1.1.1         
## [109] AnnotationDbi_1.68.0    pillar_1.10.1           grid_4.4.1             
## [112] vctrs_0.6.5             RANN_2.6.2              promises_1.3.2         
## [115] xtable_1.8-4            cluster_2.1.6           evaluate_1.0.3         
## [118] cli_3.6.3               compiler_4.4.1          rlang_1.1.5            
## [121] crayon_1.5.3            future.apply_1.11.3     labeling_0.4.3         
## [124] plyr_1.8.9              fs_1.6.5                stringi_1.8.4          
## [127] viridisLite_0.4.2       deldir_2.0-4            BiocParallel_1.40.0    
## [130] munsell_0.5.1           Biostrings_2.74.1       lazyeval_0.2.2         
## [133] spatstat.geom_3.3-4     GOSemSim_2.32.0         Matrix_1.7-0           
## [136] RcppHNSW_0.6.0          hms_1.1.3               bit64_4.6.0-1          
## [139] future_1.34.0           statmod_1.5.0           KEGGREST_1.46.0        
## [142] shiny_1.10.0            ROCR_1.0-11             igraph_2.1.2           
## [145] memoise_2.0.1           bslib_0.8.0             ggtree_3.14.0          
## [148] fastmatch_1.1-6         bit_4.5.0.1             gson_0.1.0             
## [151] ape_5.8-1

citation()

## To cite R in publications use:
## 
##   R Core Team (2024). _R: A Language and Environment for Statistical
##   Computing_. R Foundation for Statistical Computing, Vienna, Austria.
##   <https://www.R-project.org/>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {R: A Language and Environment for Statistical Computing},
##     author = {{R Core Team}},
##     organization = {R Foundation for Statistical Computing},
##     address = {Vienna, Austria},
##     year = {2024},
##     url = {https://www.R-project.org/},
##   }
## 
## We have invested a lot of time and effort in creating R, please cite it
## when using it for data analysis. See also 'citation("pkgname")' for
## citing R packages.

Lymph Node Single-Cell Cluster Annotation Part I: Expression of Key Markers

Eileen Owens

2025-01-31