Introduction

The purpose of this script is to annotate single-cell RNA-seq clusters following filtering, normalization, and clustering of the data with Seurat. This script has been adapted from the Seurat (https://satijalab.org/seurat/) documentation.

Software

library(Seurat)
library(tidyverse)
library(patchwork)
library(pheatmap)
library(RColorBrewer)
library(data.table)
library(knitr)
library(clusterProfiler)
library(presto)

Data

Input: Pre-processed Seurat object.

setwd("/scratch/alpine/edlarsen@colostate.edu/project_scrna_01/240828_scRNAseq/Cluster_Annotation")
ln_seurat <- readRDS(file = "../Normalization_and_Clustering/LN_NormalizedAndClustered.RData")

# differentially expressed features
## identifies all positive and negative markers of a single cluster compared to all other cells.
ln_seurat <- FindClusters(ln_seurat, resolution = 0.1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 29438
## Number of edges: 895159
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9696
## Number of communities: 9
## Elapsed time: 7 seconds
ln_seurat <- RunUMAP(ln_seurat, dims = 1:10)
ln.markers <- FindAllMarkers(ln_seurat, only.pos=TRUE)

# umap
DimPlot(ln_seurat,
        reduction = "umap",
                   label = TRUE,
                   label.size = 6) + 
  plot_annotation(title = "Canine Lymph Node, Resolution: 0.1", theme = theme(plot.title = element_text(hjust = 0.5, size = 20)))

Gene Set Enrichment Analysis

Comparison with gene signatures from a human immune cell atlas (https://cells.ucsc.edu/?ds=pan-immune+global), canine circulating leukocyte atlas (https://cells.ucsc.edu/?ds=canine-leukocyte-atlas+healthy), and gene sets of genes upregulated at various stages of T-cell development and differentiation from the Molecular Signatures Database (https://www.gsea-msigdb.org/gsea/msigdb).

# significant results only
sig.ln.markers <- subset(ln.markers, p_val_adj < 0.05)

# loop through each cluster and create a ranked gene list for each
for (clust in unique(sig.ln.markers$cluster)){
  name <- paste("cluster", clust, "rankedGenes", sep="")
  rankedGenes <- sig.ln.markers %>%
    dplyr::filter(cluster == clust) %>%
    dplyr::select(gene, avg_log2FC) %>%
    arrange(desc(avg_log2FC))
  rankedGeneList <- rankedGenes$avg_log2FC
  names(rankedGeneList) <- rankedGenes$gene
  assign(name, rankedGeneList)
}

# import gmt file(s)
## multiple gmt files
setwd("/scratch/alpine/edlarsen@colostate.edu/project_scrna_01/240828_scRNAseq/Cluster_Annotation/T_cell_GMTs")
gmtFiles <- list.files(pattern = "\\.gmt", full.names = TRUE) # Get list of all .gmt files in this directory
gmtTables <- lapply(gmtFiles, read.gmt) # Apply the read.gmt function to the list of .gmt files and save as a variable
gmt <- do.call(rbind, gmtTables) # Rbind files
setwd("/scratch/alpine/edlarsen@colostate.edu/project_scrna_01/240828_scRNAseq/Cluster_Annotation")

# loop through each cluster's ranked gene list and run GSEA for gene lists in the gmt file(s) on each
for (geneList in ls(pattern = "cluster")){
  name <- paste(geneList, "GSEA", sep="_")
  input <- get(geneList)
  gse <- GSEA(input,
              exponent = 1,
              pvalueCutoff = 1,
              pAdjustMethod = "BH",
              TERM2GENE = gmt,
              verbose = TRUE,
              by = "fgsea")
  assign(name, gse)
}

# Remove any objects with no significantly enriched gene sets from the global environment
for (GSEAresult in ls(pattern = "_GSEA")){
  obj <- get(GSEAresult)
  res <- obj@result
  if (nrow(res) == 0) {
    rm(list = ls(pattern = GSEAresult))
  }
}
# Loop through each GSEA object, export results as csv, and plot results as an enrichment dot plot
plot_list <- list() # initiate empty list for individual plots

for (GSEAresult in ls(pattern = "_GSEA")){
  name <- paste("ln", GSEAresult, sep="_")
  obj <- get(GSEAresult)
  write.csv(obj, file=paste(name, "csv", sep="."))
  
  eplot <- obj %>%
    dotplot(showCategory = 10, x = "NES") +
    scale_color_viridis_c(name = "Adjusted\nP-value",
                          option = "H") +
    scale_y_discrete(labels = ~ str_wrap(gsub('_', ' ', .x), 40)) +
    geom_vline(xintercept = 0, linetype = 2) +
    labs(title = name, y = "Gene Set") +
    theme(plot.title = element_text(hjust = 0.5))
  
  plot_list[[name]] <- eplot # add plot to list
}

# combine plots into single layout
combined_plot <- wrap_plots(plot_list, ncol = 3)
print(combined_plot)

Citations

sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so;  LAPACK version 3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Etc/UTC
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] presto_1.0.0           Rcpp_1.0.13-1          clusterProfiler_4.14.4
##  [4] knitr_1.49             data.table_1.16.4      RColorBrewer_1.1-3    
##  [7] pheatmap_1.0.12        patchwork_1.3.0        lubridate_1.9.4       
## [10] forcats_1.0.0          stringr_1.5.1          dplyr_1.1.4           
## [13] purrr_1.0.2            readr_2.1.5            tidyr_1.3.1           
## [16] tibble_3.2.1           ggplot2_3.5.1          tidyverse_2.0.0       
## [19] Seurat_5.2.1           SeuratObject_5.0.2     sp_2.1-4              
## 
## loaded via a namespace (and not attached):
##   [1] RcppAnnoy_0.0.22        splines_4.4.1           later_1.4.1            
##   [4] ggplotify_0.1.2         R.oo_1.27.0             polyclip_1.10-7        
##   [7] fastDummies_1.7.5       lifecycle_1.0.4         globals_0.16.3         
##  [10] lattice_0.22-6          MASS_7.3-60.2           magrittr_2.0.3         
##  [13] limma_3.62.2            plotly_4.10.4           sass_0.4.9             
##  [16] rmarkdown_2.29          jquerylib_0.1.4         yaml_2.3.10            
##  [19] ggtangle_0.0.6          httpuv_1.6.15           sctransform_0.4.1      
##  [22] spam_2.11-1             spatstat.sparse_3.1-0   reticulate_1.40.0      
##  [25] cowplot_1.1.3           pbapply_1.7-2           DBI_1.2.3              
##  [28] abind_1.4-8             zlibbioc_1.52.0         Rtsne_0.17             
##  [31] R.utils_2.12.3          BiocGenerics_0.52.0     yulab.utils_0.1.9      
##  [34] GenomeInfoDbData_1.2.13 enrichplot_1.26.6       IRanges_2.40.1         
##  [37] S4Vectors_0.44.0        ggrepel_0.9.6           irlba_2.3.5.1          
##  [40] listenv_0.9.1           spatstat.utils_3.1-1    tidytree_0.4.6         
##  [43] goftest_1.2-3           RSpectra_0.16-2         spatstat.random_3.3-2  
##  [46] fitdistrplus_1.2-2      parallelly_1.40.1       codetools_0.2-20       
##  [49] DOSE_4.0.0              tidyselect_1.2.1        aplot_0.2.4            
##  [52] UCSC.utils_1.2.0        farver_2.1.2            matrixStats_1.5.0      
##  [55] stats4_4.4.1            spatstat.explore_3.3-3  jsonlite_1.8.9         
##  [58] progressr_0.15.1        ggridges_0.5.6          survival_3.6-4         
##  [61] tools_4.4.1             treeio_1.30.0           ica_1.0-3              
##  [64] glue_1.8.0              gridExtra_2.3           xfun_0.49              
##  [67] qvalue_2.38.0           GenomeInfoDb_1.42.1     withr_3.0.2            
##  [70] fastmap_1.2.0           digest_0.6.37           gridGraphics_0.5-1     
##  [73] timechange_0.3.0        R6_2.5.1                mime_0.12              
##  [76] colorspace_2.1-1        scattermore_1.2         GO.db_3.20.0           
##  [79] tensor_1.5              spatstat.data_3.1-4     RSQLite_2.3.9          
##  [82] R.methodsS3_1.8.2       generics_0.1.3          httr_1.4.7             
##  [85] htmlwidgets_1.6.4       uwot_0.2.2              pkgconfig_2.0.3        
##  [88] gtable_0.3.6            blob_1.2.4              lmtest_0.9-40          
##  [91] XVector_0.46.0          htmltools_0.5.8.1       dotCall64_1.2          
##  [94] fgsea_1.32.2            scales_1.3.0            Biobase_2.66.0         
##  [97] png_0.1-8               spatstat.univar_3.1-1   ggfun_0.1.8            
## [100] rstudioapi_0.17.1       tzdb_0.4.0              reshape2_1.4.4         
## [103] nlme_3.1-164            cachem_1.1.0            zoo_1.8-12             
## [106] KernSmooth_2.23-24      parallel_4.4.1          miniUI_0.1.1.1         
## [109] AnnotationDbi_1.68.0    pillar_1.10.1           grid_4.4.1             
## [112] vctrs_0.6.5             RANN_2.6.2              promises_1.3.2         
## [115] xtable_1.8-4            cluster_2.1.6           evaluate_1.0.3         
## [118] cli_3.6.3               compiler_4.4.1          rlang_1.1.5            
## [121] crayon_1.5.3            future.apply_1.11.3     labeling_0.4.3         
## [124] plyr_1.8.9              fs_1.6.5                stringi_1.8.4          
## [127] viridisLite_0.4.2       deldir_2.0-4            BiocParallel_1.40.0    
## [130] munsell_0.5.1           Biostrings_2.74.1       lazyeval_0.2.2         
## [133] spatstat.geom_3.3-4     GOSemSim_2.32.0         Matrix_1.7-0           
## [136] RcppHNSW_0.6.0          hms_1.1.3               bit64_4.6.0-1          
## [139] future_1.34.0           statmod_1.5.0           KEGGREST_1.46.0        
## [142] shiny_1.10.0            ROCR_1.0-11             igraph_2.1.2           
## [145] memoise_2.0.1           bslib_0.8.0             ggtree_3.14.0          
## [148] fastmatch_1.1-6         bit_4.5.0.1             gson_0.1.0             
## [151] ape_5.8-1
citation()
## To cite R in publications use:
## 
##   R Core Team (2024). _R: A Language and Environment for Statistical
##   Computing_. R Foundation for Statistical Computing, Vienna, Austria.
##   <https://www.R-project.org/>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {R: A Language and Environment for Statistical Computing},
##     author = {{R Core Team}},
##     organization = {R Foundation for Statistical Computing},
##     address = {Vienna, Austria},
##     year = {2024},
##     url = {https://www.R-project.org/},
##   }
## 
## We have invested a lot of time and effort in creating R, please cite it
## when using it for data analysis. See also 'citation("pkgname")' for
## citing R packages.