Introduction

This script performs GSEA using gmt files of genes upregulated by each cluster of a human thymus single-cell atlas (source: https://www.science.org/doi/10.1126/science.aay3224) and various stages of thymocyte development from the Molecular Signatures Database (https://www.gsea-msigdb.org/gsea/msigdb).

Note about some of the cell types in the Park et al. human thymus single-cell atlas:

Software

library(Seurat)
library(tidyverse)
library(patchwork)
library(pheatmap)
library(RColorBrewer)
library(SingleR)
library(celldex) # To install: BiocManager::install("celldex")
library(data.table)
library(knitr)
library(clusterProfiler)

Data

Input: Pre-processed Seurat object of integrated thymocyte and T-cell subsets from canine thymus and lymph node.

setwd("C:/Users/edlarsen/Documents/240828_scRNAseq/T_Cells")
all_Tcells <- readRDS(file = "integrated_Tcells.Rdata")
all_Tcells <- FindNeighbors(all_Tcells, dims = 1:10)
all_Tcells <- FindClusters(all_Tcells, resolution = 0.2)
all_Tcells <- RunUMAP(all_Tcells, dims = 1:10)

p1 <- DimPlot(all_Tcells, 
        reduction = "umap",
        label = TRUE,
        label.size = 3,
        label.box = TRUE) + plot_annotation(title = "Merged Canine T-cells from Lymph Node and Thymus")

p2 <- DimPlot(all_Tcells, 
        reduction = "umap",
        group.by = "OriginalClusters",
        label = TRUE,
        label.size = 3,
        label.box = TRUE) + plot_annotation(title = "Merged Canine T-cells from Lymph Node and Thymus,\nOriginal Cluster IDs")

p1+p2

Gene Set Enrichment Analysis

# significant results only
t.cell.markers <- FindAllMarkers(all_Tcells, only.pos=TRUE)
sig.markers <- subset(t.cell.markers, p_val_adj < 0.05)

# loop through each cluster and create a ranked gene list for each
for (clust in unique(sig.markers$cluster)){
  name <- paste("cluster", clust, "rankedGenes", sep="")
  rankedGenes <- sig.markers %>%
    dplyr::filter(cluster == clust) %>%
    dplyr::select(gene, avg_log2FC) %>%
    arrange(desc(avg_log2FC))
  rankedGeneList <- rankedGenes$avg_log2FC
  names(rankedGeneList) <- rankedGenes$gene
  assign(name, rankedGeneList)
}

# import gmt file(s)
## multiple gmt files
setwd("C:/Users/edlarsen/Documents/240828_scRNAseq/Cluster_Annotation/T_cell_GMTs")
gmtFiles <- list.files(pattern = "\\.gmt", full.names = TRUE) # Get list of all .gmt files in this directory
gmtTables <- lapply(gmtFiles, read.gmt) # Apply the read.gmt function to the list of .gmt files and save as a variable
gmt <- do.call(rbind, gmtTables) # Rbind files
setwd("C:/Users/edlarsen/Documents/240828_scRNAseq/T_Cells")

# loop through each cluster's ranked gene list and run GSEA for gene lists in the gmt file(s) on each
for (geneList in ls(pattern = "cluster")){
  name <- paste(geneList, "GSEA", sep="_")
  input <- get(geneList)
  gse <- GSEA(input,
              exponent = 1,
              pvalueCutoff = 1,
              pAdjustMethod = "BH",
              TERM2GENE = gmt,
              verbose = TRUE,
              by = "fgsea")
  assign(name, gse)
}

# Remove any objects with no significantly enriched gene sets from the global environment
for (GSEAresult in ls(pattern = "_GSEA")){
  obj <- get(GSEAresult)
  res <- obj@result
  if (nrow(res) == 0) {
    rm(list = ls(pattern = GSEAresult))
  }
}
# Loop through each GSEA object, export results as csv, and plot results as an enrichment dot plot
plot_list <- list() # initiate empty list for individual plots

for (GSEAresult in ls(pattern = "_GSEA")){
  name <- paste(GSEAresult)
  obj <- get(GSEAresult)
  write.csv(obj, file=paste(name, "TCellGeneSets", "csv", sep="."))
  
  eplot <- obj %>%
    dotplot(showCategory = 10, x = "NES") +
    scale_color_viridis_c(name = "Adjusted\nP-value",
                          option = "H") +
    scale_y_discrete(labels = ~ str_wrap(gsub('_', ' ', .x), 40)) +
    geom_vline(xintercept = 0, linetype = 2) +
    labs(title = name, y = "Gene Set") +
    theme(plot.title = element_text(hjust = 0.5))
  
  plot_list[[name]] <- eplot # add plot to list
}

# combine plots into single layout
combined_plot <- wrap_plots(plot_list, ncol = 3)
print(combined_plot)

Assigning cell type identity to clusters

To be done once confident cluster assignment has been achieved.

new.cluster.ids <- c("Naive_T_2", "DP_Thymocytes_1", "Activated_T", "DP_Thymocytes_2", "SP_Thymocytes", "Naive_T_1", "Proliferating_DP_Thymocytes", "CD8_NK", "ETP_DN_ISP_Thymocytes", "Late_SP_Thymocytes_1", "Late_SP_Thymocytes_2")
names(new.cluster.ids) <- levels(all_Tcells)
all_Tcells <- RenameIdents(all_Tcells, new.cluster.ids)

DimPlot(all_Tcells, reduction = "umap", label = TRUE, label.size = 3, label.box = TRUE, pt.size = 0.5) + ggtitle("Canine Merged Lymph Node and Thymus T Cells, Resolution: 0.2")

# save as new column in metadata in addition to reassigning idents
all_Tcells$IntegratedClusterIDs <- Idents(all_Tcells)
# Export
saveRDS(all_Tcells, file = "IntegThymAndLN_Annotated.Rdata")

Citations

sessionInfo()
## R version 4.4.0 (2024-04-24 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 22631)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## time zone: America/Denver
## tzcode source: internal
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] clusterProfiler_4.12.6      knitr_1.49                 
##  [3] data.table_1.16.4           celldex_1.14.0             
##  [5] SingleR_2.6.0               SummarizedExperiment_1.34.0
##  [7] Biobase_2.64.0              GenomicRanges_1.56.2       
##  [9] GenomeInfoDb_1.40.1         IRanges_2.38.1             
## [11] S4Vectors_0.42.1            BiocGenerics_0.50.0        
## [13] MatrixGenerics_1.16.0       matrixStats_1.5.0          
## [15] RColorBrewer_1.1-3          pheatmap_1.0.12            
## [17] patchwork_1.3.0             lubridate_1.9.4            
## [19] forcats_1.0.0               stringr_1.5.1              
## [21] dplyr_1.1.4                 purrr_1.0.2                
## [23] readr_2.1.5                 tidyr_1.3.1                
## [25] tibble_3.2.1                ggplot2_3.5.1              
## [27] tidyverse_2.0.0             Seurat_5.2.1               
## [29] SeuratObject_5.0.2          sp_2.1-4                   
## 
## loaded via a namespace (and not attached):
##   [1] fs_1.6.5                  spatstat.sparse_3.1-0    
##   [3] enrichplot_1.24.4         httr_1.4.7               
##   [5] tools_4.4.0               sctransform_0.4.1        
##   [7] alabaster.base_1.4.2      R6_2.5.1                 
##   [9] HDF5Array_1.32.1          lazyeval_0.2.2           
##  [11] uwot_0.2.2                rhdf5filters_1.16.0      
##  [13] withr_3.0.2               gridExtra_2.3            
##  [15] progressr_0.15.1          cli_3.6.2                
##  [17] spatstat.explore_3.3-4    fastDummies_1.7.5        
##  [19] scatterpie_0.2.4          labeling_0.4.3           
##  [21] alabaster.se_1.4.1        sass_0.4.9               
##  [23] spatstat.data_3.1-4       ggridges_0.5.6           
##  [25] pbapply_1.7-2             yulab.utils_0.2.0        
##  [27] gson_0.1.0                DOSE_3.30.5              
##  [29] R.utils_2.12.3            parallelly_1.42.0        
##  [31] limma_3.60.6              rstudioapi_0.17.1        
##  [33] RSQLite_2.3.9             gridGraphics_0.5-1       
##  [35] generics_0.1.3            ica_1.0-3                
##  [37] spatstat.random_3.3-2     GO.db_3.19.1             
##  [39] Matrix_1.7-0              abind_1.4-8              
##  [41] R.methodsS3_1.8.2         lifecycle_1.0.4          
##  [43] yaml_2.3.10               qvalue_2.36.0            
##  [45] rhdf5_2.48.0              SparseArray_1.4.8        
##  [47] BiocFileCache_2.12.0      Rtsne_0.17               
##  [49] grid_4.4.0                blob_1.2.4               
##  [51] promises_1.3.2            ExperimentHub_2.12.0     
##  [53] crayon_1.5.3              miniUI_0.1.1.1           
##  [55] lattice_0.22-6            beachmat_2.20.0          
##  [57] cowplot_1.1.3             KEGGREST_1.44.1          
##  [59] pillar_1.10.1             fgsea_1.30.0             
##  [61] future.apply_1.11.3       codetools_0.2-20         
##  [63] fastmatch_1.1-6           glue_1.8.0               
##  [65] ggfun_0.1.8               spatstat.univar_3.1-1    
##  [67] treeio_1.28.0             vctrs_0.6.5              
##  [69] png_0.1-8                 gypsum_1.0.1             
##  [71] spam_2.11-1               gtable_0.3.6             
##  [73] cachem_1.1.0              xfun_0.49                
##  [75] S4Arrays_1.4.1            mime_0.12                
##  [77] tidygraph_1.3.1           survival_3.5-8           
##  [79] statmod_1.5.0             fitdistrplus_1.2-2       
##  [81] ROCR_1.0-11               nlme_3.1-164             
##  [83] ggtree_3.12.0             bit64_4.6.0-1            
##  [85] alabaster.ranges_1.4.2    filelock_1.0.3           
##  [87] RcppAnnoy_0.0.22          bslib_0.8.0              
##  [89] irlba_2.3.5.1             KernSmooth_2.23-22       
##  [91] colorspace_2.1-1          DBI_1.2.3                
##  [93] tidyselect_1.2.1          bit_4.5.0.1              
##  [95] compiler_4.4.0            curl_6.2.0               
##  [97] httr2_1.1.0               DelayedArray_0.30.1      
##  [99] plotly_4.10.4             shadowtext_0.1.4         
## [101] scales_1.3.0              lmtest_0.9-40            
## [103] rappdirs_0.3.3            digest_0.6.35            
## [105] goftest_1.2-3             presto_1.0.0             
## [107] spatstat.utils_3.1-2      alabaster.matrix_1.4.2   
## [109] rmarkdown_2.29            XVector_0.44.0           
## [111] htmltools_0.5.8.1         pkgconfig_2.0.3          
## [113] sparseMatrixStats_1.16.0  dbplyr_2.5.0             
## [115] fastmap_1.2.0             rlang_1.1.3              
## [117] htmlwidgets_1.6.4         UCSC.utils_1.0.0         
## [119] shiny_1.10.0              DelayedMatrixStats_1.26.0
## [121] farver_2.1.2              jquerylib_0.1.4          
## [123] zoo_1.8-12                jsonlite_1.8.9           
## [125] BiocParallel_1.38.0       GOSemSim_2.30.2          
## [127] R.oo_1.27.0               BiocSingular_1.20.0      
## [129] magrittr_2.0.3            ggplotify_0.1.2          
## [131] GenomeInfoDbData_1.2.12   dotCall64_1.2            
## [133] Rhdf5lib_1.26.0           munsell_0.5.1            
## [135] Rcpp_1.0.14               ape_5.8-1                
## [137] viridis_0.6.5             reticulate_1.40.0        
## [139] stringi_1.8.4             alabaster.schemas_1.4.0  
## [141] ggraph_2.2.1              zlibbioc_1.50.0          
## [143] MASS_7.3-60.2             AnnotationHub_3.12.0     
## [145] plyr_1.8.9                parallel_4.4.0           
## [147] listenv_0.9.1             ggrepel_0.9.6            
## [149] deldir_2.0-4              graphlayouts_1.2.2       
## [151] Biostrings_2.72.1         splines_4.4.0            
## [153] tensor_1.5                hms_1.1.3                
## [155] igraph_2.1.4              spatstat.geom_3.3-5      
## [157] RcppHNSW_0.6.0            reshape2_1.4.4           
## [159] ScaledMatrix_1.12.0       BiocVersion_3.19.1       
## [161] evaluate_1.0.3            BiocManager_1.30.25      
## [163] tweenr_2.0.3              tzdb_0.4.0               
## [165] httpuv_1.6.15             RANN_2.6.2               
## [167] polyclip_1.10-7           future_1.34.0            
## [169] scattermore_1.2           ggforce_0.4.2            
## [171] rsvd_1.0.5                xtable_1.8-4             
## [173] tidytree_0.4.6            RSpectra_0.16-2          
## [175] later_1.4.1               viridisLite_0.4.2        
## [177] snow_0.4-4                aplot_0.2.4              
## [179] memoise_2.0.1             AnnotationDbi_1.66.0     
## [181] cluster_2.1.6             timechange_0.3.0         
## [183] globals_0.16.3
citation()
## To cite R in publications use:
## 
##   R Core Team (2024). _R: A Language and Environment for Statistical
##   Computing_. R Foundation for Statistical Computing, Vienna, Austria.
##   <https://www.R-project.org/>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {R: A Language and Environment for Statistical Computing},
##     author = {{R Core Team}},
##     organization = {R Foundation for Statistical Computing},
##     address = {Vienna, Austria},
##     year = {2024},
##     url = {https://www.R-project.org/},
##   }
## 
## We have invested a lot of time and effort in creating R, please cite it
## when using it for data analysis. See also 'citation("pkgname")' for
## citing R packages.