This script performs GSEA using gmt files of genes upregulated by each cluster of a human thymus single-cell atlas (source: https://www.science.org/doi/10.1126/science.aay3224) and various stages of thymocyte development from the Molecular Signatures Database (https://www.gsea-msigdb.org/gsea/msigdb).
Note about some of the cell types in the Park et al. human thymus single-cell atlas:
library(Seurat)
library(tidyverse)
library(patchwork)
library(pheatmap)
library(RColorBrewer)
library(SingleR)
library(celldex) # To install: BiocManager::install("celldex")
library(data.table)
library(knitr)
library(clusterProfiler)
Input: Pre-processed Seurat object of integrated thymocyte and T-cell subsets from canine thymus and lymph node.
setwd("C:/Users/edlarsen/Documents/240828_scRNAseq/T_Cells")
all_Tcells <- readRDS(file = "integrated_Tcells.Rdata")
all_Tcells <- FindNeighbors(all_Tcells, dims = 1:10)
all_Tcells <- FindClusters(all_Tcells, resolution = 0.2)
all_Tcells <- RunUMAP(all_Tcells, dims = 1:10)
p1 <- DimPlot(all_Tcells,
reduction = "umap",
label = TRUE,
label.size = 3,
label.box = TRUE) + plot_annotation(title = "Merged Canine T-cells from Lymph Node and Thymus")
p2 <- DimPlot(all_Tcells,
reduction = "umap",
group.by = "OriginalClusters",
label = TRUE,
label.size = 3,
label.box = TRUE) + plot_annotation(title = "Merged Canine T-cells from Lymph Node and Thymus,\nOriginal Cluster IDs")
p1+p2
# significant results only
t.cell.markers <- FindAllMarkers(all_Tcells, only.pos=TRUE)
sig.markers <- subset(t.cell.markers, p_val_adj < 0.05)
# loop through each cluster and create a ranked gene list for each
for (clust in unique(sig.markers$cluster)){
name <- paste("cluster", clust, "rankedGenes", sep="")
rankedGenes <- sig.markers %>%
dplyr::filter(cluster == clust) %>%
dplyr::select(gene, avg_log2FC) %>%
arrange(desc(avg_log2FC))
rankedGeneList <- rankedGenes$avg_log2FC
names(rankedGeneList) <- rankedGenes$gene
assign(name, rankedGeneList)
}
# import gmt file(s)
## multiple gmt files
setwd("C:/Users/edlarsen/Documents/240828_scRNAseq/Cluster_Annotation/T_cell_GMTs")
gmtFiles <- list.files(pattern = "\\.gmt", full.names = TRUE) # Get list of all .gmt files in this directory
gmtTables <- lapply(gmtFiles, read.gmt) # Apply the read.gmt function to the list of .gmt files and save as a variable
gmt <- do.call(rbind, gmtTables) # Rbind files
setwd("C:/Users/edlarsen/Documents/240828_scRNAseq/T_Cells")
# loop through each cluster's ranked gene list and run GSEA for gene lists in the gmt file(s) on each
for (geneList in ls(pattern = "cluster")){
name <- paste(geneList, "GSEA", sep="_")
input <- get(geneList)
gse <- GSEA(input,
exponent = 1,
pvalueCutoff = 1,
pAdjustMethod = "BH",
TERM2GENE = gmt,
verbose = TRUE,
by = "fgsea")
assign(name, gse)
}
# Remove any objects with no significantly enriched gene sets from the global environment
for (GSEAresult in ls(pattern = "_GSEA")){
obj <- get(GSEAresult)
res <- obj@result
if (nrow(res) == 0) {
rm(list = ls(pattern = GSEAresult))
}
}
# Loop through each GSEA object, export results as csv, and plot results as an enrichment dot plot
plot_list <- list() # initiate empty list for individual plots
for (GSEAresult in ls(pattern = "_GSEA")){
name <- paste(GSEAresult)
obj <- get(GSEAresult)
write.csv(obj, file=paste(name, "TCellGeneSets", "csv", sep="."))
eplot <- obj %>%
dotplot(showCategory = 10, x = "NES") +
scale_color_viridis_c(name = "Adjusted\nP-value",
option = "H") +
scale_y_discrete(labels = ~ str_wrap(gsub('_', ' ', .x), 40)) +
geom_vline(xintercept = 0, linetype = 2) +
labs(title = name, y = "Gene Set") +
theme(plot.title = element_text(hjust = 0.5))
plot_list[[name]] <- eplot # add plot to list
}
# combine plots into single layout
combined_plot <- wrap_plots(plot_list, ncol = 3)
print(combined_plot)
To be done once confident cluster assignment has been achieved.
new.cluster.ids <- c("Naive_T_2", "DP_Thymocytes_1", "Activated_T", "DP_Thymocytes_2", "SP_Thymocytes", "Naive_T_1", "Proliferating_DP_Thymocytes", "CD8_NK", "ETP_DN_ISP_Thymocytes", "Late_SP_Thymocytes_1", "Late_SP_Thymocytes_2")
names(new.cluster.ids) <- levels(all_Tcells)
all_Tcells <- RenameIdents(all_Tcells, new.cluster.ids)
DimPlot(all_Tcells, reduction = "umap", label = TRUE, label.size = 3, label.box = TRUE, pt.size = 0.5) + ggtitle("Canine Merged Lymph Node and Thymus T Cells, Resolution: 0.2")
# save as new column in metadata in addition to reassigning idents
all_Tcells$IntegratedClusterIDs <- Idents(all_Tcells)
# Export
saveRDS(all_Tcells, file = "IntegThymAndLN_Annotated.Rdata")
sessionInfo()
## R version 4.4.0 (2024-04-24 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 22631)
##
## Matrix products: default
##
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## time zone: America/Denver
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] clusterProfiler_4.12.6 knitr_1.49
## [3] data.table_1.16.4 celldex_1.14.0
## [5] SingleR_2.6.0 SummarizedExperiment_1.34.0
## [7] Biobase_2.64.0 GenomicRanges_1.56.2
## [9] GenomeInfoDb_1.40.1 IRanges_2.38.1
## [11] S4Vectors_0.42.1 BiocGenerics_0.50.0
## [13] MatrixGenerics_1.16.0 matrixStats_1.5.0
## [15] RColorBrewer_1.1-3 pheatmap_1.0.12
## [17] patchwork_1.3.0 lubridate_1.9.4
## [19] forcats_1.0.0 stringr_1.5.1
## [21] dplyr_1.1.4 purrr_1.0.2
## [23] readr_2.1.5 tidyr_1.3.1
## [25] tibble_3.2.1 ggplot2_3.5.1
## [27] tidyverse_2.0.0 Seurat_5.2.1
## [29] SeuratObject_5.0.2 sp_2.1-4
##
## loaded via a namespace (and not attached):
## [1] fs_1.6.5 spatstat.sparse_3.1-0
## [3] enrichplot_1.24.4 httr_1.4.7
## [5] tools_4.4.0 sctransform_0.4.1
## [7] alabaster.base_1.4.2 R6_2.5.1
## [9] HDF5Array_1.32.1 lazyeval_0.2.2
## [11] uwot_0.2.2 rhdf5filters_1.16.0
## [13] withr_3.0.2 gridExtra_2.3
## [15] progressr_0.15.1 cli_3.6.2
## [17] spatstat.explore_3.3-4 fastDummies_1.7.5
## [19] scatterpie_0.2.4 labeling_0.4.3
## [21] alabaster.se_1.4.1 sass_0.4.9
## [23] spatstat.data_3.1-4 ggridges_0.5.6
## [25] pbapply_1.7-2 yulab.utils_0.2.0
## [27] gson_0.1.0 DOSE_3.30.5
## [29] R.utils_2.12.3 parallelly_1.42.0
## [31] limma_3.60.6 rstudioapi_0.17.1
## [33] RSQLite_2.3.9 gridGraphics_0.5-1
## [35] generics_0.1.3 ica_1.0-3
## [37] spatstat.random_3.3-2 GO.db_3.19.1
## [39] Matrix_1.7-0 abind_1.4-8
## [41] R.methodsS3_1.8.2 lifecycle_1.0.4
## [43] yaml_2.3.10 qvalue_2.36.0
## [45] rhdf5_2.48.0 SparseArray_1.4.8
## [47] BiocFileCache_2.12.0 Rtsne_0.17
## [49] grid_4.4.0 blob_1.2.4
## [51] promises_1.3.2 ExperimentHub_2.12.0
## [53] crayon_1.5.3 miniUI_0.1.1.1
## [55] lattice_0.22-6 beachmat_2.20.0
## [57] cowplot_1.1.3 KEGGREST_1.44.1
## [59] pillar_1.10.1 fgsea_1.30.0
## [61] future.apply_1.11.3 codetools_0.2-20
## [63] fastmatch_1.1-6 glue_1.8.0
## [65] ggfun_0.1.8 spatstat.univar_3.1-1
## [67] treeio_1.28.0 vctrs_0.6.5
## [69] png_0.1-8 gypsum_1.0.1
## [71] spam_2.11-1 gtable_0.3.6
## [73] cachem_1.1.0 xfun_0.49
## [75] S4Arrays_1.4.1 mime_0.12
## [77] tidygraph_1.3.1 survival_3.5-8
## [79] statmod_1.5.0 fitdistrplus_1.2-2
## [81] ROCR_1.0-11 nlme_3.1-164
## [83] ggtree_3.12.0 bit64_4.6.0-1
## [85] alabaster.ranges_1.4.2 filelock_1.0.3
## [87] RcppAnnoy_0.0.22 bslib_0.8.0
## [89] irlba_2.3.5.1 KernSmooth_2.23-22
## [91] colorspace_2.1-1 DBI_1.2.3
## [93] tidyselect_1.2.1 bit_4.5.0.1
## [95] compiler_4.4.0 curl_6.2.0
## [97] httr2_1.1.0 DelayedArray_0.30.1
## [99] plotly_4.10.4 shadowtext_0.1.4
## [101] scales_1.3.0 lmtest_0.9-40
## [103] rappdirs_0.3.3 digest_0.6.35
## [105] goftest_1.2-3 presto_1.0.0
## [107] spatstat.utils_3.1-2 alabaster.matrix_1.4.2
## [109] rmarkdown_2.29 XVector_0.44.0
## [111] htmltools_0.5.8.1 pkgconfig_2.0.3
## [113] sparseMatrixStats_1.16.0 dbplyr_2.5.0
## [115] fastmap_1.2.0 rlang_1.1.3
## [117] htmlwidgets_1.6.4 UCSC.utils_1.0.0
## [119] shiny_1.10.0 DelayedMatrixStats_1.26.0
## [121] farver_2.1.2 jquerylib_0.1.4
## [123] zoo_1.8-12 jsonlite_1.8.9
## [125] BiocParallel_1.38.0 GOSemSim_2.30.2
## [127] R.oo_1.27.0 BiocSingular_1.20.0
## [129] magrittr_2.0.3 ggplotify_0.1.2
## [131] GenomeInfoDbData_1.2.12 dotCall64_1.2
## [133] Rhdf5lib_1.26.0 munsell_0.5.1
## [135] Rcpp_1.0.14 ape_5.8-1
## [137] viridis_0.6.5 reticulate_1.40.0
## [139] stringi_1.8.4 alabaster.schemas_1.4.0
## [141] ggraph_2.2.1 zlibbioc_1.50.0
## [143] MASS_7.3-60.2 AnnotationHub_3.12.0
## [145] plyr_1.8.9 parallel_4.4.0
## [147] listenv_0.9.1 ggrepel_0.9.6
## [149] deldir_2.0-4 graphlayouts_1.2.2
## [151] Biostrings_2.72.1 splines_4.4.0
## [153] tensor_1.5 hms_1.1.3
## [155] igraph_2.1.4 spatstat.geom_3.3-5
## [157] RcppHNSW_0.6.0 reshape2_1.4.4
## [159] ScaledMatrix_1.12.0 BiocVersion_3.19.1
## [161] evaluate_1.0.3 BiocManager_1.30.25
## [163] tweenr_2.0.3 tzdb_0.4.0
## [165] httpuv_1.6.15 RANN_2.6.2
## [167] polyclip_1.10-7 future_1.34.0
## [169] scattermore_1.2 ggforce_0.4.2
## [171] rsvd_1.0.5 xtable_1.8-4
## [173] tidytree_0.4.6 RSpectra_0.16-2
## [175] later_1.4.1 viridisLite_0.4.2
## [177] snow_0.4-4 aplot_0.2.4
## [179] memoise_2.0.1 AnnotationDbi_1.66.0
## [181] cluster_2.1.6 timechange_0.3.0
## [183] globals_0.16.3
citation()
## To cite R in publications use:
##
## R Core Team (2024). _R: A Language and Environment for Statistical
## Computing_. R Foundation for Statistical Computing, Vienna, Austria.
## <https://www.R-project.org/>.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {R: A Language and Environment for Statistical Computing},
## author = {{R Core Team}},
## organization = {R Foundation for Statistical Computing},
## address = {Vienna, Austria},
## year = {2024},
## url = {https://www.R-project.org/},
## }
##
## We have invested a lot of time and effort in creating R, please cite it
## when using it for data analysis. See also 'citation("pkgname")' for
## citing R packages.