The purpose of this script is to annotate single-cell RNA-seq clusters following filtering, normalization, and clustering of the data with Seurat. This script has been adapted from the Seurat (https://satijalab.org/seurat/) documentation.
library(Seurat)
library(tidyverse)
library(patchwork)
library(pheatmap)
library(RColorBrewer)
library(data.table)
library(knitr)
library(clusterProfiler)
library(presto)
Input: Pre-processed Seurat object.
setwd("/scratch/alpine/edlarsen@colostate.edu/project_scrna_01/240828_scRNAseq/Cluster_Annotation")
ln_seurat <- readRDS(file = "../Normalization_and_Clustering/LN_NormalizedAndClustered.RData")
# differentially expressed features
## identifies all positive and negative markers of a single cluster compared to all other cells.
ln_seurat <- FindClusters(ln_seurat, resolution = 0.1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 29438
## Number of edges: 895159
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9696
## Number of communities: 9
## Elapsed time: 7 seconds
ln_seurat <- RunUMAP(ln_seurat, dims = 1:10)
ln.markers <- FindAllMarkers(ln_seurat, only.pos=TRUE)
# umap
DimPlot(ln_seurat,
reduction = "umap",
label = TRUE,
label.size = 6) +
plot_annotation(title = "Canine Lymph Node, Resolution: 0.1", theme = theme(plot.title = element_text(hjust = 0.5, size = 20)))
Comparison with gene signatures from a human immune cell atlas (https://cells.ucsc.edu/?ds=pan-immune+global), canine circulating leukocyte atlas (https://cells.ucsc.edu/?ds=canine-leukocyte-atlas+healthy), and gene sets of genes upregulated at various stages of T-cell development and differentiation from the Molecular Signatures Database (https://www.gsea-msigdb.org/gsea/msigdb).
# significant results only
sig.ln.markers <- subset(ln.markers, p_val_adj < 0.05)
# loop through each cluster and create a ranked gene list for each
for (clust in unique(sig.ln.markers$cluster)){
name <- paste("cluster", clust, "rankedGenes", sep="")
rankedGenes <- sig.ln.markers %>%
dplyr::filter(cluster == clust) %>%
dplyr::select(gene, avg_log2FC) %>%
arrange(desc(avg_log2FC))
rankedGeneList <- rankedGenes$avg_log2FC
names(rankedGeneList) <- rankedGenes$gene
assign(name, rankedGeneList)
}
# import gmt file(s)
## multiple gmt files
setwd("/scratch/alpine/edlarsen@colostate.edu/project_scrna_01/240828_scRNAseq/Cluster_Annotation/T_cell_GMTs")
gmtFiles <- list.files(pattern = "\\.gmt", full.names = TRUE) # Get list of all .gmt files in this directory
gmtTables <- lapply(gmtFiles, read.gmt) # Apply the read.gmt function to the list of .gmt files and save as a variable
gmt <- do.call(rbind, gmtTables) # Rbind files
setwd("/scratch/alpine/edlarsen@colostate.edu/project_scrna_01/240828_scRNAseq/Cluster_Annotation")
# loop through each cluster's ranked gene list and run GSEA for gene lists in the gmt file(s) on each
for (geneList in ls(pattern = "cluster")){
name <- paste(geneList, "GSEA", sep="_")
input <- get(geneList)
gse <- GSEA(input,
exponent = 1,
pvalueCutoff = 1,
pAdjustMethod = "BH",
TERM2GENE = gmt,
verbose = TRUE,
by = "fgsea")
assign(name, gse)
}
# Remove any objects with no significantly enriched gene sets from the global environment
for (GSEAresult in ls(pattern = "_GSEA")){
obj <- get(GSEAresult)
res <- obj@result
if (nrow(res) == 0) {
rm(list = ls(pattern = GSEAresult))
}
}
# Loop through each GSEA object, export results as csv, and plot results as an enrichment dot plot
plot_list <- list() # initiate empty list for individual plots
for (GSEAresult in ls(pattern = "_GSEA")){
name <- paste("ln", GSEAresult, sep="_")
obj <- get(GSEAresult)
write.csv(obj, file=paste(name, "csv", sep="."))
eplot <- obj %>%
dotplot(showCategory = 10, x = "NES") +
scale_color_viridis_c(name = "Adjusted\nP-value",
option = "H") +
scale_y_discrete(labels = ~ str_wrap(gsub('_', ' ', .x), 40)) +
geom_vline(xintercept = 0, linetype = 2) +
labs(title = name, y = "Gene Set") +
theme(plot.title = element_text(hjust = 0.5))
plot_list[[name]] <- eplot # add plot to list
}
# combine plots into single layout
combined_plot <- wrap_plots(plot_list, ncol = 3)
print(combined_plot)
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Etc/UTC
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] presto_1.0.0 Rcpp_1.0.13-1 clusterProfiler_4.14.4
## [4] knitr_1.49 data.table_1.16.4 RColorBrewer_1.1-3
## [7] pheatmap_1.0.12 patchwork_1.3.0 lubridate_1.9.4
## [10] forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
## [13] purrr_1.0.2 readr_2.1.5 tidyr_1.3.1
## [16] tibble_3.2.1 ggplot2_3.5.1 tidyverse_2.0.0
## [19] Seurat_5.2.1 SeuratObject_5.0.2 sp_2.1-4
##
## loaded via a namespace (and not attached):
## [1] RcppAnnoy_0.0.22 splines_4.4.1 later_1.4.1
## [4] ggplotify_0.1.2 R.oo_1.27.0 polyclip_1.10-7
## [7] fastDummies_1.7.5 lifecycle_1.0.4 globals_0.16.3
## [10] lattice_0.22-6 MASS_7.3-60.2 magrittr_2.0.3
## [13] limma_3.62.2 plotly_4.10.4 sass_0.4.9
## [16] rmarkdown_2.29 jquerylib_0.1.4 yaml_2.3.10
## [19] ggtangle_0.0.6 httpuv_1.6.15 sctransform_0.4.1
## [22] spam_2.11-1 spatstat.sparse_3.1-0 reticulate_1.40.0
## [25] cowplot_1.1.3 pbapply_1.7-2 DBI_1.2.3
## [28] abind_1.4-8 zlibbioc_1.52.0 Rtsne_0.17
## [31] R.utils_2.12.3 BiocGenerics_0.52.0 yulab.utils_0.1.9
## [34] GenomeInfoDbData_1.2.13 enrichplot_1.26.6 IRanges_2.40.1
## [37] S4Vectors_0.44.0 ggrepel_0.9.6 irlba_2.3.5.1
## [40] listenv_0.9.1 spatstat.utils_3.1-1 tidytree_0.4.6
## [43] goftest_1.2-3 RSpectra_0.16-2 spatstat.random_3.3-2
## [46] fitdistrplus_1.2-2 parallelly_1.40.1 codetools_0.2-20
## [49] DOSE_4.0.0 tidyselect_1.2.1 aplot_0.2.4
## [52] UCSC.utils_1.2.0 farver_2.1.2 matrixStats_1.5.0
## [55] stats4_4.4.1 spatstat.explore_3.3-3 jsonlite_1.8.9
## [58] progressr_0.15.1 ggridges_0.5.6 survival_3.6-4
## [61] tools_4.4.1 treeio_1.30.0 ica_1.0-3
## [64] glue_1.8.0 gridExtra_2.3 xfun_0.49
## [67] qvalue_2.38.0 GenomeInfoDb_1.42.1 withr_3.0.2
## [70] fastmap_1.2.0 digest_0.6.37 gridGraphics_0.5-1
## [73] timechange_0.3.0 R6_2.5.1 mime_0.12
## [76] colorspace_2.1-1 scattermore_1.2 GO.db_3.20.0
## [79] tensor_1.5 spatstat.data_3.1-4 RSQLite_2.3.9
## [82] R.methodsS3_1.8.2 generics_0.1.3 httr_1.4.7
## [85] htmlwidgets_1.6.4 uwot_0.2.2 pkgconfig_2.0.3
## [88] gtable_0.3.6 blob_1.2.4 lmtest_0.9-40
## [91] XVector_0.46.0 htmltools_0.5.8.1 dotCall64_1.2
## [94] fgsea_1.32.2 scales_1.3.0 Biobase_2.66.0
## [97] png_0.1-8 spatstat.univar_3.1-1 ggfun_0.1.8
## [100] rstudioapi_0.17.1 tzdb_0.4.0 reshape2_1.4.4
## [103] nlme_3.1-164 cachem_1.1.0 zoo_1.8-12
## [106] KernSmooth_2.23-24 parallel_4.4.1 miniUI_0.1.1.1
## [109] AnnotationDbi_1.68.0 pillar_1.10.1 grid_4.4.1
## [112] vctrs_0.6.5 RANN_2.6.2 promises_1.3.2
## [115] xtable_1.8-4 cluster_2.1.6 evaluate_1.0.3
## [118] cli_3.6.3 compiler_4.4.1 rlang_1.1.5
## [121] crayon_1.5.3 future.apply_1.11.3 labeling_0.4.3
## [124] plyr_1.8.9 fs_1.6.5 stringi_1.8.4
## [127] viridisLite_0.4.2 deldir_2.0-4 BiocParallel_1.40.0
## [130] munsell_0.5.1 Biostrings_2.74.1 lazyeval_0.2.2
## [133] spatstat.geom_3.3-4 GOSemSim_2.32.0 Matrix_1.7-0
## [136] RcppHNSW_0.6.0 hms_1.1.3 bit64_4.6.0-1
## [139] future_1.34.0 statmod_1.5.0 KEGGREST_1.46.0
## [142] shiny_1.10.0 ROCR_1.0-11 igraph_2.1.2
## [145] memoise_2.0.1 bslib_0.8.0 ggtree_3.14.0
## [148] fastmatch_1.1-6 bit_4.5.0.1 gson_0.1.0
## [151] ape_5.8-1
citation()
## To cite R in publications use:
##
## R Core Team (2024). _R: A Language and Environment for Statistical
## Computing_. R Foundation for Statistical Computing, Vienna, Austria.
## <https://www.R-project.org/>.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {R: A Language and Environment for Statistical Computing},
## author = {{R Core Team}},
## organization = {R Foundation for Statistical Computing},
## address = {Vienna, Austria},
## year = {2024},
## url = {https://www.R-project.org/},
## }
##
## We have invested a lot of time and effort in creating R, please cite it
## when using it for data analysis. See also 'citation("pkgname")' for
## citing R packages.