The purpose of this script is to annotate single-cell RNA-seq clusters following filtering, normalization, and clustering of the data with Seurat. This script has been adapted from the Seurat (https://satijalab.org/seurat/) documentation.
library(Seurat)
library(tidyverse)
library(patchwork)
library(pheatmap)
library(RColorBrewer)
library(data.table)
library(knitr)
library(clusterProfiler)
library(presto)
Input: Pre-processed Seurat object.
setwd("/scratch/alpine/edlarsen@colostate.edu/project_scrna_01/240828_scRNAseq/Cluster_Annotation")
ln_seurat <- readRDS(file = "../Normalization_and_Clustering/LN_NormalizedAndClustered.RData")
By default, identifies all positive and negative markers of a single cluster compared to all other cells.
ln_seurat <- FindClusters(ln_seurat, resolution = 0.1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 29438
## Number of edges: 895159
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9696
## Number of communities: 9
## Elapsed time: 6 seconds
ln_seurat <- RunUMAP(ln_seurat, dims = 1:10)
ln.markers <- FindAllMarkers(ln_seurat, only.pos=TRUE)
# export
write.csv(ln.markers, file="LN_DEfeaturesByUMAPcluster_res0.1.csv")
# umap
DimPlot(ln_seurat,
reduction = "umap",
label = TRUE,
label.size = 6) +
plot_annotation(title = "Canine Lymph Node, Resolution: 0.1", theme = theme(plot.title = element_text(hjust = 0.5, size = 20)))
key.genes <- ln.markers[!grepl("ENSCAFG", row.names(ln.markers)),]
key.genes.sortedByPval <- key.genes[order(key.genes$p_val),]
features <- key.genes.sortedByPval %>% group_by(cluster) %>% do(head(., n=5))
features <- as.data.frame(features[!duplicated(features$gene),]) # prioritize unique clusters
DotPlot(ln_seurat,
assay = "RNA",
features = rev(features$gene),
scale = TRUE) +
geom_point(aes(size=pct.exp), shape = 21, colour="black", stroke=0.5) +
labs(y = "Cluster") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
Some of these markers were derived from a single-cell atlas of circulating canine leukocytes (https://github.com/dyammons/Canine_Leukocyte_scRNA).
FeaturePlot(ln_seurat,
reduction = "umap",
ncol = 3,
features = c("CD3E", "CD5", "CD4", "FOXP3", "IL2RA", "GATA3", "TBX21", "RORC")) +
plot_annotation(title = "Canine Lymph Node: Expression of Canonical CD4 T-cell Markers", theme = theme(plot.title = element_text(hjust = 0.5, size = 20))) &
scale_color_gradientn(colours = brewer.pal(name = "RdPu", n=11))
FeaturePlot(ln_seurat,
reduction = "umap",
features = c("GNLY", "NKG7", "NCAM1", "KLRK1", "CD8A", "CD3E", "CD5"),
ncol = 3) +
plot_annotation(title = "Canine Lymph Node: Expression of Canonical CD8 & NK Cell Markers", theme = theme(plot.title = element_text(hjust = 0.5, size = 20))) &
scale_color_gradientn(colours = brewer.pal(name = "RdPu", n=11))
FeaturePlot(ln_seurat,
reduction = "umap",
features = c("CCR7", "SELL", "HOPX"),
ncol = 3) +
plot_annotation(title = "Canine Lymph Node: Expression of Naive T-cell Markers", theme = theme(plot.title = element_text(hjust = 0.5, size = 20))) &
scale_color_gradientn(colours = brewer.pal(name = "RdPu", n=11))
FeaturePlot(ln_seurat,
reduction = "umap",
features = c("ICOS", "IL2RA", "LTB", "CTLA4", "LGALS3", "MAF"),
ncol = 3) +
plot_annotation(title = "Canine Lymph Node: Expression of Activated T-cell Markers", theme = theme(plot.title = element_text(hjust = 0.5, size = 20))) &
scale_color_gradientn(colours = brewer.pal(name = "RdPu", n=11))
FeaturePlot(ln_seurat,
reduction = "umap",
features = c("MS4A1", "CD19", "PAX5", "IRF4")) +
plot_annotation(title = "Canine Lymph Node: Expression of Canonical B-cell Markers", theme = theme(plot.title = element_text(hjust = 0.5, size = 20))) &
scale_color_gradientn(colours = brewer.pal(name = "RdPu", n=11))
FeaturePlot(ln_seurat,
reduction = "umap",
ncol = 3,
features = c("LYZ", "FCER1A", "ITGAM", "CD163", "CD68")) +
plot_annotation(title = "Canine Lymph Node: Expression of Canonical Monocyte/Macrophage/DC Markers", theme = theme(plot.title = element_text(hjust = 0.5, size = 20))) &
scale_color_gradientn(colours = brewer.pal(name = "RdPu", n=11))
FeaturePlot(ln_seurat,
reduction = "umap",
features = c("FCER1A", "MMP9", "CCR5", "CXCR2", "EPX", "MMP8", "S100A12", "SERPINA1"),
ncol = 3) +
plot_annotation(title = "Canine Lymph Node: Expression of Canonical Granulocyte Markers", theme = theme(plot.title = element_text(hjust = 0.5, size = 20))) &
scale_color_gradientn(colours = brewer.pal(name = "RdPu", n=11))
Note: Granulocytes are difficult to capture with single-cell RNA-seq
experiments given their relatively low RNA content and relatively high
levels of RNAses and other inhibitory compounds, resulting in fewer
detectable transcripts.
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Etc/UTC
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] presto_1.0.0 Rcpp_1.0.13-1 clusterProfiler_4.14.4
## [4] knitr_1.49 data.table_1.16.4 RColorBrewer_1.1-3
## [7] pheatmap_1.0.12 patchwork_1.3.0 lubridate_1.9.4
## [10] forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
## [13] purrr_1.0.2 readr_2.1.5 tidyr_1.3.1
## [16] tibble_3.2.1 ggplot2_3.5.1 tidyverse_2.0.0
## [19] Seurat_5.2.1 SeuratObject_5.0.2 sp_2.1-4
##
## loaded via a namespace (and not attached):
## [1] RcppAnnoy_0.0.22 splines_4.4.1 later_1.4.1
## [4] ggplotify_0.1.2 R.oo_1.27.0 polyclip_1.10-7
## [7] fastDummies_1.7.5 lifecycle_1.0.4 globals_0.16.3
## [10] lattice_0.22-6 MASS_7.3-60.2 magrittr_2.0.3
## [13] limma_3.62.2 plotly_4.10.4 sass_0.4.9
## [16] rmarkdown_2.29 jquerylib_0.1.4 yaml_2.3.10
## [19] ggtangle_0.0.6 httpuv_1.6.15 sctransform_0.4.1
## [22] spam_2.11-1 spatstat.sparse_3.1-0 reticulate_1.40.0
## [25] cowplot_1.1.3 pbapply_1.7-2 DBI_1.2.3
## [28] abind_1.4-8 zlibbioc_1.52.0 Rtsne_0.17
## [31] R.utils_2.12.3 BiocGenerics_0.52.0 yulab.utils_0.1.9
## [34] GenomeInfoDbData_1.2.13 enrichplot_1.26.6 IRanges_2.40.1
## [37] S4Vectors_0.44.0 ggrepel_0.9.6 irlba_2.3.5.1
## [40] listenv_0.9.1 spatstat.utils_3.1-1 tidytree_0.4.6
## [43] goftest_1.2-3 RSpectra_0.16-2 spatstat.random_3.3-2
## [46] fitdistrplus_1.2-2 parallelly_1.40.1 codetools_0.2-20
## [49] DOSE_4.0.0 tidyselect_1.2.1 aplot_0.2.4
## [52] UCSC.utils_1.2.0 farver_2.1.2 matrixStats_1.5.0
## [55] stats4_4.4.1 spatstat.explore_3.3-3 jsonlite_1.8.9
## [58] progressr_0.15.1 ggridges_0.5.6 survival_3.6-4
## [61] tools_4.4.1 treeio_1.30.0 ica_1.0-3
## [64] glue_1.8.0 gridExtra_2.3 xfun_0.49
## [67] qvalue_2.38.0 GenomeInfoDb_1.42.1 withr_3.0.2
## [70] fastmap_1.2.0 digest_0.6.37 gridGraphics_0.5-1
## [73] timechange_0.3.0 R6_2.5.1 mime_0.12
## [76] colorspace_2.1-1 scattermore_1.2 GO.db_3.20.0
## [79] tensor_1.5 spatstat.data_3.1-4 RSQLite_2.3.9
## [82] R.methodsS3_1.8.2 generics_0.1.3 httr_1.4.7
## [85] htmlwidgets_1.6.4 uwot_0.2.2 pkgconfig_2.0.3
## [88] gtable_0.3.6 blob_1.2.4 lmtest_0.9-40
## [91] XVector_0.46.0 htmltools_0.5.8.1 dotCall64_1.2
## [94] fgsea_1.32.2 scales_1.3.0 Biobase_2.66.0
## [97] png_0.1-8 spatstat.univar_3.1-1 ggfun_0.1.8
## [100] rstudioapi_0.17.1 tzdb_0.4.0 reshape2_1.4.4
## [103] nlme_3.1-164 cachem_1.1.0 zoo_1.8-12
## [106] KernSmooth_2.23-24 parallel_4.4.1 miniUI_0.1.1.1
## [109] AnnotationDbi_1.68.0 pillar_1.10.1 grid_4.4.1
## [112] vctrs_0.6.5 RANN_2.6.2 promises_1.3.2
## [115] xtable_1.8-4 cluster_2.1.6 evaluate_1.0.3
## [118] cli_3.6.3 compiler_4.4.1 rlang_1.1.5
## [121] crayon_1.5.3 future.apply_1.11.3 labeling_0.4.3
## [124] plyr_1.8.9 fs_1.6.5 stringi_1.8.4
## [127] viridisLite_0.4.2 deldir_2.0-4 BiocParallel_1.40.0
## [130] munsell_0.5.1 Biostrings_2.74.1 lazyeval_0.2.2
## [133] spatstat.geom_3.3-4 GOSemSim_2.32.0 Matrix_1.7-0
## [136] RcppHNSW_0.6.0 hms_1.1.3 bit64_4.6.0-1
## [139] future_1.34.0 statmod_1.5.0 KEGGREST_1.46.0
## [142] shiny_1.10.0 ROCR_1.0-11 igraph_2.1.2
## [145] memoise_2.0.1 bslib_0.8.0 ggtree_3.14.0
## [148] fastmatch_1.1-6 bit_4.5.0.1 gson_0.1.0
## [151] ape_5.8-1
citation()
## To cite R in publications use:
##
## R Core Team (2024). _R: A Language and Environment for Statistical
## Computing_. R Foundation for Statistical Computing, Vienna, Austria.
## <https://www.R-project.org/>.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {R: A Language and Environment for Statistical Computing},
## author = {{R Core Team}},
## organization = {R Foundation for Statistical Computing},
## address = {Vienna, Austria},
## year = {2024},
## url = {https://www.R-project.org/},
## }
##
## We have invested a lot of time and effort in creating R, please cite it
## when using it for data analysis. See also 'citation("pkgname")' for
## citing R packages.