The purpose of this script is to annotate single-cell RNA-seq clusters following filtering, normalization, and clustering of the data with Seurat. This script has been adapted from the Seurat (https://satijalab.org/seurat/) documentation.
library(Seurat)
library(tidyverse)
library(patchwork)
library(pheatmap)
library(RColorBrewer)
library(data.table)
library(knitr)
library(presto)
Input: Pre-processed Seurat object.
setwd("/scratch/alpine/edlarsen@colostate.edu/project_scrna_01/240828_scRNAseq/Cluster_Annotation")
ln_seurat <- readRDS(file = "../Normalization_and_Clustering/LN_NormalizedAndClustered.RData")
# differentially expressed features
## identifies all positive and negative markers of a single cluster compared to all other cells.
ln_seurat <- FindClusters(ln_seurat, resolution = 0.1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 29438
## Number of edges: 895159
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9696
## Number of communities: 9
## Elapsed time: 7 seconds
ln_seurat <- RunUMAP(ln_seurat, dims = 1:10)
ln.markers <- FindAllMarkers(ln_seurat, only.pos=TRUE)
# umap
DimPlot(ln_seurat,
reduction = "umap",
label = TRUE,
label.size = 6) +
plot_annotation(title = "Canine Lymph Node, Resolution: 0.1", theme = theme(plot.title = element_text(hjust = 0.5, size = 20)))
See if cluster differences are associated with differences in cell proliferation.
Each cell will be assigned a score based on its expression of G2/M and S phase markers. The CellCycleStoring() function stores S and G2/M scores in object metadata, along with the predicted classification of each cell in either G2/M, S, or G1 phase.
# segregate G2/M phase and S phase markers from Seurat's built-in list of cell cycle markers
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
# assign each cell a score based on expression of G2/M and S phase markers
ln_seurat_cycle <- CellCycleScoring(ln_seurat,
s.features = s.genes,
g2m.features = g2m.genes,
set.ident = TRUE)
ln_seurat_cycle <- RunPCA(ln_seurat_cycle, features = c(s.genes, g2m.genes))
DimPlot(ln_seurat_cycle,
group.by = "Phase",
reduction = "umap",
pt.size=0.5) +
ggtitle("Cell Cycle Scoring: Canine ln")
Module scoring the average expression levels of each cluster on a single-cell level, subtracted by the aggregated expression of control feature sets. All analyzed features are binned based on averaged expression, and the control features are randomly selected from each bin. Source: https://cells.ucsc.edu/?ds=pan-immune+global. The top 51 markers were exported from UCSC Cell Browser for each annotated cell type.
# make gene lists
human_ln_atlas_top51 <- read.csv("CondeEtAl_HumanImmuneCellAtlas_Global.csv")
colnames(human_ln_atlas_top51)
## [1] "Trm_gut_CD8" "TnaiveCM_CD4"
## [3] "Tregs" "Pro_B"
## [5] "Naive_B" "Pre_B"
## [7] "Plasma_Cell" "Plasmablasts"
## [9] "Memory_B" "Erythroid"
## [11] "Plasmacytoid_DC" "Megakaryocytes"
## [13] "Mast_cells" "ILC3"
## [15] "Cycling_T_NK" "DC_1"
## [17] "DC_2" "Classical_Monocyte"
## [19] "Cycling" "Progenitor"
## [21] "CD8_TEMRA" "Migratory_DC"
## [23] "Trm_Tgd" "NK_CD56_Bright_CD16_Negative"
## [25] "NK_CD16_Positive" "Tgd_CRTAM_Positive"
## [27] "Tmem_CD8" "MAIT"
## [29] "T_naive_CM_CD4_activated" "Trm_Th1_Th17"
## [31] "T_CD4_CD8" "T_naive_CM_CD8"
## [33] "TEM_CD4" "Tfh"
plot_list <- list() # initiate empty list for individual plots
for (column in colnames(human_ln_atlas_top51)){
genes <- list(human_ln_atlas_top51[[column]])
name <- paste(column, "Score", sep="_")
ln_seurat <- AddModuleScore(ln_seurat, features = genes, name = name)
newname <- paste(name, "1", sep="")
modscoreplot <- FeaturePlot(ln_seurat, features = newname) + scale_color_gradientn(colours = brewer.pal(name = "RdPu", n=11))
plot_list[[name]] <- modscoreplot # add plot to list
}
# split plot_list into groups of 6 and handle any remaining plots
plot_groups <- split(plot_list, ceiling(seq_along(plot_list) / 6))
# combined plot with dynamic number of columns/rows
combine_plots <- function(group, ncol = 3){
n_plots <- length(group)
nrow <- ceiling(n_plots / ncol)
wrap_plots(group, ncol = ncol, nrow = nrow)
}
combined_plots <- lapply(plot_groups, function(group) {
combine_plots(group, ncol = 3)
})
# display each combined plot
for (i in seq_along(combined_plots)) {
print(combined_plots[[i]])
}