The purpose of this script is to perform a pseudotime trajectory analysis on single-cell RNA-seq data from normal canine T-cell populations derived from the thymus and lymph node of healthy dogs.
This script was adapted from the Monocle 3 documentation (https://cole-trapnell-lab.github.io/monocle3/docs/trajectories/) and “Single Cell RNA seq analysis - SEurat and Monocle3 pipeline” by Mahima Bose (https://rpubs.com/mahima_bose/Seurat_and_Monocle3_p).
library(Seurat)
library(knitr)
library(tidyverse)
library(patchwork)
library(monocle3)
library(SeuratWrappers)
library(pheatmap)
library(stringr)
library(RColorBrewer)
Input: A Seurat object of T-cell subsets from normal canine lymph node and thymus.
setwd("C:/Users/edlarsen/Documents/240828_scRNAseq/Pseudotime")
seu <- readRDS(file = "C:/Users/edlarsen/Documents/240828_scRNAseq/T_Cells/IntegThymAndLN_Annotated.RData")
As cells move between biologic states, they undergo transcriptional reconfiguration, with some genes silenced and others newly activated. Trying to purify cells in these transient states to characterize them can be challenging. Monocle 3 utilizes an algorithm that learns the sequence of gene expression changes each cell must go through as it passes through dynamic biologic processes. Once it learns the overall trajectory of gene expression changes, it places each cell at its proper position in the trajectory. Monocle 3’s differential expression analysis toolkit can then find genes regulated over the course of the trajectory.
If there are multiple outcomes for a process, a branched trajectory will be made, which correspond to cellular “decisions.” The differential expression analysis toolkit is therefore also useful for identifying genes affected by and involved in making these decisions.
cds <- as.cell_data_set(seu, assay = "RNA")
cds <- estimate_size_factors(cds)
fData(cds)$gene_short_name <- rownames(fData(cds))
## To view cell metadata:
# head(colData(cds))
## To view count data:
#head(counts(cds))
Performs unsupervised clustering of cells using Leiden community detection.
cds <- cluster_cells(cds, reduction_method = "UMAP")
plot_cells(cds, group_label_size = 5, show_trajectory_graph = FALSE) + plot_annotation("Monocle 3 clusters by Leiden community detection")
plot_cells(cds, color_cells_by = "IntegratedClusterIDs", group_label_size = 4, show_trajectory_graph = FALSE) + plot_annotation(title = "Original Seurat clusters") + theme(legend.position = "right")
# create new column in colData(cds) an initialize it with values of partition(cds)
colData(cds)$assigned_cell_type <- as.character(clusters(cds))
# remap each cluster to cell type
colData(cds)$assigned_cell_type <- dplyr::recode(colData(cds)$assigned_cell_type,
"1" = "DP_Thymocytes_2",
"2" = "DP_Thymocytes_1",
"3" = "Naive_to_Activated_T",
"4" = "Naive_T_2",
"5" = "SP_Thym_and_Naive_T",
"6" = "Naive_T_1",
"7" = "Activated_T",
"8" = "SP_Thym",
"9" = "Proliferating_DP_Thym",
"10" = "CD8_NK",
"11" = "ETP_DN_ISP_Thym",
"12" = "Late_SP_Thym")
plot_cells(cds,
color_cells_by = "assigned_cell_type",
group_label_size = 4,
show_trajectory_graph = FALSE) + theme(legend.position = "right")
cds <- reduce_dimension(cds)
plot_cells(cds,
label_groups_by_cluster = FALSE,
group_label_size = 4,
color_cells_by = "assigned_cell_type") + theme(legend.position = "right")
marker_genes1 <- c("CD4",
"GATA3",
"ZBTB7B",
"CD8A",
"RUNX3")
marker_genes2 <- c("CD34",
"KIT",
"RAG1",
"CCR9",
"MKI67")
marker_genes3 <- c("IL2RA",
"LTB",
"S1PR1",
"SELL",
"KLRB1")
plot_cells(cds,
genes=marker_genes1,
label_cell_groups=FALSE,
show_trajectory_graph=FALSE)
plot_cells(cds,
genes=marker_genes2,
label_cell_groups=FALSE,
show_trajectory_graph=FALSE)
plot_cells(cds,
genes=marker_genes3,
label_cell_groups=FALSE,
show_trajectory_graph=FALSE)
cds <- cluster_cells(cds, reduction_method = "UMAP")
cds <- learn_graph(cds, use_partition = F)
plot_cells(cds,
color_cells_by = "assigned_cell_type",
label_groups_by_cluster = T,
label_branch_points = T,
label_roots = T,
label_leaves = F,
group_label_size = 5) + ggtitle("Trajectory analysis: Cell type annotations")
plot_cells(cds,
label_branch_points = T,
label_roots = T,
label_leaves = F,
group_label_size = 7) + ggtitle("Trajectory analysis: Monocle3 cluster numbers")
To place cells in order, Monocle 3 must be told where the “beginning” of the biologic process is, done by choosing regions on the graph to mark as “roots” of the trajectory.
cds <- order_cells(cds, reduction_method = "UMAP", root_cells = colnames(cds[, clusters(cds) == 10])) # use Monocle3 cluster numbers from figure above
plot_cells(cds,
color_cells_by = "pseudotime",
label_branch_points = T,
graph_label_size=3,
label_roots = F,
label_leaves = F)
cds$monocle3_pseudotime <- pseudotime(cds)
data.pseudo <- as.data.frame(colData(cds))
ggplot(data.pseudo,
aes(monocle3_pseudotime,
reorder(assigned_cell_type, monocle3_pseudotime),
fill = assigned_cell_type)) + geom_boxplot()