Introduction

The purpose of this script is to perform a pseudotime trajectory analysis on single-cell RNA-seq data from normal canine T-cell populations derived from the thymus and lymph node of healthy dogs.

Acknowledgements

This script was adapted from the Monocle 3 documentation (https://cole-trapnell-lab.github.io/monocle3/docs/trajectories/) and “Single Cell RNA seq analysis - SEurat and Monocle3 pipeline” by Mahima Bose (https://rpubs.com/mahima_bose/Seurat_and_Monocle3_p).

Software

library(Seurat)
library(knitr)
library(tidyverse)
library(patchwork)
library(monocle3)
library(SeuratWrappers)
library(pheatmap)
library(stringr)
library(RColorBrewer)

Data

Input: A Seurat object of T-cell subsets from normal canine lymph node and thymus.

setwd("C:/Users/edlarsen/Documents/240828_scRNAseq/Pseudotime")
seu <- readRDS(file = "C:/Users/edlarsen/Documents/240828_scRNAseq/T_Cells/IntegThymAndLN_Annotated.RData")

Trajectory analysis with Monocle3

As cells move between biologic states, they undergo transcriptional reconfiguration, with some genes silenced and others newly activated. Trying to purify cells in these transient states to characterize them can be challenging. Monocle 3 utilizes an algorithm that learns the sequence of gene expression changes each cell must go through as it passes through dynamic biologic processes. Once it learns the overall trajectory of gene expression changes, it places each cell at its proper position in the trajectory. Monocle 3’s differential expression analysis toolkit can then find genes regulated over the course of the trajectory.

If there are multiple outcomes for a process, a branched trajectory will be made, which correspond to cellular “decisions.” The differential expression analysis toolkit is therefore also useful for identifying genes affected by and involved in making these decisions.

Convert Seurat object to celldata set object:

cds <- as.cell_data_set(seu, assay = "RNA")
cds <- estimate_size_factors(cds)
fData(cds)$gene_short_name <- rownames(fData(cds))

## To view cell metadata:
# head(colData(cds))
## To view count data:
#head(counts(cds))

Cluster data with Monocle 3

Performs unsupervised clustering of cells using Leiden community detection.

cds <- cluster_cells(cds, reduction_method = "UMAP")

plot_cells(cds, group_label_size = 5, show_trajectory_graph = FALSE) + plot_annotation("Monocle 3 clusters by Leiden community detection")

plot_cells(cds, color_cells_by = "IntegratedClusterIDs", group_label_size = 4, show_trajectory_graph = FALSE) + plot_annotation(title = "Original Seurat clusters") + theme(legend.position = "right")

Add cluster names and plot clusters before trajectory

# create new column in colData(cds) an initialize it with values of partition(cds)
colData(cds)$assigned_cell_type <- as.character(clusters(cds))

# remap each cluster to cell type
colData(cds)$assigned_cell_type <- dplyr::recode(colData(cds)$assigned_cell_type,
                                                 "1" = "DP_Thymocytes_2",
                                                 "2" = "DP_Thymocytes_1",
                                                 "3" = "Naive_to_Activated_T",
                                                 "4" = "Naive_T_2",
                                                 "5" = "SP_Thym_and_Naive_T",
                                                 "6" = "Naive_T_1",
                                                 "7" = "Activated_T",
                                                 "8" = "SP_Thym",
                                                 "9" = "Proliferating_DP_Thym",
                                                 "10" = "CD8_NK",
                                                 "11" = "ETP_DN_ISP_Thym",
                                                 "12" = "Late_SP_Thym")

plot_cells(cds, 
           color_cells_by = "assigned_cell_type", 
           group_label_size = 4, 
           show_trajectory_graph = FALSE) + theme(legend.position = "right")

Reduce dimensionality

cds <- reduce_dimension(cds)
plot_cells(cds,
           label_groups_by_cluster = FALSE,
           group_label_size = 4,
           color_cells_by = "assigned_cell_type") + theme(legend.position = "right")

marker_genes1 <- c("CD4",
                   "GATA3",
                   "ZBTB7B",
                   "CD8A",
                   "RUNX3")

marker_genes2 <- c("CD34",
                   "KIT",
                   "RAG1",
                   "CCR9",
                   "MKI67")
  
marker_genes3 <- c("IL2RA",
                   "LTB",
                   "S1PR1",
                   "SELL",
                   "KLRB1")

plot_cells(cds,
           genes=marker_genes1,
           label_cell_groups=FALSE,
           show_trajectory_graph=FALSE)

plot_cells(cds,
           genes=marker_genes2,
           label_cell_groups=FALSE,
           show_trajectory_graph=FALSE)

plot_cells(cds,
           genes=marker_genes3,
           label_cell_groups=FALSE,
           show_trajectory_graph=FALSE)

Learn trajectory

cds <- cluster_cells(cds, reduction_method = "UMAP")
cds <- learn_graph(cds, use_partition = F)

plot_cells(cds,
           color_cells_by = "assigned_cell_type",
           label_groups_by_cluster = T,
           label_branch_points = T, 
           label_roots = T, 
           label_leaves = F,
           group_label_size = 5) + ggtitle("Trajectory analysis: Cell type annotations")

plot_cells(cds,
           label_branch_points = T, 
           label_roots = T, 
           label_leaves = F,
           group_label_size = 7) + ggtitle("Trajectory analysis: Monocle3 cluster numbers")

Order cells in pseudotime

To place cells in order, Monocle 3 must be told where the “beginning” of the biologic process is, done by choosing regions on the graph to mark as “roots” of the trajectory.

cds <- order_cells(cds, reduction_method = "UMAP", root_cells = colnames(cds[, clusters(cds) == 10])) # use Monocle3 cluster numbers from figure above
plot_cells(cds,
           color_cells_by = "pseudotime",
           label_branch_points = T,
           graph_label_size=3,
           label_roots = F,
           label_leaves = F)

Cells ordered by pseudotime

cds$monocle3_pseudotime <- pseudotime(cds)
data.pseudo <- as.data.frame(colData(cds))
ggplot(data.pseudo,
       aes(monocle3_pseudotime,
           reorder(assigned_cell_type, monocle3_pseudotime),
           fill = assigned_cell_type)) + geom_boxplot()

Citations

sessionInfo()
## R version 4.4.0 (2024-04-24 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 22631)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## time zone: America/Denver
## tzcode source: internal
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] RColorBrewer_1.1-3          pheatmap_1.0.12            
##  [3] SeuratWrappers_0.4.0        monocle3_1.3.7             
##  [5] SingleCellExperiment_1.26.0 SummarizedExperiment_1.34.0
##  [7] GenomicRanges_1.56.2        GenomeInfoDb_1.40.1        
##  [9] IRanges_2.38.1              S4Vectors_0.42.1           
## [11] MatrixGenerics_1.16.0       matrixStats_1.5.0          
## [13] Biobase_2.64.0              BiocGenerics_0.50.0        
## [15] patchwork_1.3.0             lubridate_1.9.4            
## [17] forcats_1.0.0               stringr_1.5.1              
## [19] dplyr_1.1.4                 purrr_1.0.2                
## [21] readr_2.1.5                 tidyr_1.3.1                
## [23] tibble_3.2.1                ggplot2_3.5.1              
## [25] tidyverse_2.0.0             knitr_1.49                 
## [27] Seurat_5.2.1                SeuratObject_5.0.2         
## [29] sp_2.1-4                   
## 
## loaded via a namespace (and not attached):
##   [1] RcppAnnoy_0.0.22        splines_4.4.0           later_1.4.1            
##   [4] R.oo_1.27.0             polyclip_1.10-7         fastDummies_1.7.5      
##   [7] lifecycle_1.0.4         Rdpack_2.6.2            globals_0.16.3         
##  [10] lattice_0.22-6          MASS_7.3-60.2           magrittr_2.0.3         
##  [13] plotly_4.10.4           sass_0.4.9              rmarkdown_2.29         
##  [16] jquerylib_0.1.4         yaml_2.3.10             remotes_2.5.0          
##  [19] httpuv_1.6.15           sctransform_0.4.1       spam_2.11-1            
##  [22] spatstat.sparse_3.1-0   reticulate_1.40.0       cowplot_1.1.3          
##  [25] pbapply_1.7-2           minqa_1.2.8             abind_1.4-8            
##  [28] zlibbioc_1.50.0         Rtsne_0.17              R.utils_2.12.3         
##  [31] GenomeInfoDbData_1.2.12 ggrepel_0.9.6           irlba_2.3.5.1          
##  [34] listenv_0.9.1           spatstat.utils_3.1-2    goftest_1.2-3          
##  [37] RSpectra_0.16-2         spatstat.random_3.3-2   fitdistrplus_1.2-2     
##  [40] parallelly_1.42.0       codetools_0.2-20        DelayedArray_0.30.1    
##  [43] tidyselect_1.2.1        UCSC.utils_1.0.0        farver_2.1.2           
##  [46] viridis_0.6.5           lme4_1.1-36             spatstat.explore_3.3-4 
##  [49] jsonlite_1.8.9          progressr_0.15.1        ggridges_0.5.6         
##  [52] survival_3.5-8          tools_4.4.0             ica_1.0-3              
##  [55] Rcpp_1.0.14             glue_1.8.0              gridExtra_2.3          
##  [58] SparseArray_1.4.8       xfun_0.49               withr_3.0.2            
##  [61] BiocManager_1.30.25     fastmap_1.2.0           boot_1.3-30            
##  [64] digest_0.6.35           rsvd_1.0.5              timechange_0.3.0       
##  [67] R6_2.5.1                mime_0.12               colorspace_2.1-1       
##  [70] scattermore_1.2         tensor_1.5              spatstat.data_3.1-4    
##  [73] R.methodsS3_1.8.2       generics_0.1.3          data.table_1.16.4      
##  [76] httr_1.4.7              htmlwidgets_1.6.4       S4Arrays_1.4.1         
##  [79] uwot_0.2.2              pkgconfig_2.0.3         gtable_0.3.6           
##  [82] lmtest_0.9-40           XVector_0.44.0          htmltools_0.5.8.1      
##  [85] dotCall64_1.2           scales_1.3.0            png_0.1-8              
##  [88] spatstat.univar_3.1-1   reformulas_0.4.0        rstudioapi_0.17.1      
##  [91] tzdb_0.4.0              reshape2_1.4.4          nlme_3.1-164           
##  [94] nloptr_2.1.1            proxy_0.4-27            cachem_1.1.0           
##  [97] zoo_1.8-12              KernSmooth_2.23-22      parallel_4.4.0         
## [100] miniUI_0.1.1.1          pillar_1.10.1           grid_4.4.0             
## [103] vctrs_0.6.5             RANN_2.6.2              promises_1.3.2         
## [106] xtable_1.8-4            cluster_2.1.6           evaluate_1.0.3         
## [109] cli_3.6.2               compiler_4.4.0          rlang_1.1.3            
## [112] crayon_1.5.3            leidenbase_0.1.31       future.apply_1.11.3    
## [115] labeling_0.4.3          plyr_1.8.9              stringi_1.8.4          
## [118] viridisLite_0.4.2       deldir_2.0-4            assertthat_0.2.1       
## [121] munsell_0.5.1           lazyeval_0.2.2          spatstat.geom_3.3-5    
## [124] Matrix_1.7-0            RcppHNSW_0.6.0          hms_1.1.3              
## [127] future_1.34.0           shiny_1.10.0            rbibutils_2.3          
## [130] ROCR_1.0-11             igraph_2.1.4            bslib_0.8.0
citation()
## To cite R in publications use:
## 
##   R Core Team (2024). _R: A Language and Environment for Statistical
##   Computing_. R Foundation for Statistical Computing, Vienna, Austria.
##   <https://www.R-project.org/>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {R: A Language and Environment for Statistical Computing},
##     author = {{R Core Team}},
##     organization = {R Foundation for Statistical Computing},
##     address = {Vienna, Austria},
##     year = {2024},
##     url = {https://www.R-project.org/},
##   }
## 
## We have invested a lot of time and effort in creating R, please cite it
## when using it for data analysis. See also 'citation("pkgname")' for
## citing R packages.