Introduction

The purpose of this script is to subset T-cell clusters from a canine thymus Seurat object and canine lymph node Seurat object, and integrate those T-cell clusters together into one Seurat object for downstream analysis.

Software packages

library(Seurat)
library(tidyverse)
library(patchwork)
library(pheatmap)
library(RColorBrewer)
library(data.table)
library(knitr)
library(presto)
library(clustree)

Data import and pre-processing

Filtered, normalized, clustered, and annotated Seurat objects:

setwd("C:/Users/edlarsen/Documents/240828_scRNAseq/T_Cells")

ln_seurat <- readRDS(file = "../Cluster_Annotation/LN_Annotated.RData")
thym_seurat <- readRDS(file = "../Cluster_Annotation/THYM_Annotated.RData")

# Check original cluster IDs
kable(table(Idents(ln_seurat)))
Var1 Freq
LN0_Tcell 9328
LN1_BCell 7209
LN2_Tcell 4648
LN3_Tcell 2720
LN4_CD8_NK 1711
LN5_BCell 1434
LN6_Cycling 1295
LN7_Myeloid 731
LN8_PlasmaCell 362
DimPlot(ln_seurat,
        reduction = "umap",
                   label = TRUE,
                   label.size = 3,
                   label.box = TRUE) + 
  plot_annotation(title = "Canine Lymph Node, Resolution: 0.1", theme = theme(plot.title = element_text(hjust = 0.5, size = 20)))

kable(table(Idents(thym_seurat)))
Var1 Freq
T0_DP 7891
T1_DP 4259
T2_EarlySP 2929
T3_DN 1941
T4_Cycling 1771
T5_LateSP1 1665
T6_LateSP2 1067
T7_Bcell 492
T8_Cycling 381
T9_MonoMacDC 146
T10_Granulocytes 112
DimPlot(thym_seurat,
        reduction = "umap",
                   label = TRUE,
                  label.box = TRUE,
                   label.size = 3) + 
  plot_annotation(title = "Canine Thymus, Resolution: 0.2", theme = theme(plot.title = element_text(hjust = 0.5, size = 20)))

Assign dataset ID and preserve original cluster labels in metadata:
ln_seurat$dataset = "LymphNodeDataset"
thym_seurat$dataset = "ThymDataset"

ln_seurat$OriginalClusters <- Idents(ln_seurat)
thym_seurat$OriginalClusters <- Idents(thym_seurat)
Subset T-cell clusters from each object:
ln_Tcells <- subset(x = ln_seurat, idents = c("LN0_Tcell", "LN2_Tcell", "LN3_Tcell", "LN4_CD8_NK"))
thym_Tcells <- subset(x = thym_seurat, idents = c("T0_DP", "T1_DP", "T2_EarlySP", "T3_DN", "T4_Cycling", "T5_LateSP1", "T6_LateSP2", "T8_Cycling"))
Merge T-cell clusters into one object:
all_Tcells <- merge(x = ln_Tcells, y = thym_Tcells, project = "MergedTCells")
kable(table(Idents(all_Tcells)))
Var1 Freq
LN0_Tcell 9328
LN2_Tcell 4648
LN3_Tcell 2720
LN4_CD8_NK 1711
T0_DP 7891
T1_DP 4259
T2_EarlySP 2929
T3_DN 1941
T4_Cycling 1771
T5_LateSP1 1665
T6_LateSP2 1067
T8_Cycling 381

Data normalization and scaling:

# normalize data
all_Tcells <- NormalizeData(all_Tcells,
                            normalization.method = "LogNormalize",
                            scale.factor = 10000)

# find variable features
all_Tcells <- FindVariableFeatures(all_Tcells)
# Make table of 10 most highly variable genes
top10_Tcells <- head(VariableFeatures(all_Tcells), 10)
plot1 <- VariableFeaturePlot(all_Tcells)
plot2 <- LabelPoints(
  plot = plot1,
  points = top10_Tcells,
  repel = TRUE
) + 
  labs(title = "Top 10 Most Variable Features:\n Merged Canine T-cells from Lymph Node and Thymus")
plot2

# scale data
all_Tcells <- ScaleData(all_Tcells)

Linear dimensional reduction (PCA)

all_Tcells <- RunPCA(all_Tcells)
DimPlot(all_Tcells, reduction = "pca") + ggtitle("PCA: Merged Canine T-cells from Lymph Node and Thymus")

DimHeatmap(all_Tcells, dims = 1:15, cells = 500, balanced = TRUE)

Elbow plot

To decide how many principal components to include, we rank principal components based on the percentage of variance explained by an “elbow” in the plot.

ElbowPlot(all_Tcells, ndims=40) + ggtitle("Elbow plot of principal components: \nMerged Canine T-cells from Lymph Node and Thymus")

The location of the elbow in this plots suggests that the majority of the true signal is captured in the first 10 PCs. Based on this, the first 10 PCs will be used to generate cell clusters.

Integration

Canonical correlation analysis (CCA) is a form of PCA, except it only identifies the greatest sources of variation in the data if it is shared or conserved across the conditions/groups, using the 3000 most variable genes from each sample.

all_Tcells <- IntegrateLayers(object = all_Tcells,
                              method = CCAIntegration,
                              orig.reduction = "pca",
                              new.reduction = "integrated.cca",
                              verbose = FALSE)
all_Tcells[["RNA"]] <- JoinLayers(all_Tcells[["RNA"]])
saveRDS(all_Tcells, "integrated_Tcells.RData")

Clustering combined T-cell populations

all_Tcells <- FindNeighbors(all_Tcells, dims = 1:10) # replace with PCs calculated above
all_Tcells <- FindClusters(all_Tcells, resolution = c(0.1,0.15,0.2,0.3,0.4,0.45,0.5,0.6,0.7,0.8,0.9,1.0,1.1)) # use a value above/below 1.0 if you want to obtain a larger/smaller number of clusters (communities); setting this parameter between 0.4-1.2 typically returns good results for single-cell datasets of ~3K cells; optimal resolution often increases for larger datasets.

# Check for any columns matching the "RNA_snn_res." prefix for resolutions not in the list above, which may be leftover from prior analyses, and remove these prior to running clustree (will throw an error otherwise)
resolutions <- c(0.1,0.15,0.2,0.3,0.4,0.45,0.5,0.6,0.7,0.8,0.9,1.0,1.1) # should match FindClusters code above
cols_to_keep <- paste0("RNA_snn_res.", resolutions)
rna_snn_cols <- grep("^RNA_snn_res\\.", names(all_Tcells[[]]), value = TRUE)
cols_to_remove <- setdiff(rna_snn_cols, cols_to_keep)
## Remove
all_Tcells[[cols_to_remove]] <- NULL

clustree

clustreeT <- clustree(all_Tcells, prefix = "RNA_snn_res.")
clustreeT + ggtitle("Merged Canine T-cells from Lymph Node and Thymus") + theme(plot.title = element_text(hjust = 0.5))

Non-linear Dimensional Reduction (UMAP)

all_Tcells <- RunUMAP(all_Tcells, dims = 1:10) # replace with the PCs determined from elbow plot above
umapT <- DimPlot(all_Tcells,
                 reduction = "umap",
                 ncol = 2,
                 group.by=glue::glue("RNA_snn_res.{c(0.1,0.15,0.2,0.3)}"),
                 label = TRUE,
                 label.size = 6)
umapT + plot_annotation(title = "Merged Canine T-cells from Lymph Node and Thymus", theme = theme(plot.title = element_text(hjust = 0.5, size = 20)))

umapT <- DimPlot(all_Tcells,
                 reduction = "umap",
                 ncol = 2,
                 group.by=glue::glue("RNA_snn_res.{c(0.4,0.45,0.5,0.6)}"),
                 label = TRUE,
                 label.size = 6)
umapT + plot_annotation(title = "Merged Canine T-cells from Lymph Node and Thymus", theme = theme(plot.title = element_text(hjust = 0.5, size = 20)))

Comparison to original cluster IDs

DimPlot(all_Tcells, 
        reduction = "umap",
        group.by = "OriginalClusters",
        label = TRUE,
        label.size = 3,
        label.box = TRUE)

Citations

sessionInfo()
## R version 4.4.0 (2024-04-24 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 22631)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## time zone: America/Denver
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] clustree_0.5.1     ggraph_2.2.1       presto_1.0.0       Rcpp_1.0.13       
##  [5] knitr_1.49         data.table_1.16.4  RColorBrewer_1.1-3 pheatmap_1.0.12   
##  [9] patchwork_1.3.0    lubridate_1.9.4    forcats_1.0.0      stringr_1.5.1     
## [13] dplyr_1.1.4        purrr_1.0.2        readr_2.1.5        tidyr_1.3.1       
## [17] tibble_3.2.1       ggplot2_3.5.1      tidyverse_2.0.0    Seurat_5.1.0      
## [21] SeuratObject_5.0.2 sp_2.1-4          
## 
## loaded via a namespace (and not attached):
##   [1] rstudioapi_0.17.1      jsonlite_1.8.9         magrittr_2.0.3        
##   [4] spatstat.utils_3.1-1   farver_2.1.2           rmarkdown_2.29        
##   [7] vctrs_0.6.5            ROCR_1.0-11            memoise_2.0.1         
##  [10] spatstat.explore_3.3-3 htmltools_0.5.8.1      sass_0.4.9            
##  [13] sctransform_0.4.1      parallelly_1.40.1      KernSmooth_2.23-22    
##  [16] bslib_0.8.0            htmlwidgets_1.6.4      ica_1.0-3             
##  [19] plyr_1.8.9             plotly_4.10.4          zoo_1.8-12            
##  [22] cachem_1.1.0           igraph_2.1.2           mime_0.12             
##  [25] lifecycle_1.0.4        pkgconfig_2.0.3        Matrix_1.7-0          
##  [28] R6_2.5.1               fastmap_1.2.0          fitdistrplus_1.2-2    
##  [31] future_1.34.0          shiny_1.10.0           digest_0.6.35         
##  [34] colorspace_2.1-1       tensor_1.5             RSpectra_0.16-2       
##  [37] irlba_2.3.5.1          labeling_0.4.3         progressr_0.15.1      
##  [40] spatstat.sparse_3.1-0  timechange_0.3.0       httr_1.4.7            
##  [43] polyclip_1.10-7        abind_1.4-8            compiler_4.4.0        
##  [46] withr_3.0.2            backports_1.5.0        viridis_0.6.5         
##  [49] fastDummies_1.7.4      ggforce_0.4.2          MASS_7.3-60.2         
##  [52] tools_4.4.0            lmtest_0.9-40          httpuv_1.6.15         
##  [55] future.apply_1.11.3    goftest_1.2-3          glue_1.7.0            
##  [58] nlme_3.1-164           promises_1.3.2         grid_4.4.0            
##  [61] checkmate_2.3.2        Rtsne_0.17             cluster_2.1.6         
##  [64] reshape2_1.4.4         generics_0.1.3         gtable_0.3.6          
##  [67] spatstat.data_3.1-4    tzdb_0.4.0             hms_1.1.3             
##  [70] tidygraph_1.3.1        spatstat.geom_3.3-4    RcppAnnoy_0.0.22      
##  [73] ggrepel_0.9.6          RANN_2.6.2             pillar_1.10.1         
##  [76] spam_2.11-0            RcppHNSW_0.6.0         later_1.3.2           
##  [79] splines_4.4.0          tweenr_2.0.3           lattice_0.22-6        
##  [82] survival_3.5-8         deldir_2.0-4           tidyselect_1.2.1      
##  [85] miniUI_0.1.1.1         pbapply_1.7-2          gridExtra_2.3         
##  [88] scattermore_1.2        xfun_0.49              graphlayouts_1.2.1    
##  [91] matrixStats_1.4.1      stringi_1.8.4          lazyeval_0.2.2        
##  [94] yaml_2.3.10            evaluate_1.0.3         codetools_0.2-20      
##  [97] cli_3.6.2              uwot_0.2.2             xtable_1.8-4          
## [100] reticulate_1.40.0      munsell_0.5.1          jquerylib_0.1.4       
## [103] globals_0.16.3         spatstat.random_3.3-2  png_0.1-8             
## [106] spatstat.univar_3.1-1  parallel_4.4.0         dotCall64_1.2         
## [109] listenv_0.9.1          viridisLite_0.4.2      scales_1.3.0          
## [112] ggridges_0.5.6         leiden_0.4.3.1         rlang_1.1.3           
## [115] cowplot_1.1.3
citation()
## To cite R in publications use:
## 
##   R Core Team (2024). _R: A Language and Environment for Statistical
##   Computing_. R Foundation for Statistical Computing, Vienna, Austria.
##   <https://www.R-project.org/>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {R: A Language and Environment for Statistical Computing},
##     author = {{R Core Team}},
##     organization = {R Foundation for Statistical Computing},
##     address = {Vienna, Austria},
##     year = {2024},
##     url = {https://www.R-project.org/},
##   }
## 
## We have invested a lot of time and effort in creating R, please cite it
## when using it for data analysis. See also 'citation("pkgname")' for
## citing R packages.