Introduction

The purpose of this script is to subset T-cell clusters from a canine thymus Seurat object and canine lymph node Seurat object, and integrate those T-cell clusters together into one Seurat object for downstream analysis.

Software packages

library(Seurat)
library(tidyverse)
library(patchwork)
library(pheatmap)
library(RColorBrewer)
library(data.table)
library(knitr)
library(presto)
library(clustree)

Data import and pre-processing

Filtered, normalized, clustered, and annotated Seurat objects:

setwd("C:/Users/edlarsen/Documents/240828_scRNAseq/T_Cells")

ln_seurat <- readRDS(file = "../Cluster_Annotation/LN_Annotated.RData")
thym_seurat <- readRDS(file = "../Cluster_Annotation/THYM_Annotated.RData")

# Check original cluster IDs
kable(table(Idents(ln_seurat)))

Var1	Freq
LN0_Tcell	9328
LN1_BCell	7209
LN2_Tcell	4648
LN3_Tcell	2720
LN4_CD8_NK	1711
LN5_BCell	1434
LN6_Cycling	1295
LN7_Myeloid	731
LN8_PlasmaCell	362

DimPlot(ln_seurat,
        reduction = "umap",
                   label = TRUE,
                   label.size = 3,
                   label.box = TRUE) + 
  plot_annotation(title = "Canine Lymph Node, Resolution: 0.1", theme = theme(plot.title = element_text(hjust = 0.5, size = 20)))

kable(table(Idents(thym_seurat)))

Var1	Freq
T0_DP	7891
T1_DP	4259
T2_EarlySP	2929
T3_DN	1941
T4_Cycling	1771
T5_LateSP1	1665
T6_LateSP2	1067
T7_Bcell	492
T8_Cycling	381
T9_MonoMacDC	146
T10_Granulocytes	112

DimPlot(thym_seurat,
        reduction = "umap",
                   label = TRUE,
                  label.box = TRUE,
                   label.size = 3) + 
  plot_annotation(title = "Canine Thymus, Resolution: 0.2", theme = theme(plot.title = element_text(hjust = 0.5, size = 20)))

Assign dataset ID and preserve original cluster labels in metadata:

ln_seurat$dataset = "LymphNodeDataset"
thym_seurat$dataset = "ThymDataset"

ln_seurat$OriginalClusters <- Idents(ln_seurat)
thym_seurat$OriginalClusters <- Idents(thym_seurat)

Subset T-cell clusters from each object:

ln_Tcells <- subset(x = ln_seurat, idents = c("LN0_Tcell", "LN2_Tcell", "LN3_Tcell", "LN4_CD8_NK"))
thym_Tcells <- subset(x = thym_seurat, idents = c("T0_DP", "T1_DP", "T2_EarlySP", "T3_DN", "T4_Cycling", "T5_LateSP1", "T6_LateSP2", "T8_Cycling"))

Merge T-cell clusters into one object:

all_Tcells <- merge(x = ln_Tcells, y = thym_Tcells, project = "MergedTCells")
kable(table(Idents(all_Tcells)))

Var1	Freq
LN0_Tcell	9328
LN2_Tcell	4648
LN3_Tcell	2720
LN4_CD8_NK	1711
T0_DP	7891
T1_DP	4259
T2_EarlySP	2929
T3_DN	1941
T4_Cycling	1771
T5_LateSP1	1665
T6_LateSP2	1067
T8_Cycling	381

Data normalization and scaling:

# normalize data
all_Tcells <- NormalizeData(all_Tcells,
                            normalization.method = "LogNormalize",
                            scale.factor = 10000)

# find variable features
all_Tcells <- FindVariableFeatures(all_Tcells)
# Make table of 10 most highly variable genes
top10_Tcells <- head(VariableFeatures(all_Tcells), 10)
plot1 <- VariableFeaturePlot(all_Tcells)
plot2 <- LabelPoints(
  plot = plot1,
  points = top10_Tcells,
  repel = TRUE
) + 
  labs(title = "Top 10 Most Variable Features:\n Merged Canine T-cells from Lymph Node and Thymus")
plot2

# scale data
all_Tcells <- ScaleData(all_Tcells)

Linear dimensional reduction (PCA)

all_Tcells <- RunPCA(all_Tcells)
DimPlot(all_Tcells, reduction = "pca") + ggtitle("PCA: Merged Canine T-cells from Lymph Node and Thymus")

DimHeatmap(all_Tcells, dims = 1:15, cells = 500, balanced = TRUE)

Elbow plot

To decide how many principal components to include, we rank principal components based on the percentage of variance explained by an “elbow” in the plot.

ElbowPlot(all_Tcells, ndims=40) + ggtitle("Elbow plot of principal components: \nMerged Canine T-cells from Lymph Node and Thymus")

The location of the elbow in this plots suggests that the majority of the true signal is captured in the first 10 PCs. Based on this, the first 10 PCs will be used to generate cell clusters.

Integration

Canonical correlation analysis (CCA) is a form of PCA, except it only identifies the greatest sources of variation in the data if it is shared or conserved across the conditions/groups, using the 3000 most variable genes from each sample.

all_Tcells <- IntegrateLayers(object = all_Tcells,
                              method = CCAIntegration,
                              orig.reduction = "pca",
                              new.reduction = "integrated.cca",
                              verbose = FALSE)
all_Tcells[["RNA"]] <- JoinLayers(all_Tcells[["RNA"]])

saveRDS(all_Tcells, "integrated_Tcells.RData")

Clustering combined T-cell populations

all_Tcells <- FindNeighbors(all_Tcells, dims = 1:10) # replace with PCs calculated above
all_Tcells <- FindClusters(all_Tcells, resolution = c(0.1,0.15,0.2,0.3,0.4,0.45,0.5,0.6,0.7,0.8,0.9,1.0,1.1)) # use a value above/below 1.0 if you want to obtain a larger/smaller number of clusters (communities); setting this parameter between 0.4-1.2 typically returns good results for single-cell datasets of ~3K cells; optimal resolution often increases for larger datasets.

# Check for any columns matching the "RNA_snn_res." prefix for resolutions not in the list above, which may be leftover from prior analyses, and remove these prior to running clustree (will throw an error otherwise)
resolutions <- c(0.1,0.15,0.2,0.3,0.4,0.45,0.5,0.6,0.7,0.8,0.9,1.0,1.1) # should match FindClusters code above
cols_to_keep <- paste0("RNA_snn_res.", resolutions)
rna_snn_cols <- grep("^RNA_snn_res\\.", names(all_Tcells[[]]), value = TRUE)
cols_to_remove <- setdiff(rna_snn_cols, cols_to_keep)
## Remove
all_Tcells[[cols_to_remove]] <- NULL

clustree

clustreeT <- clustree(all_Tcells, prefix = "RNA_snn_res.")
clustreeT + ggtitle("Merged Canine T-cells from Lymph Node and Thymus") + theme(plot.title = element_text(hjust = 0.5))

Non-linear Dimensional Reduction (UMAP)

all_Tcells <- RunUMAP(all_Tcells, dims = 1:10) # replace with the PCs determined from elbow plot above
umapT <- DimPlot(all_Tcells,
                 reduction = "umap",
                 ncol = 2,
                 group.by=glue::glue("RNA_snn_res.{c(0.1,0.15,0.2,0.3)}"),
                 label = TRUE,
                 label.size = 6)
umapT + plot_annotation(title = "Merged Canine T-cells from Lymph Node and Thymus", theme = theme(plot.title = element_text(hjust = 0.5, size = 20)))

umapT <- DimPlot(all_Tcells,
                 reduction = "umap",
                 ncol = 2,
                 group.by=glue::glue("RNA_snn_res.{c(0.4,0.45,0.5,0.6)}"),
                 label = TRUE,
                 label.size = 6)
umapT + plot_annotation(title = "Merged Canine T-cells from Lymph Node and Thymus", theme = theme(plot.title = element_text(hjust = 0.5, size = 20)))

Comparison to original cluster IDs

DimPlot(all_Tcells, 
        reduction = "umap",
        group.by = "OriginalClusters",
        label = TRUE,
        label.size = 3,
        label.box = TRUE)

Citations

sessionInfo()

## R version 4.4.0 (2024-04-24 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 22631)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## time zone: America/Denver
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] clustree_0.5.1     ggraph_2.2.1       presto_1.0.0       Rcpp_1.0.13       
##  [5] knitr_1.49         data.table_1.16.4  RColorBrewer_1.1-3 pheatmap_1.0.12   
##  [9] patchwork_1.3.0    lubridate_1.9.4    forcats_1.0.0      stringr_1.5.1     
## [13] dplyr_1.1.4        purrr_1.0.2        readr_2.1.5        tidyr_1.3.1       
## [17] tibble_3.2.1       ggplot2_3.5.1      tidyverse_2.0.0    Seurat_5.1.0      
## [21] SeuratObject_5.0.2 sp_2.1-4          
## 
## loaded via a namespace (and not attached):
##   [1] rstudioapi_0.17.1      jsonlite_1.8.9         magrittr_2.0.3        
##   [4] spatstat.utils_3.1-1   farver_2.1.2           rmarkdown_2.29        
##   [7] vctrs_0.6.5            ROCR_1.0-11            memoise_2.0.1         
##  [10] spatstat.explore_3.3-3 htmltools_0.5.8.1      sass_0.4.9            
##  [13] sctransform_0.4.1      parallelly_1.40.1      KernSmooth_2.23-22    
##  [16] bslib_0.8.0            htmlwidgets_1.6.4      ica_1.0-3             
##  [19] plyr_1.8.9             plotly_4.10.4          zoo_1.8-12            
##  [22] cachem_1.1.0           igraph_2.1.2           mime_0.12             
##  [25] lifecycle_1.0.4        pkgconfig_2.0.3        Matrix_1.7-0          
##  [28] R6_2.5.1               fastmap_1.2.0          fitdistrplus_1.2-2    
##  [31] future_1.34.0          shiny_1.10.0           digest_0.6.35         
##  [34] colorspace_2.1-1       tensor_1.5             RSpectra_0.16-2       
##  [37] irlba_2.3.5.1          labeling_0.4.3         progressr_0.15.1      
##  [40] spatstat.sparse_3.1-0  timechange_0.3.0       httr_1.4.7            
##  [43] polyclip_1.10-7        abind_1.4-8            compiler_4.4.0        
##  [46] withr_3.0.2            backports_1.5.0        viridis_0.6.5         
##  [49] fastDummies_1.7.4      ggforce_0.4.2          MASS_7.3-60.2         
##  [52] tools_4.4.0            lmtest_0.9-40          httpuv_1.6.15         
##  [55] future.apply_1.11.3    goftest_1.2-3          glue_1.7.0            
##  [58] nlme_3.1-164           promises_1.3.2         grid_4.4.0            
##  [61] checkmate_2.3.2        Rtsne_0.17             cluster_2.1.6         
##  [64] reshape2_1.4.4         generics_0.1.3         gtable_0.3.6          
##  [67] spatstat.data_3.1-4    tzdb_0.4.0             hms_1.1.3             
##  [70] tidygraph_1.3.1        spatstat.geom_3.3-4    RcppAnnoy_0.0.22      
##  [73] ggrepel_0.9.6          RANN_2.6.2             pillar_1.10.1         
##  [76] spam_2.11-0            RcppHNSW_0.6.0         later_1.3.2           
##  [79] splines_4.4.0          tweenr_2.0.3           lattice_0.22-6        
##  [82] survival_3.5-8         deldir_2.0-4           tidyselect_1.2.1      
##  [85] miniUI_0.1.1.1         pbapply_1.7-2          gridExtra_2.3         
##  [88] scattermore_1.2        xfun_0.49              graphlayouts_1.2.1    
##  [91] matrixStats_1.4.1      stringi_1.8.4          lazyeval_0.2.2        
##  [94] yaml_2.3.10            evaluate_1.0.3         codetools_0.2-20      
##  [97] cli_3.6.2              uwot_0.2.2             xtable_1.8-4          
## [100] reticulate_1.40.0      munsell_0.5.1          jquerylib_0.1.4       
## [103] globals_0.16.3         spatstat.random_3.3-2  png_0.1-8             
## [106] spatstat.univar_3.1-1  parallel_4.4.0         dotCall64_1.2         
## [109] listenv_0.9.1          viridisLite_0.4.2      scales_1.3.0          
## [112] ggridges_0.5.6         leiden_0.4.3.1         rlang_1.1.3           
## [115] cowplot_1.1.3

citation()

## To cite R in publications use:
## 
##   R Core Team (2024). _R: A Language and Environment for Statistical
##   Computing_. R Foundation for Statistical Computing, Vienna, Austria.
##   <https://www.R-project.org/>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {R: A Language and Environment for Statistical Computing},
##     author = {{R Core Team}},
##     organization = {R Foundation for Statistical Computing},
##     address = {Vienna, Austria},
##     year = {2024},
##     url = {https://www.R-project.org/},
##   }
## 
## We have invested a lot of time and effort in creating R, please cite it
## when using it for data analysis. See also 'citation("pkgname")' for
## citing R packages.

Merging T Cell Clusters: Part 1 - Integration

Eileen Owens

2024-12-05