The purpose of this script is to subset T-cell clusters from a canine thymus Seurat object and canine lymph node Seurat object, and integrate those T-cell clusters together into one Seurat object for downstream analysis.
library(Seurat)
library(tidyverse)
library(patchwork)
library(pheatmap)
library(RColorBrewer)
library(data.table)
library(knitr)
library(presto)
library(clustree)
setwd("C:/Users/edlarsen/Documents/240828_scRNAseq/T_Cells")
ln_seurat <- readRDS(file = "../Cluster_Annotation/LN_Annotated.RData")
thym_seurat <- readRDS(file = "../Cluster_Annotation/THYM_Annotated.RData")
# Check original cluster IDs
kable(table(Idents(ln_seurat)))
Var1 | Freq |
---|---|
LN0_Tcell | 9328 |
LN1_BCell | 7209 |
LN2_Tcell | 4648 |
LN3_Tcell | 2720 |
LN4_CD8_NK | 1711 |
LN5_BCell | 1434 |
LN6_Cycling | 1295 |
LN7_Myeloid | 731 |
LN8_PlasmaCell | 362 |
DimPlot(ln_seurat,
reduction = "umap",
label = TRUE,
label.size = 3,
label.box = TRUE) +
plot_annotation(title = "Canine Lymph Node, Resolution: 0.1", theme = theme(plot.title = element_text(hjust = 0.5, size = 20)))
kable(table(Idents(thym_seurat)))
Var1 | Freq |
---|---|
T0_DP | 7891 |
T1_DP | 4259 |
T2_EarlySP | 2929 |
T3_DN | 1941 |
T4_Cycling | 1771 |
T5_LateSP1 | 1665 |
T6_LateSP2 | 1067 |
T7_Bcell | 492 |
T8_Cycling | 381 |
T9_MonoMacDC | 146 |
T10_Granulocytes | 112 |
DimPlot(thym_seurat,
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3) +
plot_annotation(title = "Canine Thymus, Resolution: 0.2", theme = theme(plot.title = element_text(hjust = 0.5, size = 20)))
ln_seurat$dataset = "LymphNodeDataset"
thym_seurat$dataset = "ThymDataset"
ln_seurat$OriginalClusters <- Idents(ln_seurat)
thym_seurat$OriginalClusters <- Idents(thym_seurat)
ln_Tcells <- subset(x = ln_seurat, idents = c("LN0_Tcell", "LN2_Tcell", "LN3_Tcell", "LN4_CD8_NK"))
thym_Tcells <- subset(x = thym_seurat, idents = c("T0_DP", "T1_DP", "T2_EarlySP", "T3_DN", "T4_Cycling", "T5_LateSP1", "T6_LateSP2", "T8_Cycling"))
all_Tcells <- merge(x = ln_Tcells, y = thym_Tcells, project = "MergedTCells")
kable(table(Idents(all_Tcells)))
Var1 | Freq |
---|---|
LN0_Tcell | 9328 |
LN2_Tcell | 4648 |
LN3_Tcell | 2720 |
LN4_CD8_NK | 1711 |
T0_DP | 7891 |
T1_DP | 4259 |
T2_EarlySP | 2929 |
T3_DN | 1941 |
T4_Cycling | 1771 |
T5_LateSP1 | 1665 |
T6_LateSP2 | 1067 |
T8_Cycling | 381 |
# normalize data
all_Tcells <- NormalizeData(all_Tcells,
normalization.method = "LogNormalize",
scale.factor = 10000)
# find variable features
all_Tcells <- FindVariableFeatures(all_Tcells)
# Make table of 10 most highly variable genes
top10_Tcells <- head(VariableFeatures(all_Tcells), 10)
plot1 <- VariableFeaturePlot(all_Tcells)
plot2 <- LabelPoints(
plot = plot1,
points = top10_Tcells,
repel = TRUE
) +
labs(title = "Top 10 Most Variable Features:\n Merged Canine T-cells from Lymph Node and Thymus")
plot2
# scale data
all_Tcells <- ScaleData(all_Tcells)
all_Tcells <- RunPCA(all_Tcells)
DimPlot(all_Tcells, reduction = "pca") + ggtitle("PCA: Merged Canine T-cells from Lymph Node and Thymus")
DimHeatmap(all_Tcells, dims = 1:15, cells = 500, balanced = TRUE)
To decide how many principal components to include, we rank principal components based on the percentage of variance explained by an “elbow” in the plot.
ElbowPlot(all_Tcells, ndims=40) + ggtitle("Elbow plot of principal components: \nMerged Canine T-cells from Lymph Node and Thymus")
The location of the elbow in this plots suggests that the majority of
the true signal is captured in the first 10 PCs. Based on this, the
first 10 PCs will be used to generate cell clusters.
Canonical correlation analysis (CCA) is a form of PCA, except it only identifies the greatest sources of variation in the data if it is shared or conserved across the conditions/groups, using the 3000 most variable genes from each sample.
all_Tcells <- IntegrateLayers(object = all_Tcells,
method = CCAIntegration,
orig.reduction = "pca",
new.reduction = "integrated.cca",
verbose = FALSE)
all_Tcells[["RNA"]] <- JoinLayers(all_Tcells[["RNA"]])
saveRDS(all_Tcells, "integrated_Tcells.RData")
all_Tcells <- FindNeighbors(all_Tcells, dims = 1:10) # replace with PCs calculated above
all_Tcells <- FindClusters(all_Tcells, resolution = c(0.1,0.15,0.2,0.3,0.4,0.45,0.5,0.6,0.7,0.8,0.9,1.0,1.1)) # use a value above/below 1.0 if you want to obtain a larger/smaller number of clusters (communities); setting this parameter between 0.4-1.2 typically returns good results for single-cell datasets of ~3K cells; optimal resolution often increases for larger datasets.
# Check for any columns matching the "RNA_snn_res." prefix for resolutions not in the list above, which may be leftover from prior analyses, and remove these prior to running clustree (will throw an error otherwise)
resolutions <- c(0.1,0.15,0.2,0.3,0.4,0.45,0.5,0.6,0.7,0.8,0.9,1.0,1.1) # should match FindClusters code above
cols_to_keep <- paste0("RNA_snn_res.", resolutions)
rna_snn_cols <- grep("^RNA_snn_res\\.", names(all_Tcells[[]]), value = TRUE)
cols_to_remove <- setdiff(rna_snn_cols, cols_to_keep)
## Remove
all_Tcells[[cols_to_remove]] <- NULL
clustreeT <- clustree(all_Tcells, prefix = "RNA_snn_res.")
clustreeT + ggtitle("Merged Canine T-cells from Lymph Node and Thymus") + theme(plot.title = element_text(hjust = 0.5))
all_Tcells <- RunUMAP(all_Tcells, dims = 1:10) # replace with the PCs determined from elbow plot above
umapT <- DimPlot(all_Tcells,
reduction = "umap",
ncol = 2,
group.by=glue::glue("RNA_snn_res.{c(0.1,0.15,0.2,0.3)}"),
label = TRUE,
label.size = 6)
umapT + plot_annotation(title = "Merged Canine T-cells from Lymph Node and Thymus", theme = theme(plot.title = element_text(hjust = 0.5, size = 20)))
umapT <- DimPlot(all_Tcells,
reduction = "umap",
ncol = 2,
group.by=glue::glue("RNA_snn_res.{c(0.4,0.45,0.5,0.6)}"),
label = TRUE,
label.size = 6)
umapT + plot_annotation(title = "Merged Canine T-cells from Lymph Node and Thymus", theme = theme(plot.title = element_text(hjust = 0.5, size = 20)))
DimPlot(all_Tcells,
reduction = "umap",
group.by = "OriginalClusters",
label = TRUE,
label.size = 3,
label.box = TRUE)
sessionInfo()
## R version 4.4.0 (2024-04-24 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 22631)
##
## Matrix products: default
##
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## time zone: America/Denver
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] clustree_0.5.1 ggraph_2.2.1 presto_1.0.0 Rcpp_1.0.13
## [5] knitr_1.49 data.table_1.16.4 RColorBrewer_1.1-3 pheatmap_1.0.12
## [9] patchwork_1.3.0 lubridate_1.9.4 forcats_1.0.0 stringr_1.5.1
## [13] dplyr_1.1.4 purrr_1.0.2 readr_2.1.5 tidyr_1.3.1
## [17] tibble_3.2.1 ggplot2_3.5.1 tidyverse_2.0.0 Seurat_5.1.0
## [21] SeuratObject_5.0.2 sp_2.1-4
##
## loaded via a namespace (and not attached):
## [1] rstudioapi_0.17.1 jsonlite_1.8.9 magrittr_2.0.3
## [4] spatstat.utils_3.1-1 farver_2.1.2 rmarkdown_2.29
## [7] vctrs_0.6.5 ROCR_1.0-11 memoise_2.0.1
## [10] spatstat.explore_3.3-3 htmltools_0.5.8.1 sass_0.4.9
## [13] sctransform_0.4.1 parallelly_1.40.1 KernSmooth_2.23-22
## [16] bslib_0.8.0 htmlwidgets_1.6.4 ica_1.0-3
## [19] plyr_1.8.9 plotly_4.10.4 zoo_1.8-12
## [22] cachem_1.1.0 igraph_2.1.2 mime_0.12
## [25] lifecycle_1.0.4 pkgconfig_2.0.3 Matrix_1.7-0
## [28] R6_2.5.1 fastmap_1.2.0 fitdistrplus_1.2-2
## [31] future_1.34.0 shiny_1.10.0 digest_0.6.35
## [34] colorspace_2.1-1 tensor_1.5 RSpectra_0.16-2
## [37] irlba_2.3.5.1 labeling_0.4.3 progressr_0.15.1
## [40] spatstat.sparse_3.1-0 timechange_0.3.0 httr_1.4.7
## [43] polyclip_1.10-7 abind_1.4-8 compiler_4.4.0
## [46] withr_3.0.2 backports_1.5.0 viridis_0.6.5
## [49] fastDummies_1.7.4 ggforce_0.4.2 MASS_7.3-60.2
## [52] tools_4.4.0 lmtest_0.9-40 httpuv_1.6.15
## [55] future.apply_1.11.3 goftest_1.2-3 glue_1.7.0
## [58] nlme_3.1-164 promises_1.3.2 grid_4.4.0
## [61] checkmate_2.3.2 Rtsne_0.17 cluster_2.1.6
## [64] reshape2_1.4.4 generics_0.1.3 gtable_0.3.6
## [67] spatstat.data_3.1-4 tzdb_0.4.0 hms_1.1.3
## [70] tidygraph_1.3.1 spatstat.geom_3.3-4 RcppAnnoy_0.0.22
## [73] ggrepel_0.9.6 RANN_2.6.2 pillar_1.10.1
## [76] spam_2.11-0 RcppHNSW_0.6.0 later_1.3.2
## [79] splines_4.4.0 tweenr_2.0.3 lattice_0.22-6
## [82] survival_3.5-8 deldir_2.0-4 tidyselect_1.2.1
## [85] miniUI_0.1.1.1 pbapply_1.7-2 gridExtra_2.3
## [88] scattermore_1.2 xfun_0.49 graphlayouts_1.2.1
## [91] matrixStats_1.4.1 stringi_1.8.4 lazyeval_0.2.2
## [94] yaml_2.3.10 evaluate_1.0.3 codetools_0.2-20
## [97] cli_3.6.2 uwot_0.2.2 xtable_1.8-4
## [100] reticulate_1.40.0 munsell_0.5.1 jquerylib_0.1.4
## [103] globals_0.16.3 spatstat.random_3.3-2 png_0.1-8
## [106] spatstat.univar_3.1-1 parallel_4.4.0 dotCall64_1.2
## [109] listenv_0.9.1 viridisLite_0.4.2 scales_1.3.0
## [112] ggridges_0.5.6 leiden_0.4.3.1 rlang_1.1.3
## [115] cowplot_1.1.3
citation()
## To cite R in publications use:
##
## R Core Team (2024). _R: A Language and Environment for Statistical
## Computing_. R Foundation for Statistical Computing, Vienna, Austria.
## <https://www.R-project.org/>.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {R: A Language and Environment for Statistical Computing},
## author = {{R Core Team}},
## organization = {R Foundation for Statistical Computing},
## address = {Vienna, Austria},
## year = {2024},
## url = {https://www.R-project.org/},
## }
##
## We have invested a lot of time and effort in creating R, please cite it
## when using it for data analysis. See also 'citation("pkgname")' for
## citing R packages.