load libraries————————————
Read Seurat object with load as you save it with save()
function
All_samples_Merged <- readRDS("../STCAT_Annotation/All_samples_Merged_with_STCAT.rds")
1. Subset Normal CD4⁺ T Cells for Reference
# Load required library
library(Seurat)
# Subset normal CD4+ T cells from merged object
reference_cd4 <- subset(
All_samples_Merged,
subset = cell_line %in% c("CD4Tcells_lab", "CD4Tcells_10x")
)
# Ensure the SCT assay is available and set it as default
if ("SCT" %in% names(reference_cd4@assays)) {
DefaultAssay(reference_cd4) <- "SCT"
} else {
stop("SCT assay not found in the object. Please run SCTransform first.")
}
# Confirm object class and assays
print(class(reference_cd4)) # Should return "Seurat"
[1] "Seurat"
attr(,"package")
[1] "SeuratObject"
print(names(reference_cd4@assays)) # List available assays
[1] "RNA" "ADT" "prediction.score.celltype.l1"
[4] "prediction.score.celltype.l2" "prediction.score.celltype.l3" "SCT"
2. Normalize & Integrate Reference (Accommodate Multiple
Donors)
ref_list <- SplitObject(reference_cd4, split.by = "cell_line")
# Run SCTransform on each subset
ref_list <- lapply(ref_list, SCTransform, verbose = FALSE)
Warning: The `slot` argument of `SetAssayData()` is deprecated as of SeuratObject 5.0.0.
Please use the `layer` argument instead.Warning: The `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0.
Please use the `layer` argument instead.Warning: Different cells and/or features from existing assay SCTWarning: Different cells and/or features from existing assay SCT
# Run PCA on each SCT-normalized subset (required for RPCA)
ref_list <- lapply(ref_list, function(x) {
x <- RunPCA(x, assay = "SCT", verbose = FALSE)
return(x)
})
# Select integration features
ref_features <- SelectIntegrationFeatures(object.list = ref_list)
# Prepare SCT integration
ref_list <- PrepSCTIntegration(object.list = ref_list, anchor.features = ref_features)
| | 0 % ~calculating
|+++++++++++++++++++++++++ | 50% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=01s
# Find integration anchors using RPCA reduction
ref_anchors <- FindIntegrationAnchors(
object.list = ref_list,
anchor.features = ref_features,
normalization.method = "SCT",
reduction = "rpca"
)
Computing within dataset neighborhoods
| | 0 % ~calculating
|+++++++++++++++++++++++++ | 50% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=02s
Finding all pairwise anchors
| | 0 % ~calculating
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 630 anchors
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=04s
# Integrate data
reference_integrated <- IntegrateData(anchorset = ref_anchors, normalization.method = "SCT")
[1] 1
Warning: Different cells and/or features from existing assay SCTWarning: Layer counts isn't present in the assay object; returning NULL
[1] 2
Warning: Different cells and/or features from existing assay SCTWarning: Layer counts isn't present in the assay object; returning NULLMerging dataset 2 into 1
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Warning: Layer counts isn't present in the assay object; returning NULLWarning: Assay integrated changing from Assay to SCTAssayWarning: Layer counts isn't present in the assay object; returning NULLWarning: Different cells and/or features from existing assay SCT
# Set default assay to integrated
DefaultAssay(reference_integrated) <- "integrated"
3. Clustering & Dimensionality Reduction
reference_integrated <- RunPCA(reference_integrated, verbose = FALSE)
reference_integrated <- RunUMAP(reference_integrated, dims = 1:18)
Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session12:29:11 UMAP embedding parameters a = 0.9922 b = 1.112
12:29:11 Read 8610 rows and found 18 numeric columns
12:29:11 Using Annoy for neighbor search, n_neighbors = 30
12:29:11 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
12:29:12 Writing NN index file to temp file /tmp/RtmpgsQ1lC/file1a753457555a35
12:29:12 Searching Annoy index using 1 thread, search_k = 3000
12:29:14 Annoy recall = 100%
12:29:14 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
12:29:15 Initializing from normalized Laplacian + noise (using RSpectra)
12:29:15 Commencing optimization for 500 epochs, with 361792 positive edges
12:29:15 Using rng type: pcg
Using method 'umap'
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
12:29:22 Optimization finished
reference_integrated <- FindNeighbors(reference_integrated, dims = 1:18)
Computing nearest neighbor graph
Computing SNN
reference_integrated <- FindClusters(reference_integrated, resolution = 0.3)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 8610
Number of edges: 308059
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9201
Number of communities: 11
Elapsed time: 0 seconds
ElbowPlot(reference_integrated, ndims = 50)

# Visualize UMAP colored by original donor (cell_line)
DimPlot(reference_integrated, group.by = "cell_line", reduction = "umap") +
ggtitle("UMAP of Integrated CD4⁺ T Cells")

# Visualize UMAP colored by original donor (cell_line)
DimPlot(reference_integrated, group.by = "integrated_snn_res.0.3", reduction = "umap") +
ggtitle("UMAP of Integrated CD4⁺ T Cells")

# Visualize UMAP colored by original donor (cell_line)
DimPlot(reference_integrated, group.by = "Prediction", reduction = "umap") +
ggtitle("UMAP of Integrated CD4⁺ T Cells")

# Visualize UMAP colored by original donor (cell_line)
DimPlot(reference_integrated, group.by = "predicted.celltype.l2", reduction = "umap") +
ggtitle("UMAP of Integrated CD4⁺ T Cells")

Save the mapped query object (Sézary cell lines projected onto
reference trajectory):
saveRDS(reference_integrated, file = "sezary_cell_lines_mapped_to_cd4_reference_integrated_before_Monocle3.rds")
4. Trajectory and Pseudotime with Monocle3
library(monocle3)
library(SeuratWrappers)
library(Matrix)
Attaching package: ‘Matrix’
The following object is masked from ‘package:S4Vectors’:
expand
cds <- as.cell_data_set(reference_integrated)
Warning: `PackageCheck()` was deprecated in SeuratObject 5.0.0.
Please use `rlang::check_installed()` instead.Warning: Monocle 3 trajectories require cluster partitions, which Seurat does not calculate. Please run 'cluster_cells' on your cell_data_set object
cds <- cluster_cells(cds, reduction_method = "UMAP")
cds <- learn_graph(cds, use_partition = TRUE)
|
| | 0%
|
|===============================================================================================================| 100%
|
| | 0%
|
|===============================================================================================================| 100%
|
| | 0%
|
|===============================================================================================================| 100%
naive_markers <- c("CCR7", "SELL", "LEF1")
naive_markers <- naive_markers[naive_markers %in% rownames(cds)]
# Extract log-normalized expression or fallback to counts log-transformed
if("logcounts" %in% assayNames(cds)) {
expr_mat <- assay(cds, "logcounts")
} else {
expr_mat <- log1p(assay(cds, "counts"))
}
naive_score <- Matrix::colMeans(expr_mat[naive_markers, , drop = FALSE])
threshold <- quantile(naive_score, 0.95)
root_cells <- names(naive_score[naive_score > threshold])
cds <- order_cells(cds, root_cells = root_cells)
reference_integrated$pseudotime <- pseudotime(cds)
plot_cells(cds, color_cells_by = "pseudotime", show_trajectory_graph = TRUE)
Cells aren't colored in a way that allows them to be grouped.

# Visualize UMAP colored by original donor (cell_line)
DimPlot(reference_integrated, group.by = "Prediction", reduction = "umap") +
ggtitle("UMAP of Integrated CD4⁺ T Cells")

# Visualize UMAP colored by original donor (cell_line)
DimPlot(reference_integrated, group.by = "predicted.celltype.l2", reduction = "umap") +
ggtitle("UMAP of Integrated CD4⁺ T Cells")

Save the mapped query object (Sézary cell lines projected onto
reference trajectory):
saveRDS(reference_integrated, file = "sezary_cell_lines_mapped_to_cd4_reference_integrated_before_Query_Projection.rds")
5. Prepare Sézary Syndrome Cell Lines as Query
# Load required packages
library(Seurat)
library(glmGamPoi) # Recommended for memory-efficient SCTransform
Attaching package: ‘glmGamPoi’
The following object is masked from ‘package:ggplot2’:
vars
The following object is masked from ‘package:dplyr’:
vars
library(future)
# Optional: Parallel setup to handle memory better
plan("multisession", workers = 4)
options(future.globals.maxSize = 50 * 1024^3) # 50 GB memory ceiling
# Step 1: Subset Sézary syndrome cell lines
query_subset <- subset(All_samples_Merged, subset = cell_line %in% paste0("L", 1:7))
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 9283855 495.9 14812904 791.1 14812904 791.1
Vcells 2971711686 22672.4 4735597000 36129.8 4085964052 31173.5
# Step 2: Get raw counts matrix from RNA assay
query_raw <- GetAssayData(query_subset, assay = "RNA", slot = "counts")
# Step 3: Filter out genes expressed in <3 cells (saves memory)
keep_genes <- rowSums(query_raw > 0) >= 3
query_raw_filtered <- query_raw[keep_genes, ]
# Step 4: Create a new Seurat object with metadata preserved
query_cells <- CreateSeuratObject(counts = query_raw_filtered, meta.data = query_subset@meta.data)
# Step 5: Run SCTransform with glmGamPoi backend for better performance
query_cells <- SCTransform(
query_cells,
method = "glmGamPoi", # Faster and uses less RAM
variable.features.n = 3000, # Optional: focus on top 3000 variable genes
verbose = FALSE
)
6. Inject Cell Lines into Reference with MapQuery
# Find anchors between reference and query
anchors_query <- FindTransferAnchors(
reference = reference_integrated,
query = query_cells,
reference.reduction = "pca",
normalization.method = "SCT",
dims = 1:18
)
# Map query cells to reference
query_mapped <- MapQuery(
anchorset = anchors_query,
query = query_cells,
reference = reference_integrated,
refdata = list(
pseudotime = "pseudotime", # Transfer pseudotime
seurat_clusters = "seurat_clusters", # Transfer clusters
trajectory_position = "monocle3_embedding" # If storing MST coords
),
reference.reduction = "pca",
reduction.model = "umap"
)
7. Visualization (Plot Reference and Injected Cells by Pseudotime
and Cell Line)
# Plot the reference trajectory UMAP, colored by pseudotime
DimPlot(reference_integrated, group.by = "pseudotime", reduction = "umap") +
ggtitle("Reference CD4⁺ T Cell Trajectory (Pseudotime)")
# Overlay injected cell lines
DimPlot(query_mapped, reduction = "ref.umap", group.by = "cell_line", label = TRUE) +
ggtitle("Injected Sézary Cell Lines on Reference Trajectory")
Visualization & Analysis
# Combined UMAP with pseudotime
p1 <- DimPlot(reference_integrated,
group.by = "pseudotime",
reduction = "umap") +
scale_color_viridis_c(option = "magma")
p2 <- DimPlot(query_mapped,
reduction = "ref.umap",
group.by = "cell_line",
cols = "darkred") +
ggtitle("Sézary Cell Lines")
p1 + p2
# Pseudotime distribution in cell lines
VlnPlot(query_mapped,
features = "pseudotime",
group.by = "cell_line",
pt.size = 0.1) +
geom_boxplot(width = 0.2)
# Project cell lines on reference trajectory
FeaturePlot(query_mapped,
features = "pseudotime",
reduction = "ref.umap") +
geom_point(data = query_mapped[[]],
aes(x = refUMAP_1, y = refUMAP_2, color = pseudotime),
size = 1.5)
Analyze Pseudotime Distributions Across Cell Lines
FeaturePlot(query_mapped, features = "pseudotime", reduction = "ref.umap") +
ggtitle("Pseudotime Values of Injected Cell Lines")
VlnPlot(query_mapped, features = "pseudotime", group.by = "cell_line") +
ggtitle("Pseudotime Distribution by Cell Line")
Save the mapped query object (Sézary cell lines projected onto
reference trajectory):
saveRDS(query_mapped, file = "sezary_cell_lines_mapped_to_cd4_reference.rds")
