1. load libraries

2. Read Seurat object with load as you save it with save() function


All_samples_Merged <- readRDS("../All_samples_Merged_with_STCAT_and_cleaned.rds")

3. Clustering & Dimensionality Reduction


reference_integrated <- readRDS("../sezary_cell_lines_mapped_to_cd4_reference_integrated_before_Query_Projection.rds")

# Skip re-running SCTransform since full model is already saved

DefaultAssay(reference_integrated) <- "integrated"


# [31m🔴 RED MODIFICATION: Re-run PCA + UMAP after SCT[0m
reference_integrated <- RunPCA(reference_integrated, assay = "integrated", verbose = TRUE)
Avis : The `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0.
Please use the `layer` argument instead.PC_ 1 
Positive:  MALAT1, ANK3, ARHGAP15, MBNL1, FOXP1, PLCL1, RUNX1, PRKCA, CAMK4, SYNE2 
       BIRC6, SERINC5, HIVEP2, TNRC6B, KLF12, MAML2, RIPOR2, TSHZ2, PITPNC1, ZBTB20 
       DOCK10, FHIT, FTX, CSGALNACT1, ITGA4, EXOC4, DLEU1, ANKRD44, ELMO1, MLLT3 
Negative:  TRBV6-2, RPL10, EEF1A1, RPS12, RPL32, RPL30, RPL41, RPS8, RPL34, RPL19 
       RPS15A, RPS27, RPS14, RPS3A, RPL11, RPL13, RPS28, RPS13, RPL39, RPLP1 
       RPS27A, RPL28, RPS23, RPS18, RPS2, EEF1B2, RPL12, TPT1, ACTG1, RPL10A 
PC_ 2 
Positive:  TRBV6-2, MALAT1, FHIT, TSHZ2, MAML2, LINC00861, LEF1, CERS6, CCR7, BACH2 
       TCF7, FOXP1, RPS8, PLCL1, ANK3, RPL30, APBA2, EEF1B2, SELL, RPL32 
       RPS3A, PITPNC1, IGF1R, HIF1A, ACTN1, IKZF1, ITGA6, PRKCA, RPS14, RPS13 
Negative:  S100A4, VIM, S100A11, CRIP1, ANXA1, FTH1, IL7R, S100A6, LGALS3, S100A10 
       LMNA, LGALS1, ANXA2, AHNAK, IL32, ITGB1, SH3BGRL3, TMSB4X, B2M, CD52 
       CYBA, CCL5, ALOX5AP, TAGLN2, KLF2, CD99, CXCR4, CLIC1, NIBAN1, ZFP36L2 
PC_ 3 
Positive:  NFKB2, NFKBIA, SRSF7, HIST1H4C, RPS27L, BTG1, PNRC1, GPR137, RELB, SQSTM1 
       SRSF3, RSRC2, ATF4, RNASEK, EIF1, HNRNPA2B1, LINC01578, SRSF2, GPX4, SNHG1 
       GADD45B, UBC, IER5, MITD1, BAX, IRF1, TMEM120A, WHAMM, RHBDD2, CREM 
Negative:  TRBV6-2, CRIP1, RPL10, VIM, PABPC1, S100A4, EEF1A1, RPL28, RPL34, RPL30 
       S100A11, RPL32, RPL12, FTH1, RPS8, FXYD5, RPL11, RPS2, RPL39, RPL41 
       RPS18, S100A10, TRBV7-2, S100A6, ANK3, RPS28, RPL10A, RPS15A, IL7R, RPS14 
PC_ 4 
Positive:  TRBV6-2, TRBV7-2, TRBV7-9, NFKBIA, TRBV18, JUN, BTG1, TRBV28, LINC01578, NFKB2 
       TRBV6-5, DDIT4, VIM, TRBV27, S100A4, RELB, TXNIP, TRBV5-6, TRDV1, LMNA 
       KLF6, CCL5, LGALS3, REL, C12orf65, TRBV12-3, LTB, SRSF7, TRBV29-1, TRBV5-5 
Negative:  AL050309.1, GMDS, RPL41, KIF5C, EEF1A1, RPLP1, RMRP, FP671120.4, RPS18, SYTL2 
       TRBV20-1, PABPC1, RPS15A, SLC20A2, RPL10, RPL12, LINC01725, RPS12, RPL13, RPL32 
       C5orf17, AC245407.2, TPT1, CCDC84, FHIT, RPS27A, RPL11, RPS2, RPS14, RPL36A 
PC_ 5 
Positive:  S100A4, ACTB, ACTG1, LGALS3, PFN1, IL32, CD74, LTB, CORO1A, TMSB10 
       S100A6, LGALS1, IFITM1, STAT1, CD52, VIM, AL136456.1, TRBV19, NME2, CCL5 
       CLIC1, HLA-DRB1, CALR, GBP5, EMP3, TRAV5, HLA-DRA, ANXA2, HSP90AB1, KLRB1 
Negative:  FTH1, PABPC1, ANXA1, KLF2, IL7R, SESN3, RPS16, TSC22D3, CXCR4, SARAF 
       ZFP36L2, PASK, SELL, LEPROTL1, CRIP1, NR3C1, CREM, PDCD4, FYN, FXYD5 
       CD55, SNED1, TAGLN2, TXNIP, SRGN, TNFAIP3, S1PR1, RGCC, SYTL3, TAGAP 
reference_integrated <- RunUMAP(reference_integrated, reduction = "pca", dims = 1:18, return.model = TRUE)
Avis : The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per sessionUMAP will return its model
12:20:27 UMAP embedding parameters a = 0.9922 b = 1.112
12:20:27 Read 8610 rows and found 18 numeric columns
12:20:27 Using Annoy for neighbor search, n_neighbors = 30
12:20:27 Building Annoy index with metric = cosine, n_trees = 50
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
12:20:28 Writing NN index file to temp file /tmp/RtmpC4q9CN/file86eb63b1d1ff3
12:20:28 Searching Annoy index using 1 thread, search_k = 3000
12:20:31 Annoy recall = 100%
12:20:32 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
12:20:33 Initializing from normalized Laplacian + noise (using RSpectra)
12:20:33 Commencing optimization for 500 epochs, with 361792 positive edges
12:20:33 Using rng type: pcg
Using method 'umap'
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
12:20:47 Optimization finished
# [31m🔴 RED MODIFICATION: Explicitly set graph.name for integrated assay[0m
reference_integrated <- FindNeighbors(reference_integrated, dims = 1:18, graph.name = "integrated_snn")
Computing nearest neighbor graph
Computing SNN
Only one graph name supplied, storing nearest-neighbor graph only
reference_integrated <- FindClusters(reference_integrated, resolution = 0.3, graph.name = "integrated_snn")
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 8610
Number of edges: 80842

Running Louvain algorithm...
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8976
Number of communities: 17
Elapsed time: 0 seconds
7 singletons identified. 10 final clusters.
ElbowPlot(reference_integrated, ndims = 50)


# Visualize UMAP colored by original donor (cell_line)
DimPlot(reference_integrated, group.by = "cell_line", reduction = "umap") +
  ggtitle("UMAP of Integrated CD4+ T Cells")



DimPlot(reference_integrated, group.by = "integrated_snn_res.0.3", reduction = "umap") +
  ggtitle("UMAP of Integrated CD4+ T Cells")



DimPlot(reference_integrated, group.by = "Prediction", reduction = "umap") +
  ggtitle("UMAP of Integrated CD4+ T Cells")



DimPlot(reference_integrated, group.by = "predicted.celltype.l2", reduction = "umap") +
  ggtitle("UMAP of Integrated CD4+ T Cells")

4. Trajectory and Pseudotime with Monocle3


cat("\n====== STEP 4: Monocle3 Pseudotime ======\n")

====== STEP 4: Monocle3 Pseudotime ======
library(monocle3)
library(SeuratWrappers)
library(Matrix)

cds <- as.cell_data_set(reference_integrated)
Avis : `PackageCheck()` was deprecated in SeuratObject 5.0.0.
Please use `rlang::check_installed()` instead.Avis : Monocle 3 trajectories require cluster partitions, which Seurat does not calculate. Please run 'cluster_cells' on your cell_data_set object
cds <- cluster_cells(cds, reduction_method = "UMAP")
cds <- learn_graph(cds, use_partition = TRUE)

  |                                                                                                                          
  |                                                                                                                    |   0%
  |                                                                                                                          
  |====================================================================================================================| 100%

  |                                                                                                                          
  |                                                                                                                    |   0%
  |                                                                                                                          
  |====================================================================================================================| 100%

  |                                                                                                                          
  |                                                                                                                    |   0%
  |                                                                                                                          
  |====================================================================================================================| 100%
naive_markers <- c("CCR7", "SELL", "LEF1", "TCF7")
naive_markers <- naive_markers[naive_markers %in% rownames(cds)]

# Extract log-normalized expression or fallback to counts log-transformed
if("logcounts" %in% assayNames(cds)) {
  expr_mat <- assay(cds, "logcounts")
} else {
  expr_mat <- log1p(assay(cds, "counts"))
}

naive_score <- Matrix::colMeans(expr_mat[naive_markers, , drop = FALSE])

# Pick top 10 cells or top 1%, whichever is more robust
#top_n <- min(10, length(naive_score))
threshold <- quantile(naive_score, 0.95)
root_cells <- names(naive_score[naive_score > threshold])

cds <- order_cells(cds, root_cells = root_cells)
reference_integrated$pseudotime <- pseudotime(cds)

plot_cells(cds, color_cells_by = "pseudotime", show_trajectory_graph = TRUE)
Cells aren't colored in a way that allows them to be grouped.

# Visualize UMAP colored by original donor (cell_line)
DimPlot(reference_integrated, group.by = "Prediction", reduction = "umap") +
  ggtitle("UMAP of Integrated CD4+ T Cells")


# Visualize UMAP colored by original donor (cell_line)
DimPlot(reference_integrated, group.by = "predicted.celltype.l2", reduction = "umap") +
  ggtitle("UMAP of Integrated CD4+ T Cells")


gc()
             used    (Mb) gc trigger    (Mb)   max used    (Mb)
Ncells   10298476   550.0   17879138   954.9   17879138   954.9
Vcells 1395875807 10649.7 2214099682 16892.3 1662399277 12683.1

Save the Reference (Sézary cell lines projected onto this reference trajectory):

saveRDS(reference_integrated, file = "sezary_reference_CD4Tcells_with_ready_for_cell_line_Projection.rds")
LS0tCnRpdGxlOiAiVHJhamVjdG9yeSBBbmFseXNpcyBvZiBSZWZlcmVuY2UgQ0Q0VGNlbGxzIG9uIHdoaWNoIHdlIHByb2plY3QgY2VsbCBsaW5lcyBvbmUgYnkgb25lIgphdXRob3I6ICJOYXNpciBNYWhtb29kIEFiYmFzaSIKZGF0ZTogImByIFN5cy5EYXRlKClgIgpvdXRwdXQ6CiAgaHRtbF9ub3RlYm9vazoKICAgIHRvYzogeWVzCiAgICB0b2NfZmxvYXQ6IHllcwogICAgdG9jX2NvbGxhcHNlZDogeWVzCiAgd29yZF9kb2N1bWVudDoKICAgIHRvYzogeWVzCiAgaHRtbF9kb2N1bWVudDoKICAgIHRvYzogeWVzCiAgICBkZl9wcmludDogcGFnZWQKICAgIG51bWJlcl9zZWN0aW9uczogdHJ1ZQogIHBkZl9kb2N1bWVudDoKICAgIHRvYzogeWVzCi0tLQoKIyAxLiBsb2FkIGxpYnJhcmllcwpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0KCmxpYnJhcnkoU2V1cmF0KQpsaWJyYXJ5KG1vbm9jbGUzKQpsaWJyYXJ5KFNldXJhdFdyYXBwZXJzKQpsaWJyYXJ5KGhhcm1vbnkpCgoKIyBFeHRyYSBsaWJyYXJpZXMKbGlicmFyeShkcGx5cikKbGlicmFyeShwaGVhdG1hcCkKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KE1hdHJpeCkKbGlicmFyeShwYXRjaHdvcmspCgoKc2V0LnNlZWQoMTIzNCkKCmBgYAoKCiMgMi4gUmVhZCBTZXVyYXQgb2JqZWN0IHdpdGggbG9hZCBhcyB5b3Ugc2F2ZSBpdCB3aXRoIHNhdmUoKSBmdW5jdGlvbgpgYGB7ciBsb2FkU2V1cmF0fQoKQWxsX3NhbXBsZXNfTWVyZ2VkIDwtIHJlYWRSRFMoIi4uL0FsbF9zYW1wbGVzX01lcmdlZF93aXRoX1NUQ0FUX2FuZF9jbGVhbmVkLnJkcyIpCgoKYGBgCgoKIyAzLiBDbHVzdGVyaW5nICYgRGltZW5zaW9uYWxpdHkgUmVkdWN0aW9uCmBgYHtyIENsdXN0ZXJ9CgpyZWZlcmVuY2VfaW50ZWdyYXRlZCA8LSByZWFkUkRTKCIuLi9zZXphcnlfY2VsbF9saW5lc19tYXBwZWRfdG9fY2Q0X3JlZmVyZW5jZV9pbnRlZ3JhdGVkX2JlZm9yZV9RdWVyeV9Qcm9qZWN0aW9uLnJkcyIpCgojIFNraXAgcmUtcnVubmluZyBTQ1RyYW5zZm9ybSBzaW5jZSBmdWxsIG1vZGVsIGlzIGFscmVhZHkgc2F2ZWQKCkRlZmF1bHRBc3NheShyZWZlcmVuY2VfaW50ZWdyYXRlZCkgPC0gImludGVncmF0ZWQiCgoKIyBbMzFt8J+UtCBSRUQgTU9ESUZJQ0FUSU9OOiBSZS1ydW4gUENBICsgVU1BUCBhZnRlciBTQ1RbMG0KcmVmZXJlbmNlX2ludGVncmF0ZWQgPC0gUnVuUENBKHJlZmVyZW5jZV9pbnRlZ3JhdGVkLCBhc3NheSA9ICJpbnRlZ3JhdGVkIiwgdmVyYm9zZSA9IFRSVUUpCnJlZmVyZW5jZV9pbnRlZ3JhdGVkIDwtIFJ1blVNQVAocmVmZXJlbmNlX2ludGVncmF0ZWQsIHJlZHVjdGlvbiA9ICJwY2EiLCBkaW1zID0gMToxOCwgcmV0dXJuLm1vZGVsID0gVFJVRSkKCiMgWzMxbfCflLQgUkVEIE1PRElGSUNBVElPTjogRXhwbGljaXRseSBzZXQgZ3JhcGgubmFtZSBmb3IgaW50ZWdyYXRlZCBhc3NheVswbQpyZWZlcmVuY2VfaW50ZWdyYXRlZCA8LSBGaW5kTmVpZ2hib3JzKHJlZmVyZW5jZV9pbnRlZ3JhdGVkLCBkaW1zID0gMToxOCwgZ3JhcGgubmFtZSA9ICJpbnRlZ3JhdGVkX3NubiIpCnJlZmVyZW5jZV9pbnRlZ3JhdGVkIDwtIEZpbmRDbHVzdGVycyhyZWZlcmVuY2VfaW50ZWdyYXRlZCwgcmVzb2x1dGlvbiA9IDAuMywgZ3JhcGgubmFtZSA9ICJpbnRlZ3JhdGVkX3NubiIpCgpFbGJvd1Bsb3QocmVmZXJlbmNlX2ludGVncmF0ZWQsIG5kaW1zID0gNTApCgojIFZpc3VhbGl6ZSBVTUFQIGNvbG9yZWQgYnkgb3JpZ2luYWwgZG9ub3IgKGNlbGxfbGluZSkKRGltUGxvdChyZWZlcmVuY2VfaW50ZWdyYXRlZCwgZ3JvdXAuYnkgPSAiY2VsbF9saW5lIiwgcmVkdWN0aW9uID0gInVtYXAiKSArCiAgZ2d0aXRsZSgiVU1BUCBvZiBJbnRlZ3JhdGVkIENENCsgVCBDZWxscyIpCgoKRGltUGxvdChyZWZlcmVuY2VfaW50ZWdyYXRlZCwgZ3JvdXAuYnkgPSAiaW50ZWdyYXRlZF9zbm5fcmVzLjAuMyIsIHJlZHVjdGlvbiA9ICJ1bWFwIikgKwogIGdndGl0bGUoIlVNQVAgb2YgSW50ZWdyYXRlZCBDRDQrIFQgQ2VsbHMiKQoKCkRpbVBsb3QocmVmZXJlbmNlX2ludGVncmF0ZWQsIGdyb3VwLmJ5ID0gIlByZWRpY3Rpb24iLCByZWR1Y3Rpb24gPSAidW1hcCIpICsKICBnZ3RpdGxlKCJVTUFQIG9mIEludGVncmF0ZWQgQ0Q0KyBUIENlbGxzIikKCgpEaW1QbG90KHJlZmVyZW5jZV9pbnRlZ3JhdGVkLCBncm91cC5ieSA9ICJwcmVkaWN0ZWQuY2VsbHR5cGUubDIiLCByZWR1Y3Rpb24gPSAidW1hcCIpICsKICBnZ3RpdGxlKCJVTUFQIG9mIEludGVncmF0ZWQgQ0Q0KyBUIENlbGxzIikKCmBgYAoKIyA0LiBUcmFqZWN0b3J5IGFuZCBQc2V1ZG90aW1lIHdpdGggTW9ub2NsZTMKYGBge3IgVHJhamVjdG9yeX0KCmNhdCgiXG49PT09PT0gU1RFUCA0OiBNb25vY2xlMyBQc2V1ZG90aW1lID09PT09PVxuIikKCmxpYnJhcnkobW9ub2NsZTMpCmxpYnJhcnkoU2V1cmF0V3JhcHBlcnMpCmxpYnJhcnkoTWF0cml4KQoKY2RzIDwtIGFzLmNlbGxfZGF0YV9zZXQocmVmZXJlbmNlX2ludGVncmF0ZWQpCmNkcyA8LSBjbHVzdGVyX2NlbGxzKGNkcywgcmVkdWN0aW9uX21ldGhvZCA9ICJVTUFQIikKY2RzIDwtIGxlYXJuX2dyYXBoKGNkcywgdXNlX3BhcnRpdGlvbiA9IFRSVUUpCgpuYWl2ZV9tYXJrZXJzIDwtIGMoIkNDUjciLCAiU0VMTCIsICJMRUYxIiwgIlRDRjciKQpuYWl2ZV9tYXJrZXJzIDwtIG5haXZlX21hcmtlcnNbbmFpdmVfbWFya2VycyAlaW4lIHJvd25hbWVzKGNkcyldCgojIEV4dHJhY3QgbG9nLW5vcm1hbGl6ZWQgZXhwcmVzc2lvbiBvciBmYWxsYmFjayB0byBjb3VudHMgbG9nLXRyYW5zZm9ybWVkCmlmKCJsb2djb3VudHMiICVpbiUgYXNzYXlOYW1lcyhjZHMpKSB7CiAgZXhwcl9tYXQgPC0gYXNzYXkoY2RzLCAibG9nY291bnRzIikKfSBlbHNlIHsKICBleHByX21hdCA8LSBsb2cxcChhc3NheShjZHMsICJjb3VudHMiKSkKfQoKbmFpdmVfc2NvcmUgPC0gTWF0cml4Ojpjb2xNZWFucyhleHByX21hdFtuYWl2ZV9tYXJrZXJzLCAsIGRyb3AgPSBGQUxTRV0pCgojIFBpY2sgdG9wIDEwIGNlbGxzIG9yIHRvcCAxJSwgd2hpY2hldmVyIGlzIG1vcmUgcm9idXN0CiN0b3BfbiA8LSBtaW4oMTAsIGxlbmd0aChuYWl2ZV9zY29yZSkpCnRocmVzaG9sZCA8LSBxdWFudGlsZShuYWl2ZV9zY29yZSwgMC45NSkKcm9vdF9jZWxscyA8LSBuYW1lcyhuYWl2ZV9zY29yZVtuYWl2ZV9zY29yZSA+IHRocmVzaG9sZF0pCgpjZHMgPC0gb3JkZXJfY2VsbHMoY2RzLCByb290X2NlbGxzID0gcm9vdF9jZWxscykKcmVmZXJlbmNlX2ludGVncmF0ZWQkcHNldWRvdGltZSA8LSBwc2V1ZG90aW1lKGNkcykKCnBsb3RfY2VsbHMoY2RzLCBjb2xvcl9jZWxsc19ieSA9ICJwc2V1ZG90aW1lIiwgc2hvd190cmFqZWN0b3J5X2dyYXBoID0gVFJVRSkKCiMgVmlzdWFsaXplIFVNQVAgY29sb3JlZCBieSBvcmlnaW5hbCBkb25vciAoY2VsbF9saW5lKQpEaW1QbG90KHJlZmVyZW5jZV9pbnRlZ3JhdGVkLCBncm91cC5ieSA9ICJQcmVkaWN0aW9uIiwgcmVkdWN0aW9uID0gInVtYXAiKSArCiAgZ2d0aXRsZSgiVU1BUCBvZiBJbnRlZ3JhdGVkIENENCsgVCBDZWxscyIpCgojIFZpc3VhbGl6ZSBVTUFQIGNvbG9yZWQgYnkgb3JpZ2luYWwgZG9ub3IgKGNlbGxfbGluZSkKRGltUGxvdChyZWZlcmVuY2VfaW50ZWdyYXRlZCwgZ3JvdXAuYnkgPSAicHJlZGljdGVkLmNlbGx0eXBlLmwyIiwgcmVkdWN0aW9uID0gInVtYXAiKSArCiAgZ2d0aXRsZSgiVU1BUCBvZiBJbnRlZ3JhdGVkIENENCsgVCBDZWxscyIpCgpnYygpCgoKCmBgYAoKIyMgU2F2ZSB0aGUgUmVmZXJlbmNlIChTw6l6YXJ5IGNlbGwgbGluZXMgcHJvamVjdGVkIG9udG8gdGhpcyByZWZlcmVuY2UgdHJhamVjdG9yeSk6CmBgYHtyIHNhdmVmaW5hbH0Kc2F2ZVJEUyhyZWZlcmVuY2VfaW50ZWdyYXRlZCwgZmlsZSA9ICJzZXphcnlfcmVmZXJlbmNlX0NENFRjZWxsc193aXRoX3JlYWR5X2Zvcl9jZWxsX2xpbmVfUHJvamVjdGlvbi5yZHMiKQpgYGAKCgo=