3. Clustering & Dimensionality Reduction
reference_integrated <- readRDS("../sezary_cell_lines_mapped_to_cd4_reference_integrated_before_Query_Projection.rds")
# Skip re-running SCTransform since full model is already saved
DefaultAssay(reference_integrated) <- "integrated"
# [31m🔴 RED MODIFICATION: Re-run PCA + UMAP after SCT[0m
reference_integrated <- RunPCA(reference_integrated, assay = "integrated", verbose = TRUE)
Avis : The `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0.
Please use the `layer` argument instead.PC_ 1
Positive: MALAT1, ANK3, ARHGAP15, MBNL1, FOXP1, PLCL1, RUNX1, PRKCA, CAMK4, SYNE2
BIRC6, SERINC5, HIVEP2, TNRC6B, KLF12, MAML2, RIPOR2, TSHZ2, PITPNC1, ZBTB20
DOCK10, FHIT, FTX, CSGALNACT1, ITGA4, EXOC4, DLEU1, ANKRD44, ELMO1, MLLT3
Negative: TRBV6-2, RPL10, EEF1A1, RPS12, RPL32, RPL30, RPL41, RPS8, RPL34, RPL19
RPS15A, RPS27, RPS14, RPS3A, RPL11, RPL13, RPS28, RPS13, RPL39, RPLP1
RPS27A, RPL28, RPS23, RPS18, RPS2, EEF1B2, RPL12, TPT1, ACTG1, RPL10A
PC_ 2
Positive: TRBV6-2, MALAT1, FHIT, TSHZ2, MAML2, LINC00861, LEF1, CERS6, CCR7, BACH2
TCF7, FOXP1, RPS8, PLCL1, ANK3, RPL30, APBA2, EEF1B2, SELL, RPL32
RPS3A, PITPNC1, IGF1R, HIF1A, ACTN1, IKZF1, ITGA6, PRKCA, RPS14, RPS13
Negative: S100A4, VIM, S100A11, CRIP1, ANXA1, FTH1, IL7R, S100A6, LGALS3, S100A10
LMNA, LGALS1, ANXA2, AHNAK, IL32, ITGB1, SH3BGRL3, TMSB4X, B2M, CD52
CYBA, CCL5, ALOX5AP, TAGLN2, KLF2, CD99, CXCR4, CLIC1, NIBAN1, ZFP36L2
PC_ 3
Positive: NFKB2, NFKBIA, SRSF7, HIST1H4C, RPS27L, BTG1, PNRC1, GPR137, RELB, SQSTM1
SRSF3, RSRC2, ATF4, RNASEK, EIF1, HNRNPA2B1, LINC01578, SRSF2, GPX4, SNHG1
GADD45B, UBC, IER5, MITD1, BAX, IRF1, TMEM120A, WHAMM, RHBDD2, CREM
Negative: TRBV6-2, CRIP1, RPL10, VIM, PABPC1, S100A4, EEF1A1, RPL28, RPL34, RPL30
S100A11, RPL32, RPL12, FTH1, RPS8, FXYD5, RPL11, RPS2, RPL39, RPL41
RPS18, S100A10, TRBV7-2, S100A6, ANK3, RPS28, RPL10A, RPS15A, IL7R, RPS14
PC_ 4
Positive: TRBV6-2, TRBV7-2, TRBV7-9, NFKBIA, TRBV18, JUN, BTG1, TRBV28, LINC01578, NFKB2
TRBV6-5, DDIT4, VIM, TRBV27, S100A4, RELB, TXNIP, TRBV5-6, TRDV1, LMNA
KLF6, CCL5, LGALS3, REL, C12orf65, TRBV12-3, LTB, SRSF7, TRBV29-1, TRBV5-5
Negative: AL050309.1, GMDS, RPL41, KIF5C, EEF1A1, RPLP1, RMRP, FP671120.4, RPS18, SYTL2
TRBV20-1, PABPC1, RPS15A, SLC20A2, RPL10, RPL12, LINC01725, RPS12, RPL13, RPL32
C5orf17, AC245407.2, TPT1, CCDC84, FHIT, RPS27A, RPL11, RPS2, RPS14, RPL36A
PC_ 5
Positive: S100A4, ACTB, ACTG1, LGALS3, PFN1, IL32, CD74, LTB, CORO1A, TMSB10
S100A6, LGALS1, IFITM1, STAT1, CD52, VIM, AL136456.1, TRBV19, NME2, CCL5
CLIC1, HLA-DRB1, CALR, GBP5, EMP3, TRAV5, HLA-DRA, ANXA2, HSP90AB1, KLRB1
Negative: FTH1, PABPC1, ANXA1, KLF2, IL7R, SESN3, RPS16, TSC22D3, CXCR4, SARAF
ZFP36L2, PASK, SELL, LEPROTL1, CRIP1, NR3C1, CREM, PDCD4, FYN, FXYD5
CD55, SNED1, TAGLN2, TXNIP, SRGN, TNFAIP3, S1PR1, RGCC, SYTL3, TAGAP
reference_integrated <- RunUMAP(reference_integrated, reduction = "pca", dims = 1:18, return.model = TRUE)
Avis : The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per sessionUMAP will return its model
12:20:27 UMAP embedding parameters a = 0.9922 b = 1.112
12:20:27 Read 8610 rows and found 18 numeric columns
12:20:27 Using Annoy for neighbor search, n_neighbors = 30
12:20:27 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
12:20:28 Writing NN index file to temp file /tmp/RtmpC4q9CN/file86eb63b1d1ff3
12:20:28 Searching Annoy index using 1 thread, search_k = 3000
12:20:31 Annoy recall = 100%
12:20:32 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
12:20:33 Initializing from normalized Laplacian + noise (using RSpectra)
12:20:33 Commencing optimization for 500 epochs, with 361792 positive edges
12:20:33 Using rng type: pcg
Using method 'umap'
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
12:20:47 Optimization finished
# [31m🔴 RED MODIFICATION: Explicitly set graph.name for integrated assay[0m
reference_integrated <- FindNeighbors(reference_integrated, dims = 1:18, graph.name = "integrated_snn")
Computing nearest neighbor graph
Computing SNN
Only one graph name supplied, storing nearest-neighbor graph only
reference_integrated <- FindClusters(reference_integrated, resolution = 0.3, graph.name = "integrated_snn")
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 8610
Number of edges: 80842
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8976
Number of communities: 17
Elapsed time: 0 seconds
7 singletons identified. 10 final clusters.
ElbowPlot(reference_integrated, ndims = 50)

# Visualize UMAP colored by original donor (cell_line)
DimPlot(reference_integrated, group.by = "cell_line", reduction = "umap") +
ggtitle("UMAP of Integrated CD4+ T Cells")

DimPlot(reference_integrated, group.by = "integrated_snn_res.0.3", reduction = "umap") +
ggtitle("UMAP of Integrated CD4+ T Cells")

DimPlot(reference_integrated, group.by = "Prediction", reduction = "umap") +
ggtitle("UMAP of Integrated CD4+ T Cells")

DimPlot(reference_integrated, group.by = "predicted.celltype.l2", reduction = "umap") +
ggtitle("UMAP of Integrated CD4+ T Cells")

LS0tCnRpdGxlOiAiVHJhamVjdG9yeSBBbmFseXNpcyBvZiBSZWZlcmVuY2UgQ0Q0VGNlbGxzIG9uIHdoaWNoIHdlIHByb2plY3QgY2VsbCBsaW5lcyBvbmUgYnkgb25lIgphdXRob3I6ICJOYXNpciBNYWhtb29kIEFiYmFzaSIKZGF0ZTogImByIFN5cy5EYXRlKClgIgpvdXRwdXQ6CiAgaHRtbF9ub3RlYm9vazoKICAgIHRvYzogeWVzCiAgICB0b2NfZmxvYXQ6IHllcwogICAgdG9jX2NvbGxhcHNlZDogeWVzCiAgd29yZF9kb2N1bWVudDoKICAgIHRvYzogeWVzCiAgaHRtbF9kb2N1bWVudDoKICAgIHRvYzogeWVzCiAgICBkZl9wcmludDogcGFnZWQKICAgIG51bWJlcl9zZWN0aW9uczogdHJ1ZQogIHBkZl9kb2N1bWVudDoKICAgIHRvYzogeWVzCi0tLQoKIyAxLiBsb2FkIGxpYnJhcmllcwpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0KCmxpYnJhcnkoU2V1cmF0KQpsaWJyYXJ5KG1vbm9jbGUzKQpsaWJyYXJ5KFNldXJhdFdyYXBwZXJzKQpsaWJyYXJ5KGhhcm1vbnkpCgoKIyBFeHRyYSBsaWJyYXJpZXMKbGlicmFyeShkcGx5cikKbGlicmFyeShwaGVhdG1hcCkKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KE1hdHJpeCkKbGlicmFyeShwYXRjaHdvcmspCgoKc2V0LnNlZWQoMTIzNCkKCmBgYAoKCiMgMi4gUmVhZCBTZXVyYXQgb2JqZWN0IHdpdGggbG9hZCBhcyB5b3Ugc2F2ZSBpdCB3aXRoIHNhdmUoKSBmdW5jdGlvbgpgYGB7ciBsb2FkU2V1cmF0fQoKQWxsX3NhbXBsZXNfTWVyZ2VkIDwtIHJlYWRSRFMoIi4uL0FsbF9zYW1wbGVzX01lcmdlZF93aXRoX1NUQ0FUX2FuZF9jbGVhbmVkLnJkcyIpCgoKYGBgCgoKIyAzLiBDbHVzdGVyaW5nICYgRGltZW5zaW9uYWxpdHkgUmVkdWN0aW9uCmBgYHtyIENsdXN0ZXJ9CgpyZWZlcmVuY2VfaW50ZWdyYXRlZCA8LSByZWFkUkRTKCIuLi9zZXphcnlfY2VsbF9saW5lc19tYXBwZWRfdG9fY2Q0X3JlZmVyZW5jZV9pbnRlZ3JhdGVkX2JlZm9yZV9RdWVyeV9Qcm9qZWN0aW9uLnJkcyIpCgojIFNraXAgcmUtcnVubmluZyBTQ1RyYW5zZm9ybSBzaW5jZSBmdWxsIG1vZGVsIGlzIGFscmVhZHkgc2F2ZWQKCkRlZmF1bHRBc3NheShyZWZlcmVuY2VfaW50ZWdyYXRlZCkgPC0gImludGVncmF0ZWQiCgoKIyBbMzFt8J+UtCBSRUQgTU9ESUZJQ0FUSU9OOiBSZS1ydW4gUENBICsgVU1BUCBhZnRlciBTQ1RbMG0KcmVmZXJlbmNlX2ludGVncmF0ZWQgPC0gUnVuUENBKHJlZmVyZW5jZV9pbnRlZ3JhdGVkLCBhc3NheSA9ICJpbnRlZ3JhdGVkIiwgdmVyYm9zZSA9IFRSVUUpCnJlZmVyZW5jZV9pbnRlZ3JhdGVkIDwtIFJ1blVNQVAocmVmZXJlbmNlX2ludGVncmF0ZWQsIHJlZHVjdGlvbiA9ICJwY2EiLCBkaW1zID0gMToxOCwgcmV0dXJuLm1vZGVsID0gVFJVRSkKCiMgWzMxbfCflLQgUkVEIE1PRElGSUNBVElPTjogRXhwbGljaXRseSBzZXQgZ3JhcGgubmFtZSBmb3IgaW50ZWdyYXRlZCBhc3NheVswbQpyZWZlcmVuY2VfaW50ZWdyYXRlZCA8LSBGaW5kTmVpZ2hib3JzKHJlZmVyZW5jZV9pbnRlZ3JhdGVkLCBkaW1zID0gMToxOCwgZ3JhcGgubmFtZSA9ICJpbnRlZ3JhdGVkX3NubiIpCnJlZmVyZW5jZV9pbnRlZ3JhdGVkIDwtIEZpbmRDbHVzdGVycyhyZWZlcmVuY2VfaW50ZWdyYXRlZCwgcmVzb2x1dGlvbiA9IDAuMywgZ3JhcGgubmFtZSA9ICJpbnRlZ3JhdGVkX3NubiIpCgpFbGJvd1Bsb3QocmVmZXJlbmNlX2ludGVncmF0ZWQsIG5kaW1zID0gNTApCgojIFZpc3VhbGl6ZSBVTUFQIGNvbG9yZWQgYnkgb3JpZ2luYWwgZG9ub3IgKGNlbGxfbGluZSkKRGltUGxvdChyZWZlcmVuY2VfaW50ZWdyYXRlZCwgZ3JvdXAuYnkgPSAiY2VsbF9saW5lIiwgcmVkdWN0aW9uID0gInVtYXAiKSArCiAgZ2d0aXRsZSgiVU1BUCBvZiBJbnRlZ3JhdGVkIENENCsgVCBDZWxscyIpCgoKRGltUGxvdChyZWZlcmVuY2VfaW50ZWdyYXRlZCwgZ3JvdXAuYnkgPSAiaW50ZWdyYXRlZF9zbm5fcmVzLjAuMyIsIHJlZHVjdGlvbiA9ICJ1bWFwIikgKwogIGdndGl0bGUoIlVNQVAgb2YgSW50ZWdyYXRlZCBDRDQrIFQgQ2VsbHMiKQoKCkRpbVBsb3QocmVmZXJlbmNlX2ludGVncmF0ZWQsIGdyb3VwLmJ5ID0gIlByZWRpY3Rpb24iLCByZWR1Y3Rpb24gPSAidW1hcCIpICsKICBnZ3RpdGxlKCJVTUFQIG9mIEludGVncmF0ZWQgQ0Q0KyBUIENlbGxzIikKCgpEaW1QbG90KHJlZmVyZW5jZV9pbnRlZ3JhdGVkLCBncm91cC5ieSA9ICJwcmVkaWN0ZWQuY2VsbHR5cGUubDIiLCByZWR1Y3Rpb24gPSAidW1hcCIpICsKICBnZ3RpdGxlKCJVTUFQIG9mIEludGVncmF0ZWQgQ0Q0KyBUIENlbGxzIikKCmBgYAoKIyA0LiBUcmFqZWN0b3J5IGFuZCBQc2V1ZG90aW1lIHdpdGggTW9ub2NsZTMKYGBge3IgVHJhamVjdG9yeX0KCmNhdCgiXG49PT09PT0gU1RFUCA0OiBNb25vY2xlMyBQc2V1ZG90aW1lID09PT09PVxuIikKCmxpYnJhcnkobW9ub2NsZTMpCmxpYnJhcnkoU2V1cmF0V3JhcHBlcnMpCmxpYnJhcnkoTWF0cml4KQoKY2RzIDwtIGFzLmNlbGxfZGF0YV9zZXQocmVmZXJlbmNlX2ludGVncmF0ZWQpCmNkcyA8LSBjbHVzdGVyX2NlbGxzKGNkcywgcmVkdWN0aW9uX21ldGhvZCA9ICJVTUFQIikKY2RzIDwtIGxlYXJuX2dyYXBoKGNkcywgdXNlX3BhcnRpdGlvbiA9IFRSVUUpCgpuYWl2ZV9tYXJrZXJzIDwtIGMoIkNDUjciLCAiU0VMTCIsICJMRUYxIiwgIlRDRjciKQpuYWl2ZV9tYXJrZXJzIDwtIG5haXZlX21hcmtlcnNbbmFpdmVfbWFya2VycyAlaW4lIHJvd25hbWVzKGNkcyldCgojIEV4dHJhY3QgbG9nLW5vcm1hbGl6ZWQgZXhwcmVzc2lvbiBvciBmYWxsYmFjayB0byBjb3VudHMgbG9nLXRyYW5zZm9ybWVkCmlmKCJsb2djb3VudHMiICVpbiUgYXNzYXlOYW1lcyhjZHMpKSB7CiAgZXhwcl9tYXQgPC0gYXNzYXkoY2RzLCAibG9nY291bnRzIikKfSBlbHNlIHsKICBleHByX21hdCA8LSBsb2cxcChhc3NheShjZHMsICJjb3VudHMiKSkKfQoKbmFpdmVfc2NvcmUgPC0gTWF0cml4Ojpjb2xNZWFucyhleHByX21hdFtuYWl2ZV9tYXJrZXJzLCAsIGRyb3AgPSBGQUxTRV0pCgojIFBpY2sgdG9wIDEwIGNlbGxzIG9yIHRvcCAxJSwgd2hpY2hldmVyIGlzIG1vcmUgcm9idXN0CiN0b3BfbiA8LSBtaW4oMTAsIGxlbmd0aChuYWl2ZV9zY29yZSkpCnRocmVzaG9sZCA8LSBxdWFudGlsZShuYWl2ZV9zY29yZSwgMC45NSkKcm9vdF9jZWxscyA8LSBuYW1lcyhuYWl2ZV9zY29yZVtuYWl2ZV9zY29yZSA+IHRocmVzaG9sZF0pCgpjZHMgPC0gb3JkZXJfY2VsbHMoY2RzLCByb290X2NlbGxzID0gcm9vdF9jZWxscykKcmVmZXJlbmNlX2ludGVncmF0ZWQkcHNldWRvdGltZSA8LSBwc2V1ZG90aW1lKGNkcykKCnBsb3RfY2VsbHMoY2RzLCBjb2xvcl9jZWxsc19ieSA9ICJwc2V1ZG90aW1lIiwgc2hvd190cmFqZWN0b3J5X2dyYXBoID0gVFJVRSkKCiMgVmlzdWFsaXplIFVNQVAgY29sb3JlZCBieSBvcmlnaW5hbCBkb25vciAoY2VsbF9saW5lKQpEaW1QbG90KHJlZmVyZW5jZV9pbnRlZ3JhdGVkLCBncm91cC5ieSA9ICJQcmVkaWN0aW9uIiwgcmVkdWN0aW9uID0gInVtYXAiKSArCiAgZ2d0aXRsZSgiVU1BUCBvZiBJbnRlZ3JhdGVkIENENCsgVCBDZWxscyIpCgojIFZpc3VhbGl6ZSBVTUFQIGNvbG9yZWQgYnkgb3JpZ2luYWwgZG9ub3IgKGNlbGxfbGluZSkKRGltUGxvdChyZWZlcmVuY2VfaW50ZWdyYXRlZCwgZ3JvdXAuYnkgPSAicHJlZGljdGVkLmNlbGx0eXBlLmwyIiwgcmVkdWN0aW9uID0gInVtYXAiKSArCiAgZ2d0aXRsZSgiVU1BUCBvZiBJbnRlZ3JhdGVkIENENCsgVCBDZWxscyIpCgpnYygpCgoKCmBgYAoKIyMgU2F2ZSB0aGUgUmVmZXJlbmNlIChTw6l6YXJ5IGNlbGwgbGluZXMgcHJvamVjdGVkIG9udG8gdGhpcyByZWZlcmVuY2UgdHJhamVjdG9yeSk6CmBgYHtyIHNhdmVmaW5hbH0Kc2F2ZVJEUyhyZWZlcmVuY2VfaW50ZWdyYXRlZCwgZmlsZSA9ICJzZXphcnlfcmVmZXJlbmNlX0NENFRjZWxsc193aXRoX3JlYWR5X2Zvcl9jZWxsX2xpbmVfUHJvamVjdGlvbi5yZHMiKQpgYGAKCgo=