1. load libraries
2. Load Seurat Object
load("../../../0-IMP-OBJECTS/All_Samples_Merged_with_10x_Azitmuth_Annotated_SCT_HPC_without_harmony_integration.robj")
All_samples_Merged
An object of class Seurat
64169 features across 59355 samples within 6 assays
Active assay: SCT (27417 features, 3000 variable features)
3 layers present: counts, data, scale.data
5 other assays present: RNA, ADT, prediction.score.celltype.l1, prediction.score.celltype.l2, prediction.score.celltype.l3
4 dimensional reductions calculated: integrated_dr, ref.umap, pca, umap
3. Data PREPERATION
ElbowPlot(All_samples_Merged)

# plot
before <- DimPlot(All_samples_Merged, reduction = "umap", group.by = "cell_line", label = TRUE, label.box = TRUE, repel = TRUE)
DimPlot(All_samples_Merged, reduction = "umap", group.by = "cell_line", label = TRUE, label.box = TRUE, repel = TRUE)

4. Harmony-integration
# run Harmony -----------
All_samples_Merged_Harmony_Integrated <- All_samples_Merged %>%
RunHarmony(group.by.vars = 'orig.ident', plot_convergence = FALSE)
Transposing data matrix
Initializing state using k-means centroids initialization
Harmony 1/10
Harmony 2/10
Harmony 3/10
Harmony converged after 3 iterations
All_samples_Merged_Harmony_Integrated@reductions
$integrated_dr
A dimensional reduction object with key integrateddr_
Number of dimensions: 50
Number of cells: 59355
Projected dimensional reduction calculated: FALSE
Jackstraw run: FALSE
Computed using assay: RNA
$ref.umap
A dimensional reduction object with key UMAP_
Number of dimensions: 2
Number of cells: 59355
Projected dimensional reduction calculated: FALSE
Jackstraw run: FALSE
Computed using assay: RNA
$pca
A dimensional reduction object with key PC_
Number of dimensions: 50
Number of cells: 59355
Projected dimensional reduction calculated: FALSE
Jackstraw run: FALSE
Computed using assay: SCT
$umap
A dimensional reduction object with key umap_
Number of dimensions: 2
Number of cells: 59355
Projected dimensional reduction calculated: FALSE
Jackstraw run: FALSE
Computed using assay: SCT
$harmony
A dimensional reduction object with key harmony_
Number of dimensions: 50
Number of cells: 59355
Projected dimensional reduction calculated: TRUE
Jackstraw run: FALSE
Computed using assay: SCT
All_samples_Merged_Harmony_Integrated.embed <- Embeddings(All_samples_Merged_Harmony_Integrated, "harmony")
All_samples_Merged_Harmony_Integrated.embed[1:10,1:10]
harmony_1 harmony_2 harmony_3 harmony_4 harmony_5
L1_AAACCTGAGGGCTTCC-1 -24.7159025 9.4383708 -2.595885 -4.8545693 9.5279361
L1_AAACCTGGTGCAGGTA-1 -7.4043559 -0.2431793 -6.512215 -1.0740249 3.0236635
L1_AAACCTGGTTAAAGTG-1 4.1966520 -3.2580142 -7.488281 2.3632919 -2.8494811
L1_AAACCTGTCAGGTAAA-1 9.3659883 4.0151201 -2.784977 -0.9320338 0.1966207
L1_AAACCTGTCCCTGACT-1 -21.8982398 5.2365914 -1.855428 -4.7009885 9.9275835
L1_AAACCTGTCCTTCAAT-1 0.6872360 -5.5511997 -6.237131 2.2476651 -1.8600064
L1_AAACCTGTCTTGCAAG-1 0.3389511 -6.3244918 -6.489758 1.6118737 -2.1777593
L1_AAACGGGAGGCTAGAC-1 4.9363084 1.9257427 -2.435222 -0.6554694 0.5666725
L1_AAACGGGAGGGTATCG-1 5.0320109 -2.5210300 -2.183697 -1.4247422 -1.3822734
L1_AAACGGGAGGGTTCCC-1 2.5703222 8.0085594 5.619477 -3.9629752 2.1930832
harmony_6 harmony_7 harmony_8 harmony_9 harmony_10
L1_AAACCTGAGGGCTTCC-1 0.6434699 3.3956215 -2.0238788 -2.8754652 -0.5210165
L1_AAACCTGGTGCAGGTA-1 0.5577080 5.5664832 -3.1247213 -0.9722501 3.3526319
L1_AAACCTGGTTAAAGTG-1 0.9342769 4.4644112 0.3335652 -1.8805763 0.9852970
L1_AAACCTGTCAGGTAAA-1 -0.4044759 0.6000904 1.1277442 -0.1430370 1.5758242
L1_AAACCTGTCCCTGACT-1 1.9262066 -1.2916080 -2.0479582 -1.3727711 -0.9952941
L1_AAACCTGTCCTTCAAT-1 1.4745182 3.4523254 -3.4832128 -3.2268001 1.2139663
L1_AAACCTGTCTTGCAAG-1 1.1331250 1.0139476 -2.9306194 -2.4875431 0.4212196
L1_AAACGGGAGGCTAGAC-1 1.1134144 2.3778052 1.8432861 0.4951815 -0.5264072
L1_AAACGGGAGGGTATCG-1 2.1885390 3.5571018 -0.2041201 0.2795494 -1.2595757
L1_AAACGGGAGGGTTCCC-1 -0.7596334 -0.6270356 3.5620757 1.6807614 -3.9461927
# Do UMAP and clustering using ** Harmony embeddings instead of PCA **
All_samples_Merged_Harmony_Integrated <- All_samples_Merged_Harmony_Integrated %>%
RunUMAP(reduction = 'harmony', dims = 1:22) %>%
FindNeighbors(reduction = "harmony", dims = 1:22) %>%
FindClusters(resolution = 0.5)
Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session
14:31:30 UMAP embedding parameters a = 0.9922 b = 1.112
14:31:30 Read 59355 rows and found 22 numeric columns
14:31:30 Using Annoy for neighbor search, n_neighbors = 30
14:31:30 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
14:31:37 Writing NN index file to temp file /tmp/Rtmp1yGWZm/file1d0762c338d97
14:31:37 Searching Annoy index using 1 thread, search_k = 3000
14:31:58 Annoy recall = 100%
14:32:00 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
14:32:04 Initializing from normalized Laplacian + noise (using RSpectra)
14:32:09 Commencing optimization for 200 epochs, with 2575120 positive edges
14:33:23 Optimization finished
Computing nearest neighbor graph
Computing SNN
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 59355
Number of edges: 1764181
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9000
Number of communities: 19
Elapsed time: 30 seconds
1 singletons identified. 18 final clusters.
# visualize
after <- DimPlot(All_samples_Merged_Harmony_Integrated, reduction = "umap", group.by = "cell_line", label = TRUE, label.box = TRUE, repel = TRUE)
before|after

DimPlot(All_samples_Merged_Harmony_Integrated,reduction = "umap", group.by = "cell_line", label = TRUE, label.box = TRUE, repel = TRUE)

DimPlot(All_samples_Merged_Harmony_Integrated,reduction = "umap", group.by = "SCT_snn_res.0.5", label = TRUE, label.box = TRUE, repel = TRUE)

DimPlot(All_samples_Merged_Harmony_Integrated, reduction = "umap", group.by = "predicted.celltype.l2", label = TRUE, label.box = TRUE, repel = TRUE)
Warning: ggrepel: 11 unlabeled data points (too many overlaps). Consider
increasing max.overlaps

FeaturePlot
myfeatures <- c("CD3E", "CD4", "CD8A", "NKG7", "GNLY", "MS4A1", "CD14", "LYZ", "MS4A7", "FCGR3A", "CST3", "FCER1A")
FeaturePlot(All_samples_Merged_Harmony_Integrated, reduction = "umap", dims = 1:2, features = myfeatures, ncol = 4, order = T) + NoLegend() + NoAxes() + NoGrid()

5. Save the Seurat object as an Robj file
# save(All_samples_Merged_Harmony_Integrated, file = "All_samples_Merged_Harmony_Integrated.Robj")
LS0tCnRpdGxlOiAiSGFybW9ueSBJbnRlZ3JhdGlvbiBvZiBQQk1DMTB4IHdpdGggU0NUIG9uIHNhbXBsZXMgcGFydDIiCmF1dGhvcjogTmFzaXIgTWFobW9vZCBBYmJhc2kKZGF0ZTogImByIFN5cy5EYXRlKClgIgpvdXRwdXQ6CiAgI3JtZGZvcm1hdHM6OnJlYWR0aGVkb3duCiAgaHRtbF9ub3RlYm9vazoKICAgIHRvYzogdHJ1ZQogICAgdG9jX2Zsb2F0OiB0cnVlCiAgICB0b2NfY29sbGFwc2VkOiB0cnVlCi0tLQoKIyAxLiBsb2FkIGxpYnJhcmllcwpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0KbGlicmFyeShTZXVyYXQpCmxpYnJhcnkoU2V1cmF0T2JqZWN0KQpsaWJyYXJ5KFNldXJhdERhdGEpCmxpYnJhcnkocGF0Y2h3b3JrKQpsaWJyYXJ5KGhhcm1vbnkpCmxpYnJhcnkoZ2dwbG90MikKbGlicmFyeShyZXRpY3VsYXRlKQpsaWJyYXJ5KEF6aW11dGgpCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkoUnRzbmUpCmxpYnJhcnkoaGFybW9ueSkKbGlicmFyeShncmlkRXh0cmEpCmBgYAoKIyAyLiBMb2FkIFNldXJhdCBPYmplY3QgCmBgYHtyIGxvYWRfc2V1cmF0fQoKIGxvYWQoIi4uLy4uLy4uLzAtSU1QLU9CSkVDVFMvQWxsX1NhbXBsZXNfTWVyZ2VkX3dpdGhfMTB4X0F6aXRtdXRoX0Fubm90YXRlZF9TQ1RfSFBDX3dpdGhvdXRfaGFybW9ueV9pbnRlZ3JhdGlvbi5yb2JqIikKIAogIEFsbF9zYW1wbGVzX01lcmdlZAoKCmBgYAoKCiMgMy4gRGF0YSBQUkVQRVJBVElPTgpgYGB7ciBkYXRhLCBmaWcuaGVpZ2h0PTYsIGZpZy53aWR0aD0xMH0KCgpFbGJvd1Bsb3QoQWxsX3NhbXBsZXNfTWVyZ2VkKQoKIyBwbG90CmJlZm9yZSA8LSBEaW1QbG90KEFsbF9zYW1wbGVzX01lcmdlZCwgcmVkdWN0aW9uID0gInVtYXAiLCBncm91cC5ieSA9ICJjZWxsX2xpbmUiLCBsYWJlbCA9IFRSVUUsIGxhYmVsLmJveCA9IFRSVUUsIHJlcGVsID0gVFJVRSkKCgoKRGltUGxvdChBbGxfc2FtcGxlc19NZXJnZWQsIHJlZHVjdGlvbiA9ICJ1bWFwIiwgZ3JvdXAuYnkgPSAiY2VsbF9saW5lIiwgbGFiZWwgPSBUUlVFLCBsYWJlbC5ib3ggPSBUUlVFLCByZXBlbCA9IFRSVUUpCgoKCmBgYAoKCiMgNC4gSGFybW9ueS1pbnRlZ3JhdGlvbgpgYGB7ciBpbnRlZ3JhdGlvbi1oYXJtb255LCBmaWcuaGVpZ2h0PTYsIGZpZy53aWR0aD0xMH0KCiMgcnVuIEhhcm1vbnkgLS0tLS0tLS0tLS0KQWxsX3NhbXBsZXNfTWVyZ2VkX0hhcm1vbnlfSW50ZWdyYXRlZCA8LSBBbGxfc2FtcGxlc19NZXJnZWQgJT4lCiAgUnVuSGFybW9ueShncm91cC5ieS52YXJzID0gJ29yaWcuaWRlbnQnLCBwbG90X2NvbnZlcmdlbmNlID0gRkFMU0UpCgpBbGxfc2FtcGxlc19NZXJnZWRfSGFybW9ueV9JbnRlZ3JhdGVkQHJlZHVjdGlvbnMKCkFsbF9zYW1wbGVzX01lcmdlZF9IYXJtb255X0ludGVncmF0ZWQuZW1iZWQgPC0gRW1iZWRkaW5ncyhBbGxfc2FtcGxlc19NZXJnZWRfSGFybW9ueV9JbnRlZ3JhdGVkLCAiaGFybW9ueSIpCkFsbF9zYW1wbGVzX01lcmdlZF9IYXJtb255X0ludGVncmF0ZWQuZW1iZWRbMToxMCwxOjEwXQoKCgojIERvIFVNQVAgYW5kIGNsdXN0ZXJpbmcgdXNpbmcgKiogSGFybW9ueSBlbWJlZGRpbmdzIGluc3RlYWQgb2YgUENBICoqCkFsbF9zYW1wbGVzX01lcmdlZF9IYXJtb255X0ludGVncmF0ZWQgPC0gQWxsX3NhbXBsZXNfTWVyZ2VkX0hhcm1vbnlfSW50ZWdyYXRlZCAlPiUKICAgUnVuVU1BUChyZWR1Y3Rpb24gPSAnaGFybW9ueScsIGRpbXMgPSAxOjIyKSAlPiUKICAgRmluZE5laWdoYm9ycyhyZWR1Y3Rpb24gPSAiaGFybW9ueSIsIGRpbXMgPSAxOjIyKSAlPiUKICAgRmluZENsdXN0ZXJzKHJlc29sdXRpb24gPSAwLjUpCgojIHZpc3VhbGl6ZSAKCgphZnRlciA8LSBEaW1QbG90KEFsbF9zYW1wbGVzX01lcmdlZF9IYXJtb255X0ludGVncmF0ZWQsIHJlZHVjdGlvbiA9ICJ1bWFwIiwgZ3JvdXAuYnkgPSAiY2VsbF9saW5lIiwgbGFiZWwgPSBUUlVFLCBsYWJlbC5ib3ggPSBUUlVFLCByZXBlbCA9IFRSVUUpCgpiZWZvcmV8YWZ0ZXIKCkRpbVBsb3QoQWxsX3NhbXBsZXNfTWVyZ2VkX0hhcm1vbnlfSW50ZWdyYXRlZCxyZWR1Y3Rpb24gPSAidW1hcCIsIGdyb3VwLmJ5ID0gImNlbGxfbGluZSIsIGxhYmVsID0gVFJVRSwgbGFiZWwuYm94ID0gVFJVRSwgcmVwZWwgPSBUUlVFKQoKCkRpbVBsb3QoQWxsX3NhbXBsZXNfTWVyZ2VkX0hhcm1vbnlfSW50ZWdyYXRlZCxyZWR1Y3Rpb24gPSAidW1hcCIsIGdyb3VwLmJ5ID0gIlNDVF9zbm5fcmVzLjAuNSIsIGxhYmVsID0gVFJVRSwgbGFiZWwuYm94ID0gVFJVRSwgcmVwZWwgPSBUUlVFKQoKRGltUGxvdChBbGxfc2FtcGxlc19NZXJnZWRfSGFybW9ueV9JbnRlZ3JhdGVkLCByZWR1Y3Rpb24gPSAidW1hcCIsIGdyb3VwLmJ5ID0gInByZWRpY3RlZC5jZWxsdHlwZS5sMiIsIGxhYmVsID0gVFJVRSwgbGFiZWwuYm94ID0gVFJVRSwgcmVwZWwgPSBUUlVFKQoKYGBgCgoKIyBGZWF0dXJlUGxvdApgYGB7ciBmZWF0dXJlcGxvdC1oYXJtb255LCBmaWcuaGVpZ2h0PTYsIGZpZy53aWR0aD0xMH0KCgpteWZlYXR1cmVzIDwtIGMoIkNEM0UiLCAiQ0Q0IiwgIkNEOEEiLCAiTktHNyIsICJHTkxZIiwgIk1TNEExIiwgIkNEMTQiLCAiTFlaIiwgIk1TNEE3IiwgIkZDR1IzQSIsICJDU1QzIiwgIkZDRVIxQSIpCgpGZWF0dXJlUGxvdChBbGxfc2FtcGxlc19NZXJnZWRfSGFybW9ueV9JbnRlZ3JhdGVkLCByZWR1Y3Rpb24gPSAidW1hcCIsIGRpbXMgPSAxOjIsIGZlYXR1cmVzID0gbXlmZWF0dXJlcywgbmNvbCA9IDQsIG9yZGVyID0gVCkgKyBOb0xlZ2VuZCgpICsgTm9BeGVzKCkgKyBOb0dyaWQoKQoKYGBgCgoKIyA1LiBTYXZlIHRoZSBTZXVyYXQgb2JqZWN0IGFzIGFuIFJvYmogZmlsZQpgYGB7ciBzYXZlUk9CSn0KCiMgc2F2ZShBbGxfc2FtcGxlc19NZXJnZWRfSGFybW9ueV9JbnRlZ3JhdGVkLCBmaWxlID0gIkFsbF9zYW1wbGVzX01lcmdlZF9IYXJtb255X0ludGVncmF0ZWQuUm9iaiIpCgoKYGBgCgoKCgo=