1. load libraries
2. Load Seurat Object
#Load Seurat Object merged from cell lines and a control after filtration
load("../0-R_Objects/CD4Tcells_SCTnormalized_done_on_HPC_inluding_Patient_origin.robj")
# Visualize before Harmony integration
DimPlot(All_samples_Merged,
reduction = "umap",
group.by = "Patient_origin",
label = TRUE,
label.box = TRUE) +
ggtitle("Before Harmony - By Cell Line")

DimPlot(All_samples_Merged,
reduction = "umap",
group.by = "cell_line",
label = TRUE,
label.box = TRUE) +
ggtitle("Before Harmony - By Cell Line")

DimPlot(All_samples_Merged,
reduction = "umap",
group.by = "SCT_snn_res.0.5",
label = TRUE,
label.box = TRUE) +
ggtitle("Before Harmony - By Clusters")

DimPlot(All_samples_Merged,
reduction = "umap",
group.by = "predicted.celltype.l1",
label = TRUE,
label.box = TRUE) +
ggtitle("Before Harmony - By Annotation.l1")

DimPlot(All_samples_Merged,
reduction = "umap",
group.by = "predicted.celltype.l2",
label = TRUE,
label.box = TRUE) +
ggtitle("Before Harmony - By Annotation.l2")

DimPlot(All_samples_Merged,
reduction = "umap",
group.by = "predicted.celltype.l3",
label = TRUE,
label.box = TRUE) +
ggtitle("Before Harmony - By Annotation.l3")

table(All_samples_Merged$predicted.celltype.l2, All_samples_Merged$SCT_snn_res.0.5)
0 1 2 3 4 5 6 7 8 9 10 11 12 13
B intermediate 0 3 0 0 0 0 2 0 0 0 0 0 2 0
B memory 8 6 1 0 85 0 30 2 0 115 4 0 1 0
CD14 Mono 0 1 0 0 0 0 4 0 0 7 0 0 0 0
CD4 CTL 0 0 0 0 0 12 0 0 0 0 0 0 0 1
CD4 Naive 0 8 0 0 0 517 0 0 1479 0 0 37 0 1
CD4 Proliferating 5448 2474 5388 2852 3954 0 3256 2863 6 1270 1407 0 93 0
CD4 TCM 871 3414 522 269 536 4214 106 29 1838 457 46 425 49 54
CD4 TEM 0 1 0 0 0 61 0 0 21 0 0 1 0 0
CD8 Proliferating 0 0 0 0 1 0 0 0 0 1 0 0 0 0
CD8 TCM 0 1 0 16 0 0 0 0 0 0 0 0 0 0
CD8 TEM 0 1 0 8 3 0 2 0 0 1 0 0 0 0
cDC1 0 0 0 0 5 0 2 0 0 0 0 0 1 0
cDC2 0 1 2 0 3 0 10 0 0 36 0 0 0 1
dnT 0 3 1 1 1 0 2 0 0 3 0 1 3 0
HSPC 57 10 1 0 211 0 678 483 0 5 358 0 2 0
NK Proliferating 4 40 23 2785 237 0 10 12 0 22 1 0 27 0
Treg 15 14 1 0 1 0 0 0 0 0 0 1 13 0
3. Perform Harmony Integration
# Perform Harmony integration
All_samples_Merged <- RunHarmony(All_samples_Merged,
group.by.vars = c("Patient_origin", "cell_line"),
reduction.use = "pca",
dim.use = 1:15,
theta = c(1, 1),
assay.use = "SCT")
Transposing data matrix
Initializing state using k-means centroids initialization
Harmony 1/10
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
*****************************Registered S3 method overwritten by 'rmarkdown':
method from
print.paged_df
*********************|
Harmony 2/10
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Harmony converged after 2 iterations
# Find neighbors using the Harmony reduction (you do not need to specify "reduction" here for FindNeighbors)
All_samples_Merged <- FindNeighbors(All_samples_Merged,reduction = "harmony", dims = 1:15) # Use the first 16 PCs from Harmony integration
Computing nearest neighbor graph
Computing SNN
# Find clusters based on the neighbors found in the Harmony space
All_samples_Merged <- FindClusters(All_samples_Merged, reduction = "harmony", resolution = c(0.5)) # Clustering based on PC space (default)
Avis : The following arguments are not used: reductionAvis : The following arguments are not used: reduction
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49372
Number of edges: 1485026
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8856
Number of communities: 14
Elapsed time: 16 seconds
# Run UMAP on the new Harmony reduction
All_samples_Merged <- RunUMAP(All_samples_Merged, reduction = "harmony", dims = 1:15, reduction.name = "umap.harmony")
Avis : The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session20:19:31 UMAP embedding parameters a = 0.9922 b = 1.112
20:19:31 Read 49372 rows and found 15 numeric columns
20:19:31 Using Annoy for neighbor search, n_neighbors = 30
20:19:31 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
20:19:37 Writing NN index file to temp file /tmp/RtmpoINXWz/file176a6f4d64d19c
20:19:37 Searching Annoy index using 1 thread, search_k = 3000
20:20:00 Annoy recall = 100%
20:20:01 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
20:20:05 Initializing from normalized Laplacian + noise (using RSpectra)
20:20:07 Commencing optimization for 200 epochs, with 2063492 positive edges
Using method 'umap'
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
20:20:38 Optimization finished
4. Visualize Harmony Integrated Data
# Visualization after Harmony
# By cell line
p3 <- DimPlot(All_samples_Merged,
reduction = "umap.harmony",
group.by = "cell_line",
label = TRUE,
label.box = TRUE) +
ggtitle("After Harmony - By Cell Line")
# By clusters
p4 <- DimPlot(All_samples_Merged,
reduction = "umap.harmony",
group.by = "seurat_clusters",
label = TRUE,
label.box = TRUE) +
ggtitle("After Harmony - By Clusters")
# By cell type annotations
p5 <- DimPlot(All_samples_Merged,
reduction = "umap.harmony",
group.by = "predicted.celltype.l2",
label = TRUE,
label.box = TRUE) +
ggtitle("After Harmony - Cell Type Annotations")
# Print comparison plots
p3 + p4

print(p5)

DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line", label = T, label.box = T, repel = T) +
ggtitle("Harmony Integration - By Cell Line")

DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "seurat_clusters",label = T, label.box = T, repel = T) +
ggtitle("Harmony Integration - By Clusters")

DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "predicted.celltype.l2",label = T, label.box = T, repel = T) +
ggtitle("Harmony Integration - Annotations")
