#Load Seurat Object merged from cell lines and a control(PBMC) after filtration
SS_All_samples_Merged <- load("../../../0-IMP-OBJECTS/All_Samples_Merged_with_10x_Azitmuth_Annotated_SCT_HPC_without_harmony_integration.robj")
All_samples_Merged
An object of class Seurat
64169 features across 59355 samples within 6 assays
Active assay: SCT (27417 features, 3000 variable features)
3 layers present: counts, data, scale.data
5 other assays present: RNA, ADT, prediction.score.celltype.l1, prediction.score.celltype.l2, prediction.score.celltype.l3
4 dimensional reductions calculated: integrated_dr, ref.umap, pca, umap
P1 <- DimPlot(object = All_samples_Merged, group.by = "cell_line", label = T, label.box = T) +
labs(title = 'Colored by cellline')
P1
P2 <- DimPlot(object = All_samples_Merged, group.by = "predicted.celltype.l2") +
labs(title = 'Colored by celltype')
P2
P3 <- DimPlot(object = All_samples_Merged, group.by = "cell_line", reduction = "pca") +
labs(title = 'Colored by cellline')
P3
P4 <- DimPlot(object = All_samples_Merged, group.by = "predicted.celltype.l2", reduction = "pca") +
labs(title = 'Colored by celltype')
P4
cowplot::plot_grid(P1, P2, P3, P4, nrow = 2)
cell_distribution_table <- table(All_samples_Merged$predicted.celltype.l2, All_samples_Merged$seurat_clusters)
cell_distribution_df <- as.data.frame.matrix(cell_distribution_table)
print(cell_distribution_df)
#write.csv(cell_distribution_df, file = "test2.csv", row.names = TRUE)
All_samples_Merged_integrated <- RunHarmony(All_samples_Merged, "cell_line")
Transposing data matrix
Initializing state using k-means centroids initialization
Harmony 1/10
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Harmony 2/10
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Harmony 3/10
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Harmony converged after 3 iterations
# Do UMAP and clustering using ** Harmony embeddings instead of PCA **
All_samples_Merged_integrated <- All_samples_Merged_integrated %>%
RunUMAP(reduction = 'harmony', dims = 1:22) %>%
FindNeighbors(reduction = "harmony", dims = 1:22) %>%
FindClusters(resolution = 0.5)
Avis : The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session15:25:38 UMAP embedding parameters a = 0.9922 b = 1.112
15:25:38 Read 59355 rows and found 22 numeric columns
15:25:38 Using Annoy for neighbor search, n_neighbors = 30
15:25:38 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
15:25:46 Writing NN index file to temp file /tmp/RtmpOpTF6v/file37a9016be5962
15:25:46 Searching Annoy index using 1 thread, search_k = 3000
15:26:14 Annoy recall = 100%
15:26:15 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
15:26:20 Initializing from normalized Laplacian + noise (using RSpectra)
15:26:24 Commencing optimization for 200 epochs, with 2572158 positive edges
Using method 'umap'
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
15:27:01 Optimization finished
Computing nearest neighbor graph
Computing SNN
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 59355
Number of edges: 1778039
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9024
Number of communities: 18
Elapsed time: 23 seconds
DimPlot(object = All_samples_Merged_integrated, group.by = "predicted.celltype.l2", label = T, label.box = T, repel = T, reduction = "umap")
DimPlot(object = All_samples_Merged_integrated, group.by = "predicted.celltype.l2", label = T, label.box = T, repel = T, reduction = "harmony")
DimPlot(object = All_samples_Merged_integrated, group.by = "predicted.celltype.l2", label = T, label.box = T, repel = T, reduction = "integrated_dr")
DimPlot(object = All_samples_Merged_integrated, group.by = "predicted.celltype.l2", label = T, label.box = T, repel = T, reduction = "pca")
DimPlot(object = All_samples_Merged_integrated, group.by = "predicted.celltype.l2", label = T, label.box = T, repel = T, reduction = "ref.umap")
DimPlot(object = All_samples_Merged_integrated, group.by = "cell_line", label = T, label.box = T, repel = T, reduction = "umap")
DimPlot(object = All_samples_Merged_integrated, group.by = "seurat_clusters", label = T, label.box = T, repel = T, reduction = "umap")
cell_distribution_table <- table(All_samples_Merged_integrated$cell_line, All_samples_Merged_integrated$seurat_clusters)
cell_distribution_df <- as.data.frame.matrix(cell_distribution_table)
print(cell_distribution_df)
#write.csv(cell_distribution_df, file = "15-2-integration_table_HARMONY-TEST1.csv", row.names = TRUE)
cell_distribution_table <- table(All_samples_Merged_integrated$predicted.celltype.l2, All_samples_Merged_integrated$seurat_clusters)
cell_distribution_df <- as.data.frame.matrix(cell_distribution_table)
print(cell_distribution_df)
#write.csv(cell_distribution_df, file = "15-2-integration_table_HARMONY-TEST1_annotationbased.csv", row.names = TRUE)
cell_distribution_table <- table(All_samples_Merged_integrated$predicted.celltype.l1, All_samples_Merged_integrated$seurat_clusters)
cell_distribution_df <- as.data.frame.matrix(cell_distribution_table)
print(cell_distribution_df)
#write.csv(cell_distribution_df, file = "15-2-integration_table_HARMONY-TEST1_annotationbased_l1.csv", row.names = TRUE)
cell_distribution_table <- table(All_samples_Merged_integrated$predicted.celltype.l2, All_samples_Merged_integrated$cell_line)
cell_distribution_df <- as.data.frame.matrix(cell_distribution_table)
print(cell_distribution_df)
#write.csv(cell_distribution_df, file = "15-2-integration_table_HARMONY-TEST1_annotationbased_cellline.csv", row.names = TRUE)
# save
#save(All_samples_Merged_integrated,file = "15-2_All_samples_Merged_integrated.Robj")
```