#Load Seurat Object merged from cell lines and a control(PBMC) after filtration
SS_All_samples_Merged <- load("../../../0-IMP-OBJECTS/All_Samples_Merged_with_10x_Azitmuth_Annotated_SCT_HPC_without_harmony_integration.robj")
All_samples_Merged
An object of class Seurat
64169 features across 59355 samples within 6 assays
Active assay: SCT (27417 features, 3000 variable features)
3 layers present: counts, data, scale.data
5 other assays present: RNA, ADT, prediction.score.celltype.l1, prediction.score.celltype.l2, prediction.score.celltype.l3
4 dimensional reductions calculated: integrated_dr, ref.umap, pca, umap
Idents(All_samples_Merged) <- "cell_line"
VlnPlot(All_samples_Merged, features = c("nFeature_RNA",
"nCount_RNA",
"percent.mt"),
ncol = 3)
VlnPlot(All_samples_Merged, features = c("nFeature_RNA",
"nCount_RNA",
"percent.mt",
"percent.rb"),
ncol = 4, pt.size = 0.1) &
theme(plot.title = element_text(size=10))
FeatureScatter(All_samples_Merged,
feature1 = "nCount_RNA",
feature2 = "nFeature_RNA") +
geom_smooth(method = 'lm')
`geom_smooth()` using formula = 'y ~ x'
##FeatureScatter is typically used to visualize feature-feature relationships ##for anything calculated by the object, ##i.e. columns in object metadata, PC scores etc.
FeatureScatter(All_samples_Merged,
feature1 = "nCount_RNA",
feature2 = "percent.mt")+
geom_smooth(method = 'lm')
`geom_smooth()` using formula = 'y ~ x'
FeatureScatter(All_samples_Merged,
feature1 = "nCount_RNA",
feature2 = "nFeature_RNA")+
geom_smooth(method = 'lm')
`geom_smooth()` using formula = 'y ~ x'
ElbowPlot(All_samples_Merged, ndims = 50)
# TEST-1
# given that the output of RunPCA is "pca"
# replace "so" by the name of your seurat object
pct <- All_samples_Merged[["pca"]]@stdev / sum(All_samples_Merged[["pca"]]@stdev) * 100
cumu <- cumsum(pct) # Calculate cumulative percents for each PC
# Determine the difference between variation of PC and subsequent PC
co2 <- sort(which((pct[-length(pct)] - pct[-1]) > 0.1), decreasing = T)[1] + 1
# last point where change of % of variation is more than 0.1%. -> co2
co2
[1] 22
# TEST-2
# get significant PCs
stdv <- All_samples_Merged[["pca"]]@stdev
sum.stdv <- sum(All_samples_Merged[["pca"]]@stdev)
percent.stdv <- (stdv / sum.stdv) * 100
cumulative <- cumsum(percent.stdv)
co1 <- which(cumulative > 90 & percent.stdv < 5)[1]
co2 <- sort(which((percent.stdv[1:length(percent.stdv) - 1] -
percent.stdv[2:length(percent.stdv)]) > 0.1),
decreasing = T)[1] + 1
min.pc <- min(co1, co2)
min.pc
[1] 22
# Create a dataframe with values
plot_df <- data.frame(pct = percent.stdv,
cumu = cumulative,
rank = 1:length(percent.stdv))
# Elbow plot to visualize
ggplot(plot_df, aes(cumulative, percent.stdv, label = rank, color = rank > min.pc)) +
geom_text() +
geom_vline(xintercept = 90, color = "grey") +
geom_hline(yintercept = min(percent.stdv[percent.stdv > 5]), color = "grey") +
theme_bw()
P1 <- DimPlot(object = All_samples_Merged, group.by = "cell_line", label = T, label.box = T) +
labs(title = 'Colored by cellline')
P1
P2 <- DimPlot(object = All_samples_Merged, group.by = "predicted.celltype.l2") +
labs(title = 'Colored by celltype')
P2
P3 <- DimPlot(object = All_samples_Merged, group.by = "cell_line", reduction = "pca") +
labs(title = 'Colored by cellline')
P3
P4 <- DimPlot(object = All_samples_Merged, group.by = "predicted.celltype.l2", reduction = "pca") +
labs(title = 'Colored by celltype')
P4
cowplot::plot_grid(P1, P2, P3, P4, nrow = 2)
cell_distribution_table <- table(All_samples_Merged$predicted.celltype.l2, All_samples_Merged$seurat_clusters)
cell_distribution_df <- as.data.frame.matrix(cell_distribution_table)
print(cell_distribution_df)
0 1 2 3 4 5 6 7 8 9 10 11
ASDC 0 0 0 0 0 0 0 0 0 0 0 0
B intermediate 0 0 0 0 0 0 0 0 0 0 0 0
B memory 0 8 0 0 6 0 81 0 0 0 0 1
B naive 0 0 0 0 0 0 0 0 0 0 0 0
CD14 Mono 0 0 0 0 0 1 0 0 0 0 0 0
CD16 Mono 0 0 0 0 0 0 0 0 0 0 0 0
CD4 CTL 0 0 0 0 0 0 0 0 0 0 0 0
CD4 Naive 0 0 0 0 0 488 0 0 0 0 0 856
CD4 Proliferating 4009 2960 1418 2803 2611 0 1820 2416 2175 2063 1431 0
CD4 TCM 32 841 4 22 31 1870 537 8 198 0 262 634
CD4 TEM 0 0 0 0 0 6 0 0 0 0 0 0
CD8 Naive 0 0 0 0 0 316 0 0 0 0 0 693
CD8 Proliferating 0 0 0 0 0 0 1 0 0 0 0 0
CD8 TCM 0 0 0 0 0 59 0 0 0 0 16 36
CD8 TEM 0 0 0 0 0 3 3 0 1 0 6 1
cDC1 0 0 0 0 0 0 4 0 0 0 0 0
cDC2 0 0 0 0 1 0 3 0 0 0 0 0
dnT 0 0 0 0 0 6 1 0 0 0 0 0
gdT 0 0 0 0 0 0 0 0 0 0 0 7
HSPC 1 2 0 482 644 0 82 52 0 120 0 0
ILC 0 0 0 0 0 0 0 0 0 0 0 0
MAIT 0 0 0 0 0 0 0 0 0 0 0 0
NK 0 0 0 0 0 1 0 0 0 0 0 0
NK Proliferating 10 1 2108 11 8 0 13 5 38 213 677 0
NK_CD56bright 0 0 0 0 0 0 0 0 0 0 0 0
pDC 0 0 0 0 0 0 0 0 0 0 0 0
Plasmablast 0 0 0 0 0 0 0 0 0 0 0 0
Platelet 0 0 0 0 0 0 0 0 0 0 0 0
Treg 1 2 0 0 0 54 2 0 1 0 0 45
12 13 14 15 16 17 18 19 20 21 22 23 24
ASDC 0 0 0 0 0 0 0 0 0 0 0 0 0
B intermediate 0 0 0 0 0 0 0 0 1 438 0 178 17
B memory 0 116 0 2 0 4 0 0 0 162 0 69 4
B naive 0 0 0 0 0 0 0 0 1 453 0 678 0
CD14 Mono 2190 7 0 2 0 0 0 0 2 0 0 0 758
CD16 Mono 6 0 0 0 0 0 0 0 0 0 0 0 2
CD4 CTL 0 0 0 0 0 0 0 0 0 0 0 0 0
CD4 Naive 0 0 5 0 0 0 0 619 29 0 5 0 0
CD4 Proliferating 0 1319 0 1319 24 1363 263 0 0 0 4 0 0
CD4 TCM 0 459 1662 486 1775 41 1349 308 922 31 891 2 35
CD4 TEM 0 0 49 0 0 0 1 1 6 0 15 0 0
CD8 Naive 0 0 0 0 0 0 0 304 33 1 7 0 1
CD8 Proliferating 0 1 0 0 0 0 0 0 0 0 0 0 0
CD8 TCM 0 0 143 0 1 0 0 39 21 2 69 0 0
CD8 TEM 0 1 23 0 0 0 0 3 3 1 19 0 0
cDC1 0 0 0 0 0 0 0 0 0 0 0 0 0
cDC2 22 36 0 2 0 0 0 0 0 0 0 0 1
dnT 0 3 2 1 0 0 1 2 23 1 14 0 2
gdT 0 0 0 0 0 0 0 3 1 0 4 0 0
HSPC 0 6 0 0 0 356 0 0 0 4 0 0 0
ILC 0 0 0 0 0 0 0 0 0 1 0 0 0
MAIT 0 0 0 0 0 0 0 0 1 0 3 0 0
NK 0 0 0 0 0 0 0 0 0 0 0 2 0
NK Proliferating 0 27 0 13 0 1 0 0 0 0 1 0 0
NK_CD56bright 0 0 0 0 0 0 0 0 0 0 0 0 0
pDC 0 0 0 0 0 0 0 0 0 0 0 0 0
Plasmablast 0 0 0 0 0 0 0 0 0 1 0 1 0
Platelet 0 0 0 0 0 0 0 0 0 0 0 0 1
Treg 0 0 89 1 0 0 0 4 63 2 43 0 0
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
ASDC 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0
B intermediate 0 0 2 0 0 0 50 0 2 0 0 0 0 2 0 0 0
B memory 0 0 11 19 0 0 33 0 2 0 0 0 1 3 0 0 0
B naive 0 0 0 0 0 0 46 0 0 0 0 2 0 0 0 0 0
CD14 Mono 645 0 0 4 0 0 0 0 0 167 19 4 0 7 0 2 4
CD16 Mono 1 0 0 0 0 0 1 0 0 0 0 116 0 0 0 0 0
CD4 CTL 0 0 0 0 0 16 0 0 0 1 0 0 0 0 0 0 0
CD4 Naive 0 0 0 0 0 0 7 33 0 1 0 0 0 0 0 0 0
CD4 Proliferating 0 1 409 373 0 0 3 3 88 0 0 0 73 0 63 0 0
CD4 TCM 0 10 26 75 0 32 65 170 53 20 0 0 20 2 5 0 0
CD4 TEM 0 9 0 0 0 7 0 0 0 0 0 0 0 0 0 0 0
CD8 Naive 0 1 0 0 0 0 1 14 0 2 0 0 0 0 0 0 0
CD8 Proliferating 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
CD8 TCM 0 29 0 0 0 60 0 2 0 0 0 0 0 0 0 0 0
CD8 TEM 0 162 0 2 9 150 0 3 0 1 0 0 0 0 0 0 0
cDC1 0 0 2 1 0 0 0 0 1 0 13 0 0 21 0 0 0
cDC2 0 0 4 5 0 0 0 0 0 0 101 1 0 53 0 0 0
dnT 0 0 0 3 0 1 5 13 3 0 1 0 0 0 0 0 0
gdT 0 52 0 0 0 26 0 0 0 0 0 0 0 0 0 0 0
HSPC 0 0 41 4 0 0 34 0 4 0 0 0 1 1 0 0 0
ILC 0 3 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0
MAIT 0 220 0 0 0 14 0 2 0 2 0 0 0 0 0 0 0
NK 1 20 0 0 410 90 0 2 0 8 0 0 0 0 0 0 0
NK Proliferating 0 0 10 2 2 0 1 0 26 0 0 0 0 0 0 0 0
NK_CD56bright 0 6 0 0 7 1 0 2 0 0 0 0 0 0 0 0 0
pDC 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 56 0
Plasmablast 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
Platelet 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 31
Treg 0 0 0 0 0 0 12 9 23 1 0 0 0 1 0 0 0
42 43
ASDC 0 0
B intermediate 6 0
B memory 1 0
B naive 12 0
CD14 Mono 11 0
CD16 Mono 0 0
CD4 CTL 0 0
CD4 Naive 0 0
CD4 Proliferating 0 0
CD4 TCM 0 0
CD4 TEM 0 0
CD8 Naive 0 0
CD8 Proliferating 0 0
CD8 TCM 0 0
CD8 TEM 0 0
cDC1 0 0
cDC2 0 0
dnT 0 0
gdT 0 0
HSPC 0 0
ILC 0 0
MAIT 0 0
NK 0 0
NK Proliferating 0 0
NK_CD56bright 0 0
pDC 0 0
Plasmablast 0 16
Platelet 0 0
Treg 0 0
#write.csv(cell_distribution_df, file = "test2.csv", row.names = TRUE)
All_samples_Merged_integrated <- RunHarmony(All_samples_Merged, "cell_line")
Transposing data matrix
Initializing state using k-means centroids initialization
Harmony 1/10
Harmony 2/10
Harmony 3/10
Harmony converged after 3 iterations
# Do UMAP and clustering using ** Harmony embeddings instead of PCA **
All_samples_Merged_integrated <- All_samples_Merged_integrated %>%
RunUMAP(reduction = 'harmony', dims = 1:22) %>%
FindNeighbors(reduction = "harmony", dims = 1:22) %>%
FindClusters(resolution = c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7,0.8, 0.9, 1,1.1,1.2))
Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session
23:44:40 UMAP embedding parameters a = 0.9922 b = 1.112
23:44:40 Read 59355 rows and found 22 numeric columns
23:44:40 Using Annoy for neighbor search, n_neighbors = 30
23:44:40 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
23:44:47 Writing NN index file to temp file /tmp/RtmpAWHUvW/file7d2b5c21b994
23:44:47 Searching Annoy index using 1 thread, search_k = 3000
23:45:07 Annoy recall = 100%
23:45:09 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
23:45:14 Initializing from normalized Laplacian + noise (using RSpectra)
23:45:18 Commencing optimization for 200 epochs, with 2570480 positive edges
23:46:32 Optimization finished
Computing nearest neighbor graph
Computing SNN
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 59355
Number of edges: 1777503
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9663
Number of communities: 9
Elapsed time: 22 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 59355
Number of edges: 1777503
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9482
Number of communities: 13
Elapsed time: 31 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 59355
Number of edges: 1777503
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9324
Number of communities: 16
Elapsed time: 21 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 59355
Number of edges: 1777503
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9176
Number of communities: 17
Elapsed time: 27 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 59355
Number of edges: 1777503
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9053
Number of communities: 18
Elapsed time: 22 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 59355
Number of edges: 1777503
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8958
Number of communities: 19
Elapsed time: 28 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 59355
Number of edges: 1777503
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8874
Number of communities: 21
Elapsed time: 31 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 59355
Number of edges: 1777503
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8788
Number of communities: 20
Elapsed time: 29 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 59355
Number of edges: 1777503
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8707
Number of communities: 22
Elapsed time: 29 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 59355
Number of edges: 1777503
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8630
Number of communities: 24
Elapsed time: 24 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 59355
Number of edges: 1777503
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8556
Number of communities: 26
Elapsed time: 26 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 59355
Number of edges: 1777503
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8498
Number of communities: 29
Elapsed time: 26 seconds
DimPlot(object = All_samples_Merged_integrated, group.by = "predicted.celltype.l2", label = T, label.box = T, repel = T, reduction = "umap")
Warning: ggrepel: 11 unlabeled data points (too many overlaps). Consider
increasing max.overlaps
DimPlot(object = All_samples_Merged_integrated, group.by = "predicted.celltype.l2", label = T, label.box = T, repel = T, reduction = "harmony")
Warning: ggrepel: 24 unlabeled data points (too many overlaps). Consider
increasing max.overlaps
DimPlot(object = All_samples_Merged_integrated, group.by = "predicted.celltype.l2", label = T, label.box = T, repel = T, reduction = "integrated_dr")
Warning: ggrepel: 10 unlabeled data points (too many overlaps). Consider
increasing max.overlaps
DimPlot(object = All_samples_Merged_integrated, group.by = "predicted.celltype.l2", label = T, label.box = T, repel = T, reduction = "pca")
Warning: ggrepel: 21 unlabeled data points (too many overlaps). Consider
increasing max.overlaps
DimPlot(object = All_samples_Merged_integrated, group.by = "predicted.celltype.l2", label = T, label.box = T, repel = T, reduction = "ref.umap")
DimPlot(object = All_samples_Merged_integrated, group.by = "cell_line", label = T, label.box = T, repel = T, reduction = "umap")
DimPlot(object = All_samples_Merged_integrated, group.by = "seurat_clusters", label = T, label.box = T, repel = T, reduction = "umap")
cell_distribution_table <- table(All_samples_Merged_integrated$cell_line, All_samples_Merged_integrated$seurat_clusters)
cell_distribution_df <- as.data.frame.matrix(cell_distribution_table)
print(cell_distribution_df)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
L1 20 18 1 0 1076 3276 897 6 37 14 1 1 0 0 0
L2 129 61 2 5 2435 109 2584 13 228 63 6 0 0 0 2
L3 2385 2297 6 147 8 0 5 7 171 103 987 4 0 0 141
L4 1776 1549 4 213 14 7 16 78 762 482 701 141 1 1 85
L5 1305 1246 8 2205 10 0 8 34 393 221 150 0 0 0 102
L6 994 1389 15 466 17 0 19 51 830 464 175 1 0 0 451
L7 942 988 9 1015 43 0 45 159 982 704 133 0 0 0 176
PBMC 14 3 2349 6 17 21 9 1887 5 580 6 965 35 433 15
PBMC_10x 4 3 2032 0 78 237 2 1196 6 428 4 941 1821 994 4
15 16 17 18 19 20 21 22 23 24 25 26 27 28
L1 0 0 3 0 3 28 29 349 66 0 0 0 0 0
L2 1 5 1 0 1 36 17 27 145 0 0 0 65 0
L3 121 0 1 0 3 29 10 0 0 0 0 0 0 3
L4 241 1 2 1 6 19 14 8 2 18 2 0 0 6
L5 305 0 0 0 10 16 6 2 0 0 1 0 0 0
L6 119 4 0 0 3 102 42 6 0 0 0 0 0 0
L7 79 3 0 0 7 28 17 1 0 0 0 0 0 0
PBMC 18 274 9 697 301 132 325 3 14 142 79 1 2 12
PBMC_10x 0 591 856 23 373 135 48 33 11 59 128 123 0 32
#write.csv(cell_distribution_df, file = "15-2-integration_table_HARMONY-TEST1.csv", row.names = TRUE)
cell_distribution_table <- table(All_samples_Merged_integrated$predicted.celltype.l2, All_samples_Merged_integrated$seurat_clusters)
cell_distribution_df <- as.data.frame.matrix(cell_distribution_table)
print(cell_distribution_df)
0 1 2 3 4 5 6 7 8 9 10 11
ASDC 0 0 0 0 0 0 0 0 0 0 0 0
B intermediate 0 0 1 0 0 1 0 1 0 0 0 572
B memory 8 0 1 5 6 0 0 26 0 132 1 190
B naive 1 1 1 0 0 0 0 0 0 1 0 1163
CD14 Mono 0 0 1 0 79 263 1 3 0 12 1 23
CD16 Mono 1 0 0 0 0 0 0 0 0 0 0 0
CD4 CTL 0 0 0 0 1 0 0 0 0 4 0 0
CD4 Naive 0 0 1700 0 0 0 0 46 0 12 1 6
CD4 Proliferating 6360 6641 25 3427 2839 466 1507 105 3114 809 1943 4
CD4 TCM 732 12 2455 155 186 2905 10 2820 28 1490 137 50
CD4 TEM 0 0 3 0 0 0 0 52 0 30 0 0
CD8 Naive 0 0 102 0 0 0 2 9 0 3 2 2
CD8 Proliferating 0 0 0 0 0 0 0 0 0 2 0 0
CD8 TCM 2 0 37 0 0 9 0 127 0 127 2 2
CD8 TEM 0 1 1 0 1 3 0 47 0 121 0 1
cDC1 0 0 0 0 0 0 0 0 0 1 0 0
cDC2 1 0 0 1 0 0 0 2 0 39 0 0
dnT 2 1 5 1 3 0 0 23 0 8 2 0
gdT 0 0 6 0 0 0 0 7 0 23 0 0
HSPC 457 720 1 452 5 0 0 1 46 10 48 10
ILC 0 0 0 0 0 0 0 1 0 0 0 1
MAIT 2 0 1 0 0 0 0 18 0 162 0 0
NK 0 0 1 0 2 1 1 24 0 18 0 2
NK Proliferating 1 177 2 15 571 2 2063 0 225 3 26 0
NK_CD56bright 0 0 0 0 0 0 0 8 0 2 0 0
pDC 0 0 0 0 0 0 0 0 0 0 0 2
Plasmablast 0 0 0 0 0 0 0 0 0 0 0 19
Platelet 0 0 0 0 1 0 0 0 0 0 0 0
Treg 2 1 83 1 4 0 1 111 1 50 0 6
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
ASDC 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0
B intermediate 4 17 5 3 0 0 4 0 1 4 0 0 82 1 0
B memory 0 6 60 13 0 0 2 1 2 3 1 0 65 1 0
B naive 2 8 0 0 4 0 0 0 1 3 2 0 2 0 2
CD14 Mono 1830 3 0 0 9 844 673 1 0 4 31 25 2 9 5
CD16 Mono 6 0 0 0 0 1 2 0 0 0 0 0 0 0 116
CD4 CTL 0 0 0 0 12 0 0 0 0 0 0 0 0 0 0
CD4 Naive 0 26 0 0 0 1 0 204 32 14 0 0 1 0 0
CD4 Proliferating 0 0 602 747 7 3 0 17 125 40 25 142 0 1 0
CD4 TCM 0 39 267 69 11 19 29 427 254 344 362 62 9 2 0
CD4 TEM 0 1 1 0 4 0 0 0 0 3 0 0 0 0 0
CD8 Naive 0 1193 1 0 0 1 1 29 13 15 0 0 0 0 0
CD8 Proliferating 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
CD8 TCM 0 125 1 0 29 0 0 6 1 5 4 0 0 0 0
CD8 TEM 0 4 2 0 201 1 0 1 4 2 0 1 0 0 0
cDC1 0 0 4 3 0 0 2 0 0 0 0 0 0 32 0
cDC2 12 0 7 1 0 1 0 0 0 0 0 1 0 163 1
dnT 0 2 1 1 0 0 3 0 14 16 0 0 0 0 0
gdT 0 2 0 0 55 0 0 0 0 0 0 0 0 0 0
HSPC 0 0 15 36 0 0 4 1 2 3 2 0 1 0 0
ILC 0 0 0 0 3 0 0 0 1 1 0 0 0 0 0
MAIT 0 0 0 0 57 1 0 0 1 0 0 0 0 0 0
NK 1 2 0 0 479 0 0 0 1 1 0 1 0 0 0
NK Proliferating 0 0 10 11 3 0 0 0 40 10 1 6 0 0 0
NK_CD56bright 0 0 0 0 4 0 0 0 2 0 0 0 0 0 0
pDC 0 0 0 0 0 0 0 0 0 0 0 0 54 0 0
Plasmablast 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Platelet 2 0 0 0 0 0 1 0 0 0 0 0 0 0 0
Treg 0 0 0 0 0 0 0 20 31 40 1 0 0 1 0
27 28
ASDC 0 0
B intermediate 0 0
B memory 0 0
B naive 1 0
CD14 Mono 0 4
CD16 Mono 0 0
CD4 CTL 0 0
CD4 Naive 0 0
CD4 Proliferating 62 0
CD4 TCM 4 0
CD4 TEM 0 0
CD8 Naive 0 0
CD8 Proliferating 0 0
CD8 TCM 0 0
CD8 TEM 0 0
cDC1 0 0
cDC2 0 0
dnT 0 0
gdT 0 0
HSPC 0 20
ILC 0 0
MAIT 0 0
NK 0 0
NK Proliferating 0 1
NK_CD56bright 0 0
pDC 0 0
Plasmablast 0 0
Platelet 0 28
Treg 0 0
#write.csv(cell_distribution_df, file = "1-integration_table_HARMONY-TEST1_annotationbased.csv", row.names = TRUE)
cell_distribution_table <- table(All_samples_Merged_integrated$predicted.celltype.l1, All_samples_Merged_integrated$seurat_clusters)
cell_distribution_df <- as.data.frame.matrix(cell_distribution_table)
print(cell_distribution_df)
0 1 2 3 4 5 6 7 8 9 10 11 12 13
B 8 1 3 3 6 2 0 26 0 132 1 1946 6 31
CD4 T 7096 6654 4270 3585 3030 3357 1518 3145 3143 2393 2082 64 0 70
CD8 T 2 1 138 0 1 26 2 177 0 253 4 5 0 1320
DC 3 0 0 7 1 0 0 4 0 51 0 2 12 0
Mono 1 0 1 0 79 262 1 3 0 9 1 23 1836 3
NK 1 177 3 15 573 3 2064 32 225 23 26 2 1 1
other 456 720 1 447 5 0 0 1 46 5 48 11 2 0
other T 2 1 10 0 3 0 0 43 0 193 1 0 0 3
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
B 65 16 5 0 9 1 4 11 3 0 150 2 2 1 0
CD4 T 872 817 35 24 30 667 453 443 388 203 9 8 0 66 1
CD8 T 3 0 232 2 1 37 11 23 4 2 0 0 0 0 0
DC 13 5 0 1 2 0 0 1 0 1 57 193 1 0 0
Mono 0 0 9 844 674 1 0 4 31 25 2 7 121 0 4
NK 10 11 485 0 0 0 44 11 1 7 0 0 0 0 1
other 12 35 3 0 3 1 2 2 2 0 1 0 0 0 47
other T 1 0 109 1 2 0 11 13 0 0 0 0 0 0 0
#write.csv(cell_distribution_df, file = "15-2-integration_table_HARMONY-TEST1_annotationbased_l1.csv", row.names = TRUE)
cell_distribution_table <- table(All_samples_Merged_integrated$predicted.celltype.l2, All_samples_Merged_integrated$cell_line)
cell_distribution_df <- as.data.frame.matrix(cell_distribution_table)
print(cell_distribution_df)
L1 L2 L3 L4 L5 L6 L7 PBMC PBMC_10x
ASDC 0 0 0 0 0 0 0 0 3
B intermediate 0 0 2 54 2 2 0 457 179
B memory 0 0 11 34 38 82 120 164 74
B naive 0 0 0 41 0 0 0 459 692
CD14 Mono 0 0 1 14 5 0 6 755 3042
CD16 Mono 0 0 0 0 0 0 0 2 124
CD4 CTL 0 0 0 0 0 0 0 16 1
CD4 Naive 0 0 0 7 0 0 0 524 1512
CD4 Proliferating 2461 2852 5452 5391 4732 4002 4115 0 6
CD4 TCM 3320 270 887 562 178 557 517 4609 1978
CD4 TEM 1 0 0 0 0 0 0 68 25
CD8 Naive 0 0 0 0 0 0 0 361 1012
CD8 Proliferating 0 0 0 0 0 1 1 0 0
CD8 TCM 1 16 0 0 0 0 0 286 174
CD8 TEM 1 8 0 0 2 3 1 181 195
cDC1 0 0 0 0 2 6 0 21 13
cDC2 0 0 0 4 11 3 35 52 124
dnT 2 3 0 1 2 5 2 38 29
gdT 0 0 0 0 0 0 0 26 67
HSPC 0 0 60 7 1035 213 490 17 12
ILC 0 0 0 1 0 0 0 3 3
MAIT 0 0 0 0 0 0 0 14 228
NK 0 0 0 1 0 0 0 89 444
NK Proliferating 38 2785 6 24 11 259 38 1 5
NK_CD56bright 0 0 0 0 0 0 0 1 15
pDC 0 0 0 0 0 0 0 0 56
Plasmablast 0 0 0 0 0 0 0 9 10
Platelet 0 0 0 0 0 0 0 1 31
Treg 1 1 9 9 4 15 6 200 108
#write.csv(cell_distribution_df, file = "15-2-integration_table_HARMONY-TEST1_annotationbased_cellline.csv", row.names = TRUE)
# save
#save(All_samples_Merged_integrated,file = "All_samples_PBMC10X_Harmony_Integrated.Robj")
```