merged_seurat_filtered
An object of class Seurat
36724 features across 49193 samples within 5 assays
Active assay: RNA (36601 features, 0 variable features)
2 layers present: counts, data
4 other assays present: ADT, prediction.score.celltype.l1, prediction.score.celltype.l2, prediction.score.celltype.l3
2 dimensional reductions calculated: integrated_dr, ref.umap
# Set identity classes to an existing column in meta data
Idents(object = All_samples_Merged) <- "cell_line"
All_samples_Merged[["percent.rb"]] <- PercentageFeatureSet(All_samples_Merged,
pattern = "^RP[SL]")
VlnPlot(All_samples_Merged, features = c("nFeature_RNA",
"nCount_RNA",
"percent.mt",
"percent.rb"),
ncol = 4, pt.size = 0.1) &
theme(plot.title = element_text(size=10))
FeatureScatter(All_samples_Merged, feature1 = "percent.mito",
feature2 = "percent.rb")
VlnPlot(All_samples_Merged, features = c("nFeature_RNA",
"nCount_RNA",
"percent.mito"),
ncol = 3)
FeatureScatter(All_samples_Merged,
feature1 = "percent.mito",
feature2 = "percent.rb") +
geom_smooth(method = 'lm')
FeatureScatter(All_samples_Merged,
feature1 = "nCount_RNA",
feature2 = "nFeature_RNA") +
geom_smooth(method = 'lm')
##FeatureScatter is typically used to visualize feature-feature relationships ##for anything calculated by the object, ##i.e. columns in object metadata, PC scores etc.
FeatureScatter(All_samples_Merged,
feature1 = "nCount_RNA",
feature2 = "percent.mito")+
geom_smooth(method = 'lm')
FeatureScatter(All_samples_Merged,
feature1 = "nCount_RNA",
feature2 = "nFeature_RNA")+
geom_smooth(method = 'lm')
Warning: The following features are not present in the object: MLF1IP, not searching for symbol synonymsWarning: The following features are not present in the object: FAM64A, HN1, not searching for symbol synonyms
Idents(object = All_samples_Merged) <- "cell_line"
# perform standard workflow steps to figure out if we see any batch effects --------
All_samples_Merged <- NormalizeData(object = All_samples_Merged, verbose = FALSE)
All_samples_Merged <- FindVariableFeatures(object = All_samples_Merged , selection.method = "vst", nfeatures = 3000,verbose = FALSE)
All_samples_Merged <- ScaleData(object = All_samples_Merged, vars.to.regress = c("percent.rb","percent.mito", "CC.Difference"), )
Regressing out percent.rb, percent.mito, CC.Difference
|
| | 0%
|
|= | 1%
|
|== | 2%
|
|=== | 3%
|
|==== | 4%
|
|===== | 5%
|
|====== | 6%
|
|======= | 7%
|
|======== | 8%
|
|========= | 9%
|
|========== | 10%
Variables_genes <- All_samples_Merged@assays$RNA@var.features
# Exclude genes starting with "HLA-" AND "Xist" AND "TRBV, TRAV"
Variables_genes_after_exclusion <- Variables_genes[!grepl("^HLA-|^XIST|^TRBV|^TRAV", Variables_genes)]
# These are now standard steps in the Seurat workflow for visualization and clustering
All_samples_Merged <- RunPCA(All_samples_Merged,
features = Variables_genes_after_exclusion,
do.print = TRUE,
pcs.print = 1:5,
genes.print = 15,
npcs = 50)
PC_ 1
Positive: SPI1, TYROBP, FCER1G, HCK, DOCK4, C15orf48, RAB31, ANPEP, CD14, PID1
PILRA, S100A9, THBS1, EREG, S100A8, CXCL8, MAFB, LYN, CYP27A1, RAB13
CSF2RA, CYBB, SDC2, SLC43A2, PNRC1, CXCL16, LYZ, RNF130, TXNIP, MS4A7
Negative: NPM1, SEC11C, FABP5, IL2RA, HSPD1, CCND2, MTHFD2, JPT1, CD70, C12orf75
TUBA1B, PTTG1, UBE2S, HSP90AB1, HMGB2, TYMS, SRM, BATF3, LGALS1, RPS4X
CD74, NME1, CYC1, ENO1, ATP5MC3, MTDH, YBX3, PSAT1, PRDX1, HDGFL3
PC_ 2
Positive: MARCKS, IL2RA, FAM107B, KYNU, GK, CD74, MSC, HDGFL3, SLC7A11, IFNGR2
TNFRSF4, KRT7, RBM47, EGFL6, CTSH, SEC11C, HCK, DOCK4, YBX3, SYT4
ZEB2, SPI1, MINDY3, FCER1G, SQSTM1, ANPEP, C15orf48, MIIP, CXCL8, TIMP1
Negative: KIR3DL1, KIR2DL3, XCL1, KLRC1, CD7, XCL2, EPCAM, KIR2DL4, KIR3DL2, MATK
KRT86, TRGV2, KRT81, CST7, CXCR3, GZMM, KLRK1, ESYT2, CLEC2B, MYO1E
IFITM1, ZBTB16, TSPOAP1, PRKCH, TRGV4, RPS15, TOX, KLRF2, LTB, ID3
PC_ 3
Positive: PAGE5, RBPMS, TENM3, LMNA, CDKN2A, PPBP, PPP2R2B, NDUFV2, VAMP5, IQCG
STAT1, RPL22L1, ERAP2, PLD1, FAM241A, SPOCK1, FAM50B, PIM2, CTAG2, SLC7A11-AS1
TNFSF10, ZC2HC1A, CD74, IGFBP3, PLAAT3, AC010967.1, C1orf162, CCDC50, CD2, RAP1A
Negative: CYBA, HACD1, SCCPDH, TNFRSF4, LY6E, EGFL6, CORO1B, SPINT2, RHOC, PTP4A3
BACE2, C12orf75, CAPG, APRT, PLPP1, SYT4, CTSC, TIGIT, GGH, DBN1
GAS5, GYPC, PON2, FAH, PHLDA2, KIR3DL1, HSPB1, RPL27A, CDK6, HIST1H1B
PC_ 4
Positive: RPS4Y1, BTG1, RPS27, TCF7, TRBC2, PNRC1, LINC00861, PIK3IP1, GIMAP5, SELL
YPEL3, CCR7, GIMAP7, LBH, IL7R, SESN3, PCED1B-AS1, FCMR, ZFP36, PBXIP1
TRIM22, MALAT1, PASK, BIRC3, GIMAP1, GIMAP4, ANK3, CD79A, RALGPS2, BTG2
Negative: TXN, PRDX1, TUBA1B, TUBB, PFN1, TYMS, KIR3DL2, STMN1, ANXA2, NME1
WDR34, FTL, RPL22L1, EIF4A1, TUBA1C, RPS15, HMGB2, CDKN2A, C1QBP, PPBP
RRM2, BID, CCNA2, TK1, CYP1B1, MT2A, DPP4, SLC7A11, ACAT1, ATP5MC3
PC_ 5
Positive: WFDC1, S100A4, IL32, S100A6, DUSP4, EGLN3, S100A11, F2R, ENTPD1, AHNAK
LINC02694, TP73, ITM2A, GATA3, PTGDR2, AL136456.1, FXYD5, GPAT3, MAL, RPS6KA5
TNFSF10, LINC02752, RNF213, PHLDA1, CD2, VIM, RGS9, FLNA, HOXC9, PALLD
Negative: CD79A, COL19A1, MS4A1, FCER2, AFF3, MIR155HG, LTA, BANK1, CD79B, RXFP1
DNAJC12, TCL1A, CD19, CCL17, IGHM, CXXC5, SPIB, GNG7, RUBCNL, SLC35F3
LINC00926, RALGPS2, C7orf50, FCRLA, NIBAN3, ARHGAP24, PPID, HVCN1, CD83, CCL5
# determine dimensionality of the data
ElbowPlot(All_samples_Merged, ndims =50)
NA
NA
library(ggplot2)
library(RColorBrewer)
# Assuming you have 10 different cell lines, generating a color palette with 10 colors
cell_line_colors <- brewer.pal(10, "Set3")
# Assuming All_samples_Merged$cell_line is a factor or character vector containing cell line names
data <- as.data.frame(table(All_samples_Merged$cell_line))
colnames(data) <- c("cell_line", "nUMI") # Change column name to nUMI
ncells <- ggplot(data, aes(x = cell_line, y = nUMI, fill = cell_line)) +
geom_col() +
theme_classic() +
geom_text(aes(label = nUMI),
position = position_dodge(width = 0.9),
vjust = -0.25) +
scale_fill_manual(values = cell_line_colors) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5)) + # Adjust the title position
ggtitle("Filtered cells per sample") +
xlab("Cell lines") + # Adjust x-axis label
ylab("Frequency") # Adjust y-axis label
print(ncells)
# TEST-1
# given that the output of RunPCA is "pca"
# replace "so" by the name of your seurat object
pct <- All_samples_Merged[["pca"]]@stdev / sum(All_samples_Merged[["pca"]]@stdev) * 100
cumu <- cumsum(pct) # Calculate cumulative percents for each PC
# Determine the difference between variation of PC and subsequent PC
co2 <- sort(which((pct[-length(pct)] - pct[-1]) > 0.1), decreasing = T)[1] + 1
# last point where change of % of variation is more than 0.1%. -> co2
co2
[1] 17
# TEST-2
# get significant PCs
stdv <- All_samples_Merged[["pca"]]@stdev
sum.stdv <- sum(All_samples_Merged[["pca"]]@stdev)
percent.stdv <- (stdv / sum.stdv) * 100
cumulative <- cumsum(percent.stdv)
co1 <- which(cumulative > 90 & percent.stdv < 5)[1]
co2 <- sort(which((percent.stdv[1:length(percent.stdv) - 1] -
percent.stdv[2:length(percent.stdv)]) > 0.1),
decreasing = T)[1] + 1
min.pc <- min(co1, co2)
min.pc
[1] 17
# Create a dataframe with values
plot_df <- data.frame(pct = percent.stdv,
cumu = cumulative,
rank = 1:length(percent.stdv))
# Elbow plot to visualize
ggplot(plot_df, aes(cumulative, percent.stdv, label = rank, color = rank > min.pc)) +
geom_text() +
geom_vline(xintercept = 90, color = "grey") +
geom_hline(yintercept = min(percent.stdv[percent.stdv > 5]), color = "grey") +
theme_bw()
NA
NA
NA
All_samples_Merged <- FindNeighbors(All_samples_Merged,
dims = 1:17,
verbose = FALSE)
# understanding resolution
All_samples_Merged <- FindClusters(All_samples_Merged,
resolution = c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6,
0.7,0.8, 0.9, 1))
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49193
Number of edges: 1633833
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9880
Number of communities: 11
Elapsed time: 8 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49193
Number of edges: 1633833
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9773
Number of communities: 13
Elapsed time: 9 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49193
Number of edges: 1633833
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9669
Number of communities: 15
Elapsed time: 10 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49193
Number of edges: 1633833
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9571
Number of communities: 16
Elapsed time: 10 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49193
Number of edges: 1633833
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9487
Number of communities: 19
Elapsed time: 10 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49193
Number of edges: 1633833
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9403
Number of communities: 20
Elapsed time: 9 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49193
Number of edges: 1633833
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9321
Number of communities: 20
Elapsed time: 9 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49193
Number of edges: 1633833
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9243
Number of communities: 22
Elapsed time: 9 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49193
Number of edges: 1633833
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9179
Number of communities: 23
Elapsed time: 9 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49193
Number of edges: 1633833
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9126
Number of communities: 24
Elapsed time: 9 seconds
All_samples_Merged <- RunUMAP(object = All_samples_Merged, dims = 1:17)
Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session16:43:32 UMAP embedding parameters a = 0.9922 b = 1.112
16:43:32 Read 49193 rows and found 17 numeric columns
16:43:32 Using Annoy for neighbor search, n_neighbors = 30
16:43:32 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
16:43:35 Writing NN index file to temp file /tmp/RtmpCvwIf9/filef40c932408d84
16:43:35 Searching Annoy index using 1 thread, search_k = 3000
16:43:46 Annoy recall = 100%
16:43:47 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
16:43:49 Initializing from normalized Laplacian + noise (using RSpectra)
16:43:59 Commencing optimization for 200 epochs, with 2070272 positive edges
Using method 'umap'
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
16:44:15 Optimization finished
# plot
before <- DimPlot(All_samples_Merged, reduction = "umap", group.by = "cell_line", label = TRUE, label.box = TRUE, repel = TRUE)
DimPlot(All_samples_Merged, reduction = "umap", group.by = "cell_line", label = TRUE, label.box = TRUE, repel = TRUE)
NA
NA
NA
library(clustree)
clustree(All_samples_Merged, prefix = "RNA_snn_res.")
table(All_samples_Merged$predicted.celltype.l2, All_samples_Merged$RNA_snn_res.0.1)
0 1 2 3 4 5 6 7 8 9 10
ASDC 0 0 0 0 0 0 0 0 0 1 0
B intermediate 0 0 0 0 0 0 0 0 0 502 14
B memory 0 0 0 0 0 0 0 0 0 142 2
B naive 0 0 0 0 0 0 0 0 0 569 1
CD14 Mono 0 0 0 0 0 0 0 0 0 2 715
CD16 Mono 0 0 0 0 0 0 0 0 0 2 74
CD4 CTL 0 6 0 0 0 0 0 0 0 0 0
CD4 Naive 0 705 0 0 1 0 0 0 0 3 0
CD4 Proliferating 5206 1 5347 3011 2419 3943 3969 3111 1372 0 0
CD4 TCM 1016 4364 571 286 3209 627 519 144 51 49 12
CD4 TEM 0 46 0 0 23 0 0 0 0 0 0
CD8 Naive 6 380 2 0 0 19 2 3 1 1 1
CD8 TCM 0 255 0 10 148 0 0 0 0 1 0
CD8 TEM 0 209 0 8 0 0 0 0 0 0 0
cDC2 11 0 0 0 0 78 168 67 12 81 1
dnT 1 51 0 2 3 0 4 0 0 2 0
gdT 0 13 0 0 0 0 0 0 0 0 0
HSPC 173 8 9 0 18 669 303 785 406 8 0
ILC 0 2 0 0 0 0 0 0 0 0 0
MAIT 0 56 0 0 0 0 0 0 0 1 0
NK 0 92 0 0 0 0 0 0 0 0 0
NK Proliferating 8 0 11 2615 19 14 193 4 0 0 0
pDC 0 0 0 0 0 0 0 0 0 1 0
Plasmablast 0 0 0 0 0 0 0 0 0 9 0
Platelet 0 0 0 0 0 0 0 0 0 0 7
Treg 11 173 0 0 1 0 19 1 0 2 0
table(All_samples_Merged$predicted.celltype.l2, All_samples_Merged$RNA_snn_res.0.2)
0 1 2 3 4 5 6 7 8 9 10 11 12
ASDC 0 0 0 0 0 0 0 0 0 0 0 1 0
B intermediate 0 0 0 0 0 0 0 0 0 501 14 1 0
B memory 0 0 0 0 0 0 0 0 0 141 2 1 0
B naive 0 0 0 0 0 0 0 0 0 569 1 0 0
CD14 Mono 0 0 0 0 0 0 0 0 0 0 715 2 0
CD16 Mono 0 0 0 0 0 0 0 0 0 0 74 2 0
CD4 CTL 0 6 0 0 0 0 0 0 0 0 0 0 0
CD4 Naive 0 701 0 0 1 0 0 0 0 3 0 0 4
CD4 Proliferating 5206 1 5347 3011 2419 3945 3967 3111 1372 0 0 0 0
CD4 TCM 1016 4303 571 286 3209 627 519 144 51 48 12 1 61
CD4 TEM 0 46 0 0 23 0 0 0 0 0 0 0 0
CD8 Naive 6 377 2 0 0 19 2 3 1 1 1 0 3
CD8 TCM 0 254 0 10 148 0 0 0 0 1 0 0 1
CD8 TEM 0 209 0 8 0 0 0 0 0 0 0 0 0
cDC2 11 0 0 0 0 78 168 67 12 0 1 81 0
dnT 1 50 0 2 3 0 4 0 0 2 0 0 1
gdT 0 13 0 0 0 0 0 0 0 0 0 0 0
HSPC 173 8 9 0 19 669 303 785 406 1 0 6 0
ILC 0 2 0 0 0 0 0 0 0 0 0 0 0
MAIT 0 56 0 0 0 0 0 0 0 1 0 0 0
NK 0 91 0 0 0 0 0 0 0 0 0 0 1
NK Proliferating 8 0 11 2615 19 14 193 4 0 0 0 0 0
pDC 0 0 0 0 0 0 0 0 0 1 0 0 0
Plasmablast 0 0 0 0 0 0 0 0 0 9 0 0 0
Platelet 0 0 0 0 0 0 0 0 0 0 7 0 0
Treg 11 168 0 0 1 0 19 1 0 2 0 0 5
table(All_samples_Merged$predicted.celltype.l2, All_samples_Merged$RNA_snn_res.0.3)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
ASDC 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
B intermediate 0 0 0 0 0 0 0 0 0 501 14 0 0 1 0
B memory 0 0 0 0 0 0 0 0 0 141 2 0 0 1 0
B naive 0 0 0 0 0 0 0 0 0 569 1 0 0 0 0
CD14 Mono 0 0 0 0 0 0 0 0 0 0 715 0 0 2 0
CD16 Mono 0 0 0 0 0 0 0 0 0 0 74 0 0 2 0
CD4 CTL 0 0 0 1 0 0 0 0 0 0 0 5 0 0 0
CD4 Naive 0 0 0 701 1 0 0 0 0 3 0 0 0 0 4
CD4 Proliferating 5206 5347 3011 1 2419 3942 3909 3111 1372 0 0 0 61 0 0
CD4 TCM 1016 571 286 4290 3209 626 481 144 51 48 12 13 39 1 61
CD4 TEM 0 0 0 43 23 0 0 0 0 0 0 3 0 0 0
CD8 Naive 6 2 0 377 0 19 2 3 1 1 1 0 0 0 3
CD8 TCM 0 0 10 224 148 0 0 0 0 1 0 30 0 0 1
CD8 TEM 0 0 8 27 0 0 0 0 0 0 0 182 0 0 0
cDC2 11 0 0 0 0 78 164 67 12 0 1 0 4 81 0
dnT 1 0 2 50 3 0 0 0 0 2 0 0 4 0 1
gdT 0 0 0 0 0 0 0 0 0 0 0 13 0 0 0
HSPC 173 9 0 8 18 669 294 785 406 1 0 0 9 7 0
ILC 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0
MAIT 0 0 0 6 0 0 0 0 0 1 0 50 0 0 0
NK 0 0 0 0 0 0 0 0 0 0 0 91 0 0 1
NK Proliferating 8 11 2615 0 19 14 180 4 0 0 0 0 13 0 0
pDC 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
Plasmablast 0 0 0 0 0 0 0 0 0 9 0 0 0 0 0
Platelet 0 0 0 0 0 0 0 0 0 0 7 0 0 0 0
Treg 11 0 0 168 1 0 0 1 0 2 0 0 19 0 5
table(All_samples_Merged$predicted.celltype.l2, All_samples_Merged$RNA_snn_res.0.4)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
ASDC 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
B intermediate 0 0 0 0 0 0 0 0 0 0 501 14 0 0 1 0
B memory 0 0 0 0 0 0 0 0 0 0 141 2 0 0 1 0
B naive 0 0 0 0 0 0 0 0 0 0 569 1 0 0 0 0
CD14 Mono 0 0 0 0 0 0 0 0 0 0 0 715 0 0 2 0
CD16 Mono 0 0 0 0 0 0 0 0 0 0 0 74 0 0 2 0
CD4 CTL 0 0 0 0 1 0 0 0 0 0 0 0 0 5 0 0
CD4 Naive 0 0 0 1 694 0 0 0 0 0 3 0 11 0 0 0
CD4 Proliferating 5191 5347 3011 2419 0 3907 3148 2709 1198 1372 0 0 62 0 0 15
CD4 TCM 1006 571 286 3208 3955 481 145 26 599 51 48 12 434 14 2 10
CD4 TEM 0 0 0 23 42 0 0 0 0 0 0 0 1 3 0 0
CD8 Naive 6 2 0 0 362 2 3 1 18 1 1 1 18 0 0 0
CD8 TCM 0 0 10 148 221 0 0 0 0 0 1 0 4 30 0 0
CD8 TEM 0 0 8 0 27 0 0 0 0 0 0 0 0 182 0 0
cDC2 11 0 0 0 0 164 67 7 71 12 0 1 4 0 81 0
dnT 0 0 2 3 14 0 0 0 0 0 2 0 41 0 0 1
gdT 0 0 0 0 0 0 0 0 0 0 0 0 0 13 0 0
HSPC 163 9 0 5 4 294 785 662 7 406 1 0 13 0 20 10
ILC 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0
MAIT 0 0 0 0 4 0 0 0 0 0 1 0 0 52 0 0
NK 0 0 0 0 0 0 0 0 0 0 0 0 1 91 0 0
NK Proliferating 6 11 2615 19 0 180 4 5 9 0 0 0 13 0 0 2
pDC 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
Plasmablast 0 0 0 0 0 0 0 0 0 0 9 0 0 0 0 0
Platelet 0 0 0 0 0 0 0 0 0 0 0 7 0 0 0 0
Treg 2 0 0 1 145 0 1 0 0 0 2 0 47 0 0 9
table(All_samples_Merged$predicted.celltype.l2, All_samples_Merged$RNA_snn_res.0.5)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
ASDC 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
B intermediate 0 0 0 0 0 0 0 0 0 0 0 501 14 0 0 0 0 1
B memory 0 0 0 0 0 0 0 0 0 0 0 141 2 0 0 0 0 1
B naive 0 0 0 0 0 0 0 0 0 0 0 569 1 0 0 0 0 0
CD14 Mono 0 0 0 0 0 0 0 0 0 0 0 0 715 0 0 0 0 2
CD16 Mono 0 0 0 0 0 0 0 0 0 0 0 0 74 0 0 0 0 2
CD4 CTL 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 5 0 0
CD4 Naive 0 0 0 696 0 0 0 0 1 0 0 3 0 9 0 0 0 0
CD4 Proliferating 5191 3011 5328 0 3907 3146 2686 2336 83 1221 1374 0 0 1 19 0 61 0
CD4 TCM 1006 286 190 3975 481 145 26 850 2358 599 51 48 12 376 381 13 39 2
CD4 TEM 0 0 0 42 0 0 0 4 19 0 0 0 0 1 0 3 0 0
CD8 Naive 6 0 1 363 2 3 1 0 0 18 1 1 1 17 1 0 0 0
CD8 TCM 0 10 0 221 0 0 0 37 111 0 0 1 0 4 0 30 0 0
CD8 TEM 0 8 0 27 0 0 0 0 0 0 0 0 0 0 0 182 0 0
cDC2 11 0 0 0 164 67 7 0 0 71 12 0 1 0 0 0 4 81
dnT 0 2 0 14 0 0 0 3 0 0 0 2 0 37 0 0 4 0
gdT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 13 0 0
HSPC 163 0 8 4 294 785 662 4 1 7 406 1 0 4 1 0 9 20
ILC 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0
MAIT 0 0 0 5 0 0 0 0 0 0 0 1 0 1 0 50 0 0
NK 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 91 0 0
NK Proliferating 6 2615 11 0 180 4 5 19 0 9 0 0 0 0 0 0 13 0
pDC 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
Plasmablast 0 0 0 0 0 0 0 0 0 0 0 9 0 0 0 0 0 0
Platelet 0 0 0 0 0 0 0 0 0 0 0 0 7 0 0 0 0 0
Treg 2 0 0 145 0 1 0 1 0 0 0 2 0 28 0 0 19 0
18
ASDC 0
B intermediate 0
B memory 0
B naive 0
CD14 Mono 0
CD16 Mono 0
CD4 CTL 0
CD4 Naive 0
CD4 Proliferating 15
CD4 TCM 10
CD4 TEM 0
CD8 Naive 0
CD8 TCM 0
CD8 TEM 0
cDC2 0
dnT 1
gdT 0
HSPC 10
ILC 0
MAIT 0
NK 0
NK Proliferating 2
pDC 0
Plasmablast 0
Platelet 0
Treg 9
table(All_samples_Merged$predicted.celltype.l2, All_samples_Merged$RNA_snn_res.0.6)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
ASDC 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
B intermediate 0 0 0 0 0 0 0 0 0 0 0 501 14 0 0 0 0 0
B memory 0 0 0 0 0 0 0 0 0 0 0 141 2 0 0 0 0 0
B naive 0 0 0 0 0 0 0 0 0 0 0 569 1 0 0 0 0 0
CD14 Mono 0 0 0 0 0 0 0 0 0 0 0 0 715 0 0 0 0 0
CD16 Mono 0 0 0 0 0 0 0 0 0 0 0 0 74 0 0 0 0 0
CD4 CTL 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 5 0
CD4 Naive 0 0 0 696 0 0 0 0 1 0 0 3 0 0 9 0 0 0
CD4 Proliferating 5191 3011 5328 0 3907 2792 2683 2345 74 1223 1372 0 0 357 0 19 0 62
CD4 TCM 1006 286 187 3996 481 30 26 839 2369 599 51 48 12 115 348 384 14 45
CD4 TEM 0 0 0 42 0 0 0 4 19 0 0 0 0 0 1 0 3 0
CD8 Naive 6 0 1 366 2 0 1 0 0 18 1 1 1 3 14 1 0 0
CD8 TCM 0 10 0 221 0 0 0 35 113 0 0 1 0 0 4 0 30 0
CD8 TEM 0 8 0 27 0 0 0 0 0 0 0 0 0 0 0 0 182 0
cDC2 11 0 0 0 164 51 7 0 0 71 12 0 1 16 0 0 0 4
dnT 0 2 0 14 0 0 0 3 0 0 0 2 0 0 36 0 0 5
gdT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 13 0
HSPC 163 0 8 4 294 784 662 4 1 7 406 1 0 1 0 1 0 13
ILC 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0
MAIT 0 0 0 4 0 0 0 0 0 0 0 1 0 0 0 0 52 0
NK 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 91 0
NK Proliferating 6 2615 11 0 180 4 5 19 0 9 0 0 0 0 0 0 0 13
pDC 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
Plasmablast 0 0 0 0 0 0 0 0 0 0 0 9 0 0 0 0 0 0
Platelet 0 0 0 0 0 0 0 0 0 0 0 0 7 0 0 0 0 0
Treg 2 0 0 147 0 0 0 1 0 0 0 2 0 1 21 0 0 24
18 19
ASDC 1 0
B intermediate 1 0
B memory 1 0
B naive 0 0
CD14 Mono 2 0
CD16 Mono 2 0
CD4 CTL 0 0
CD4 Naive 0 0
CD4 Proliferating 0 15
CD4 TCM 2 10
CD4 TEM 0 0
CD8 Naive 0 0
CD8 TCM 0 0
CD8 TEM 0 0
cDC2 81 0
dnT 0 1
gdT 0 0
HSPC 20 10
ILC 0 0
MAIT 0 0
NK 0 0
NK Proliferating 0 2
pDC 0 0
Plasmablast 0 0
Platelet 0 0
Treg 0 9
table(All_samples_Merged$predicted.celltype.l2, All_samples_Merged$RNA_snn_res.0.7)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
ASDC 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
B intermediate 0 0 0 0 0 0 0 0 0 0 0 501 14 0 0 0 0 0
B memory 0 0 0 0 0 0 0 0 0 0 0 141 2 0 0 0 0 0
B naive 0 0 0 0 0 0 0 0 0 0 0 569 1 0 0 0 0 0
CD14 Mono 0 0 0 0 0 0 0 0 0 0 0 0 715 0 0 0 0 0
CD16 Mono 0 0 0 0 0 0 0 0 0 0 0 0 74 0 0 0 0 0
CD4 CTL 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 5 0
CD4 Naive 0 0 0 696 0 0 0 0 1 0 0 3 0 0 9 0 0 0
CD4 Proliferating 5191 3011 5328 0 3867 2792 2709 2335 84 1197 1372 0 0 357 1 19 0 101
CD4 TCM 1006 286 187 3975 478 30 26 828 2380 599 51 48 12 115 375 384 14 42
CD4 TEM 0 0 0 42 0 0 0 4 19 0 0 0 0 0 1 0 3 0
CD8 Naive 6 0 1 363 2 0 1 0 0 18 1 1 1 3 17 1 0 0
CD8 TCM 0 10 0 221 0 0 0 35 113 0 0 1 0 0 4 0 30 0
CD8 TEM 0 8 0 27 0 0 0 0 0 0 0 0 0 0 0 0 182 0
cDC2 11 0 0 0 162 51 7 0 0 71 12 0 1 16 0 0 0 6
dnT 0 2 0 14 0 0 0 3 0 0 0 2 0 0 37 0 0 4
gdT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 13 0
HSPC 163 0 8 3 294 784 662 4 1 7 406 1 0 1 4 1 0 9
ILC 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0
MAIT 0 0 0 4 0 0 0 0 0 0 0 1 0 0 0 0 52 0
NK 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 91 0
NK Proliferating 6 2615 11 0 172 4 5 19 0 9 0 0 0 0 0 0 0 21
pDC 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
Plasmablast 0 0 0 0 0 0 0 0 0 0 0 9 0 0 0 0 0 0
Platelet 0 0 0 0 0 0 0 0 0 0 0 0 7 0 0 0 0 0
Treg 2 0 0 145 0 0 0 1 0 0 0 2 0 1 28 0 0 19
18 19
ASDC 1 0
B intermediate 1 0
B memory 1 0
B naive 0 0
CD14 Mono 2 0
CD16 Mono 2 0
CD4 CTL 0 0
CD4 Naive 0 0
CD4 Proliferating 0 15
CD4 TCM 2 10
CD4 TEM 0 0
CD8 Naive 0 0
CD8 TCM 0 0
CD8 TEM 0 0
cDC2 81 0
dnT 0 1
gdT 0 0
HSPC 21 10
ILC 0 0
MAIT 0 0
NK 0 0
NK Proliferating 0 2
pDC 0 0
Plasmablast 0 0
Platelet 0 0
Treg 0 9
table(All_samples_Merged$predicted.celltype.l2, All_samples_Merged$RNA_snn_res.0.8)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
ASDC 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
B intermediate 0 0 0 0 0 0 0 0 0 0 0 0 0 0 501 14 0 0
B memory 0 0 0 0 0 0 0 0 0 0 0 0 0 0 141 2 0 0
B naive 0 0 0 0 0 0 0 0 0 0 0 0 0 0 569 1 0 0
CD14 Mono 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 715 0 0
CD16 Mono 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 74 0 0
CD4 CTL 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 5
CD4 Naive 0 0 0 0 0 0 0 66 0 635 1 0 0 0 3 0 0 0
CD4 Proliferating 3011 3867 4218 2792 3254 2679 2330 1 1937 0 89 1227 1372 1129 0 0 357 0
CD4 TCM 286 478 51 30 27 26 819 2551 979 1739 2389 599 51 520 48 12 115 13
CD4 TEM 0 0 0 0 0 0 4 42 0 1 19 0 0 0 0 0 0 3
CD8 Naive 0 2 0 0 0 1 0 60 6 317 0 18 1 2 1 1 3 0
CD8 TCM 10 0 0 0 0 0 35 209 0 17 113 0 0 0 1 0 0 28
CD8 TEM 8 0 0 0 0 0 0 28 0 0 0 0 0 0 0 0 0 181
cDC2 0 162 0 51 2 7 0 0 9 0 0 71 12 0 0 1 16 0
dnT 2 0 0 0 0 0 3 47 0 3 0 0 0 0 2 0 0 0
gdT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 13
HSPC 0 294 8 784 155 662 4 4 8 3 1 7 406 1 1 0 1 0
ILC 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0
MAIT 0 0 0 0 0 0 0 6 0 0 0 0 0 0 1 0 0 50
NK 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 91
NK Proliferating 2615 172 10 4 6 5 19 0 0 0 0 9 0 1 0 0 0 0
pDC 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
Plasmablast 0 0 0 0 0 0 0 0 0 0 0 0 0 0 9 0 0 0
Platelet 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 7 0 0
Treg 0 0 0 0 1 0 1 125 0 43 0 0 0 0 2 0 1 0
18 19 20 21
ASDC 0 1 0 0
B intermediate 0 1 0 0
B memory 0 1 0 0
B naive 0 0 0 0
CD14 Mono 0 2 0 0
CD16 Mono 0 2 0 0
CD4 CTL 0 0 0 0
CD4 Naive 0 0 4 0
CD4 Proliferating 101 0 0 15
CD4 TCM 42 2 61 10
CD4 TEM 0 0 0 0
CD8 Naive 0 0 3 0
CD8 TCM 0 0 1 0
CD8 TEM 0 0 0 0
cDC2 6 81 0 0
dnT 4 0 1 1
gdT 0 0 0 0
HSPC 9 21 0 10
ILC 0 0 0 0
MAIT 0 0 0 0
NK 0 0 1 0
NK Proliferating 21 0 0 2
pDC 0 0 0 0
Plasmablast 0 0 0 0
Platelet 0 0 0 0
Treg 19 0 5 10
table(All_samples_Merged$predicted.celltype.l2, All_samples_Merged$RNA_snn_res.0.9)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
ASDC 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
B intermediate 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 501 14 0
B memory 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 141 2 0
B naive 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 569 1 0
CD14 Mono 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 715 0
CD16 Mono 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 74 0
CD4 CTL 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
CD4 Naive 0 0 0 0 0 0 0 59 635 0 1 0 0 0 0 3 0 0
CD4 Proliferating 3011 4185 2736 2700 3167 2345 2024 0 0 2264 74 1602 1244 1372 1162 0 0 375
CD4 TCM 286 50 28 26 27 847 979 2252 1703 5 2361 473 600 51 521 48 12 116
CD4 TEM 0 0 0 0 0 4 0 41 1 0 19 0 0 0 0 0 0 0
CD8 Naive 0 0 0 1 0 0 6 45 317 0 0 2 18 1 2 1 1 3
CD8 TCM 10 0 0 0 0 36 0 203 18 0 112 0 0 0 0 1 0 0
CD8 TEM 8 0 0 0 0 0 0 27 0 0 0 0 0 0 0 0 0 0
cDC2 0 0 51 7 2 0 9 0 0 0 0 162 71 12 0 0 1 16
dnT 2 0 0 0 0 3 0 11 3 0 0 0 0 0 0 2 0 0
gdT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
HSPC 0 8 784 662 155 4 8 0 3 187 1 107 7 406 1 1 0 1
ILC 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0
MAIT 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 1 0 0
NK 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
NK Proliferating 2615 10 4 5 6 19 0 0 0 157 0 15 9 0 1 0 0 0
pDC 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
Plasmablast 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 9 0 0
Platelet 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 7 0
Treg 0 0 0 0 1 1 0 102 43 0 0 0 0 0 0 2 0 1
18 19 20 21 22
ASDC 0 0 0 1 0
B intermediate 0 0 0 1 0
B memory 0 0 0 1 0
B naive 0 0 0 0 0
CD14 Mono 0 0 0 2 0
CD16 Mono 0 0 0 2 0
CD4 CTL 0 5 0 0 0
CD4 Naive 11 0 0 0 0
CD4 Proliferating 1 0 102 0 15
CD4 TCM 395 14 42 2 10
CD4 TEM 1 3 0 0 0
CD8 Naive 18 0 0 0 0
CD8 TCM 4 30 0 0 0
CD8 TEM 0 182 0 0 0
cDC2 0 0 6 81 0
dnT 37 0 4 0 1
gdT 0 13 0 0 0
HSPC 4 0 9 21 10
ILC 0 0 0 0 0
MAIT 0 52 0 0 0
NK 1 91 0 0 0
NK Proliferating 0 0 21 0 2
pDC 0 0 0 0 0
Plasmablast 0 0 0 0 0
Platelet 0 0 0 0 0
Treg 28 0 19 0 10
table(All_samples_Merged$predicted.celltype.l2, All_samples_Merged$RNA_snn_res.1)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
ASDC 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
B intermediate 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 501 14
B memory 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 141 2
B naive 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 569 1
CD14 Mono 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 715
CD16 Mono 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 74
CD4 CTL 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
CD4 Naive 0 0 0 0 0 0 0 60 635 1 0 0 0 0 0 0 3 0
CD4 Proliferating 4134 2734 2264 3261 2686 2334 1930 0 0 85 1752 2155 747 1220 1372 1213 0 0
CD4 TCM 45 28 284 27 26 828 979 2276 1687 2380 477 4 2 599 51 526 48 12
CD4 TEM 0 0 0 0 0 4 0 41 1 19 0 0 0 0 0 0 0 0
CD8 Naive 0 0 0 0 1 0 6 45 317 0 2 0 0 18 1 2 1 1
CD8 TCM 0 0 10 0 0 35 0 203 18 113 0 0 0 0 0 0 1 0
CD8 TEM 0 0 8 0 0 0 0 27 0 0 0 0 0 0 0 0 0 0
cDC2 0 51 0 2 7 0 9 0 0 0 164 0 0 71 12 0 0 1
dnT 0 0 2 0 0 3 0 11 3 0 0 0 0 0 0 0 2 0
gdT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
HSPC 8 784 0 155 662 4 8 0 3 1 107 187 0 7 406 1 1 0
ILC 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0
MAIT 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 1 0
NK 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
NK Proliferating 10 4 966 6 5 19 0 0 0 0 25 155 1649 9 0 1 0 0
pDC 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
Plasmablast 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 9 0
Platelet 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 7
Treg 0 0 0 1 0 1 0 102 43 0 0 0 0 0 0 0 2 0
18 19 20 21 22 23
ASDC 0 0 0 0 1 0
B intermediate 0 0 0 0 1 0
B memory 0 0 0 0 1 0
B naive 0 0 0 0 0 0
CD14 Mono 0 0 0 0 2 0
CD16 Mono 0 0 0 0 2 0
CD4 CTL 0 0 5 0 0 0
CD4 Naive 0 10 0 0 0 0
CD4 Proliferating 415 1 0 61 0 15
CD4 TCM 117 387 14 39 2 10
CD4 TEM 0 1 3 0 0 0
CD8 Naive 3 18 0 0 0 0
CD8 TCM 0 4 30 0 0 0
CD8 TEM 0 0 182 0 0 0
cDC2 16 0 0 4 81 0
dnT 0 37 0 4 0 1
gdT 0 0 13 0 0 0
HSPC 1 4 0 9 21 10
ILC 0 0 0 0 0 0
MAIT 0 0 52 0 0 0
NK 0 1 91 0 0 0
NK Proliferating 0 0 0 13 0 2
pDC 0 0 0 0 0 0
Plasmablast 0 0 0 0 0 0
Platelet 0 0 0 0 0 0
Treg 1 28 0 19 0 10
# run Harmony -----------
All_samples_Merged.harmony <- All_samples_Merged %>%
RunHarmony(group.by.vars = 'orig.ident', plot_convergence = FALSE)
Transposing data matrix
Initializing state using k-means centroids initialization
Harmony 1/10
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Harmony 2/10
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Harmony 3/10
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Harmony converged after 3 iterations
All_samples_Merged.harmony@reductions
$integrated_dr
A dimensional reduction object with key integrateddr_
Number of dimensions: 50
Number of cells: 49193
Projected dimensional reduction calculated: FALSE
Jackstraw run: FALSE
Computed using assay: RNA
$ref.umap
A dimensional reduction object with key UMAP_
Number of dimensions: 2
Number of cells: 49193
Projected dimensional reduction calculated: FALSE
Jackstraw run: FALSE
Computed using assay: RNA
$pca
A dimensional reduction object with key PC_
Number of dimensions: 50
Number of cells: 49193
Projected dimensional reduction calculated: FALSE
Jackstraw run: FALSE
Computed using assay: RNA
$umap
A dimensional reduction object with key umap_
Number of dimensions: 2
Number of cells: 49193
Projected dimensional reduction calculated: FALSE
Jackstraw run: FALSE
Computed using assay: RNA
$harmony
A dimensional reduction object with key harmony_
Number of dimensions: 50
Number of cells: 49193
Projected dimensional reduction calculated: TRUE
Jackstraw run: FALSE
Computed using assay: RNA
All_samples_Merged.harmony.embed <- Embeddings(All_samples_Merged.harmony, "harmony")
All_samples_Merged.harmony.embed[1:10,1:10]
harmony_1 harmony_2 harmony_3 harmony_4 harmony_5 harmony_6 harmony_7
L1_AAACCTGAGGGCTTCC-1 21.56244733 9.868702 -4.0174771 -7.94158999 3.0739480 0.9387805 2.436339
L1_AAACCTGGTGCAGGTA-1 5.14952820 -4.701175 -0.0155166 2.65451303 7.3121708 -6.4188836 1.762186
L1_AAACCTGGTTAAAGTG-1 0.05158189 -10.464501 -4.8046129 -2.32140205 6.3555470 -0.9085890 3.418133
L1_AAACCTGTCAGGTAAA-1 12.76395119 1.385307 -4.1782353 -6.49845857 1.2501412 0.5047437 -0.649038
L1_AAACCTGTCCCTGACT-1 23.03747278 9.108082 -3.1995286 -5.21196428 1.3911921 0.9191888 -1.730269
L1_AAACCTGTCCTTCAAT-1 3.14703622 -7.715660 -5.3762496 -3.35197579 4.3830014 -0.7946471 2.192075
L1_AAACCTGTCTTGCAAG-1 4.86674698 -7.692752 -5.4123418 -2.60206386 5.6578043 -2.7672873 2.398720
L1_AAACGGGAGGCTAGAC-1 6.14064874 -1.305362 -3.6118773 -4.53951843 2.9519808 1.0211799 1.146628
L1_AAACGGGAGGGTATCG-1 2.49450857 -2.627244 -0.8153037 0.29580250 5.0091363 -3.5007965 1.416005
L1_AAACGGGAGGGTTCCC-1 1.14463806 7.007878 2.7785099 0.05781279 -0.9112137 0.9857917 -1.878294
harmony_8 harmony_9 harmony_10
L1_AAACCTGAGGGCTTCC-1 -1.6732088 0.7306888 -0.6665029
L1_AAACCTGGTGCAGGTA-1 8.1348959 0.9153502 1.9750565
L1_AAACCTGGTTAAAGTG-1 5.1158163 1.5619024 -2.3101974
L1_AAACCTGTCAGGTAAA-1 -0.1217165 -1.3594819 -1.4987351
L1_AAACCTGTCCCTGACT-1 -2.7439452 -0.1149423 -0.5856063
L1_AAACCTGTCCTTCAAT-1 4.4926701 0.7441967 1.5451011
L1_AAACCTGTCTTGCAAG-1 6.6934033 2.0389352 0.8621451
L1_AAACGGGAGGCTAGAC-1 -1.8649246 0.3168664 -2.3005340
L1_AAACGGGAGGGTATCG-1 2.9221759 0.8803822 -3.6148133
L1_AAACGGGAGGGTTCCC-1 -6.1736648 -1.2823246 -3.8366808
# Do UMAP and clustering using ** Harmony embeddings instead of PCA **
All_samples_Merged.harmony <- All_samples_Merged.harmony %>%
RunUMAP(reduction = 'harmony', dims = 1:17) %>%
FindNeighbors(reduction = "harmony", dims = 1:17) %>%
FindClusters(resolution = 0.5)
17:02:31 UMAP embedding parameters a = 0.9922 b = 1.112
17:02:31 Read 49193 rows and found 17 numeric columns
17:02:31 Using Annoy for neighbor search, n_neighbors = 30
17:02:31 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
17:02:35 Writing NN index file to temp file /tmp/RtmpCvwIf9/filef40c919d65246
17:02:35 Searching Annoy index using 1 thread, search_k = 3000
17:02:46 Annoy recall = 100%
17:02:47 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
17:02:49 Initializing from normalized Laplacian + noise (using RSpectra)
17:02:51 Commencing optimization for 200 epochs, with 2086822 positive edges
Using method 'umap'
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
17:03:07 Optimization finished
Computing nearest neighbor graph
Computing SNN
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49193
Number of edges: 1536941
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9125
Number of communities: 13
Elapsed time: 10 seconds
# visualize
after <- DimPlot(All_samples_Merged.harmony, reduction = "umap", group.by = "cell_line", label = TRUE, label.box = TRUE, repel = TRUE)
before|after
DimPlot(All_samples_Merged.harmony,reduction = "umap", group.by = "RNA_snn_res.0.5", label = TRUE, label.box = TRUE, repel = TRUE)
DimPlot(All_samples_Merged.harmony, reduction = "umap", group.by = "predicted.celltype.l2", label = TRUE, label.box = TRUE, repel = TRUE)
myfeatures <- c("CD3E", "CD4", "CD8A", "CD8B", "GNLY", "MS4A1", "CD14", "LYZ", "MS4A7", "FOXP3", "TIGIT", "KIR3DL2", "GZMA", "CCL17", "CCL5", "CD52", "CD7", "CD26")
FeaturePlot(All_samples_Merged.harmony, reduction = "umap", dims = 1:2, features = myfeatures, ncol = 4, order = T) + NoLegend() + NoAxes() + NoGrid()
Warning: Could not find CD26 in the default search locations, found in 'ADT' assay instead
# save(All_samples_Merged.harmony, file = "All_samples_Merged_Harmony_logNormalize.Robj")