#Load Seurat Object merged from cell lines and a control(PBMC) after filtration
load("../0-R_Objects/SS_CD4_Tcells_Azimuth_Annotated_PBMC10x_final_for_SCT_and_Integration.robj")
All_samples_Merged <- filtered_seurat
rm(filtered_seurat)
# Display basic metadata summary
head(All_samples_Merged@meta.data)
# Check if columns such as `orig.ident`, `nCount_RNA`, `nFeature_RNA`, `nUMI`, `ngene`, and any other necessary columns exist
required_columns <- c("orig.ident", "nCount_RNA", "nFeature_RNA", "nUMI", "ngene")
missing_columns <- setdiff(required_columns, colnames(All_samples_Merged@meta.data))
if (length(missing_columns) > 0) {
cat("Missing columns:", paste(missing_columns, collapse = ", "), "\n")
} else {
cat("All required columns are present.\n")
}
All required columns are present.
# Check cell counts and features
cat("Number of cells:", ncol(All_samples_Merged), "\n")
Number of cells: 49372
cat("Number of features:", nrow(All_samples_Merged), "\n")
Number of features: 36601
# Verify that each `orig.ident` label has the correct number of cells
cat("Cell counts per group:\n")
Cell counts per group:
print(table(All_samples_Merged$orig.ident))
L1 L2 L3 L4 L5 L6 L7 PBMC PBMC10x
5825 5935 6428 6007 6022 5148 5331 5171 3505
# Check that the cell IDs are unique (which ensures no issues from merging)
if (any(duplicated(colnames(All_samples_Merged)))) {
cat("Warning: There are duplicated cell IDs.\n")
} else {
cat("Cell IDs are unique.\n")
}
Cell IDs are unique.
# Check the assay consistency for RNA
DefaultAssay(All_samples_Merged) <- "RNA"
# Check dimensions of the RNA counts layer using the new method
cat("Dimensions of the RNA counts layer:", dim(GetAssayData(All_samples_Merged, layer = "counts")), "\n")
Dimensions of the RNA counts layer: 36601 49372
cat("Dimensions of the RNA data layer:", dim(GetAssayData(All_samples_Merged, layer = "data")), "\n")
Dimensions of the RNA data layer: 36601 49372
# Check the ADT assay (optional)
if ("ADT" %in% names(All_samples_Merged@assays)) {
cat("ADT assay is present.\n")
cat("Dimensions of the ADT counts layer:", dim(GetAssayData(All_samples_Merged, assay = "ADT", layer = "counts")), "\n")
} else {
cat("ADT assay is not present.\n")
}
ADT assay is present.
Dimensions of the ADT counts layer: 56 49372
# Remove the percent.mito column
All_samples_Merged$percent.mito <- NULL
# Set identity classes to an existing column in meta data
Idents(object = All_samples_Merged) <- "cell_line"
All_samples_Merged[["percent.rb"]] <- PercentageFeatureSet(All_samples_Merged,
pattern = "^RP[SL]")
# Convert 'percent.mt' to numeric, replacing "NaN" with 0
All_samples_Merged$percent.rb <- replace(as.numeric(All_samples_Merged$percent.rb), is.na(All_samples_Merged$percent.rb), 0)
# The [[ operator can add columns to object metadata. This is a great place to stash QC stats
All_samples_Merged[["percent.mt"]] <- PercentageFeatureSet(All_samples_Merged, pattern = "^MT-")
# Convert 'percent.mt' to numeric, replacing "NaN" with 0
All_samples_Merged$percent.mt <- replace(as.numeric(All_samples_Merged$percent.mt), is.na(All_samples_Merged$percent.mt), 0)
VlnPlot(All_samples_Merged, features = c("nFeature_RNA",
"nCount_RNA",
"percent.mt",
"percent.rb"),
ncol = 4, pt.size = 0.1) &
theme(plot.title = element_text(size=10))
FeatureScatter(All_samples_Merged, feature1 = "percent.mt",
feature2 = "percent.rb")
VlnPlot(All_samples_Merged, features = c("nFeature_RNA",
"nCount_RNA",
"percent.mt"),
ncol = 3)
FeatureScatter(All_samples_Merged,
feature1 = "percent.mt",
feature2 = "percent.rb") +
geom_smooth(method = 'lm')
FeatureScatter(All_samples_Merged,
feature1 = "nCount_RNA",
feature2 = "nFeature_RNA") +
geom_smooth(method = 'lm')
##FeatureScatter is typically used to visualize feature-feature relationships ##for anything calculated by the object, ##i.e. columns in object metadata, PC scores etc.
FeatureScatter(All_samples_Merged,
feature1 = "nCount_RNA",
feature2 = "percent.mt")+
geom_smooth(method = 'lm')
FeatureScatter(All_samples_Merged,
feature1 = "nCount_RNA",
feature2 = "nFeature_RNA")+
geom_smooth(method = 'lm')
options(future.globals.maxSize = 8000 * 1024^2) # Set to 8000 MiB (about 8 GB)
# Apply SCTransform
All_samples_Merged <- SCTransform(All_samples_Merged,
vars.to.regress = c("percent.mt","percent.rb", "nCount_RNA"),
verbose = FALSE)
Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.
table(All_samples_Merged$Patient_origin)
0 1 2 3 NA
3505 11760 12435 16501 5171
Variables_genes <- All_samples_Merged@assays$SCT@var.features
# Exclude genes starting with "HLA-" AND "Xist" AND "TRBV, TRAV"
Variables_genes_after_exclusion <- Variables_genes[!grepl("^HLA-|^XIST|^TRBV|^TRAV", Variables_genes)]
# Set the seed for clustering steps
set.seed(123)
# These are now standard steps in the Seurat workflow for visualization and clustering
All_samples_Merged <- RunPCA(All_samples_Merged,
features = Variables_genes_after_exclusion,
do.print = TRUE,
pcs.print = 1:5,
genes.print = 15,
npcs = 50)
PC_ 1
Positive: CCL17, CA2, TNFRSF4, SYT4, MIR155HG, SEC11C, C12orf75, CCL5, EGFL6, IL2RA
CA10, CD74, IGHE, LTA, KRT7, STC1, PRG4, TIGIT, ALOX5AP, THY1
CFI, RXFP1, HDGFL3, EEF1A2, RANBP17, ONECUT2, MIIP, BACE2, SLC35F3, BATF3
Negative: CD7, MALAT1, XCL1, IL7R, RPS4Y1, XCL2, KIR3DL1, CD52, LTB, KIR2DL3
TXNIP, TCF7, KLF2, GIMAP7, MT1G, KLRC1, CST7, KIR2DL4, SELL, LEF1
PTPRC, FYB1, TMSB4X, LINC00861, ESYT2, PRKCH, B2M, PNRC1, SARAF, RPS27
PC_ 2
Positive: CCL17, XCL1, CD7, KIR3DL1, XCL2, LTB, CST7, CA2, MT1G, KLRC1
TNFRSF4, KIR2DL4, PLPP1, SPINT2, CYBA, KIR2DL3, SYT4, MATK, KRT81, GZMM
KRT86, ESYT2, HIST1H1B, MYO1E, EPCAM, TRGV2, C12orf75, KLRK1, CORO1B, CXCR3
Negative: PPBP, CD74, MT2A, CD70, PAGE5, RPL22L1, LMNA, TENM3, LGALS3, FABP5
STAT1, RBPMS, CCDC50, GSTP1, GAPDH, PPP2R2B, IQCG, FTL, MACROD2, SLC7A11-AS1
CTAG2, SPOCK1, LGALS1, BASP1, NDUFV2, ANXA1, VIM, CDKN2A, FCER2, PIM2
PC_ 3
Positive: MALAT1, IL7R, RPS4Y1, TXNIP, TNFRSF4, SELL, TCF7, EEF1A2, LINC00861, BTG1
FYB1, IL2RA, KLF2, RPS27, GIMAP7, PNRC1, JUN, RIPOR2, PIK3IP1, ANK3
SARAF, FOXP1, GIMAP5, MAML2, MTRNR2L12, ZBTB20, LEF1, PCED1B-AS1, RPS29, FHIT
Negative: XCL1, KIR3DL1, XCL2, PPBP, CD7, MT2A, CST7, KIR2DL3, LTB, NKG7
ACTB, CD74, KRT1, MT1G, KLRC1, GAPDH, HIST1H4C, KIR3DL2, GZMA, IL32
ID3, KIR2DL4, ESYT2, C1QBP, CXCR3, IFITM2, TRGV2, NME2, KRT81, EPCAM
PC_ 4
Positive: CCL17, PPBP, CD7, MIR155HG, MALAT1, XCL1, LTA, RPS4Y1, IL7R, CA2
CCL5, FCER2, TXNIP, TCF7, XCL2, MAML2, LINC00861, CA10, SLC7A11-AS1, FHIT
STC1, RPL22L1, DNAJC12, ENPP2, CSMD1, MGST3, RXFP1, GSTP1, MT2A, AC068672.2
Negative: S100A4, EEF1A2, KRT1, WFDC1, S100A6, TNFRSF4, S100A11, PHLDA2, LGALS1, IL2RA
FN1, CDKN1A, IL32, TTC29, HIST1H1C, DUSP4, GAS5, EGLN3, MIIP, TNFRSF18
CYBA, CORO1B, GATA3, GZMA, PXYLP1, GPAT3, VIM, LINC02752, CEBPD, NKG7
PC_ 5
Positive: PPBP, EEF1A2, FABP5, IL2RA, CD7, GSTP1, MIIP, RDH10, TNFRSF4, XCL1
ENPP2, RPS4Y1, DNAJC12, HSP90AA1, AC068672.2, HSPE1, HSPD1, PGAM1, MGST1, DDIT4
IFITM1, EIF5A, C1QBP, CSMD1, FTL, IFITM2, FN1, FAM162A, PHLDA2, LY6E
Negative: CCL17, KRT1, GZMA, MT2A, CD74, LGALS3, TTC29, CCL1, NKG7, GZMB
SERPINE1, CSF2, MALAT1, RYR2, PTGIS, CCL4, CYP1B1, AC114977.1, S100A4, TENM3
TNFSF10, MAL, LINC02752, NCR3, PLD1, CEBPD, GAS5, RUNX1, TSC22D3, IL32
# determine dimensionality of the data
ElbowPlot(All_samples_Merged, ndims = 50)
NA
NA
library(ggplot2)
library(RColorBrewer)
# Assuming you have 10 different cell lines, generating a color palette with 10 colors
cell_line_colors <- brewer.pal(10, "Set3")
# Assuming All_samples_Merged$cell_line is a factor or character vector containing cell line names
data <- as.data.frame(table(All_samples_Merged$cell_line))
colnames(data) <- c("cell_line", "nUMI") # Change column name to nUMI
ncells <- ggplot(data, aes(x = cell_line, y = nUMI, fill = cell_line)) +
geom_col() +
theme_classic() +
geom_text(aes(label = nUMI),
position = position_dodge(width = 0.9),
vjust = -0.25) +
scale_fill_manual(values = cell_line_colors) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5)) + # Adjust the title position
ggtitle("Filtered cells per sample") +
xlab("Cell lines") + # Adjust x-axis label
ylab("Frequency") # Adjust y-axis label
print(ncells)
# TEST-1
# given that the output of RunPCA is "pca"
# replace "so" by the name of your seurat object
pct <- All_samples_Merged[["pca"]]@stdev / sum(All_samples_Merged[["pca"]]@stdev) * 100
cumu <- cumsum(pct) # Calculate cumulative percents for each PC
# Determine the difference between variation of PC and subsequent PC
co2 <- sort(which((pct[-length(pct)] - pct[-1]) > 0.1), decreasing = T)[1] + 1
# last point where change of % of variation is more than 0.1%. -> co2
co2
[1] 15
# TEST-2
# get significant PCs
stdv <- All_samples_Merged[["pca"]]@stdev
sum.stdv <- sum(All_samples_Merged[["pca"]]@stdev)
percent.stdv <- (stdv / sum.stdv) * 100
cumulative <- cumsum(percent.stdv)
co1 <- which(cumulative > 90 & percent.stdv < 5)[1]
co2 <- sort(which((percent.stdv[1:length(percent.stdv) - 1] -
percent.stdv[2:length(percent.stdv)]) > 0.1),
decreasing = T)[1] + 1
min.pc <- min(co1, co2)
min.pc
[1] 15
# Create a dataframe with values
plot_df <- data.frame(pct = percent.stdv,
cumu = cumulative,
rank = 1:length(percent.stdv))
# Elbow plot to visualize
ggplot(plot_df, aes(cumulative, percent.stdv, label = rank, color = rank > min.pc)) +
geom_text() +
geom_vline(xintercept = 90, color = "grey") +
geom_hline(yintercept = min(percent.stdv[percent.stdv > 5]), color = "grey") +
theme_bw()
NA
NA
NA
# Set the seed for clustering steps
set.seed(123)
All_samples_Merged <- FindNeighbors(All_samples_Merged,
dims = 1:16,
verbose = FALSE)
# understanding resolution
All_samples_Merged <- FindClusters(All_samples_Merged,
resolution = c(0.4, 0.5, 0.6, 0.7,0.8))
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49372
Number of edges: 1623811
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9550
Number of communities: 13
Elapsed time: 15 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49372
Number of edges: 1623811
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9459
Number of communities: 18
Elapsed time: 16 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49372
Number of edges: 1623811
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9383
Number of communities: 21
Elapsed time: 15 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49372
Number of edges: 1623811
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9318
Number of communities: 23
Elapsed time: 14 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49372
Number of edges: 1623811
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9260
Number of communities: 23
Elapsed time: 16 seconds
# Set the seed for clustering steps
set.seed(123)
# non-linear dimensionality reduction --------------
All_samples_Merged <- RunUMAP(All_samples_Merged,
dims = 1:16,
verbose = FALSE)
Avis : The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session
# note that you can set `label = TRUE` or use the Label Clusters function to help label
# individual clusters
DimPlot(All_samples_Merged,group.by = "cell_line",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(All_samples_Merged,group.by = "predicted.celltype.l2",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(All_samples_Merged,
group.by = "SCT_snn_res.0.4",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(All_samples_Merged,
group.by = "SCT_snn_res.0.5",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(All_samples_Merged,
group.by = "SCT_snn_res.0.6",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(All_samples_Merged,
group.by = "SCT_snn_res.0.7",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(All_samples_Merged,
group.by = "SCT_snn_res.0.8",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
# Set identity classes to an existing column in meta data
Idents(object = All_samples_Merged) <- "SCT_snn_res.0.4"
cluster_table <- table(Idents(All_samples_Merged))
barplot(cluster_table, main = "Number of Cells in Each Cluster",
xlab = "Cluster",
ylab = "Number of Cells",
col = rainbow(length(cluster_table)))
print(cluster_table)
0 1 2 3 4 5 6 7 8 9 10 11 12
6236 5931 5922 5852 5150 5140 5004 3831 3369 1753 547 418 219
table(All_samples_Merged$predicted.celltype.l2, All_samples_Merged$SCT_snn_res.0.4)
0 1 2 3 4 5 6 7 8 9 10 11 12
B intermediate 0 3 0 0 0 0 0 0 0 0 2 2 0
B memory 8 3 0 1 113 0 81 22 0 2 19 2 1
CD14 Mono 0 0 0 0 7 1 0 4 0 0 0 0 0
CD4 CTL 0 1 0 0 0 11 0 0 1 0 0 0 0
CD4 Naive 0 7 0 0 0 522 0 0 1480 0 0 33 0
CD4 Proliferating 5340 2471 2848 5308 4023 3 3928 3030 6 1348 431 98 177
CD4 TCM 829 3390 267 515 448 4528 549 113 1860 42 30 220 39
CD4 TEM 0 1 0 0 0 61 0 0 22 0 0 0 0
CD8 Proliferating 0 0 0 0 1 0 1 0 0 0 0 0 0
CD8 TCM 0 1 16 0 0 0 0 0 0 0 0 0 0
CD8 TEM 0 1 6 0 1 0 3 2 0 0 0 2 0
cDC1 0 0 0 0 0 0 5 0 0 0 3 0 0
cDC2 0 0 0 2 35 2 3 8 0 1 2 0 0
dnT 0 1 0 1 3 1 1 2 0 0 0 6 0
HSPC 54 9 0 1 486 1 203 640 0 359 48 3 1
NK Proliferating 5 39 2785 23 33 0 228 10 0 1 12 25 0
Treg 0 4 0 1 0 10 2 0 0 0 0 27 1
clustree(All_samples_Merged, prefix = "SCT_snn_res.")
# InstallData("pbmcref")
#
# # The RunAzimuth function can take a Seurat object as input
# All_samples_Merged <- RunAzimuth(All_samples_Merged, reference = "pbmcref")
DimPlot(All_samples_Merged, group.by = "predicted.celltype.l1",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(All_samples_Merged, group.by = "predicted.celltype.l1",
reduction = "umap",
label.size = 3,
repel = T,
label = F)
DimPlot(All_samples_Merged, group.by = "predicted.celltype.l2",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(All_samples_Merged, group.by = "predicted.celltype.l2",
reduction = "umap",
label.size = 3,
repel = T,
label = F)
DimPlot(All_samples_Merged, group.by = "predicted.celltype.l2",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
table(All_samples_Merged$predicted.celltype.l2, All_samples_Merged$SCT_snn_res.0.2)
Error in `x[[i, drop = TRUE]]`:
! 'SCT_snn_res.0.2' not found in this Seurat object
Did you mean "SCT_snn_res.0.4"?
Backtrace:
1. base::table(All_samples_Merged$predicted.celltype.l2, All_samples_Merged$SCT_snn_res.0.2)
3. SeuratObject:::`$.Seurat`(All_samples_Merged, SCT_snn_res.0.2)
5. SeuratObject:::`[[.Seurat`(x, i, drop = TRUE)
#save(All_samples_Merged, file = "../0-R_Objects/CD4Tcells_annotated_excluding_nonCd4Tcells_SCTnormalized_ready_for_Harmony.robj")
# library(Seurat)
#
# # Assuming 'All_samples_Merged' is your Seurat object
# # Access the metadata
# metadata <- All_samples_Merged@meta.data
#
# # Rename values in the 'Patient_origin' column
# metadata$Patient_origin <- as.character(metadata$Patient_origin) # Convert to character if not already
# metadata$Patient_origin[metadata$Patient_origin == "0"] <- "PBMC_10x"
# metadata$Patient_origin[metadata$Patient_origin == "NA"] <- "PBMC"
#
# # Reassign the updated metadata back to the Seurat object
# All_samples_Merged@meta.data <- metadata
#
# # Check the changes
# table(All_samples_Merged$Patient_origin)
# Run Harmony, adjusting for batch effect using "cell_line" or another grouping variable
All_samples_Merged <- RunHarmony(
All_samples_Merged,
group.by.vars = c ("cell_line"), # Replace with the metadata column specifying batch or cell line
theta = c(0.5),
assay.use="SCT")
Transposing data matrix
Initializing state using k-means centroids initialization
Harmony 1/10
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Harmony 2/10
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Harmony converged after 2 iterations
# Check results in harmony embeddings
harmony_embeddings <- Embeddings(All_samples_Merged, reduction = "harmony")
head(harmony_embeddings)
harmony_1 harmony_2 harmony_3 harmony_4 harmony_5 harmony_6 harmony_7 harmony_8
L1_AAACCTGAGGGCTTCC-1 2.617301 -2.5740128 -6.9785092 -9.963976 1.9262977 0.3073531 2.7630107 -0.7615943
L1_AAACCTGGTGCAGGTA-1 -12.182202 5.1357982 -1.6922551 -11.213984 -11.3307965 18.8776481 3.1648233 0.8259918
L1_AAACCTGGTTAAAGTG-1 -16.673313 7.1358449 -5.7061418 -9.484867 -5.0466918 17.4927945 3.3020776 -8.9290459
L1_AAACCTGTCAGGTAAA-1 2.198191 -0.3845826 -8.0554504 -1.794563 4.3935921 2.4783219 0.3225535 -5.1666687
L1_AAACCTGTCCCTGACT-1 1.311607 -1.7761901 0.2349638 -4.394121 0.2072502 2.2932423 1.2836667 -1.2376690
L1_AAACCTGTCCTTCAAT-1 -14.093713 6.1187128 1.4139170 -4.618444 -8.4606039 17.7027820 5.3210451 -7.0832953
harmony_9 harmony_10 harmony_11 harmony_12 harmony_13 harmony_14 harmony_15 harmony_16
L1_AAACCTGAGGGCTTCC-1 1.80777686 -4.2688348 -5.6315585 -4.285121 1.0516199 7.76571570 2.11259817 -1.41256126
L1_AAACCTGGTGCAGGTA-1 6.57957683 -2.9356613 0.7045351 1.081364 3.3610418 -1.40638545 1.36950652 -3.39401110
L1_AAACCTGGTTAAAGTG-1 -3.75207937 0.8643659 -6.4896378 8.437656 -10.3235928 -3.32271030 -2.30024257 -5.88039243
L1_AAACCTGTCAGGTAAA-1 1.41409690 2.3493113 -0.3296328 -0.844764 -0.5923472 -6.22029403 -0.69792485 -0.08510601
L1_AAACCTGTCCCTGACT-1 0.05744622 0.4557658 -3.4698779 -3.563556 -0.9555034 2.10047318 0.05370564 -3.34644807
L1_AAACCTGTCCTTCAAT-1 6.44855266 -3.1693495 -3.3092398 -2.039784 0.9710283 0.03889259 0.65286278 -6.89057172
harmony_17 harmony_18 harmony_19 harmony_20 harmony_21 harmony_22 harmony_23 harmony_24
L1_AAACCTGAGGGCTTCC-1 -1.4770978 3.1559593 1.803226 -1.13159473 2.038408 -8.0970709 -0.2460011 0.9657454
L1_AAACCTGGTGCAGGTA-1 -4.4044820 1.1263228 5.365147 1.66770742 -2.497596 -0.6011762 -3.2296104 -0.7940466
L1_AAACCTGGTTAAAGTG-1 -5.1686191 1.7687423 3.158138 1.89925394 -2.260360 2.8856891 -3.9905323 2.3630676
L1_AAACCTGTCAGGTAAA-1 0.7466516 -0.2194188 1.738237 -0.07844498 -1.324692 0.4325034 1.3093719 0.6940631
L1_AAACCTGTCCCTGACT-1 0.4775565 4.5184571 -3.751190 -1.26895757 3.547170 -3.1059550 1.4264755 3.5471360
L1_AAACCTGTCCTTCAAT-1 -5.0595886 3.6315631 2.730217 1.83782883 -1.949829 -0.2926833 -3.1520561 2.9018668
harmony_25 harmony_26 harmony_27 harmony_28 harmony_29 harmony_30 harmony_31 harmony_32
L1_AAACCTGAGGGCTTCC-1 -3.4493806 -2.196457 -4.985321 2.3578722 -0.3527857 2.2414364 -0.6067463 -3.1828105
L1_AAACCTGGTGCAGGTA-1 -0.6700085 -2.843145 -1.517122 5.3136481 1.1478346 -1.6728444 5.4647074 -4.0017484
L1_AAACCTGGTTAAAGTG-1 3.6082368 1.920973 -1.857737 0.4592548 3.2061711 2.6117362 0.6099752 1.2792353
L1_AAACCTGTCAGGTAAA-1 -3.0721768 1.204302 1.315480 0.9678093 -0.6399830 0.9166997 0.2968259 0.5700036
L1_AAACCTGTCCCTGACT-1 -3.7645588 -0.820939 1.170662 -4.3941303 -0.8557999 3.7813045 -2.8122822 4.4274641
L1_AAACCTGTCCTTCAAT-1 0.5363743 -3.392188 -3.249564 1.6581933 1.8461549 1.2579632 0.5433156 2.7811773
harmony_33 harmony_34 harmony_35 harmony_36 harmony_37 harmony_38 harmony_39 harmony_40
L1_AAACCTGAGGGCTTCC-1 2.2540092 1.0788363 -0.2834453 0.5309295 2.806917 2.5203044 -1.39193701 -0.1799803
L1_AAACCTGGTGCAGGTA-1 -0.6802385 1.0587013 -6.7150577 -1.1149637 1.006155 -4.3146141 -0.58423232 0.7540082
L1_AAACCTGGTTAAAGTG-1 0.8123532 -2.6644868 5.2208927 2.5664639 -1.140591 1.6672885 -0.49443068 -2.5417894
L1_AAACCTGTCAGGTAAA-1 -1.5078551 0.6039702 -2.3785528 -4.2319542 1.800322 -0.6884364 -0.02694323 0.1976129
L1_AAACCTGTCCCTGACT-1 2.2981680 -0.4327490 4.6097258 -0.8509594 2.097483 3.5128042 2.54085459 1.3511861
L1_AAACCTGTCCTTCAAT-1 5.9363468 -1.5386663 7.1007397 2.9077585 2.243753 1.1726497 1.22165578 -2.2191215
harmony_41 harmony_42 harmony_43 harmony_44 harmony_45 harmony_46 harmony_47 harmony_48
L1_AAACCTGAGGGCTTCC-1 0.9244181 0.7196574 -0.8388805 -1.357065 0.4260469 1.6670977 0.81475716 -2.37825609
L1_AAACCTGGTGCAGGTA-1 -1.6400091 0.5963385 0.4558914 3.568463 -1.5790209 1.1544696 0.09991011 -0.04473024
L1_AAACCTGGTTAAAGTG-1 -0.2049213 0.3347294 -1.0847022 -1.909873 0.2281176 -1.9578460 0.16353840 -0.40066519
L1_AAACCTGTCAGGTAAA-1 -0.3308064 1.5609197 -1.3259031 2.109858 -2.6234817 -0.5868533 2.99142588 0.76452653
L1_AAACCTGTCCCTGACT-1 4.2628969 0.4448722 0.3824148 -3.344080 -0.2231855 0.6270584 -0.12491723 -2.91670050
L1_AAACCTGTCCTTCAAT-1 3.6465740 0.9232823 -2.7476304 1.915347 -3.0860900 3.1274649 2.68333553 1.79132213
harmony_49 harmony_50
L1_AAACCTGAGGGCTTCC-1 -2.10834671 -2.25379787
L1_AAACCTGGTGCAGGTA-1 0.63865505 -0.91264656
L1_AAACCTGGTTAAAGTG-1 -0.01666697 1.23983607
L1_AAACCTGTCAGGTAAA-1 2.38645083 0.05929537
L1_AAACCTGTCCCTGACT-1 0.18960362 0.35566159
L1_AAACCTGTCCTTCAAT-1 0.92261970 -2.17149757
# Set the seed for clustering steps
set.seed(123)
# Run UMAP on Harmony embeddings
All_samples_Merged <- RunUMAP(All_samples_Merged, reduction = "harmony", dims = 1:16)
21:02:57 UMAP embedding parameters a = 0.9922 b = 1.112
21:02:57 Read 49372 rows and found 16 numeric columns
21:02:57 Using Annoy for neighbor search, n_neighbors = 30
21:02:57 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
21:03:03 Writing NN index file to temp file /tmp/RtmpavrX7A/file17299bab4823e
21:03:03 Searching Annoy index using 1 thread, search_k = 3000
21:03:25 Annoy recall = 100%
21:03:26 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
21:03:30 Initializing from normalized Laplacian + noise (using RSpectra)
21:03:32 Commencing optimization for 200 epochs, with 2044208 positive edges
Using method 'umap'
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
21:04:01 Optimization finished
# Set the seed for clustering steps
set.seed(123)
# Optionally, find neighbors and clusters (if you plan to do clustering analysis)
All_samples_Merged <- FindNeighbors(All_samples_Merged, reduction = "harmony", dims = 1:16)
Computing nearest neighbor graph
Computing SNN
All_samples_Merged <- FindClusters(All_samples_Merged, resolution = 0.5) # Adjust resolution as needed
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49372
Number of edges: 1574085
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9126
Number of communities: 12
Elapsed time: 15 seconds
# Visualize UMAP
DimPlot(All_samples_Merged, reduction = "umap", group.by = "cell_line", label = TRUE, pt.size = 0.5) +
ggtitle("UMAP of Harmony-Integrated Data")
# Visualize UMAP with batch/cell line information
DimPlot(All_samples_Merged, reduction = "umap", group.by = "cell_line", label = TRUE, pt.size = 0.5) +
ggtitle("UMAP - Colored by Cell Line (After Harmony Integration)")
# Visualize UMAP with clusters
DimPlot(All_samples_Merged, reduction = "umap", group.by = "seurat_clusters", label = TRUE, pt.size = 0.5) +
ggtitle("UMAP - Clustered Data (After Harmony Integration)")
# Visualize specific cell types or other metadata
DimPlot(All_samples_Merged, reduction = "umap", group.by = "predicted.celltype.l2", label = TRUE, pt.size = 0.5) +
ggtitle("UMAP - Cell Types After Harmony Integration")
NA
NA
NA
NA
NA
NA
#save(All_samples_Merged, file = "../../../0-IMP-OBJECTS/All_Samples_Merged_with_10x_Azitmuth_Annotated_SCT_HPC_without_harmony_integration.robj")