Loading required package: SeuratObject
Loading required package: sp
Attaching package: 'SeuratObject'
The following objects are masked from 'package:base':
intersect, t
── Installed datasets ──────────────────────────────── SeuratData v0.2.2.9001 ──
✔ pbmcref 1.0.0 ✔ pbmcsca 3.0.0
────────────────────────────────────── Key ─────────────────────────────────────
✔ Dataset loaded successfully
❯ Dataset built with a newer version of Seurat than installed
❓ Unknown version of Seurat installed
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats 1.0.0 ✔ readr 2.1.5
✔ ggplot2 3.5.1 ✔ stringr 1.5.1
✔ lubridate 1.9.3 ✔ tibble 3.2.1
✔ purrr 1.0.2 ✔ tidyr 1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Attaching package: 'magrittr'
The following object is masked from 'package:purrr':
set_names
The following object is masked from 'package:tidyr':
extract
Attaching package: 'dbplyr'
The following objects are masked from 'package:dplyr':
ident, sql
Registered S3 method overwritten by 'SeuratDisk':
method from
as.sparse.H5Group Seurat
Attaching shinyBS
Loading required package: ggraph
Attaching package: 'ggraph'
The following object is masked from 'package:sp':
geometry
#Load Seurat Object merged from cell lines and a control(PBMC) after filtration
load("../0-R_Objects/CD4Tcells_annotated_excluding_nonCd4Tcells_ready_for_SCT.robj")
All_samples_Merged <- filtered_seurat
rm(filtered_seurat)
# Display basic metadata summary
head(All_samples_Merged@meta.data)
# Check if columns such as `orig.ident`, `nCount_RNA`, `nFeature_RNA`, `nUMI`, `ngene`, and any other necessary columns exist
required_columns <- c("orig.ident", "nCount_RNA", "nFeature_RNA", "nUMI", "ngene")
missing_columns <- setdiff(required_columns, colnames(All_samples_Merged@meta.data))
if (length(missing_columns) > 0) {
cat("Missing columns:", paste(missing_columns, collapse = ", "), "\n")
} else {
cat("All required columns are present.\n")
}
All required columns are present.
# Check cell counts and features
cat("Number of cells:", ncol(All_samples_Merged), "\n")
Number of cells: 49360
cat("Number of features:", nrow(All_samples_Merged), "\n")
Number of features: 26179
# Verify that each `orig.ident` label has the correct number of cells
cat("Cell counts per group:\n")
Cell counts per group:
print(table(All_samples_Merged$orig.ident))
L1 L2 L3 L4 L5 L6 L7 PBMC PBMC10x
5825 5935 6427 6007 6017 5148 5325 5171 3505
# Check that the cell IDs are unique (which ensures no issues from merging)
if (any(duplicated(colnames(All_samples_Merged)))) {
cat("Warning: There are duplicated cell IDs.\n")
} else {
cat("Cell IDs are unique.\n")
}
Cell IDs are unique.
# Check the assay consistency for RNA
DefaultAssay(All_samples_Merged) <- "RNA"
# Check dimensions of the RNA counts layer using the new method
cat("Dimensions of the RNA counts layer:", dim(GetAssayData(All_samples_Merged, layer = "counts")), "\n")
Dimensions of the RNA counts layer: 36601 49360
cat("Dimensions of the RNA data layer:", dim(GetAssayData(All_samples_Merged, layer = "data")), "\n")
Dimensions of the RNA data layer: 36601 49360
# Check the ADT assay (optional)
if ("ADT" %in% names(All_samples_Merged@assays)) {
cat("ADT assay is present.\n")
cat("Dimensions of the ADT counts layer:", dim(GetAssayData(All_samples_Merged, assay = "ADT", layer = "counts")), "\n")
} else {
cat("ADT assay is not present.\n")
}
ADT assay is present.
Dimensions of the ADT counts layer: 56 49360
# Remove the percent.mito column
All_samples_Merged$percent.mito <- NULL
Warning: Cannot find cell-level meta data named percent.mito
# Set identity classes to an existing column in meta data
Idents(object = All_samples_Merged) <- "cell_line"
All_samples_Merged[["percent.rb"]] <- PercentageFeatureSet(All_samples_Merged,
pattern = "^RP[SL]")
# Convert 'percent.mt' to numeric, replacing "NaN" with 0
All_samples_Merged$percent.rb <- replace(as.numeric(All_samples_Merged$percent.rb), is.na(All_samples_Merged$percent.rb), 0)
# The [[ operator can add columns to object metadata. This is a great place to stash QC stats
All_samples_Merged[["percent.mt"]] <- PercentageFeatureSet(All_samples_Merged, pattern = "^MT-")
# Convert 'percent.mt' to numeric, replacing "NaN" with 0
All_samples_Merged$percent.mt <- replace(as.numeric(All_samples_Merged$percent.mt), is.na(All_samples_Merged$percent.mt), 0)
VlnPlot(All_samples_Merged, features = c("nFeature_RNA",
"nCount_RNA",
"percent.mt",
"percent.rb"),
ncol = 4, pt.size = 0.1) &
theme(plot.title = element_text(size=10))
FeatureScatter(All_samples_Merged, feature1 = "percent.mt",
feature2 = "percent.rb")
VlnPlot(All_samples_Merged, features = c("nFeature_RNA",
"nCount_RNA",
"percent.mt"),
ncol = 3)
FeatureScatter(All_samples_Merged,
feature1 = "percent.mt",
feature2 = "percent.rb") +
geom_smooth(method = 'lm')
`geom_smooth()` using formula = 'y ~ x'
FeatureScatter(All_samples_Merged,
feature1 = "nCount_RNA",
feature2 = "nFeature_RNA") +
geom_smooth(method = 'lm')
`geom_smooth()` using formula = 'y ~ x'
##FeatureScatter is typically used to visualize feature-feature relationships ##for anything calculated by the object, ##i.e. columns in object metadata, PC scores etc.
FeatureScatter(All_samples_Merged,
feature1 = "nCount_RNA",
feature2 = "percent.mt")+
geom_smooth(method = 'lm')
`geom_smooth()` using formula = 'y ~ x'
FeatureScatter(All_samples_Merged,
feature1 = "nCount_RNA",
feature2 = "nFeature_RNA")+
geom_smooth(method = 'lm')
`geom_smooth()` using formula = 'y ~ x'
options(future.globals.maxSize = 8000 * 1024^2) # Set to 8000 MiB (about 8 GB)
# Apply SCTransform
All_samples_Merged <- SCTransform(All_samples_Merged,
verbose = FALSE)
Warning: Different cells and/or features from existing assay SCT
Variables_genes <- All_samples_Merged@assays$SCT@var.features
# Exclude genes starting with "HLA-" AND "Xist" AND "TRBV, TRAV"
Variables_genes_after_exclusion <- Variables_genes[!grepl("^HLA-|^XIST|^TRBV|^TRAV", Variables_genes)]
# Set the seed for clustering steps
set.seed(123)
# These are now standard steps in the Seurat workflow for visualization and clustering
All_samples_Merged <- RunPCA(All_samples_Merged,
features = Variables_genes_after_exclusion,
do.print = TRUE,
pcs.print = 1:5,
genes.print = 15,
npcs = 50)
PC_ 1
Positive: MALAT1, RPS4Y1, IL7R, RPS27, GIMAP7, TXNIP, TCF7, CD52, KLF2, SELL
KRT1, CD7, LTB, EEF1A1, GIMAP5, GIMAP4, LINC00861, RPS12, LEF1, FYB1
PNRC1, RPS29, BTG1, RPL41, MTRNR2L12, RPS28, RPL39, RIPOR2, RPL13, FOXP1
Negative: CCL17, CD74, PPBP, LGALS1, TNFRSF4, IL2RA, FABP5, CA2, MIR155HG, CD70
SEC11C, C12orf75, GAPDH, MT2A, SYT4, CCL5, VIM, TUBA1B, LGALS3, EGFL6
UBE2S, MIIP, MTHFD2, BATF3, TXN, EEF1A2, NPM1, FTL, KRT7, H2AFZ
PC_ 2
Positive: PPBP, XCL1, MT2A, CD74, XCL2, KIR3DL1, CD7, KIR2DL3, ACTB, GAPDH
CST7, IFITM2, IL32, TMSB4X, PAGE5, HIST1H4C, MT1G, KLRC1, RPL22L1, STMN1
KIR3DL2, ID3, KIR2DL4, GSTP1, HIST1H1E, S100A4, ESYT2, ANXA2, SRGN, TUBB
Negative: CCL17, CA2, TNFRSF4, RPS4Y1, MALAT1, SYT4, IL7R, EGFL6, CCL5, CA10
TXNIP, C12orf75, IGHE, TCF7, PRG4, BTG1, SEC11C, SELL, MIR155HG, STC1
TIGIT, RPS27, LINC00861, THY1, CFI, MAP4K4, JUN, RANBP17, ONECUT2, KRT7
PC_ 3
Positive: CD74, PPBP, MALAT1, IL7R, RPS4Y1, B2M, MT2A, CD70, LGALS3, PAGE5
TXNIP, LMNA, TENM3, ANXA1, STAT1, SELL, TSC22D3, LINC00861, FYB1, PIM2
RBPMS, TCF7, AHNAK, LGALS1, BTG1, CD2, CCDC50, SPOCK1, FHIT, CCR7
Negative: CCL17, XCL1, KIR3DL1, CD7, KRT1, XCL2, CST7, LTB, MT1G, NKG7
KLRC1, KIR2DL4, TTC29, SPINT2, CORO1B, GAS5, KIR2DL3, CYBA, GZMM, RAB25
TRGV2, MATK, CA2, EPCAM, KRT81, RPLP1, PLPP1, RPL27A, CEBPD, HIST1H4C
PC_ 4
Positive: KRT1, TTC29, RPLP1, RPL13, GAS5, GZMA, LINC02752, RPS4X, CEBPD, EEF1A1
PPBP, WFDC1, AC069410.1, SP5, TBX4, RPS12, IL4, TNS4, PLCB1, RPS15
IFNGR1, CD74, RPLP0, EEF2, RPS28, EGLN3, FAM9C, RPS8, RPS18, HSPB1
Negative: XCL1, CD7, XCL2, KIR3DL1, KIR2DL3, MT1G, KLRC1, IFITM2, KIR2DL4, S100A4
MALAT1, TMSB4X, KRT81, IL7R, LSP1, RPS4Y1, KRT86, MYO1E, PRKCH, KLRK1
IFITM1, MATK, ESYT2, HIST1H4C, CAPG, PTPN6, TNFRSF18, LGALS1, EPCAM, PLPP1
PC_ 5
Positive: EEF1A2, TNFRSF4, IL2RA, WFDC1, PHLDA2, FN1, S100A4, MIIP, S100A11, HIST1H1C
PXYLP1, S100A6, DUSP4, LGALS1, GPAT3, TIGIT, RDH10, TNFRSF18, CDKN1A, AL136456.1
HOXC9, GATA3, CEP135, MAP1B, TP73, HIST1H2BK, HACD1, GAS2L1, TMEM163, PTGDR2
Negative: CCL17, PPBP, MT2A, CD74, XCL1, LTA, CA2, MIR155HG, CD7, CCL5
CA10, XCL2, MGST3, FCER2, STC1, KIR2DL3, RPL22L1, IQCG, RXFP1, KIR3DL1
CFI, PAGE5, RANBP17, AL590550.1, RYR2, STAT1, RBPMS, SLC7A11-AS1, NDUFV2, RAP1A
# determine dimensionality of the data
ElbowPlot(All_samples_Merged, ndims = 50)
library(ggplot2)
library(RColorBrewer)
# Assuming you have 10 different cell lines, generating a color palette with 10 colors
cell_line_colors <- brewer.pal(10, "Set3")
# Assuming All_samples_Merged$cell_line is a factor or character vector containing cell line names
data <- as.data.frame(table(All_samples_Merged$cell_line))
colnames(data) <- c("cell_line", "nUMI") # Change column name to nUMI
ncells <- ggplot(data, aes(x = cell_line, y = nUMI, fill = cell_line)) +
geom_col() +
theme_classic() +
geom_text(aes(label = nUMI),
position = position_dodge(width = 0.9),
vjust = -0.25) +
scale_fill_manual(values = cell_line_colors) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5)) + # Adjust the title position
ggtitle("Filtered cells per sample") +
xlab("Cell lines") + # Adjust x-axis label
ylab("Frequency") # Adjust y-axis label
print(ncells)
# TEST-1
# given that the output of RunPCA is "pca"
# replace "so" by the name of your seurat object
pct <- All_samples_Merged[["pca"]]@stdev / sum(All_samples_Merged[["pca"]]@stdev) * 100
cumu <- cumsum(pct) # Calculate cumulative percents for each PC
# Determine the difference between variation of PC and subsequent PC
co2 <- sort(which((pct[-length(pct)] - pct[-1]) > 0.1), decreasing = T)[1] + 1
# last point where change of % of variation is more than 0.1%. -> co2
co2
[1] 15
# TEST-2
# get significant PCs
stdv <- All_samples_Merged[["pca"]]@stdev
sum.stdv <- sum(All_samples_Merged[["pca"]]@stdev)
percent.stdv <- (stdv / sum.stdv) * 100
cumulative <- cumsum(percent.stdv)
co1 <- which(cumulative > 90 & percent.stdv < 5)[1]
co2 <- sort(which((percent.stdv[1:length(percent.stdv) - 1] -
percent.stdv[2:length(percent.stdv)]) > 0.1),
decreasing = T)[1] + 1
min.pc <- min(co1, co2)
min.pc
[1] 15
# Create a dataframe with values
plot_df <- data.frame(pct = percent.stdv,
cumu = cumulative,
rank = 1:length(percent.stdv))
# Elbow plot to visualize
ggplot(plot_df, aes(cumulative, percent.stdv, label = rank, color = rank > min.pc)) +
geom_text() +
geom_vline(xintercept = 90, color = "grey") +
geom_hline(yintercept = min(percent.stdv[percent.stdv > 5]), color = "grey") +
theme_bw()
# Set the seed for clustering steps
set.seed(123)
All_samples_Merged <- FindNeighbors(All_samples_Merged,
dims = 1:16,
verbose = FALSE)
# understanding resolution
All_samples_Merged <- FindClusters(All_samples_Merged,
resolution = c(0.4, 0.5, 0.6, 0.7,0.8))
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49360
Number of edges: 1612882
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9568
Number of communities: 17
Elapsed time: 14 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49360
Number of edges: 1612882
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9485
Number of communities: 19
Elapsed time: 15 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49360
Number of edges: 1612882
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9417
Number of communities: 23
Elapsed time: 12 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49360
Number of edges: 1612882
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9351
Number of communities: 25
Elapsed time: 15 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49360
Number of edges: 1612882
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9293
Number of communities: 24
Elapsed time: 12 seconds
# Set the seed for clustering steps
set.seed(123)
# non-linear dimensionality reduction --------------
All_samples_Merged <- RunUMAP(All_samples_Merged,
dims = 1:16,
verbose = FALSE)
Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session
# note that you can set `label = TRUE` or use the Label Clusters function to help label
# individual clusters
DimPlot(All_samples_Merged,group.by = "cell_line",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(All_samples_Merged,group.by = "predicted.celltype.l2",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(All_samples_Merged,
group.by = "SCT_snn_res.0.4",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(All_samples_Merged,
group.by = "SCT_snn_res.0.5",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(All_samples_Merged,
group.by = "SCT_snn_res.0.6",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(All_samples_Merged,
group.by = "SCT_snn_res.0.7",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(All_samples_Merged,
group.by = "SCT_snn_res.0.8",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
# Set identity classes to an existing column in meta data
Idents(object = All_samples_Merged) <- "SCT_snn_res.0.4"
cluster_table <- table(Idents(All_samples_Merged))
barplot(cluster_table, main = "Number of Cells in Each Cluster",
xlab = "Cluster",
ylab = "Number of Cells",
col = rainbow(length(cluster_table)))
print(cluster_table)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
6267 5916 5285 5184 4983 4348 3787 3380 3303 1836 1802 1469 696 557 305 122
16
120
table(All_samples_Merged$predicted.celltype.l2, All_samples_Merged$SCT_snn_res.0.4)
0 1 2 3 4 5 6 7 8 9 10 11
B intermediate 0 0 0 0 0 0 0 0 0 0 0 0
B memory 8 0 0 0 83 0 22 0 0 112 3 0
CD4 CTL 0 0 0 11 0 0 0 1 0 0 0 0
CD4 Naive 0 0 0 523 0 0 0 1482 0 0 0 0
CD4 Proliferating 5351 2846 5176 7 3906 1023 2986 4 2798 1227 1392 1437
CD4 TCM 846 266 69 4574 549 3312 119 1871 23 427 37 1
CD4 TEM 0 0 0 61 0 1 0 22 0 0 0 0
CD8 Proliferating 0 0 0 0 1 0 0 0 0 1 0 0
CD8 TCM 0 16 0 0 0 1 0 0 0 0 0 0
CD8 TEM 0 6 0 2 3 1 2 0 0 1 0 0
cDC1 0 0 0 0 5 0 0 0 0 0 0 0
cDC2 0 0 0 0 3 0 8 0 0 35 1 0
dnT 0 0 0 2 2 2 2 0 0 2 0 0
HSPC 54 0 3 0 203 0 638 0 474 6 368 0
NK Proliferating 6 2782 24 3 225 7 10 0 8 25 1 31
Treg 2 0 13 1 3 1 0 0 0 0 0 0
12 13 14 15 16
B intermediate 0 2 5 0 0
B memory 1 17 5 1 0
CD4 CTL 0 0 1 0 0
CD4 Naive 0 0 7 0 30
CD4 Proliferating 221 439 103 95 0
CD4 TCM 470 33 118 25 90
CD4 TEM 0 0 0 0 0
CD8 Proliferating 0 0 0 0 0
CD8 TCM 0 0 0 0 0
CD8 TEM 0 0 0 0 0
cDC1 0 3 0 0 0
cDC2 2 4 0 0 0
dnT 1 0 4 0 0
HSPC 0 47 11 1 0
NK Proliferating 0 12 27 0 0
Treg 1 0 24 0 0
clustree(All_samples_Merged, prefix = "SCT_snn_res.")
# InstallData("pbmcref")
#
# # The RunAzimuth function can take a Seurat object as input
# All_samples_Merged <- RunAzimuth(All_samples_Merged, reference = "pbmcref")
DimPlot(All_samples_Merged, group.by = "predicted.celltype.l1",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(All_samples_Merged, group.by = "predicted.celltype.l1",
reduction = "umap",
label.size = 3,
repel = T,
label = F)
DimPlot(All_samples_Merged, group.by = "predicted.celltype.l2",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(All_samples_Merged, group.by = "predicted.celltype.l2",
reduction = "umap",
label.size = 3,
repel = T,
label = F)
DimPlot(All_samples_Merged, group.by = "predicted.celltype.l2",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
table(All_samples_Merged$predicted.celltype.l2, All_samples_Merged$SCT_snn_res.0.2)
0 1 2 3 4 5 6 7 8 9 10 11
B intermediate 0 0 0 0 2 0 3 2 0 0 0 0
B memory 9 1 0 0 88 119 4 28 0 3 0 0
CD4 CTL 0 0 0 0 0 0 12 0 0 0 0 1
CD4 Naive 0 0 0 0 0 0 531 0 1480 0 30 1
CD4 Proliferating 5453 5387 2852 2461 4064 4119 3 3217 5 1450 0 0
CD4 TCM 879 523 267 3320 616 480 4604 108 1839 48 91 55
CD4 TEM 0 0 0 1 0 0 61 0 22 0 0 0
CD8 Proliferating 0 0 0 0 1 1 0 0 0 0 0 0
CD8 TCM 0 0 16 1 0 0 0 0 0 0 0 0
CD8 TEM 0 0 8 1 3 1 0 2 0 0 0 0
cDC1 0 0 0 0 6 0 0 2 0 0 0 0
cDC2 0 2 0 0 3 35 0 10 0 1 0 2
dnT 0 1 1 1 6 3 1 2 0 0 0 0
HSPC 57 1 0 0 215 489 8 672 0 363 0 0
NK Proliferating 5 23 2785 39 264 34 0 9 0 2 0 0
Treg 15 1 0 1 25 0 3 0 0 0 0 0
save(All_samples_Merged, file = "../0-R_Objects/CD4Tcells_annotated_excluding_nonCd4Tcells_SCTnormalized_ready_for_Harmony.robj")
# Load required libraries
library(Seurat)
library(harmony)
Loading required package: Rcpp
library(ggplot2)
# Run Harmony, adjusting for batch effect using "cell_line" or another grouping variable
All_samples_Merged <- RunHarmony(
All_samples_Merged,
group.by.vars = "cell_line", # Replace with the metadata column specifying batch or cell line
theta = 0.5,
assay.use="SCT")
Transposing data matrix
Initializing state using k-means centroids initialization
Harmony 1/10
Harmony 2/10
Harmony converged after 2 iterations
# Check results in harmony embeddings
harmony_embeddings <- Embeddings(All_samples_Merged, reduction = "harmony")
head(harmony_embeddings)
harmony_1 harmony_2 harmony_3 harmony_4 harmony_5
L1_AAACCTGAGGGCTTCC-1 23.91351 -1.532878 -7.6361306 18.23082 2.072152
L1_AAACCTGGTGCAGGTA-1 24.26755 4.144872 -19.0012969 28.50732 5.451155
L1_AAACCTGGTTAAAGTG-1 17.83184 8.710217 -21.6388215 22.45712 4.665238
L1_AAACCTGTCAGGTAAA-1 16.32201 3.677017 -9.8687875 15.15608 2.228064
L1_AAACCTGTCCCTGACT-1 24.95998 -5.155462 -0.3109214 12.30670 1.267936
L1_AAACCTGTCCTTCAAT-1 22.58655 4.460051 -18.1320245 24.60922 4.542139
harmony_6 harmony_7 harmony_8 harmony_9 harmony_10
L1_AAACCTGAGGGCTTCC-1 1.54399555 3.0133502 -1.7735331 2.8998416 4.314101
L1_AAACCTGGTGCAGGTA-1 8.95070506 -6.1461350 9.4154896 5.2014203 5.962379
L1_AAACCTGGTTAAAGTG-1 4.26853519 -5.2933873 4.6744231 -2.7231907 -6.526178
L1_AAACCTGTCAGGTAAA-1 -2.40104454 3.7541904 -2.0645335 -2.1622743 2.205870
L1_AAACCTGTCCCTGACT-1 0.09339488 -0.2825693 -0.6162508 1.1081485 2.375654
L1_AAACCTGTCCTTCAAT-1 4.81878591 -9.6932500 5.7353211 0.2637625 3.119703
harmony_11 harmony_12 harmony_13 harmony_14 harmony_15
L1_AAACCTGAGGGCTTCC-1 1.6944705 -1.240337 0.7484595 -1.288225 5.385517
L1_AAACCTGGTGCAGGTA-1 2.2310350 5.583691 0.1168129 1.314351 -5.668360
L1_AAACCTGGTTAAAGTG-1 -5.1239869 2.065266 4.9916132 18.875831 -7.970709
L1_AAACCTGTCAGGTAAA-1 0.2152444 -3.033954 3.3348114 1.339715 1.698702
L1_AAACCTGTCCCTGACT-1 -1.3783679 -1.626816 0.2171765 0.992034 3.815175
L1_AAACCTGTCCTTCAAT-1 0.2948632 3.274638 -0.7288815 4.079627 -4.760190
harmony_16 harmony_17 harmony_18 harmony_19 harmony_20
L1_AAACCTGAGGGCTTCC-1 -0.01637501 -2.079224 -9.12724080 7.1133353 -2.707350
L1_AAACCTGGTGCAGGTA-1 -3.05476631 -9.531739 -7.41922287 2.7856250 -5.291010
L1_AAACCTGGTTAAAGTG-1 -0.97445074 -7.613931 -5.71184481 2.2399871 -2.880838
L1_AAACCTGTCAGGTAAA-1 -1.76793149 -2.141121 -0.06249675 0.1699334 -1.903440
L1_AAACCTGTCCCTGACT-1 1.44401246 -4.879160 -3.92861091 6.0107236 1.983531
L1_AAACCTGTCCTTCAAT-1 -1.94902299 -9.019891 -6.84514399 4.2525096 -2.060634
harmony_21 harmony_22 harmony_23 harmony_24 harmony_25
L1_AAACCTGAGGGCTTCC-1 -1.51722219 -2.0518292 -10.9145275 -2.969247 -3.09315864
L1_AAACCTGGTGCAGGTA-1 -1.54626302 -2.9586571 -0.1760673 5.167177 0.99817809
L1_AAACCTGGTTAAAGTG-1 -3.57212791 -1.6933570 3.3254064 8.628619 -2.16724211
L1_AAACCTGTCAGGTAAA-1 0.09332724 -0.9948012 1.7769526 -3.662372 -0.03476946
L1_AAACCTGTCCCTGACT-1 -1.76305438 2.0793722 -6.0617226 -4.779955 -4.06360076
L1_AAACCTGTCCTTCAAT-1 -3.64366782 -1.4343277 0.3289138 4.065389 -2.28644027
harmony_26 harmony_27 harmony_28 harmony_29 harmony_30
L1_AAACCTGAGGGCTTCC-1 0.32581173 3.8517254 1.8508846 4.3700963 6.2355266
L1_AAACCTGGTGCAGGTA-1 -0.50081441 3.4765933 3.4670147 -1.9296726 6.2672550
L1_AAACCTGGTTAAAGTG-1 0.05233127 -3.3572984 0.5539682 -0.3589735 -1.1582049
L1_AAACCTGTCAGGTAAA-1 1.43672806 -1.5947224 -1.1896353 -2.5558137 0.2619503
L1_AAACCTGTCCCTGACT-1 2.43356119 0.9815154 -1.4103041 1.8643661 -2.9652746
L1_AAACCTGTCCTTCAAT-1 1.29613674 2.0986986 0.6435490 0.9137773 1.2621962
harmony_31 harmony_32 harmony_33 harmony_34 harmony_35
L1_AAACCTGAGGGCTTCC-1 -3.300070 -0.3802022 0.9888985 5.93080461 -1.2533985
L1_AAACCTGGTGCAGGTA-1 1.307805 4.9495812 5.8688662 -0.55390956 0.4953402
L1_AAACCTGGTTAAAGTG-1 -2.607976 -1.1289525 -0.9691122 -1.34664890 1.7718922
L1_AAACCTGTCAGGTAAA-1 -1.057791 1.2288326 -2.6791549 -0.23077521 -1.1078223
L1_AAACCTGTCCCTGACT-1 -4.448978 -5.0922422 -3.9084826 0.06494545 -1.1045282
L1_AAACCTGTCCTTCAAT-1 -1.772732 -5.3081921 1.4208485 -1.00693984 -0.5721547
harmony_36 harmony_37 harmony_38 harmony_39 harmony_40
L1_AAACCTGAGGGCTTCC-1 -0.7307101 1.91255775 -1.0816583 -3.5082091 -1.09637999
L1_AAACCTGGTGCAGGTA-1 -8.4255248 1.95500027 -3.7926972 1.4385267 1.21614864
L1_AAACCTGGTTAAAGTG-1 5.3383304 -0.16435386 -1.3488185 -2.8593892 -0.01271721
L1_AAACCTGTCAGGTAAA-1 -3.8873613 -1.83198455 0.0966273 2.4599165 -0.55885853
L1_AAACCTGTCCCTGACT-1 5.2487056 -0.01978973 1.7974629 -0.7513981 -4.54239431
L1_AAACCTGTCCTTCAAT-1 6.8911152 2.37436864 -0.3360580 -1.2148025 -1.65939542
harmony_41 harmony_42 harmony_43 harmony_44 harmony_45
L1_AAACCTGAGGGCTTCC-1 1.619259 2.3014878 -1.6358935 2.1934358 1.522109
L1_AAACCTGGTGCAGGTA-1 0.741981 1.8157008 -0.7395320 -0.6410387 -5.618954
L1_AAACCTGGTTAAAGTG-1 -2.440635 0.9033992 0.1374661 -0.5033722 1.630184
L1_AAACCTGTCAGGTAAA-1 1.560952 0.8531869 0.8270163 -2.3695360 -2.475027
L1_AAACCTGTCCCTGACT-1 2.337286 -2.2757646 0.8910871 2.4973960 2.474064
L1_AAACCTGTCCTTCAAT-1 -2.892782 -2.2442776 1.7544697 -1.9059912 -2.018893
harmony_46 harmony_47 harmony_48 harmony_49 harmony_50
L1_AAACCTGAGGGCTTCC-1 -0.02042831 -1.0181409 -2.7355636 -7.2194315 -4.852359
L1_AAACCTGGTGCAGGTA-1 -1.19637697 0.9745353 -2.4222709 -1.2042825 -2.824948
L1_AAACCTGGTTAAAGTG-1 -0.49453017 -2.2926609 0.3771045 -1.2441376 2.252297
L1_AAACCTGTCAGGTAAA-1 -3.77511403 0.8878575 4.6043015 0.6029046 2.390522
L1_AAACCTGTCCCTGACT-1 -0.58493741 -0.2921591 -0.7010308 -3.5333577 1.170021
L1_AAACCTGTCCTTCAAT-1 -3.40636447 2.3036606 0.7709116 -1.5313024 -2.252222
# Set the seed for clustering steps
set.seed(123)
# Run UMAP on Harmony embeddings
All_samples_Merged <- RunUMAP(All_samples_Merged, reduction = "harmony", dims = 1:16)
19:13:14 UMAP embedding parameters a = 0.9922 b = 1.112
19:13:14 Read 49360 rows and found 16 numeric columns
19:13:14 Using Annoy for neighbor search, n_neighbors = 30
19:13:14 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
19:13:20 Writing NN index file to temp file /tmp/RtmpvPQZFB/file278b05d281e1e
19:13:20 Searching Annoy index using 1 thread, search_k = 3000
19:13:36 Annoy recall = 100%
19:13:38 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
19:13:43 Initializing from normalized Laplacian + noise (using RSpectra)
19:13:49 Commencing optimization for 200 epochs, with 2039474 positive edges
19:14:51 Optimization finished
# Set the seed for clustering steps
set.seed(123)
# Optionally, find neighbors and clusters (if you plan to do clustering analysis)
All_samples_Merged <- FindNeighbors(All_samples_Merged, reduction = "harmony", dims = 1:16)
Computing nearest neighbor graph
Computing SNN
All_samples_Merged <- FindClusters(All_samples_Merged, resolution = 0.5) # Adjust resolution as needed
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49360
Number of edges: 1597969
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9232
Number of communities: 13
Elapsed time: 23 seconds
# Visualize UMAP
DimPlot(All_samples_Merged, reduction = "umap", group.by = "cell_line", label = TRUE, pt.size = 0.5) +
ggtitle("UMAP of Harmony-Integrated Data")
# Visualize UMAP with batch/cell line information
DimPlot(All_samples_Merged, reduction = "umap", group.by = "cell_line", label = TRUE, pt.size = 0.5) +
ggtitle("UMAP - Colored by Cell Line (After Harmony Integration)")
# Visualize UMAP with clusters
DimPlot(All_samples_Merged, reduction = "umap", group.by = "seurat_clusters", label = TRUE, pt.size = 0.5) +
ggtitle("UMAP - Clustered Data (After Harmony Integration)")
# Visualize specific cell types or other metadata
DimPlot(All_samples_Merged, reduction = "umap", group.by = "predicted.celltype.l2", label = TRUE, pt.size = 0.5) +
ggtitle("UMAP - Cell Types After Harmony Integration")
# Generate a DimPlot for cell cycle phases
DimPlot(All_samples_Merged, reduction = "umap", group.by = "Phase", label = TRUE, repel = TRUE) +
ggtitle("UMAP Colored by Cell Cycle Phase")
# Alternatively, visualize individual scores (e.g., S.Score and G2M.Score)
FeaturePlot(All_samples_Merged, features = c("S.Score", "G2M.Score"), reduction = "umap", min.cutoff = "q10", max.cutoff = "q90") +
ggtitle("UMAP Colored by Cell Cycle Scores")
#save(All_samples_Merged, file = "../../../0-IMP-OBJECTS/All_Samples_Merged_with_10x_Azitmuth_Annotated_SCT_HPC_without_harmony_integration.robj")