Loading required package: SeuratObject
Loading required package: sp
Attaching package: 'SeuratObject'
The following objects are masked from 'package:base':
intersect, t
── Installed datasets ──────────────────────────────── SeuratData v0.2.2.9001 ──
✔ pbmcref 1.0.0 ✔ pbmcsca 3.0.0
────────────────────────────────────── Key ─────────────────────────────────────
✔ Dataset loaded successfully
❯ Dataset built with a newer version of Seurat than installed
❓ Unknown version of Seurat installed
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats 1.0.0 ✔ readr 2.1.5
✔ ggplot2 3.5.1 ✔ stringr 1.5.1
✔ lubridate 1.9.3 ✔ tibble 3.2.1
✔ purrr 1.0.2 ✔ tidyr 1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Attaching package: 'magrittr'
The following object is masked from 'package:purrr':
set_names
The following object is masked from 'package:tidyr':
extract
Attaching package: 'dbplyr'
The following objects are masked from 'package:dplyr':
ident, sql
Registered S3 method overwritten by 'SeuratDisk':
method from
as.sparse.H5Group Seurat
Attaching shinyBS
Loading required package: ggraph
Attaching package: 'ggraph'
The following object is masked from 'package:sp':
geometry
#Load Seurat Object merged from cell lines and a control(PBMC) after filtration
load("0-imp_Robj/SS_CD4_Tcells_Azimuth_Annotated_PBMC10x_excluding_nonCD4_cells_from_Control_Bcells_from_L4.robj")
All_samples_Merged <- filtered_seurat
# Load necessary libraries
library(Seurat)
# Display basic metadata summary
head(All_samples_Merged@meta.data)
# Check if columns such as `orig.ident`, `nCount_RNA`, `nFeature_RNA`, `nUMI`, `ngene`, and any other necessary columns exist
required_columns <- c("orig.ident", "nCount_RNA", "nFeature_RNA", "nUMI", "ngene")
missing_columns <- setdiff(required_columns, colnames(All_samples_Merged@meta.data))
if (length(missing_columns) > 0) {
cat("Missing columns:", paste(missing_columns, collapse = ", "), "\n")
} else {
cat("All required columns are present.\n")
}
All required columns are present.
# Check cell counts and features
cat("Number of cells:", ncol(All_samples_Merged), "\n")
Number of cells: 49388
cat("Number of features:", nrow(All_samples_Merged), "\n")
Number of features: 36601
# Verify that each `orig.ident` label has the correct number of cells
cat("Cell counts per group:\n")
Cell counts per group:
print(table(All_samples_Merged$orig.ident))
L1 L2 L3 L4 L5 L6 L7 PBMC PBMC10x
5825 5935 6428 6023 6022 5148 5331 5171 3505
# Check that the cell IDs are unique (which ensures no issues from merging)
if (any(duplicated(colnames(All_samples_Merged)))) {
cat("Warning: There are duplicated cell IDs.\n")
} else {
cat("Cell IDs are unique.\n")
}
Cell IDs are unique.
# Check the assay consistency for RNA
DefaultAssay(All_samples_Merged) <- "RNA"
# Check dimensions of the RNA counts layer using the new method
cat("Dimensions of the RNA counts layer:", dim(GetAssayData(All_samples_Merged, layer = "counts")), "\n")
Dimensions of the RNA counts layer: 36601 49388
cat("Dimensions of the RNA data layer:", dim(GetAssayData(All_samples_Merged, layer = "data")), "\n")
Dimensions of the RNA data layer: 36601 49388
# Check the ADT assay (optional)
if ("ADT" %in% names(All_samples_Merged@assays)) {
cat("ADT assay is present.\n")
cat("Dimensions of the ADT counts layer:", dim(GetAssayData(All_samples_Merged, assay = "ADT", layer = "counts")), "\n")
} else {
cat("ADT assay is not present.\n")
}
ADT assay is present.
Dimensions of the ADT counts layer: 56 49388
# InstallData("pbmcref")
#
# # The RunAzimuth function can take a Seurat object as input
# All_samples_Merged <- RunAzimuth(All_samples_Merged, reference = "pbmcref")
# Remove the percent.mito column
All_samples_Merged$percent.mito <- NULL
# Set identity classes to an existing column in meta data
Idents(object = All_samples_Merged) <- "cell_line"
All_samples_Merged[["percent.rb"]] <- PercentageFeatureSet(All_samples_Merged,
pattern = "^RP[SL]")
# Convert 'percent.mt' to numeric, replacing "NaN" with 0
All_samples_Merged$percent.rb <- replace(as.numeric(All_samples_Merged$percent.rb), is.na(All_samples_Merged$percent.rb), 0)
# The [[ operator can add columns to object metadata. This is a great place to stash QC stats
All_samples_Merged[["percent.mt"]] <- PercentageFeatureSet(All_samples_Merged, pattern = "^MT-")
# Convert 'percent.mt' to numeric, replacing "NaN" with 0
All_samples_Merged$percent.mt <- replace(as.numeric(All_samples_Merged$percent.mt), is.na(All_samples_Merged$percent.mt), 0)
VlnPlot(All_samples_Merged, features = c("nFeature_RNA",
"nCount_RNA",
"percent.mt",
"percent.rb"),
ncol = 4, pt.size = 0.1) &
theme(plot.title = element_text(size=10))
FeatureScatter(All_samples_Merged, feature1 = "percent.mt",
feature2 = "percent.rb")
VlnPlot(All_samples_Merged, features = c("nFeature_RNA",
"nCount_RNA",
"percent.mt"),
ncol = 3)
FeatureScatter(All_samples_Merged,
feature1 = "percent.mt",
feature2 = "percent.rb") +
geom_smooth(method = 'lm')
`geom_smooth()` using formula = 'y ~ x'
FeatureScatter(All_samples_Merged,
feature1 = "nCount_RNA",
feature2 = "nFeature_RNA") +
geom_smooth(method = 'lm')
`geom_smooth()` using formula = 'y ~ x'
##FeatureScatter is typically used to visualize feature-feature relationships ##for anything calculated by the object, ##i.e. columns in object metadata, PC scores etc.
FeatureScatter(All_samples_Merged,
feature1 = "nCount_RNA",
feature2 = "percent.mt")+
geom_smooth(method = 'lm')
`geom_smooth()` using formula = 'y ~ x'
FeatureScatter(All_samples_Merged,
feature1 = "nCount_RNA",
feature2 = "nFeature_RNA")+
geom_smooth(method = 'lm')
`geom_smooth()` using formula = 'y ~ x'
Running SCTransform on assay: RNA
Running SCTransform on layer: counts
vst.flavor='v2' set. Using model with fixed slope and excluding poisson genes.
Variance stabilizing transformation of count matrix of size 26179 by 49388
Model formula is y ~ log_umi
Get Negative Binomial regression parameters per gene
Using 2000 genes, 5000 cells
Found 487 outliers - those will be ignored in fitting/regularization step
Second step: Get residuals using fitted parameters for 26179 genes
Computing corrected count matrix for 26179 genes
Calculating gene attributes
Wall clock passed: Time difference of 8.097963 mins
Determine variable features
Getting residuals for block 1(of 10) for counts dataset
Getting residuals for block 2(of 10) for counts dataset
Getting residuals for block 3(of 10) for counts dataset
Getting residuals for block 4(of 10) for counts dataset
Getting residuals for block 5(of 10) for counts dataset
Getting residuals for block 6(of 10) for counts dataset
Getting residuals for block 7(of 10) for counts dataset
Getting residuals for block 8(of 10) for counts dataset
Getting residuals for block 9(of 10) for counts dataset
Getting residuals for block 10(of 10) for counts dataset
Finished calculating residuals for counts
Set default assay to SCT
Warning: The following features are not present in the object: MLF1IP, not
searching for symbol synonyms
Warning: The following features are not present in the object: FAM64A, HN1, not
searching for symbol synonyms
# Apply SCTransform
All_samples_Merged <- SCTransform(All_samples_Merged,
vars.to.regress = c("percent.rb","percent.mt", "CC.Difference"),
do.scale=TRUE,
do.center=TRUE,
verbose = TRUE)
Running SCTransform on assay: RNA
Running SCTransform on layer: counts
vst.flavor='v2' set. Using model with fixed slope and excluding poisson genes.
Variance stabilizing transformation of count matrix of size 26179 by 49388
Model formula is y ~ log_umi
Get Negative Binomial regression parameters per gene
Using 2000 genes, 5000 cells
Found 487 outliers - those will be ignored in fitting/regularization step
Second step: Get residuals using fitted parameters for 26179 genes
Computing corrected count matrix for 26179 genes
Calculating gene attributes
Wall clock passed: Time difference of 6.631343 mins
Determine variable features
Regressing out percent.rb, percent.mt, CC.Difference
Centering and scaling data matrix
Getting residuals for block 1(of 10) for counts dataset
Getting residuals for block 2(of 10) for counts dataset
Getting residuals for block 3(of 10) for counts dataset
Getting residuals for block 4(of 10) for counts dataset
Getting residuals for block 5(of 10) for counts dataset
Getting residuals for block 6(of 10) for counts dataset
Getting residuals for block 7(of 10) for counts dataset
Getting residuals for block 8(of 10) for counts dataset
Getting residuals for block 9(of 10) for counts dataset
Getting residuals for block 10(of 10) for counts dataset
Regressing out percent.rb, percent.mt, CC.Difference
Centering and scaling data matrix
Finished calculating residuals for counts
Set default assay to SCT
Variables_genes <- All_samples_Merged@assays$SCT@var.features
# Exclude genes starting with "HLA-" AND "Xist" AND "TRBV, TRAV"
Variables_genes_after_exclusion <- Variables_genes[!grepl("^HLA-|^XIST|^TRBV|^TRAV", Variables_genes)]
# These are now standard steps in the Seurat workflow for visualization and clustering
All_samples_Merged <- RunPCA(All_samples_Merged,
features = Variables_genes_after_exclusion,
do.print = TRUE,
pcs.print = 1:5,
genes.print = 15,
npcs = 50)
PC_ 1
Positive: MALAT1, RPS27, PTPRC, SARAF, RPS4Y1, TCF7, RPS29, RPL39, RPL34, FYB1
GIMAP7, ITM2B, PNRC1, LEF1, BTG1, B2M, FOXP1, GIMAP5, IL7R, CD52
EVL, YPEL3, CD3E, RIPOR2, LINC00861, SELL, ITK, PIK3IP1, ZFP36L2, ABLIM1
Negative: NPM1, PPIA, RAN, GAPDH, NME2, PRELID1, PRDX1, RPS2, HSP90AB1, HSPD1
TUBA1B, RPLP0, TPI1, VDAC1, HMGA1, SRM, H2AFZ, TXN, ATP5F1B, CYC1
UBE2S, ENO1, PPP1R14B, NME1, CCT8, JPT1, SNRPD1, HNRNPAB, MIF, SEC11C
PC_ 2
Positive: KIR3DL1, CST7, EPCAM, KIR2DL3, TRGV2, RPL27A, MATK, KIR3DL2, DAD1, XCL1
C1QBP, KLRC1, PFN1, KIR2DL4, MYO1E, RAB25, CXCR3, EIF4A1, NDUFA4, GZMM
CD7, ESYT2, XCL2, KLRK1, ZBTB16, KRT86, RCBTB2, PTPN6, CHCHD2, SPINT2
Negative: RPL30, FAM107B, SEC11C, MT-ND3, LRRFIP1, ELL2, MTHFD2, CD2, YBX3, CFLAR
VIM, RAD21, CD74, SMAP2, HDGFL3, ANXA1, TAP1, TMEM173, IL2RA, RPL39
EEF1A1, HTATIP2, AHNAK, MTLN, CD58, CCR7, KRT7, CD70, RPL34, BATF3
PC_ 3
Positive: PAGE5, NDUFV2, RPL35A, RBPMS, CDKN2A, RPL22L1, CD74, LMNA, TENM3, KIF2A
STMN1, PSMB2, PSMB9, ANXA2, GPX4, PPP2R2B, FAM50B, ANXA5, GAPDH, IFI27L2
FAM241A, RPL11, VAMP5, PPBP, NEURL1, RPS3A, SH3KBP1, PLD1, HEBP2, SPOCK1
Negative: TNFRSF4, C12orf75, HACD1, EGFL6, TIGIT, BACE2, ARPC2, SYT4, NET1, LY6E
PXYLP1, GGH, GRIA4, MAP1B, UBE2D2, CCL17, SCCPDH, PON2, PTP4A3, CYBA
ADGRB3, KRT7, ACTN1, MCTP2, PLEKHH2, EEF1A2, SMIM3, PRKCSH, MSC, HDGFL3
PC_ 4
Positive: EIF5A, HSPE1, ATP5MC3, ODC1, CHCHD10, MT-ND3, CYCS, RPL34, PPID, PPBP
CYC1, GCSH, RPS4Y1, FCER2, FKBP4, CD7, DNAJC12, GSTP1, HSPD1, FKBP11
FAM162A, RPS29, RPL39, PRELID3B, TCF7, HSP90B1, MTDH, SET, TOMM40, SLC7A11-AS1
Negative: RPS4X, EGLN3, GAS5, KRT1, LINC02752, WFDC1, TTC29, TBX4, RPLP1, IFNGR1
AC069410.1, PLCB1, IL32, TNS4, SP5, RPL13, S100A11, FAM9C, SEMA4A, IL4
S100A4, NKG7, LINC00469, S100A6, HSPB1, VIM, CEBPD, VIPR2, PTGIS, NPTX1
PC_ 5
Positive: TMSB4X, TMSB10, LGALS1, S100A11, TP73, COTL1, S100A4, GPAT3, TMEM163, S100A6
HOXC9, LSP1, IFITM2, GPAT2, TAGLN2, DUSP4, LIME1, EEF1A2, GAS2L1, TNFRSF18
QPRT, MIIP, CEP135, RBM38, PXYLP1, PGAM1, LAPTM5, ENTPD1, MRPS6, IRX3
Negative: CCL17, MIR155HG, MAP4K4, RXFP1, MYO1D, CFI, CA10, LRBA, CA2, THY1
FRMD4A, AL590550.1, IGHE, PRKCA, RUNX1, HS3ST1, EZH2, LTA, NFIB, MGST3
RANBP17, SNTB1, IMMP2L, CCL5, AKAP12, AC100801.1, SLC35F3, AFP, ONECUT2, AL023574.1
# determine dimensionality of the data
ElbowPlot(All_samples_Merged, ndims = 50)
library(ggplot2)
library(RColorBrewer)
# Assuming you have 10 different cell lines, generating a color palette with 10 colors
cell_line_colors <- brewer.pal(10, "Set3")
# Assuming All_samples_Merged$cell_line is a factor or character vector containing cell line names
data <- as.data.frame(table(All_samples_Merged$cell_line))
colnames(data) <- c("cell_line", "nUMI") # Change column name to nUMI
ncells <- ggplot(data, aes(x = cell_line, y = nUMI, fill = cell_line)) +
geom_col() +
theme_classic() +
geom_text(aes(label = nUMI),
position = position_dodge(width = 0.9),
vjust = -0.25) +
scale_fill_manual(values = cell_line_colors) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5)) + # Adjust the title position
ggtitle("Filtered cells per sample") +
xlab("Cell lines") + # Adjust x-axis label
ylab("Frequency") # Adjust y-axis label
print(ncells)
# TEST-1
# given that the output of RunPCA is "pca"
# replace "so" by the name of your seurat object
pct <- All_samples_Merged[["pca"]]@stdev / sum(All_samples_Merged[["pca"]]@stdev) * 100
cumu <- cumsum(pct) # Calculate cumulative percents for each PC
# Determine the difference between variation of PC and subsequent PC
co2 <- sort(which((pct[-length(pct)] - pct[-1]) > 0.1), decreasing = T)[1] + 1
# last point where change of % of variation is more than 0.1%. -> co2
co2
[1] 16
# TEST-2
# get significant PCs
stdv <- All_samples_Merged[["pca"]]@stdev
sum.stdv <- sum(All_samples_Merged[["pca"]]@stdev)
percent.stdv <- (stdv / sum.stdv) * 100
cumulative <- cumsum(percent.stdv)
co1 <- which(cumulative > 90 & percent.stdv < 5)[1]
co2 <- sort(which((percent.stdv[1:length(percent.stdv) - 1] -
percent.stdv[2:length(percent.stdv)]) > 0.1),
decreasing = T)[1] + 1
min.pc <- min(co1, co2)
min.pc
[1] 16
# Create a dataframe with values
plot_df <- data.frame(pct = percent.stdv,
cumu = cumulative,
rank = 1:length(percent.stdv))
# Elbow plot to visualize
ggplot(plot_df, aes(cumulative, percent.stdv, label = rank, color = rank > min.pc)) +
geom_text() +
geom_vline(xintercept = 90, color = "grey") +
geom_hline(yintercept = min(percent.stdv[percent.stdv > 5]), color = "grey") +
theme_bw()
All_samples_Merged <- FindNeighbors(All_samples_Merged,
dims = 1:16,
verbose = FALSE)
# understanding resolution
All_samples_Merged <- FindClusters(All_samples_Merged,
resolution = c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7,0.8, 0.9, 1,1.2,1.5,2))
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49388
Number of edges: 1630788
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9871
Number of communities: 11
Elapsed time: 20 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49388
Number of edges: 1630788
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9766
Number of communities: 12
Elapsed time: 14 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49388
Number of edges: 1630788
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9663
Number of communities: 13
Elapsed time: 22 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49388
Number of edges: 1630788
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9567
Number of communities: 16
Elapsed time: 20 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49388
Number of edges: 1630788
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9474
Number of communities: 17
Elapsed time: 15 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49388
Number of edges: 1630788
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9382
Number of communities: 17
Elapsed time: 14 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49388
Number of edges: 1630788
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9300
Number of communities: 19
Elapsed time: 21 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49388
Number of edges: 1630788
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9219
Number of communities: 20
Elapsed time: 13 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49388
Number of edges: 1630788
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9142
Number of communities: 22
Elapsed time: 14 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49388
Number of edges: 1630788
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9073
Number of communities: 23
Elapsed time: 16 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49388
Number of edges: 1630788
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8968
Number of communities: 27
Elapsed time: 13 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49388
Number of edges: 1630788
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8827
Number of communities: 30
Elapsed time: 17 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49388
Number of edges: 1630788
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8615
Number of communities: 37
Elapsed time: 19 seconds
# non-linear dimensionality reduction --------------
All_samples_Merged <- RunUMAP(All_samples_Merged,
dims = 1:16,
verbose = FALSE)
Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session
# note that you can set `label = TRUE` or use the Label Clusters function to help label
# individual clusters
DimPlot(All_samples_Merged,group.by = "cell_line",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(All_samples_Merged,group.by = "predicted.celltype.l2",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(All_samples_Merged,
group.by = "SCT_snn_res.0.1",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(All_samples_Merged,
group.by = "SCT_snn_res.0.2",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(All_samples_Merged,
group.by = "SCT_snn_res.0.3",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(All_samples_Merged,
group.by = "SCT_snn_res.0.4",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(All_samples_Merged,
group.by = "SCT_snn_res.0.5",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(All_samples_Merged,
group.by = "SCT_snn_res.0.6",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(All_samples_Merged,
group.by = "SCT_snn_res.0.7",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(All_samples_Merged,
group.by = "SCT_snn_res.0.8",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(All_samples_Merged,
group.by = "SCT_snn_res.0.9",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(All_samples_Merged,
group.by = "SCT_snn_res.1",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(All_samples_Merged,
group.by = "SCT_snn_res.1.2",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(All_samples_Merged,
group.by = "SCT_snn_res.1.5",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(All_samples_Merged,
group.by = "SCT_snn_res.2",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
# Set identity classes to an existing column in meta data
Idents(object = All_samples_Merged) <- "SCT_snn_res.0.7"
cluster_table <- table(Idents(All_samples_Merged))
barplot(cluster_table, main = "Number of Cells in Each Cluster",
xlab = "Cluster",
ylab = "Number of Cells",
col = rainbow(length(cluster_table)))
print(cluster_table)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
6259 5930 5418 4925 4898 3976 3487 3344 3336 2338 1895 1895 443 348 289 256
16 17 18
198 80 73
table(All_samples_Merged$predicted.celltype.l2, All_samples_Merged$SCT_snn_res.0.2)
0 1 2 3 4 5 6 7 8 9 10 11
B intermediate 0 0 0 0 0 2 3 2 0 0 0 0
B memory 9 1 0 0 118 88 4 29 0 3 0 0
CD14 Mono 0 2 0 0 6 0 0 5 0 0 0 13
CD4 CTL 0 0 0 0 0 0 12 0 0 0 0 1
CD4 Naive 0 0 0 0 0 0 529 0 1479 0 33 1
CD4 Proliferating 5452 5387 2852 2461 4132 4063 2 3222 5 1434 1 0
CD4 TCM 879 523 269 3319 482 614 4533 109 1838 48 161 55
CD4 TEM 0 0 0 1 0 0 61 0 22 0 0 0
CD8 Proliferating 0 0 0 0 1 1 0 0 0 0 0 0
CD8 TCM 0 0 16 1 0 0 0 0 0 0 0 0
CD8 TEM 0 0 8 1 1 3 0 2 0 0 0 0
cDC1 0 0 0 0 0 6 0 2 0 0 0 0
cDC2 0 2 0 0 35 3 0 10 0 1 0 2
dnT 0 1 1 1 3 5 0 2 0 0 2 0
HSPC 57 1 0 0 489 215 8 672 0 363 0 0
ILC 0 0 0 0 0 0 1 0 0 0 0 0
NK 0 0 0 0 0 0 0 0 0 0 0 1
NK Proliferating 6 23 2785 39 34 263 0 10 0 1 0 0
Treg 15 1 0 1 0 25 2 0 0 0 1 0
clustree(All_samples_Merged, prefix = "SCT_snn_res.")
# InstallData("pbmcref")
#
# # The RunAzimuth function can take a Seurat object as input
# All_samples_Merged <- RunAzimuth(All_samples_Merged, reference = "pbmcref")
DimPlot(All_samples_Merged, group.by = "predicted.celltype.l1",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(All_samples_Merged, group.by = "predicted.celltype.l1",
reduction = "umap",
label.size = 3,
repel = T,
label = F)
DimPlot(All_samples_Merged, group.by = "predicted.celltype.l2",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(All_samples_Merged, group.by = "predicted.celltype.l2",
reduction = "umap",
label.size = 3,
repel = T,
label = F)
DimPlot(All_samples_Merged, group.by = "predicted.celltype.l2",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
table(All_samples_Merged$predicted.celltype.l2, All_samples_Merged$SCT_snn_res.0.2)
0 1 2 3 4 5 6 7 8 9 10 11
B intermediate 0 0 0 0 0 2 3 2 0 0 0 0
B memory 9 1 0 0 118 88 4 29 0 3 0 0
CD14 Mono 0 2 0 0 6 0 0 5 0 0 0 13
CD4 CTL 0 0 0 0 0 0 12 0 0 0 0 1
CD4 Naive 0 0 0 0 0 0 529 0 1479 0 33 1
CD4 Proliferating 5452 5387 2852 2461 4132 4063 2 3222 5 1434 1 0
CD4 TCM 879 523 269 3319 482 614 4533 109 1838 48 161 55
CD4 TEM 0 0 0 1 0 0 61 0 22 0 0 0
CD8 Proliferating 0 0 0 0 1 1 0 0 0 0 0 0
CD8 TCM 0 0 16 1 0 0 0 0 0 0 0 0
CD8 TEM 0 0 8 1 1 3 0 2 0 0 0 0
cDC1 0 0 0 0 0 6 0 2 0 0 0 0
cDC2 0 2 0 0 35 3 0 10 0 1 0 2
dnT 0 1 1 1 3 5 0 2 0 0 2 0
HSPC 57 1 0 0 489 215 8 672 0 363 0 0
ILC 0 0 0 0 0 0 1 0 0 0 0 0
NK 0 0 0 0 0 0 0 0 0 0 0 1
NK Proliferating 6 23 2785 39 34 263 0 10 0 1 0 0
Treg 15 1 0 1 0 25 2 0 0 0 1 0
save(All_samples_Merged, file = "0-imp_Robj/All_Samples_Merged_with_10x_Azitmuth_Annotated_SCT_HPC_without_harmony_integration_removed_nonCD4cells_from_control_and_Bcells_from_L4.robj")
# Load required libraries
library(Seurat)
library(harmony)
Loading required package: Rcpp
library(ggplot2)
# Run Harmony, adjusting for batch effect using "cell_line" or another grouping variable
All_samples_Merged <- RunHarmony(
object = All_samples_Merged,
group.by.vars = "cell_line", # Replace with the metadata column specifying batch or cell line
)
Transposing data matrix
Initializing state using k-means centroids initialization
Harmony 1/10
Harmony 2/10
Harmony converged after 2 iterations
# Check results in harmony embeddings
harmony_embeddings <- Embeddings(All_samples_Merged, reduction = "harmony")
head(harmony_embeddings)
harmony_1 harmony_2 harmony_3 harmony_4 harmony_5
L1_AAACCTGAGGGCTTCC-1 -3.946316 -2.508155 -2.4240290 -4.613461629 0.6824009
L1_AAACCTGGTGCAGGTA-1 7.932996 6.182147 -2.7156319 -9.764427287 -2.7174208
L1_AAACCTGGTTAAAGTG-1 5.143768 12.136837 -0.6697718 -6.154052811 1.1991903
L1_AAACCTGTCAGGTAAA-1 -5.906256 4.345782 -0.4280688 0.006767983 2.4929947
L1_AAACCTGTCCCTGACT-1 -1.396560 -4.652100 -4.8989151 -2.415140279 -1.2644573
L1_AAACCTGTCCTTCAAT-1 9.240531 7.752498 -2.1561278 -5.099642137 -1.1200950
harmony_6 harmony_7 harmony_8 harmony_9 harmony_10
L1_AAACCTGAGGGCTTCC-1 0.6548586 2.7330522 -1.3976666 0.8526287 -5.9366004
L1_AAACCTGGTGCAGGTA-1 7.7393352 0.2195172 0.6409612 -1.2086542 0.6341195
L1_AAACCTGGTTAAAGTG-1 2.7041438 2.2646208 -3.1204253 -4.6629926 3.4089257
L1_AAACCTGTCAGGTAAA-1 0.3419882 -2.3898699 -0.7430938 -1.9668005 3.2719306
L1_AAACCTGTCCCTGACT-1 0.2413912 1.7604939 -1.0221144 0.6831098 -2.7059917
L1_AAACCTGTCCTTCAAT-1 3.4681024 4.1983354 -0.7913665 -2.3587834 0.5029439
harmony_11 harmony_12 harmony_13 harmony_14 harmony_15
L1_AAACCTGAGGGCTTCC-1 1.09734133 -1.11966293 0.5650691 3.4603120 -0.61804833
L1_AAACCTGGTGCAGGTA-1 -1.77167474 0.02927627 0.2412102 -1.0769775 0.64390016
L1_AAACCTGGTTAAAGTG-1 3.56761986 1.98908005 -3.1179809 -3.0918491 4.51226324
L1_AAACCTGTCAGGTAAA-1 -0.01361045 2.05988920 -2.3980160 -0.7500199 -0.05927066
L1_AAACCTGTCCCTGACT-1 1.45008085 -0.36589820 -0.2210595 1.4067893 -1.72598157
L1_AAACCTGTCCTTCAAT-1 0.78530670 -0.78169083 0.7590315 -0.4170808 -0.29408267
harmony_16 harmony_17 harmony_18 harmony_19 harmony_20
L1_AAACCTGAGGGCTTCC-1 1.7890849 1.20836778 0.1104041 1.5260299 1.93287816
L1_AAACCTGGTGCAGGTA-1 -1.7466190 0.99133009 0.4312158 -3.4682531 2.39977788
L1_AAACCTGGTTAAAGTG-1 -0.8698648 -3.07801628 0.4807754 -2.2235734 0.95192869
L1_AAACCTGTCAGGTAAA-1 -0.5201749 -0.04999637 0.5597765 -0.3327123 -2.67998091
L1_AAACCTGTCCCTGACT-1 3.9717544 -4.10281464 -0.4117770 1.5410567 1.62617349
L1_AAACCTGTCCTTCAAT-1 -0.3618518 -0.57159067 1.0456638 -1.1688303 -0.01292879
harmony_21 harmony_22 harmony_23 harmony_24 harmony_25
L1_AAACCTGAGGGCTTCC-1 2.0687588 -5.2176605 4.6975199 -0.63324143 -2.3460211
L1_AAACCTGGTGCAGGTA-1 0.2594868 -2.8425078 -0.1725967 0.01843536 -0.5040802
L1_AAACCTGGTTAAAGTG-1 1.9407041 -2.3826576 0.7748255 1.40275826 -2.4241848
L1_AAACCTGTCAGGTAAA-1 -0.2233824 0.8541429 -1.4905314 -0.29876965 0.4194252
L1_AAACCTGTCCCTGACT-1 4.3979689 -4.4520455 1.2556013 0.02869466 -0.3115114
L1_AAACCTGTCCTTCAAT-1 0.2078221 -3.8691883 3.1275159 -2.97922514 0.5299079
harmony_26 harmony_27 harmony_28 harmony_29 harmony_30
L1_AAACCTGAGGGCTTCC-1 -0.13506414 -1.0233044 0.2896614 0.6969961 0.5484630
L1_AAACCTGGTGCAGGTA-1 -0.08270374 -0.2275749 -0.7267052 -0.6788381 0.1380896
L1_AAACCTGGTTAAAGTG-1 -0.84467953 -0.8558217 1.6861167 2.3833663 1.4711955
L1_AAACCTGTCAGGTAAA-1 -1.08269906 -0.2349961 -0.6329669 -0.8757767 0.3869736
L1_AAACCTGTCCCTGACT-1 1.13534579 0.6363151 0.9269910 0.9019161 0.4593291
L1_AAACCTGTCCTTCAAT-1 0.31646918 -1.9918477 0.2644108 -1.3678664 0.6603832
harmony_31 harmony_32 harmony_33 harmony_34 harmony_35
L1_AAACCTGAGGGCTTCC-1 2.1423188 -0.1884545 -1.1419913 -0.01535074 2.0850260
L1_AAACCTGGTGCAGGTA-1 -1.7959080 -1.5464014 2.6169301 -0.88924657 -2.1314655
L1_AAACCTGGTTAAAGTG-1 4.1994005 2.2431240 -0.9348459 0.35412511 0.6562207
L1_AAACCTGTCAGGTAAA-1 -0.6828722 -0.8392218 1.0889914 0.24072407 1.6185513
L1_AAACCTGTCCCTGACT-1 1.6088479 1.9181070 -2.2860843 0.73087720 1.3170961
L1_AAACCTGTCCTTCAAT-1 1.4379649 -0.3406085 1.7042741 0.84500396 3.3555968
harmony_36 harmony_37 harmony_38 harmony_39 harmony_40
L1_AAACCTGAGGGCTTCC-1 0.2114291 -0.8392132 -0.4588096 0.113644189 -0.4462939
L1_AAACCTGGTGCAGGTA-1 -0.5949871 1.2264657 1.4725473 0.524993863 -2.4467969
L1_AAACCTGGTTAAAGTG-1 1.7360139 0.4287940 0.3949197 -0.008575602 0.8446195
L1_AAACCTGTCAGGTAAA-1 0.4736172 -0.6607751 1.0578908 0.082098449 0.7363728
L1_AAACCTGTCCCTGACT-1 -1.0700470 -1.0538671 -1.8320737 -1.441252199 1.1423997
L1_AAACCTGTCCTTCAAT-1 -1.9566326 0.6140278 0.1619540 0.585458201 -0.8692424
harmony_41 harmony_42 harmony_43 harmony_44 harmony_45
L1_AAACCTGAGGGCTTCC-1 -0.4948753 -0.4528220 -0.09411515 0.4819189 1.2748695
L1_AAACCTGGTGCAGGTA-1 -1.2446649 -0.3841748 -1.25327091 0.8593334 -0.2586643
L1_AAACCTGGTTAAAGTG-1 1.4504816 1.9327587 -1.39201268 -0.5475156 -1.9893492
L1_AAACCTGTCAGGTAAA-1 -0.9413772 0.4116694 2.38706727 0.3244899 -0.1713571
L1_AAACCTGTCCCTGACT-1 1.8484362 -0.3151039 0.31272488 0.9531137 0.3261051
L1_AAACCTGTCCTTCAAT-1 2.7369868 2.0800403 2.48822876 1.1462056 0.8252289
harmony_46 harmony_47 harmony_48 harmony_49
L1_AAACCTGAGGGCTTCC-1 0.35501680 -0.07629787 -1.0487671 0.27402502
L1_AAACCTGGTGCAGGTA-1 -1.52705850 -1.63870019 0.7180720 -0.37843916
L1_AAACCTGGTTAAAGTG-1 2.11995412 -0.42004369 2.1809904 0.50038186
L1_AAACCTGTCAGGTAAA-1 -0.08247225 -1.13375218 1.3721518 -0.34928834
L1_AAACCTGTCCCTGACT-1 -0.15069807 -0.47566698 -1.9488859 -0.04742065
L1_AAACCTGTCCTTCAAT-1 1.56445910 -2.16990542 0.5506724 0.58588326
harmony_50
L1_AAACCTGAGGGCTTCC-1 0.73932739
L1_AAACCTGGTGCAGGTA-1 0.98447221
L1_AAACCTGGTTAAAGTG-1 -1.16215733
L1_AAACCTGTCAGGTAAA-1 -0.03914474
L1_AAACCTGTCCCTGACT-1 -0.10928903
L1_AAACCTGTCCTTCAAT-1 -1.40244134
# Run UMAP on Harmony embeddings
All_samples_Merged <- RunUMAP(All_samples_Merged, reduction = "harmony", dims = 1:16)
18:17:16 UMAP embedding parameters a = 0.9922 b = 1.112
18:17:16 Read 49388 rows and found 16 numeric columns
18:17:16 Using Annoy for neighbor search, n_neighbors = 30
18:17:16 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
18:17:22 Writing NN index file to temp file /tmp/Rtmpv9VRsw/file2779d66841664
18:17:22 Searching Annoy index using 1 thread, search_k = 3000
18:17:40 Annoy recall = 100%
18:17:42 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
18:17:47 Initializing from normalized Laplacian + noise (using RSpectra)
18:17:49 Commencing optimization for 200 epochs, with 2095818 positive edges
18:18:53 Optimization finished
# Optionally, find neighbors and clusters (if you plan to do clustering analysis)
All_samples_Merged <- FindNeighbors(All_samples_Merged, reduction = "harmony", dims = 1:16)
Computing nearest neighbor graph
Computing SNN
All_samples_Merged <- FindClusters(All_samples_Merged, resolution = 0.5) # Adjust resolution as needed
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 49388
Number of edges: 1512086
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8971
Number of communities: 14
Elapsed time: 20 seconds
# Visualize UMAP
DimPlot(All_samples_Merged, reduction = "umap", group.by = "cell_line", label = TRUE, pt.size = 0.5) +
ggtitle("UMAP of Harmony-Integrated Data")
# Visualize UMAP with batch/cell line information
DimPlot(All_samples_Merged, reduction = "umap", group.by = "cell_line", label = TRUE, pt.size = 0.5) +
ggtitle("UMAP - Colored by Cell Line (After Harmony Integration)")
# Visualize UMAP with clusters
DimPlot(All_samples_Merged, reduction = "umap", group.by = "seurat_clusters", label = TRUE, pt.size = 0.5) +
ggtitle("UMAP - Clustered Data (After Harmony Integration)")
# Visualize specific cell types or other metadata
DimPlot(All_samples_Merged, reduction = "umap", group.by = "predicted.celltype.l2", label = TRUE, pt.size = 0.5) +
ggtitle("UMAP - Cell Types After Harmony Integration")
#save(All_samples_Merged, file = "../../../0-IMP-OBJECTS/All_Samples_Merged_with_10x_Azitmuth_Annotated_SCT_HPC_without_harmony_integration.robj")