# Load CITE-seq H5 file
male_list <- Read10X_h5(
"Normal_PBMC_Samples-23-02-2026/10x_sample1_Same_PBMC_10x_incudingCITESEQ_TCR_Male27/sc5p_v2_hs_PBMC_10k_multi_5gex_5fb_b_t_count_filtered_feature_bc_matrix.h5",
use.names = TRUE
)
names(male_list)
[1] "Gene Expression" "Antibody Capture"
cd4t_s1 <- CreateSeuratObject(
counts = male_list[["Gene Expression"]],
project = "CD4T_10x_S1",
min.cells = 3, min.features = 200
)
adt_counts <- male_list[["Antibody Capture"]][, colnames(cd4t_s1)]
rownames(adt_counts) <- gsub("_", "-", rownames(adt_counts))
cd4t_s1[["ADT"]] <- CreateAssay5Object(counts = adt_counts)
cd4t_s1$dataset <- "CD4T_10x_S1"
cd4t_s1
An object of class Seurat
20245 features across 10529 samples within 2 assays
Active assay: RNA (20226 features, 0 variable features)
1 layer present: counts
1 other assay present: ADT
female_data <- Read10X_h5(
"Normal_PBMC_Samples-23-02-2026/10x_sample2_TCR_available_Female25-30/10k_PBMC_5pv2_nextgem_Chromium_X_intron_10k_PBMC_5pv2_nextgem_Chromium_X_intron_count_sample_feature_bc_matrix.h5",
use.names = TRUE
)
counts_matrix <- if(is.list(female_data) && length(female_data)>0) female_data[[1]] else female_data
cd4t_s2 <- CreateSeuratObject(
counts = counts_matrix,
project = "CD4T_10x_S2",
min.cells = 3, min.features = 200
)
cd4t_s2$dataset <- "CD4T_10x_S2"
cd4t_s2
An object of class Seurat
23144 features across 9731 samples within 1 assay
Active assay: RNA (23144 features, 0 variable features)
1 layer present: counts
All_samples_Merged <- readRDS(
"/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/0-Seurat_RDS_Final_OBJECT/All_samples_Merged_with_Renamed_Clusters_final-26-10-2025.rds"
)
cd4t_lab <- subset(All_samples_Merged, subset = orig.ident == "CD4T_lab")
cd4t_lab$dataset <- "CD4T_lab"
DefaultAssay(cd4t_lab) <- "RNA"
cd4t_lab
An object of class Seurat
62900 features across 5106 samples within 6 assays
Active assay: RNA (36601 features, 0 variable features)
2 layers present: data, counts
5 other assays present: ADT, prediction.score.celltype.l1, prediction.score.celltype.l2, prediction.score.celltype.l3, SCT
5 dimensional reductions calculated: integrated_dr, ref.umap, pca, umap, harmony
compute_qc <- function(obj) {
DefaultAssay(obj) <- "RNA"
obj[["percent.mt"]] <- PercentageFeatureSet(obj, pattern = "^MT-")
obj$percent.mt <- replace(as.numeric(obj$percent.mt), is.na(obj$percent.mt), 0)
obj
}
cd4t_s1 <- compute_qc(cd4t_s1)
cd4t_s2 <- compute_qc(cd4t_s2)
cd4t_lab <- compute_qc(cd4t_lab)
pre_qc <- merge(cd4t_s1, y = list(cd4t_s2, cd4t_lab))
VlnPlot(pre_qc, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"),
group.by = "dataset", pt.size = 0, ncol = 3)
p1 <- FeatureScatter(pre_qc, "nCount_RNA", "nFeature_RNA", group.by = "dataset")
p2 <- FeatureScatter(pre_qc, "nCount_RNA", "percent.mt", group.by = "dataset")
p3 <- FeatureScatter(pre_qc, "nFeature_RNA", "percent.mt", group.by = "dataset")
p1 | p2 | p3
# Your Sezary Syndrome filtering (exact)
your_filter <- function(obj) {
subset(obj, subset =
nFeature_RNA > 500 & nFeature_RNA < 7000 &
nCount_RNA < 40000 & percent.mt < 7.5 & percent.mt > 0.1
)
}
cd4t_s1 <- your_filter(cd4t_s1)
cd4t_s2 <- your_filter(cd4t_s2)
cd4t_lab <- your_filter(cd4t_lab)
cat("=== CELLS AFTER YOUR FILTERING ===\n")
=== CELLS AFTER YOUR FILTERING ===
cat("CD4T_10x_S1:", ncol(cd4t_s1), "\n")
CD4T_10x_S1: 9867
cat("CD4T_10x_S2:", ncol(cd4t_s2), "\n")
CD4T_10x_S2: 6861
cat("CD4T_lab: ", ncol(cd4t_lab), "\n")
CD4T_lab: 5106
cat("=== COMPLETE NORMALIZATION (S1/S2/Lab + Dummy ADT) ===\n")
=== COMPLETE NORMALIZATION (S1/S2/Lab + Dummy ADT) ===
# 1. RNA Normalization (ALL 3 samples)
cd4t_s1 <- NormalizeData(cd4t_s1, verbose = FALSE); cat("✅ RNA S1\n")
✅ RNA S1
cd4t_s2 <- NormalizeData(cd4t_s2, verbose = FALSE); cat("✅ RNA S2\n")
✅ RNA S2
cd4t_lab <- NormalizeData(cd4t_lab, verbose = FALSE); cat("✅ RNA Lab\n")
✅ RNA Lab
# 2. Universal ADT Normalization (Real + Dummy)
normalize_all_adt <- function() {
for(sample_name in c("cd4t_s1", "cd4t_s2", "cd4t_lab")) {
obj <- get(sample_name)
# All now have ADT (real or dummy) ✓
if("ADT" %in% names(obj@assays)) {
cat("ADT normalizing:", sample_name, "\n")
DefaultAssay(obj) <- "ADT"
obj <- NormalizeData(obj, normalization.method = "CLR", margin = 2, verbose = FALSE)
cat(" →", sample_name, "ADT CLR ✓\n")
}
# Always reset to RNA
DefaultAssay(obj) <- "RNA"
assign(sample_name, obj, envir = .GlobalEnv)
}
}
normalize_all_adt()
ADT normalizing: cd4t_s1
→ cd4t_s1 ADT CLR ✓
ADT normalizing: cd4t_lab
→ cd4t_lab ADT CLR ✓
cat("\n🎯 ALL NORMALIZED → READY FOR HARMONY!\n")
🎯 ALL NORMALIZED → READY FOR HARMONY!
# Quick verification
cat("\nActive assays:\n")
Active assays:
cat("S1: ", DefaultAssay(cd4t_s1), "\n")
S1: RNA
cat("S2: ", DefaultAssay(cd4t_s2), "\n")
S2: RNA
cat("Lab:", DefaultAssay(cd4t_lab), "\n")
Lab: RNA
# ✅ 1. Seurat v5 global option (safe)
options(Seurat.object.assay.version = "v5")
# ✅ 2. Universal V5 converter (production-ready)
convert_to_v5 <- function(obj, assay_name = "ADT") {
if(assay_name %in% names(obj@assays)) {
# Convert legacy Assay → Assay5
if(!inherits(obj[[assay_name]], "Assay5")) {
cat("Converting", assay_name, "to v5...\n")
old_assay <- obj[[assay_name]]
new_assay <- CreateAssay5Object(counts = old_assay@counts)
obj[[assay_name]] <- new_assay
}
} else {
# Add dummy ADT if missing (V5 standard)
cat("Adding dummy ADT (v5)...\n")
dummy <- Matrix::Matrix(0, nrow = 28, ncol = ncol(obj),
dimnames = list(paste0("Dummy_ADT_", 1:28),
colnames(obj)))
obj[["ADT"]] <- CreateAssay5Object(counts = dummy)
}
return(obj)
}
# ✅ 3. Convert ALL samples
cd4t_s1 <- convert_to_v5(cd4t_s1); cat("S1 ✓\n")
S1 ✓
cd4t_s2 <- convert_to_v5(cd4t_s2); cat("S2 ✓\n") # Gets dummy
Adding dummy ADT (v5)...
S2 ✓
cd4t_lab <- convert_to_v5(cd4t_lab); cat("Lab ✓\n")
Converting ADT to v5...
Lab ✓
cat("\n✅ ALL OBJECTS NOW SEURAT V5!\n")
✅ ALL OBJECTS NOW SEURAT V5!
# Seurat v5 LayerData (universal access)
for(name in c("cd4t_s1", "cd4t_s2", "cd4t_lab")) {
obj <- get(name)
adt_counts <- LayerData(obj[["ADT"]], layer = "counts")
cat("\n", name, "- ADT:", nrow(adt_counts), "×", ncol(adt_counts), "\n")
total <- sum(adt_counts)
cat(" Sum:", total, ifelse(total==0, "DUMMY ✓", "REAL ✓"), "\n")
}
cd4t_s1 - ADT: 19 × 9867
Sum: 27462748 REAL ✓
cd4t_s2 - ADT: 28 × 6861
Sum: 0 DUMMY ✓
cd4t_lab - ADT: 28 × 5106
Sum: 6401555 REAL ✓
# FIXED: Proper object names + cell IDs
healthy <- merge(cd4t_s1, y = list(cd4t_s2, cd4t_lab),
add.cell.ids = c("CD4T_10x_S1", "CD4T_10x_S2", "CD4T_lab"),
merge.data = FALSE)
DefaultAssay(healthy) <- "RNA"
# Preprocessing
healthy <- FindVariableFeatures(healthy, nfeatures = 3000)
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
healthy <- ScaleData(healthy, vars.to.regress = "percent.mt")
|
| | 0%
|
|= | 0%
|
|= | 1%
|
|== | 1%
|
|== | 2%
|
|=== | 2%
|
|=== | 3%
|
|==== | 3%
|
|==== | 4%
|
|===== | 4%
|
|===== | 5%
|
|====== | 5%
|
|======= | 5%
|
|======= | 6%
|
|======== | 6%
|
|======== | 7%
|
|========= | 7%
|
|========= | 8%
|
|========== | 8%
|
|========== | 9%
|
|=========== | 9%
|
|=========== | 10%
|
|============ | 10%
|
|============= | 11%
|
|============== | 11%
|
|============== | 12%
|
|=============== | 12%
|
|=============== | 13%
|
|================ | 13%
|
|================ | 14%
|
|================= | 14%
|
|================= | 15%
|
|================== | 15%
|
|================== | 16%
|
|=================== | 16%
|
|==================== | 16%
|
|==================== | 17%
|
|===================== | 17%
|
|===================== | 18%
|
|====================== | 18%
|
|====================== | 19%
|
|======================= | 19%
|
|======================= | 20%
|
|======================== | 20%
|
|======================== | 21%
|
|========================= | 21%
|
|========================== | 21%
|
|========================== | 22%
|
|=========================== | 22%
|
|=========================== | 23%
|
|============================ | 23%
|
|============================ | 24%
|
|============================= | 24%
|
|============================= | 25%
|
|============================== | 25%
|
|============================== | 26%
|
|=============================== | 26%
|
|================================ | 26%
|
|================================ | 27%
|
|================================= | 27%
|
|================================= | 28%
|
|================================== | 28%
|
|================================== | 29%
|
|=================================== | 29%
|
|=================================== | 30%
|
|==================================== | 30%
|
|==================================== | 31%
|
|===================================== | 31%
|
|===================================== | 32%
|
|====================================== | 32%
|
|======================================= | 32%
|
|======================================= | 33%
|
|======================================== | 33%
|
|======================================== | 34%
|
|========================================= | 34%
|
|========================================= | 35%
|
|========================================== | 35%
|
|========================================== | 36%
|
|=========================================== | 36%
|
|=========================================== | 37%
|
|============================================ | 37%
|
|============================================= | 37%
|
|============================================= | 38%
|
|============================================== | 38%
|
|============================================== | 39%
|
|=============================================== | 39%
|
|=============================================== | 40%
|
|================================================ | 40%
|
|================================================ | 41%
|
|================================================= | 41%
|
|================================================= | 42%
|
|================================================== | 42%
|
|=================================================== | 42%
|
|=================================================== | 43%
|
|==================================================== | 43%
|
|==================================================== | 44%
|
|===================================================== | 44%
|
|===================================================== | 45%
|
|====================================================== | 45%
|
|====================================================== | 46%
|
|======================================================= | 46%
|
|======================================================= | 47%
|
|======================================================== | 47%
|
|========================================================= | 48%
|
|========================================================== | 48%
|
|========================================================== | 49%
|
|=========================================================== | 49%
|
|=========================================================== | 50%
|
|============================================================ | 50%
|
|============================================================ | 51%
|
|============================================================= | 51%
|
|============================================================= | 52%
|
|============================================================== | 52%
|
|=============================================================== | 53%
|
|================================================================ | 53%
|
|================================================================ | 54%
|
|================================================================= | 54%
|
|================================================================= | 55%
|
|================================================================== | 55%
|
|================================================================== | 56%
|
|=================================================================== | 56%
|
|=================================================================== | 57%
|
|==================================================================== | 57%
|
|==================================================================== | 58%
|
|===================================================================== | 58%
|
|====================================================================== | 58%
|
|====================================================================== | 59%
|
|======================================================================= | 59%
|
|======================================================================= | 60%
|
|======================================================================== | 60%
|
|======================================================================== | 61%
|
|========================================================================= | 61%
|
|========================================================================= | 62%
|
|========================================================================== | 62%
|
|========================================================================== | 63%
|
|=========================================================================== | 63%
|
|============================================================================ | 63%
|
|============================================================================ | 64%
|
|============================================================================= | 64%
|
|============================================================================= | 65%
|
|============================================================================== | 65%
|
|============================================================================== | 66%
|
|=============================================================================== | 66%
|
|=============================================================================== | 67%
|
|================================================================================ | 67%
|
|================================================================================ | 68%
|
|================================================================================= | 68%
|
|================================================================================== | 68%
|
|================================================================================== | 69%
|
|=================================================================================== | 69%
|
|=================================================================================== | 70%
|
|==================================================================================== | 70%
|
|==================================================================================== | 71%
|
|===================================================================================== | 71%
|
|===================================================================================== | 72%
|
|====================================================================================== | 72%
|
|====================================================================================== | 73%
|
|======================================================================================= | 73%
|
|======================================================================================= | 74%
|
|======================================================================================== | 74%
|
|========================================================================================= | 74%
|
|========================================================================================= | 75%
|
|========================================================================================== | 75%
|
|========================================================================================== | 76%
|
|=========================================================================================== | 76%
|
|=========================================================================================== | 77%
|
|============================================================================================ | 77%
|
|============================================================================================ | 78%
|
|============================================================================================= | 78%
|
|============================================================================================= | 79%
|
|============================================================================================== | 79%
|
|=============================================================================================== | 79%
|
|=============================================================================================== | 80%
|
|================================================================================================ | 80%
|
|================================================================================================ | 81%
|
|================================================================================================= | 81%
|
|================================================================================================= | 82%
|
|================================================================================================== | 82%
|
|================================================================================================== | 83%
|
|=================================================================================================== | 83%
|
|=================================================================================================== | 84%
|
|==================================================================================================== | 84%
|
|===================================================================================================== | 84%
|
|===================================================================================================== | 85%
|
|====================================================================================================== | 85%
|
|====================================================================================================== | 86%
|
|======================================================================================================= | 86%
|
|======================================================================================================= | 87%
|
|======================================================================================================== | 87%
|
|======================================================================================================== | 88%
|
|========================================================================================================= | 88%
|
|========================================================================================================= | 89%
|
|========================================================================================================== | 89%
|
|=========================================================================================================== | 90%
|
|============================================================================================================ | 90%
|
|============================================================================================================ | 91%
|
|============================================================================================================= | 91%
|
|============================================================================================================= | 92%
|
|============================================================================================================== | 92%
|
|============================================================================================================== | 93%
|
|=============================================================================================================== | 93%
|
|=============================================================================================================== | 94%
|
|================================================================================================================ | 94%
|
|================================================================================================================ | 95%
|
|================================================================================================================= | 95%
|
|================================================================================================================== | 95%
|
|================================================================================================================== | 96%
|
|=================================================================================================================== | 96%
|
|=================================================================================================================== | 97%
|
|==================================================================================================================== | 97%
|
|==================================================================================================================== | 98%
|
|===================================================================================================================== | 98%
|
|===================================================================================================================== | 99%
|
|====================================================================================================================== | 99%
|
|====================================================================================================================== | 100%
|
|=======================================================================================================================| 100%
|
| | 0%
|
|======================================== | 33%
|
|=============================================================================== | 67%
|
|=======================================================================================================================| 100%
healthy <- RunPCA(healthy, npcs = 50)
ElbowPlot(healthy, ndims = 50)
library(ggplot2)
library(RColorBrewer)
# Assuming you have 10 different cell lines, generating a color palette with 10 colors
cell_line_colors <- brewer.pal(10, "Set3")
# Assuming All_samples_Merged$cell_line is a factor or character vector containing cell line names
data <- as.data.frame(table(healthy$dataset))
colnames(data) <- c("cell_line", "nUMI") # Change column name to nUMI
ncells <- ggplot(data, aes(x = cell_line, y = nUMI, fill = cell_line)) +
geom_col() +
theme_classic() +
geom_text(aes(label = nUMI),
position = position_dodge(width = 0.9),
vjust = -0.25) +
scale_fill_manual(values = cell_line_colors) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5)) + # Adjust the title position
ggtitle("Filtered cells per sample") +
xlab("PBMC samples") + # Adjust x-axis label
ylab("Frequency") # Adjust y-axis label
print(ncells)
# TEST-1
# given that the output of RunPCA is "pca"
# replace "so" by the name of your seurat object
pct <- All_samples_Merged[["pca"]]@stdev / sum(All_samples_Merged[["pca"]]@stdev) * 100
cumu <- cumsum(pct) # Calculate cumulative percents for each PC
# Determine the difference between variation of PC and subsequent PC
co2 <- sort(which((pct[-length(pct)] - pct[-1]) > 0.1), decreasing = T)[1] + 1
# last point where change of % of variation is more than 0.1%. -> co2
co2
[1] 15
# TEST-2
# get significant PCs
stdv <- All_samples_Merged[["pca"]]@stdev
sum.stdv <- sum(All_samples_Merged[["pca"]]@stdev)
percent.stdv <- (stdv / sum.stdv) * 100
cumulative <- cumsum(percent.stdv)
co1 <- which(cumulative > 90 & percent.stdv < 5)[1]
co2 <- sort(which((percent.stdv[1:length(percent.stdv) - 1] -
percent.stdv[2:length(percent.stdv)]) > 0.1),
decreasing = T)[1] + 1
min.pc <- min(co1, co2)
min.pc
[1] 15
# Create a dataframe with values
plot_df <- data.frame(pct = percent.stdv,
cumu = cumulative,
rank = 1:length(percent.stdv))
# Elbow plot to visualize
ggplot(plot_df, aes(cumulative, percent.stdv, label = rank, color = rank > min.pc)) +
geom_text() +
geom_vline(xintercept = 90, color = "grey") +
geom_hline(yintercept = min(percent.stdv[percent.stdv > 5]), color = "grey") +
theme_bw()
p1 | p2
p3
p4
p5
DimPlot(healthy, group.by = "predicted.celltype.l1", label = TRUE,
repel = TRUE, ncol = 2, label.size = 3) + NoLegend() +
ggtitle("Azimuth Cell Types")
DimPlot(healthy, group.by = "predicted.celltype.l2", label = TRUE,
repel = TRUE, ncol = 2, label.size = 3) + NoLegend() +
ggtitle("Azimuth Cell Types")
DimPlot(healthy, group.by = "predicted.celltype.l3", label = TRUE,
repel = TRUE, ncol = 2, label.size = 3) + NoLegend() +
ggtitle("Azimuth Cell Types")
# Cell type table
table(healthy$predicted.celltype.l1, healthy$dataset)
CD4T_10x_S1 CD4T_10x_S2 CD4T_lab
B 924 479 0
CD4 T 3510 3445 5105
CD8 T 1370 1200 1
DC 204 96 0
Mono 3073 1332 0
NK 450 118 0
other 46 41 0
other T 290 150 0
# Cell type table
table(healthy$predicted.celltype.l2, healthy$dataset)
CD4T_10x_S1 CD4T_10x_S2 CD4T_lab
ASDC 5 4 0
B intermediate 141 300 0
B memory 88 81 0
B naive 682 82 0
CD14 Mono 2956 1161 0
CD16 Mono 119 171 0
CD4 CTL 1 0 11
CD4 Naive 1367 381 359
CD4 Proliferating 9 4 0
CD4 TCM 2011 2861 4655
CD4 TEM 29 55 75
CD8 Naive 987 501 0
CD8 Proliferating 0 1 0
CD8 TCM 184 185 1
CD8 TEM 203 503 0
cDC1 13 5 0
cDC2 129 52 0
dnT 7 6 0
gdT 74 45 0
HSPC 12 15 0
ILC 3 0 0
MAIT 210 115 0
NK 435 105 0
NK Proliferating 2 1 0
NK_CD56bright 13 12 0
pDC 57 35 0
Plasmablast 12 16 0
Platelet 31 26 0
Treg 87 138 5
# Cell type table
table(healthy$predicted.celltype.l3, healthy$dataset)
CD4T_10x_S1 CD4T_10x_S2 CD4T_lab
ASDC_mDC 2 3 0
ASDC_pDC 2 1 0
B intermediate kappa 68 46 0
B intermediate lambda 77 254 0
B memory kappa 44 44 0
B memory lambda 42 32 0
B naive kappa 628 84 0
B naive lambda 52 2 0
CD14 Mono 2956 1162 0
CD16 Mono 119 172 0
CD4 CTL 3 0 11
CD4 Naive 1371 382 363
CD4 Proliferating 9 4 0
CD4 TCM_1 1519 1863 4181
CD4 TCM_2 113 148 248
CD4 TCM_3 371 856 226
CD4 TEM_1 12 26 8
CD4 TEM_2 19 11 13
CD4 TEM_3 2 7 49
CD8 Naive 974 376 0
CD8 Naive_2 16 121 0
CD8 Proliferating 0 1 0
CD8 TCM_1 116 118 1
CD8 TCM_2 45 39 0
CD8 TCM_3 16 12 0
CD8 TEM_1 69 273 2
CD8 TEM_2 64 179 0
CD8 TEM_3 19 12 0
CD8 TEM_4 15 0 0
CD8 TEM_5 1 5 0
CD8 TEM_6 32 38 0
cDC1 13 5 0
cDC2_1 43 25 0
cDC2_2 86 26 0
dnT_1 0 1 0
dnT_2 7 7 0
gdT_1 59 16 0
gdT_3 17 41 1
HSPC 12 15 0
ILC 3 0 0
MAIT 211 115 0
NK Proliferating 2 1 0
NK_1 5 1 0
NK_2 375 102 0
NK_3 23 1 0
NK_4 32 2 0
NK_CD56bright 13 12 0
pDC 58 35 0
Plasma 12 16 0
Platelet 31 26 0
Treg Memory 58 134 3
Treg Naive 31 9 0
library(SingleR)
library(celldex)
immune = celldex::DatabaseImmuneCellExpressionData()
singler.immune <- SingleR(test = sce, ref = immune, assay.type.test=1,
labels = immune$label.fine)
head(singler.immune)
DataFrame with 6 rows and 4 columns
scores labels delta.next pruned.labels
<matrix> <character> <numeric> <character>
CD4T_10x_S1_AAACCTGAGACAGACC-1 0.136658:0.452373:0.402454:... Monocytes, CD14+ 0.51005410 Monocytes, CD14+
CD4T_10x_S1_AAACCTGAGCGATAGC-1 0.151751:0.130657:0.145904:... T cells, CD4+, naive 0.02091751 T cells, CD4+, naive
CD4T_10x_S1_AAACCTGAGCGGCTTC-1 0.125310:0.444552:0.387669:... Monocytes, CD14+ 0.05688369 Monocytes, CD14+
CD4T_10x_S1_AAACCTGAGGATCGCA-1 0.165954:0.510133:0.456105:... Monocytes, CD14+ 0.05402872 Monocytes, CD14+
CD4T_10x_S1_AAACCTGAGTCACGCC-1 0.155126:0.175873:0.192265:... T cells, CD4+, Th2 0.00910584 T cells, CD4+, Th2
CD4T_10x_S1_AAACCTGAGTTCGCAT-1 0.120224:0.131059:0.142412:... T cells, CD4+, TFH 0.00482739 T cells, CD4+, TFH
hpca <- celldex::HumanPrimaryCellAtlasData()
singler.hpca <- SingleR(test = sce, ref = hpca, assay.type.test=1,
labels = hpca$label.fine)
head(singler.hpca)
DataFrame with 6 rows and 4 columns
scores labels delta.next pruned.labels
<matrix> <character> <numeric> <character>
CD4T_10x_S1_AAACCTGAGACAGACC-1 0.159103:0.320996:0.277669:... Monocyte:CD16- 0.1528401 Monocyte:CD16-
CD4T_10x_S1_AAACCTGAGCGATAGC-1 0.179729:0.301868:0.277188:... T_cell:CD4+_Naive 0.0751722 T_cell:CD4+_Naive
CD4T_10x_S1_AAACCTGAGCGGCTTC-1 0.176758:0.316453:0.265663:... Monocyte:CD16- 0.2338980 Monocyte:CD16-
CD4T_10x_S1_AAACCTGAGGATCGCA-1 0.176412:0.328262:0.274236:... Monocyte:CD16- 0.2199498 Monocyte:CD16-
CD4T_10x_S1_AAACCTGAGTCACGCC-1 0.192698:0.341920:0.319796:... T_cell:CD4+_central_.. 0.0472252 T_cell:CD4+_central_..
CD4T_10x_S1_AAACCTGAGTTCGCAT-1 0.153027:0.248264:0.225423:... T_cell:CD4+_central_.. 0.1357707 T_cell:CD4+_central_..
library(dplyr) # must be attached for across(), where(), n() to resolve
dir.create("annotation_tables", showWarnings = FALSE)
# ── Azimuth tables (L1, L2, L3) ──────────────────────────────────────────────
write.csv(table(healthy$predicted.celltype.l1, healthy$dataset),
"annotation_tables/azimuth_l1_by_dataset.csv")
write.csv(table(healthy$predicted.celltype.l2, healthy$dataset),
"annotation_tables/azimuth_l2_by_dataset.csv")
write.csv(table(healthy$predicted.celltype.l3, healthy$dataset),
"annotation_tables/azimuth_l3_by_dataset.csv")
# ── SingleR tables ────────────────────────────────────────────────────────────
write.csv(table(healthy$singler.hpca, healthy$dataset),
"annotation_tables/singler_hpca_by_dataset.csv")
write.csv(table(healthy$singler.immune, healthy$dataset),
"annotation_tables/singler_immune_by_dataset.csv")
# ── Full metadata summary ─────────────────────────────────────────────────────
metadata_summary <- healthy@meta.data %>%
dplyr::select(dataset, seurat_clusters,
predicted.celltype.l1, predicted.celltype.l2, predicted.celltype.l3,
singler.hpca, singler.immune,
nFeature_RNA, nCount_RNA, percent.mt, S.Score, G2M.Score) %>%
dplyr::mutate(seurat_clusters = as.character(seurat_clusters)) %>% # avoid numeric mean on clusters
dplyr::group_by(dataset) %>%
dplyr::summarise(
across(where(is.numeric),
\(x) mean(x, na.rm = TRUE)),
across(where(is.character) | where(is.factor),
\(x) {
tb <- table(x)
paste0(names(which.max(tb)),
" (", round(max(prop.table(tb)), 3), ")")
}),
n_cells = n(),
.groups = "drop"
)
write.csv(metadata_summary, "annotation_tables/metadata_summary.csv", row.names = FALSE)
# ── Cell counts overview ──────────────────────────────────────────────────────
datasets <- sort(unique(healthy$dataset))
cell_summary <- data.frame(
Dataset = datasets,
Total_Cells = as.integer(table(healthy$dataset)[datasets]),
Azimuth_L1_Tcells = sapply(datasets, \(d)
sum(grepl("T", healthy$predicted.celltype.l1[healthy$dataset == d],
ignore.case = TRUE), na.rm = TRUE)),
HPCA_CD4 = sapply(datasets, \(d)
sum(grepl("CD4", healthy$singler.hpca[healthy$dataset == d],
ignore.case = TRUE), na.rm = TRUE))
)
write.csv(cell_summary, "annotation_tables/cell_counts_summary.csv", row.names = FALSE)
cat("✅ SAVED TABLES TO: annotation_tables/\n")
✅ SAVED TABLES TO: annotation_tables/
cat("• Azimuth L1/L2/L3 by dataset\n")
• Azimuth L1/L2/L3 by dataset
cat("• SingleR HPCA/Immune by dataset\n")
• SingleR HPCA/Immune by dataset
cat("• Full metadata summary\n")
• Full metadata summary
cat("• Cell counts overview\n")
• Cell counts overview
# Final validation tables
cat("=== ANNOTATION SUMMARY ===\n")
=== ANNOTATION SUMMARY ===
print(table(healthy$predicted.celltype.l1, healthy$dataset))
CD4T_10x_S1 CD4T_10x_S2 CD4T_lab
B 924 479 0
CD4 T 3510 3445 5105
CD8 T 1370 1200 1
DC 204 96 0
Mono 3073 1332 0
NK 450 118 0
other 46 41 0
other T 290 150 0
print(table(healthy$singler.hpca, healthy$dataset))
CD4T_10x_S1 CD4T_10x_S2 CD4T_lab
B_cell 77 15 0
B_cell:immature 71 35 0
B_cell:Memory 83 123 1
B_cell:Naive 661 282 0
B_cell:Plasma_cell 8 13 0
CMP 9 12 0
GMP 18 7 0
MEP 2 0 0
Monocyte 18 3 0
Monocyte:anti-FcgRIIB 0 0 2
Monocyte:CD14+ 18 5 0
Monocyte:CD16- 2891 1086 0
Monocyte:CD16+ 221 271 0
Monocyte:leukotriene_D4 2 3 0
Monocyte:S._typhimurium_flagellin 0 1 0
Neutrophil 0 1 0
Neutrophil:commensal_E._coli_MG1655 0 4 0
Neutrophil:uropathogenic_E._coli_UTI89 0 0 1
NK_cell 236 143 0
NK_cell:CD56hiCD62L+ 9 13 1
NK_cell:IL2 173 13 0
Platelets 28 20 0
Pre-B_cell_CD34- 14 2 0
Pro-B_cell_CD34+ 2 1 0
T_cell:CD4+ 58 32 1
T_cell:CD4+_central_memory 1272 1787 1830
T_cell:CD4+_effector_memory 478 690 210
T_cell:CD4+_Naive 3178 1903 2933
T_cell:CD8+ 276 251 10
T_cell:CD8+_Central_memory 0 5 2
T_cell:CD8+_effector_memory 12 68 52
T_cell:CD8+_effector_memory_RA 0 5 2
T_cell:CD8+_naive 1 11 14
T_cell:gamma-delta 3 1 0
T_cell:Treg:Naive 1 2 3
print(table(healthy$singler.immune, healthy$dataset))
CD4T_10x_S1 CD4T_10x_S2 CD4T_lab
B cells, naive 832 445 0
Monocytes, CD14+ 3005 1209 0
Monocytes, CD16+ 112 165 0
NK cells 601 372 11
T cells, CD4+, memory TREG 148 131 132
T cells, CD4+, naive 1830 1112 1368
T cells, CD4+, naive TREG 168 121 119
T cells, CD4+, naive, stimulated 0 0 136
T cells, CD4+, TFH 733 772 1963
T cells, CD4+, Th1 223 620 160
T cells, CD4+, Th1_17 306 329 105
T cells, CD4+, Th17 273 458 447
T cells, CD4+, Th2 155 320 552
T cells, CD8+, naive 1230 680 48
T cells, CD8+, naive, stimulated 0 0 57
# Multi-annotation consensus plot
p1 <- DimPlot(healthy, group.by = "singler.hpca", label = TRUE, repel = TRUE) + ggtitle("HPCA")
p2 <- DimPlot(healthy, group.by = "singler.immune", label = TRUE, repel = TRUE) + ggtitle("Immune")
p3 <- DimPlot(healthy, group.by = "predicted.celltype.l1", label = TRUE) + ggtitle("Azimuth L1")
p1
p2
p3
DimPlot(healthy, group.by = "predicted.celltype.l2", label = TRUE) + ggtitle("Azimuth L1")
DimPlot(healthy, group.by = "predicted.celltype.l3", label = TRUE) + ggtitle("Azimuth L1")
# SAVE for trajectory RMD
#saveRDS(healthy, file = "healthy_full_annotated_harmony.rds")
cat("✅ SAVED: healthy_full_annotated_harmony.rds (", ncol(healthy), "cells)\n")
✅ SAVED: healthy_full_annotated_harmony.rds ( 21834 cells)
cat("Load next RMD: healthy <- readRDS('healthy_full_annotated_harmony.rds')\n")
Load next RMD: healthy <- readRDS('healthy_full_annotated_harmony.rds')
```