Load RDS with all
annotations
Idents(All_samples_Merged) <- "seurat_clusters"
# Ensure clusters are ordered 0-13
All_samples_Merged$seurat_clusters <- factor(All_samples_Merged$seurat_clusters,
levels = as.character(0:13))
cat("Total cells:", ncol(All_samples_Merged), "\n")
Total cells: 49305
cat("Total clusters:", length(unique(All_samples_Merged$seurat_clusters)), "\n\n")
Total clusters: 14
cat("Clusters (ordered 0-18):\n")
Clusters (ordered 0-18):
print(table(All_samples_Merged$seurat_clusters))
0 1 2 3 4 5 6 7 8 9 10 11 12 13
6789 5275 4663 4661 4086 3634 3536 3409 3338 3273 3212 1675 1063 691
cat("\n\nSamples (ordered):\n")
Samples (ordered):
print(table(All_samples_Merged$cell_line))
L1 L2 L3 L4 L5 L6 L7 CD4Tcells_lab
5825 5935 6428 6006 6022 5148 5331 5106
CD4Tcells_10x
3504
Create Annotation
Summary Table
Create Annotation
Summary Table by Clusters
library(dplyr)
library(knitr)
library(kableExtra)
# Extract dominant label per cluster per method
cols_needed <- c("seurat_clusters", "predicted.id", "predicted.celltype.l2",
"singler.immune", "scATOMIC_annotation")
annotation_table_cluster <- All_samples_Merged@meta.data[, cols_needed] %>%
group_by(seurat_clusters) %>%
summarise(
scPred = names(sort(table(predicted.id), decreasing = TRUE))[1],
Azimuth = names(sort(table(predicted.celltype.l2), decreasing = TRUE))[1],
SingleR = names(sort(table(singler.immune), decreasing = TRUE))[1],
scATOMIC = names(sort(table(scATOMIC_annotation), decreasing = TRUE))[1],
n_cells = n(),
.groups = "drop"
) %>%
arrange(seurat_clusters)
# Add consensus column
annotation_table_cluster <- annotation_table_cluster %>%
mutate(
Consensus = (scPred == Azimuth & Azimuth == SingleR & SingleR == scATOMIC)
)
# Display
cat("\n=== ANNOTATION BY CLUSTER ===\n\n")
=== ANNOTATION BY CLUSTER ===
kable(annotation_table_cluster, caption = "Dominant Cell Type Annotation per Cluster") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE, font_size = 10) %>%
column_spec(7, background = ifelse(annotation_table_cluster$Consensus, "#d4edda", "#f8d7da"))
Dominant Cell Type Annotation per Cluster
| seurat_clusters |
scPred |
Azimuth |
SingleR |
scATOMIC |
n_cells |
Consensus |
| 0 |
Plasma cell |
CD4 Proliferating |
T cells, CD4+, memory TREG |
Lung Cancer Cell |
6789 |
FALSE |
| 1 |
CD4 T cell |
NK Proliferating |
NK cells |
Effector/Memory CD4+ T cells |
5275 |
FALSE |
| 2 |
Plasma cell |
CD4 Proliferating |
T cells, CD4+, memory TREG |
Lung Cancer Cell |
4663 |
FALSE |
| 3 |
CD4 T cell |
CD4 TCM |
T cells, CD4+, naive |
Naive CD4+ T cells |
4661 |
FALSE |
| 4 |
Plasma cell |
CD4 Proliferating |
T cells, CD4+, memory TREG |
Lung Cancer Cell |
4086 |
FALSE |
| 5 |
CD4 T cell |
CD4 Proliferating |
T cells, CD4+, memory TREG |
Effector/Memory CD4+ T cells |
3634 |
FALSE |
| 6 |
Plasma cell |
CD4 Proliferating |
T cells, CD4+, memory TREG |
Lung Cancer Cell |
3536 |
FALSE |
| 7 |
CD4 T cell |
CD4 Proliferating |
T cells, CD4+, memory TREG |
Effector/Memory CD4+ T cells |
3409 |
FALSE |
| 8 |
Plasma cell |
CD4 Proliferating |
T cells, CD4+, memory TREG |
Lung Cancer Cell |
3338 |
FALSE |
| 9 |
CD4 T cell |
CD4 TCM |
T cells, CD4+, memory TREG |
Effector/Memory CD4+ T cells |
3273 |
FALSE |
| 10 |
CD4 T cell |
CD4 TCM |
T cells, CD4+, TFH |
Naive CD4+ T cells |
3212 |
FALSE |
| 11 |
CD4 T cell |
CD4 TCM |
T cells, CD4+, memory TREG |
Effector/Memory CD4+ T cells |
1675 |
FALSE |
| 12 |
Plasma cell |
CD4 Proliferating |
T cells, CD4+, memory TREG |
Effector/Memory CD4+ T cells |
1063 |
FALSE |
| 13 |
Plasma cell |
CD4 Proliferating |
T cells, CD4+, memory TREG |
Lung Cancer Cell |
691 |
FALSE |
# Save
write.csv(annotation_table_cluster, "CellLines_annotation_BY_CLUSTER_04-02-2026.csv",
row.names = FALSE)
cat("\n✓ Annotation table by cluster saved\n")
✓ Annotation table by cluster saved
Create Annotation
Summary Table - BY CELL LINE
library(dplyr)
library(knitr)
library(kableExtra)
# Extract dominant label per CELL LINE per method
cols_needed <- c("cell_line", "predicted.id", "predicted.celltype.l2",
"singler.immune", "scATOMIC_annotation")
annotation_table_cellline <- All_samples_Merged@meta.data[, cols_needed] %>%
group_by(cell_line) %>%
summarise(
scPred = names(sort(table(predicted.id), decreasing = TRUE))[1],
Azimuth = names(sort(table(predicted.celltype.l2), decreasing = TRUE))[1],
SingleR = names(sort(table(singler.immune), decreasing = TRUE))[1],
scATOMIC = names(sort(table(scATOMIC_annotation), decreasing = TRUE))[1],
n_cells = n(),
.groups = "drop"
) %>%
arrange(cell_line)
# Add consensus column
annotation_table_cellline <- annotation_table_cellline %>%
mutate(
Consensus = (scPred == Azimuth & Azimuth == SingleR & SingleR == scATOMIC)
)
# Display
cat("\n=== ANNOTATION BY CELL LINE ===\n\n")
=== ANNOTATION BY CELL LINE ===
kable(annotation_table_cellline, caption = "Dominant Cell Type Annotation per Cell Line") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE, font_size = 10) %>%
column_spec(7, background = ifelse(annotation_table_cellline$Consensus, "#d4edda", "#f8d7da"))
Dominant Cell Type Annotation per Cell Line
| cell_line |
scPred |
Azimuth |
SingleR |
scATOMIC |
n_cells |
Consensus |
| L1 |
CD4 T cell |
CD4 TCM |
T cells, CD4+, Th2 |
Effector/Memory CD4+ T cells |
5825 |
FALSE |
| L2 |
CD4 T cell |
NK Proliferating |
NK cells |
Effector/Memory CD4+ T cells |
5935 |
FALSE |
| L3 |
Plasma cell |
CD4 Proliferating |
T cells, CD4+, memory TREG |
Lung Cancer Cell |
6428 |
FALSE |
| L4 |
Plasma cell |
CD4 Proliferating |
T cells, CD4+, memory TREG |
Lung Cancer Cell |
6006 |
FALSE |
| L5 |
Plasma cell |
CD4 Proliferating |
T cells, CD4+, memory TREG |
Lung Cancer Cell |
6022 |
FALSE |
| L6 |
cDC |
CD4 Proliferating |
T cells, CD4+, memory TREG |
Effector/Memory CD4+ T cells |
5148 |
FALSE |
| L7 |
Plasma cell |
CD4 Proliferating |
T cells, CD4+, memory TREG |
Lung Cancer Cell |
5331 |
FALSE |
| CD4Tcells_lab |
CD4 T cell |
CD4 TCM |
T cells, CD4+, TFH |
Naive CD4+ T cells |
5106 |
FALSE |
| CD4Tcells_10x |
CD4 T cell |
CD4 TCM |
T cells, CD4+, naive |
Naive CD4+ T cells |
3504 |
FALSE |
# Save
write.csv(annotation_table_cellline, "CellLines_annotation_BY_CELLLINE_04-02-2026.csv",
row.names = FALSE)
cat("\n✓ Annotation table by cell line saved\n")
✓ Annotation table by cell line saved
Create Annotation
Summary Table - Combined Table (Cluster × Cell Line)
library(dplyr)
library(knitr)
library(kableExtra)
# Extract dominant label per CLUSTER × CELL LINE × METHOD
cols_needed <- c("seurat_clusters", "cell_line", "predicted.id", "predicted.celltype.l2",
"singler.immune", "scATOMIC_annotation")
annotation_table_combined <- All_samples_Merged@meta.data[, cols_needed] %>%
group_by(seurat_clusters, cell_line) %>%
summarise(
scPred = names(sort(table(predicted.id), decreasing = TRUE))[1],
Azimuth = names(sort(table(predicted.celltype.l2), decreasing = TRUE))[1],
SingleR = names(sort(table(singler.immune), decreasing = TRUE))[1],
scATOMIC = names(sort(table(scATOMIC_annotation), decreasing = TRUE))[1],
n_cells = n(),
.groups = "drop"
) %>%
arrange(cell_line, seurat_clusters)
# Add consensus column
annotation_table_combined <- annotation_table_combined %>%
mutate(
Consensus = (scPred == Azimuth & Azimuth == SingleR & SingleR == scATOMIC)
)
# Display (first 20 rows)
cat("\n=== ANNOTATION BY CLUSTER × CELL LINE (First 20 rows) ===\n\n")
=== ANNOTATION BY CLUSTER × CELL LINE (First 20 rows) ===
kable(head(annotation_table_combined, 20),
caption = "Dominant Annotation per Cluster × Cell Line") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE, font_size = 9) %>%
column_spec(8, background = ifelse(head(annotation_table_combined$Consensus, 20),
"#d4edda", "#f8d7da"))
Dominant Annotation per Cluster × Cell Line
| seurat_clusters |
cell_line |
scPred |
Azimuth |
SingleR |
scATOMIC |
n_cells |
Consensus |
| 0 |
L1 |
CD4 T cell |
CD4 Proliferating |
T cells, CD4+, memory TREG |
Effector/Memory CD4+ T cells |
7 |
FALSE |
| 1 |
L1 |
CD4 T cell |
CD4 Proliferating |
T cells, CD4+, memory TREG |
Effector/Memory CD4+ T cells |
91 |
FALSE |
| 2 |
L1 |
CD4 T cell |
CD4 TCM |
T cells, CD4+, Th2 |
Effector/Memory CD4+ T cells |
101 |
FALSE |
| 4 |
L1 |
B cell |
CD4 TCM |
T cells, CD4+, memory TREG |
Effector/Memory CD4+ T cells |
1 |
FALSE |
| 5 |
L1 |
CD4 T cell |
CD4 Proliferating |
T cells, CD4+, Th2 |
Effector/Memory CD4+ T cells |
2514 |
FALSE |
| 6 |
L1 |
CD4 T cell |
CD4 TCM |
T cells, CD4+, Th2 |
Effector/Memory CD4+ T cells |
44 |
FALSE |
| 7 |
L1 |
CD4 T cell |
CD4 Proliferating |
T cells, CD4+, memory TREG |
Effector/Memory CD4+ T cells |
518 |
FALSE |
| 9 |
L1 |
CD4 T cell |
CD4 TCM |
T cells, CD4+, Th2 |
Effector/Memory CD4+ T cells |
2052 |
FALSE |
| 10 |
L1 |
CD4 T cell |
CD4 Proliferating |
T cells, CD4+, Th1 |
Effector/Memory CD4+ T cells |
8 |
FALSE |
| 11 |
L1 |
CD4 T cell |
CD4 TCM |
T cells, CD4+, Th2 |
Effector/Memory CD4+ T cells |
406 |
FALSE |
| 12 |
L1 |
CD4 T cell |
CD4 TCM |
T cells, CD4+, memory TREG |
Effector/Memory CD4+ T cells |
82 |
FALSE |
| 13 |
L1 |
CD4 T cell |
CD4 Proliferating |
T cells, CD4+, Th2 |
Effector/Memory CD4+ T cells |
1 |
FALSE |
| 0 |
L2 |
CD4 T cell |
CD4 Proliferating |
T cells, CD4+, memory TREG |
Effector/Memory CD4+ T cells |
2 |
FALSE |
| 1 |
L2 |
CD4 T cell |
NK Proliferating |
NK cells |
Effector/Memory CD4+ T cells |
5113 |
FALSE |
| 2 |
L2 |
CD4 T cell |
CD4 Proliferating |
T cells, CD4+, memory TREG |
Effector/Memory CD4+ T cells |
13 |
FALSE |
| 4 |
L2 |
CD4 T cell |
CD4 Proliferating |
T cells, CD4+, memory TREG |
Effector/Memory CD4+ T cells |
1 |
FALSE |
| 5 |
L2 |
CD4 T cell |
NK Proliferating |
T cells, CD4+, memory TREG |
Effector/Memory CD4+ T cells |
99 |
FALSE |
| 6 |
L2 |
CD4 T cell |
CD4 Proliferating |
T cells, CD4+, memory TREG |
Effector/Memory CD4+ T cells |
7 |
FALSE |
| 7 |
L2 |
CD4 T cell |
CD4 Proliferating |
T cells, CD4+, memory TREG |
Effector/Memory CD4+ T cells |
189 |
FALSE |
| 9 |
L2 |
CD4 T cell |
CD4 Proliferating |
T cells, CD4+, memory TREG |
Effector/Memory CD4+ T cells |
487 |
FALSE |
# Save
write.csv(annotation_table_combined,
"CellLines_annotation_BY_CLUSTER_AND_CELLLINE_04-02-2026.csv",
row.names = FALSE)
cat("\n✓ Combined annotation table (cluster × cell line) saved\n")
✓ Combined annotation table (cluster × cell line) saved
cat("Total rows:", nrow(annotation_table_combined), "\n")
Total rows: 103
Create Annotation
Summary
library(purrr) # Make sure this is loaded
# Define annotation methods
methods <- c(
"predicted.id", # scPred
"predicted.celltype.l2", # Azimuth (l2 prediction)
"singler.immune", # SingleR
"scATOMIC_annotation" # scATOMIC
)
# Create summary - most common label per cluster for each method
annotation_summary <- map_dfr(methods, function(m) {
df <- All_samples_Merged@meta.data
df %>%
filter(!is.na(.data[[m]])) %>%
group_by(seurat_clusters, label = .data[[m]]) %>%
summarise(n = n(), .groups = "drop") %>%
group_by(seurat_clusters) %>%
slice_max(n, n = 1, with_ties = FALSE) %>%
mutate(method = m)
})
# Rename methods for display
annotation_summary <- annotation_summary %>%
mutate(method = recode(method,
"predicted.id" = "scPred",
"predicted.celltype.l2" = "Azimuth.l2",
"singler.immune" = "SingleR(Immune)",
"scATOMIC_annotation" = "scATOMIC"))
# Set method order
annotation_summary$method <- factor(annotation_summary$method,
levels = c("scPred", "Azimuth.l2",
"SingleR(Immune)", "scATOMIC"))
# Ensure cluster order
annotation_summary$seurat_clusters <- factor(annotation_summary$seurat_clusters,
levels = as.character(0:13)) # Adjust range
Summary
Statistics
# Cell counts per method
cat("\n=== Cells Annotated Per Method ===\n\n")
=== Cells Annotated Per Method ===
method_cols <- c("predicted.id", "predicted.celltype.l2", "singler.immune", "scATOMIC_annotation")
for(m in method_cols){
n_annotated <- sum(!is.na(All_samples_Merged@meta.data[[m]]))
cat(sprintf("%-25s: %5d cells (%.1f%%)\n",
m, n_annotated, n_annotated/ncol(All_samples_Merged)*100))
}
predicted.id : 49305 cells (100.0%)
predicted.celltype.l2 : 49305 cells (100.0%)
singler.immune : 49272 cells (99.9%)
scATOMIC_annotation : 49305 cells (100.0%)
# Number of unique labels per method
cat("\n=== Unique Labels Per Method ===\n\n")
=== Unique Labels Per Method ===
annotation_summary %>%
group_by(method) %>%
summarise(n_unique_labels = n_distinct(label)) %>%
print()
# A tibble: 4 × 2
method n_unique_labels
<fct> <int>
1 scPred 2
2 Azimuth.l2 3
3 SingleR(Immune) 4
4 scATOMIC 3
Basic Heatmap
Visualization
library(ggplot2)
ggplot(annotation_summary, aes(x = seurat_clusters, y = method, fill = label)) +
geom_tile(color = "white", linewidth = 0.5) +
scale_fill_discrete() +
labs(
x = "Seurat Clusters",
y = "Annotation Method",
fill = "Assigned Cell Type",
title = "Cross-Method Comparison of Cell Type Annotations"
) +
theme_minimal(base_size = 14) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
axis.title = element_text(face = "bold"),
plot.title = element_text(hjust = 0.5, face = "bold", size = 16)
)

Enhanced Heatmap with
Custom Colors
library(RColorBrewer)
# Generate color palette
num_colors <- length(unique(annotation_summary$label))
my_colors <- colorRampPalette(brewer.pal(8, "Set2"))(num_colors)
p_heatmap <- ggplot(annotation_summary, aes(x = seurat_clusters, y = method, fill = label)) +
geom_tile(color = "white", linewidth = 0.5) +
scale_fill_manual(values = my_colors) +
scale_x_discrete(drop = FALSE) + # Keep all cluster levels
labs(
x = "Seurat Clusters",
y = "Annotation Method",
fill = "Assigned Cell Type",
title = "Cross-Method Comparison of Cell Type Annotations"
) +
theme_minimal(base_size = 14) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
axis.title = element_text(face = "bold"),
plot.title = element_text(hjust = 0.5, face = "bold", size = 16),
legend.position = "right"
)
print(p_heatmap)

# Save high-quality versions
ggsave("CellLines_annotation_heatmap_04-02-2026.png", plot = p_heatmap,
width = 14, height = 5, dpi = 300, bg = "white")
ggsave("CellLines_annotation_heatmap_04-02-2026.pdf", plot = p_heatmap,
width = 14, height = 5)
cat("\n✓ Heatmap saved as PNG and PDF\n")
✓ Heatmap saved as PNG and PDF
MANUSCRIPT FIGURE
MANUSCRIPT FIGURE:
Text-Based Heatmap BY CLUSTERS
library(ggplot2)
library(dplyr)
library(tidyr)
# Prepare data by CLUSTERS
cols_needed <- c("seurat_clusters", "predicted.id", "predicted.celltype.l2",
"singler.immune", "scATOMIC_annotation")
heatmap_data_clusters <- All_samples_Merged@meta.data[, cols_needed] %>%
group_by(seurat_clusters) %>%
summarise(
scPred = names(sort(table(predicted.id), decreasing = TRUE))[1],
Azimuth = names(sort(table(predicted.celltype.l2), decreasing = TRUE))[1],
SingleR = names(sort(table(singler.immune), decreasing = TRUE))[1],
scATOMIC = names(sort(table(scATOMIC_annotation), decreasing = TRUE))[1],
.groups = "drop"
) %>%
pivot_longer(cols = c(scPred, Azimuth, SingleR, scATOMIC),
names_to = "Method", values_to = "CellType") %>%
mutate(
Method = factor(Method, levels = c("scATOMIC", "SingleR", "Azimuth", "scPred")),
seurat_clusters = factor(seurat_clusters, levels = as.character(0:13)), # CHANGE RANGE
CellType_display = ifelse(nchar(CellType) > 20,
paste0(substr(CellType, 1, 17), "..."),
CellType)
)
# Create manuscript figure BY CLUSTERS
p_manuscript_clusters <- ggplot(heatmap_data_clusters,
aes(x = seurat_clusters, y = Method, label = CellType_display)) +
geom_tile(color = "grey20", fill = "white", linewidth = 0.8) +
geom_text(size = 3, hjust = 0.5, vjust = 0.5, fontface = "bold") +
labs(
title = "Discordance in Automated Cell Type Annotation - Cell Lines by Clusters",
x = "Seurat Cluster",
y = "Annotation Method"
) +
theme_minimal(base_size = 16) +
theme(
axis.text.x = element_text(face = "bold", size = 14),
axis.text.y = element_text(face = "bold", size = 14),
axis.title = element_text(size = 16, face = "bold"),
plot.title = element_text(hjust = 0.5, face = "bold", size = 18),
panel.grid = element_blank(),
plot.margin = margin(10, 10, 10, 10)
)
print(p_manuscript_clusters)

# Save high-quality versions
ggsave("FigureX_CellLines_Annotation_BY_CLUSTERS.png",
plot = p_manuscript_clusters, width = 30, height = 5, dpi = 600, bg = "white")
ggsave("FigureX_CellLines_Annotation_BY_CLUSTERS.pdf",
plot = p_manuscript_clusters, width = 30, height = 5)
ggsave("FigureX_CellLines_Annotation_BY_CLUSTERS.tiff",
plot = p_manuscript_clusters, width = 30, height = 5, dpi = 600, bg = "white")
cat("\n✓ Manuscript figure BY CLUSTERS saved (PNG, PDF, TIFF at 600 DPI)\n")
✓ Manuscript figure BY CLUSTERS saved (PNG, PDF, TIFF at 600 DPI)
MANUSCRIPT FIGURE:
Text-Based Heatmap BY CELL LINES
# Prepare data by CELL LINES
cols_needed_cellline <- c("cell_line", "predicted.id", "predicted.celltype.l2",
"singler.immune", "scATOMIC_annotation")
heatmap_data_celllines <- All_samples_Merged@meta.data[, cols_needed_cellline] %>%
group_by(cell_line) %>%
summarise(
scPred = names(sort(table(predicted.id), decreasing = TRUE))[1],
Azimuth = names(sort(table(predicted.celltype.l2), decreasing = TRUE))[1],
SingleR = names(sort(table(singler.immune), decreasing = TRUE))[1],
scATOMIC = names(sort(table(scATOMIC_annotation), decreasing = TRUE))[1],
.groups = "drop"
) %>%
pivot_longer(cols = c(scPred, Azimuth, SingleR, scATOMIC),
names_to = "Method", values_to = "CellType") %>%
mutate(
Method = factor(Method, levels = c("scATOMIC", "SingleR", "Azimuth", "scPred")),
CellType_display = ifelse(nchar(CellType) > 20,
paste0(substr(CellType, 1, 17), "..."),
CellType)
)
# Create manuscript figure BY CELL LINES
p_manuscript_celllines <- ggplot(heatmap_data_celllines,
aes(x = cell_line, y = Method, label = CellType_display)) +
geom_tile(color = "grey20", fill = "white", linewidth = 0.8) +
geom_text(size = 3, hjust = 0.5, vjust = 0.5, fontface = "bold") +
labs(
title = "Discordance in Automated Cell Type Annotation - By Cell Line",
x = "Cell Line",
y = "Annotation Method"
) +
theme_minimal(base_size = 16) +
theme(
axis.text.x = element_text(face = "bold", size = 14, angle = 45, hjust = 1),
axis.text.y = element_text(face = "bold", size = 14),
axis.title = element_text(size = 16, face = "bold"),
plot.title = element_text(hjust = 0.5, face = "bold", size = 18),
panel.grid = element_blank(),
plot.margin = margin(10, 10, 10, 10)
)
print(p_manuscript_celllines)

# Save high-quality versions
ggsave("FigureX_CellLines_Annotation_BY_CELLLINE.png",
plot = p_manuscript_celllines, width = 14, height = 5, dpi = 600, bg = "white")
ggsave("FigureX_CellLines_Annotation_BY_CELLLINE.pdf",
plot = p_manuscript_celllines, width = 14, height = 5)
ggsave("FigureX_CellLines_Annotation_BY_CELLLINE.tiff",
plot = p_manuscript_celllines, width = 14, height = 5, dpi = 600, bg = "white")
cat("\n✓ Manuscript figure BY CELL LINE saved (PNG, PDF, TIFF at 600 DPI)\n")
✓ Manuscript figure BY CELL LINE saved (PNG, PDF, TIFF at 600 DPI)
COMBINED MANUSCRIPT
FIGURE: Clusters × Cell Lines (Optional)
# Prepare data by CLUSTER × CELL LINE
cols_needed_combined <- c("seurat_clusters", "cell_line", "predicted.id", "predicted.celltype.l2",
"singler.immune", "scATOMIC_annotation")
heatmap_data_combined <- All_samples_Merged@meta.data[, cols_needed_combined] %>%
group_by(seurat_clusters, cell_line) %>%
summarise(
scPred = names(sort(table(predicted.id), decreasing = TRUE))[1],
Azimuth = names(sort(table(predicted.celltype.l2), decreasing = TRUE))[1],
SingleR = names(sort(table(singler.immune), decreasing = TRUE))[1],
scATOMIC = names(sort(table(scATOMIC_annotation), decreasing = TRUE))[1],
.groups = "drop"
) %>%
pivot_longer(cols = c(scPred, Azimuth, SingleR, scATOMIC),
names_to = "Method", values_to = "CellType") %>%
mutate(
Method = factor(Method, levels = c("scATOMIC", "SingleR", "Azimuth", "scPred")),
seurat_clusters = factor(seurat_clusters, levels = as.character(0:13)),
combined_label = paste0("C", seurat_clusters, "_", cell_line),
CellType_display = ifelse(nchar(CellType) > 15,
paste0(substr(CellType, 1, 12), "..."),
CellType)
)
# Create combined figure
p_manuscript_combined <- ggplot(heatmap_data_combined,
aes(x = combined_label, y = Method, label = CellType_display)) +
geom_tile(color = "grey20", fill = "white", linewidth = 0.5) +
geom_text(size = 2, hjust = 0.5, vjust = 0.5, fontface = "bold") +
labs(
title = "Cell Type Annotation - By Cluster × Cell Line",
x = "Cluster_CellLine",
y = "Annotation Method"
) +
theme_minimal(base_size = 14) +
theme(
axis.text.x = element_text(face = "bold", size = 8, angle = 90, hjust = 1, vjust = 0.5),
axis.text.y = element_text(face = "bold", size = 12),
axis.title = element_text(size = 14, face = "bold"),
plot.title = element_text(hjust = 0.5, face = "bold", size = 16),
panel.grid = element_blank(),
plot.margin = margin(10, 10, 10, 10)
)
print(p_manuscript_combined)

# Save
ggsave("FigureX_CellLines_Annotation_CLUSTERS_x_CELLLINES.png",
plot = p_manuscript_combined, width = 24, height = 6, dpi = 600, bg = "white")
ggsave("FigureX_CellLines_Annotation_CLUSTERS_x_CELLLINES.pdf",
plot = p_manuscript_combined, width = 24, height = 6)
cat("\n✓ Combined manuscript figure (Cluster × Cell Line) saved\n")
✓ Combined manuscript figure (Cluster × Cell Line) saved
---
title: "Cross-Method Annotation Comparison - Cell lines Sézary Syndrome Dataset"
author: Nasir Mahmood Abbasi
date: "`r Sys.Date()`"
output:
  html_notebook:
    number_sections: true
    toc: true
    toc_float:
      collapsed: true
    theme: journal
---



# load libraries
```{r setup, include=FALSE}
knitr::opts_chunk$set(
  echo = TRUE,
  warning = FALSE,
  message = FALSE,
  fig.width = 14,
  fig.height = 8,
  dpi = 300
)
# load libraries
    library(Seurat)
    library(dplyr)
    library(patchwork)
    library(ggplot2)
    library(pheatmap)
    library(scPred)
    library(celldex)
    library(SingleR)
    library(remotes)
    library(presto)
    library(SeuratDisk)
    library(SeuratData)
    library(Azimuth)

```




# Load RDS with all annotations
```{r}
# Load main object with all annotations
All_samples_Merged <- readRDS("/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/2-Benchmarking-Annotation_methods_05-11-2025/All_samples_Merged_Benchmarked_05-11-2025.rds")


Idents(All_samples_Merged) <- "seurat_clusters"

# Ensure clusters are ordered 0-13
All_samples_Merged$seurat_clusters <- factor(All_samples_Merged$seurat_clusters, 
                                              levels = as.character(0:13))




cat("Total cells:", ncol(All_samples_Merged), "\n")
cat("Total clusters:", length(unique(All_samples_Merged$seurat_clusters)), "\n\n")
cat("Clusters (ordered 0-18):\n")
print(table(All_samples_Merged$seurat_clusters))

cat("\n\nSamples (ordered):\n")
print(table(All_samples_Merged$cell_line))

```
# Create Annotation Summary Table
## Create Annotation Summary Table by Clusters
```{r }
library(dplyr)
library(knitr)
library(kableExtra)

# Extract dominant label per cluster per method
cols_needed <- c("seurat_clusters", "predicted.id", "predicted.celltype.l2", 
                 "singler.immune", "scATOMIC_annotation")

annotation_table_cluster <- All_samples_Merged@meta.data[, cols_needed] %>%
  group_by(seurat_clusters) %>%
  summarise(
    scPred = names(sort(table(predicted.id), decreasing = TRUE))[1],
    Azimuth = names(sort(table(predicted.celltype.l2), decreasing = TRUE))[1],
    SingleR = names(sort(table(singler.immune), decreasing = TRUE))[1],
    scATOMIC = names(sort(table(scATOMIC_annotation), decreasing = TRUE))[1],
    n_cells = n(),
    .groups = "drop"
  ) %>%
  arrange(seurat_clusters)

# Add consensus column
annotation_table_cluster <- annotation_table_cluster %>%
  mutate(
    Consensus = (scPred == Azimuth & Azimuth == SingleR & SingleR == scATOMIC)
  )

# Display
cat("\n=== ANNOTATION BY CLUSTER ===\n\n")
kable(annotation_table_cluster, caption = "Dominant Cell Type Annotation per Cluster") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), 
                full_width = FALSE, font_size = 10) %>%
  column_spec(7, background = ifelse(annotation_table_cluster$Consensus, "#d4edda", "#f8d7da"))

# Save
write.csv(annotation_table_cluster, "CellLines_annotation_BY_CLUSTER_04-02-2026.csv", 
          row.names = FALSE)

cat("\n✓ Annotation table by cluster saved\n")

```


## Create Annotation Summary Table - BY CELL LINE
```{r }

library(dplyr)
library(knitr)
library(kableExtra)

# Extract dominant label per CELL LINE per method
cols_needed <- c("cell_line", "predicted.id", "predicted.celltype.l2", 
                 "singler.immune", "scATOMIC_annotation")

annotation_table_cellline <- All_samples_Merged@meta.data[, cols_needed] %>%
  group_by(cell_line) %>%
  summarise(
   scPred = names(sort(table(predicted.id), decreasing = TRUE))[1],
    Azimuth = names(sort(table(predicted.celltype.l2), decreasing = TRUE))[1],
    SingleR = names(sort(table(singler.immune), decreasing = TRUE))[1],
    scATOMIC = names(sort(table(scATOMIC_annotation), decreasing = TRUE))[1],
    n_cells = n(),
    .groups = "drop"
  ) %>%
  arrange(cell_line)

# Add consensus column
annotation_table_cellline <- annotation_table_cellline %>%
  mutate(
    Consensus = (scPred == Azimuth & Azimuth == SingleR & SingleR == scATOMIC)
  )

# Display
cat("\n=== ANNOTATION BY CELL LINE ===\n\n")
kable(annotation_table_cellline, caption = "Dominant Cell Type Annotation per Cell Line") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), 
                full_width = FALSE, font_size = 10) %>%
  column_spec(7, background = ifelse(annotation_table_cellline$Consensus, "#d4edda", "#f8d7da"))

# Save
write.csv(annotation_table_cellline, "CellLines_annotation_BY_CELLLINE_04-02-2026.csv", 
          row.names = FALSE)

cat("\n✓ Annotation table by cell line saved\n")

```

## Create Annotation Summary Table - Combined Table (Cluster × Cell Line)
```{r }
library(dplyr)
library(knitr)
library(kableExtra)

# Extract dominant label per CLUSTER × CELL LINE × METHOD
cols_needed <- c("seurat_clusters", "cell_line", "predicted.id", "predicted.celltype.l2", 
                 "singler.immune", "scATOMIC_annotation")

annotation_table_combined <- All_samples_Merged@meta.data[, cols_needed] %>%
  group_by(seurat_clusters, cell_line) %>%
  summarise(
   scPred = names(sort(table(predicted.id), decreasing = TRUE))[1],
    Azimuth = names(sort(table(predicted.celltype.l2), decreasing = TRUE))[1],
    SingleR = names(sort(table(singler.immune), decreasing = TRUE))[1],
    scATOMIC = names(sort(table(scATOMIC_annotation), decreasing = TRUE))[1],
    n_cells = n(),
    .groups = "drop"
  ) %>%
  arrange(cell_line, seurat_clusters)

# Add consensus column
annotation_table_combined <- annotation_table_combined %>%
  mutate(
    Consensus = (scPred == Azimuth & Azimuth == SingleR & SingleR == scATOMIC)
  )

# Display (first 20 rows)
cat("\n=== ANNOTATION BY CLUSTER × CELL LINE (First 20 rows) ===\n\n")
kable(head(annotation_table_combined, 20), 
      caption = "Dominant Annotation per Cluster × Cell Line") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), 
                full_width = FALSE, font_size = 9) %>%
  column_spec(8, background = ifelse(head(annotation_table_combined$Consensus, 20), 
                                      "#d4edda", "#f8d7da"))

# Save
write.csv(annotation_table_combined, 
          "CellLines_annotation_BY_CLUSTER_AND_CELLLINE_04-02-2026.csv", 
          row.names = FALSE)

cat("\n✓ Combined annotation table (cluster × cell line) saved\n")
cat("Total rows:", nrow(annotation_table_combined), "\n")

```
# Create Annotation Summary
```{r }
library(purrr)  # Make sure this is loaded

# Define annotation methods
methods <- c(
  "predicted.id",             # scPred
  "predicted.celltype.l2",    # Azimuth (l2 prediction)
  "singler.immune",           # SingleR
  "scATOMIC_annotation"       # scATOMIC
)

# Create summary - most common label per cluster for each method
annotation_summary <- map_dfr(methods, function(m) {
  df <- All_samples_Merged@meta.data
  df %>%
    filter(!is.na(.data[[m]])) %>%
    group_by(seurat_clusters, label = .data[[m]]) %>%
    summarise(n = n(), .groups = "drop") %>%
    group_by(seurat_clusters) %>%
    slice_max(n, n = 1, with_ties = FALSE) %>%
    mutate(method = m)
})

# Rename methods for display
annotation_summary <- annotation_summary %>%
  mutate(method = recode(method,
                         "predicted.id" = "scPred",
                         "predicted.celltype.l2" = "Azimuth.l2",
                         "singler.immune" = "SingleR(Immune)",
                         "scATOMIC_annotation" = "scATOMIC"))

# Set method order
annotation_summary$method <- factor(annotation_summary$method, 
                                    levels = c("scPred", "Azimuth.l2", 
                                              "SingleR(Immune)", "scATOMIC"))

# Ensure cluster order
annotation_summary$seurat_clusters <- factor(annotation_summary$seurat_clusters, 
                                             levels = as.character(0:13))  # Adjust range


```

## Summary Statistics
```{r }
# Cell counts per method
cat("\n=== Cells Annotated Per Method ===\n\n")
method_cols <- c("predicted.id", "predicted.celltype.l2", "singler.immune", "scATOMIC_annotation")
for(m in method_cols){
  n_annotated <- sum(!is.na(All_samples_Merged@meta.data[[m]]))
  cat(sprintf("%-25s: %5d cells (%.1f%%)\n", 
              m, n_annotated, n_annotated/ncol(All_samples_Merged)*100))
}

# Number of unique labels per method
cat("\n=== Unique Labels Per Method ===\n\n")
annotation_summary %>%
  group_by(method) %>%
  summarise(n_unique_labels = n_distinct(label)) %>%
  print()

```
# Basic Heatmap Visualization
```{r, fig.height= 5, fig.width= 18}
library(ggplot2)

ggplot(annotation_summary, aes(x = seurat_clusters, y = method, fill = label)) +
  geom_tile(color = "white", linewidth = 0.5) +
  scale_fill_discrete() +
  labs(
    x = "Seurat Clusters",
    y = "Annotation Method",
    fill = "Assigned Cell Type",
    title = "Cross-Method Comparison of Cell Type Annotations"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    axis.title = element_text(face = "bold"),
    plot.title = element_text(hjust = 0.5, face = "bold", size = 16)
  )
```

## Enhanced Heatmap with Custom Colors
```{r, fig.height= 5, fig.width= 18 }
library(RColorBrewer)

# Generate color palette
num_colors <- length(unique(annotation_summary$label))
my_colors <- colorRampPalette(brewer.pal(8, "Set2"))(num_colors)

p_heatmap <- ggplot(annotation_summary, aes(x = seurat_clusters, y = method, fill = label)) +
  geom_tile(color = "white", linewidth = 0.5) +
  scale_fill_manual(values = my_colors) +
  scale_x_discrete(drop = FALSE) +  # Keep all cluster levels
  labs(
    x = "Seurat Clusters",
    y = "Annotation Method",
    fill = "Assigned Cell Type",
    title = "Cross-Method Comparison of Cell Type Annotations"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    axis.title = element_text(face = "bold"),
    plot.title = element_text(hjust = 0.5, face = "bold", size = 16),
    legend.position = "right"
  )

print(p_heatmap)

# Save high-quality versions
ggsave("CellLines_annotation_heatmap_04-02-2026.png", plot = p_heatmap, 
       width = 14, height = 5, dpi = 300, bg = "white")
ggsave("CellLines_annotation_heatmap_04-02-2026.pdf", plot = p_heatmap, 
       width = 14, height = 5)

cat("\n✓ Heatmap saved as PNG and PDF\n")


```
#  MANUSCRIPT FIGURE
##  MANUSCRIPT FIGURE: Text-Based Heatmap BY CLUSTERS
```{r, fig.height= 5, fig.width= 30 }
library(ggplot2)
library(dplyr)
library(tidyr)

# Prepare data by CLUSTERS
cols_needed <- c("seurat_clusters", "predicted.id", "predicted.celltype.l2", 
                 "singler.immune", "scATOMIC_annotation")

heatmap_data_clusters <- All_samples_Merged@meta.data[, cols_needed] %>%
  group_by(seurat_clusters) %>%
  summarise(
   scPred = names(sort(table(predicted.id), decreasing = TRUE))[1],
    Azimuth = names(sort(table(predicted.celltype.l2), decreasing = TRUE))[1],
    SingleR = names(sort(table(singler.immune), decreasing = TRUE))[1],
    scATOMIC = names(sort(table(scATOMIC_annotation), decreasing = TRUE))[1],
    .groups = "drop"
  ) %>%
  pivot_longer(cols = c(scPred, Azimuth, SingleR, scATOMIC),
               names_to = "Method", values_to = "CellType") %>%
  mutate(
    Method = factor(Method, levels = c("scATOMIC", "SingleR", "Azimuth", "scPred")),
    seurat_clusters = factor(seurat_clusters, levels = as.character(0:13)),  # CHANGE RANGE
    CellType_display = ifelse(nchar(CellType) > 20, 
                              paste0(substr(CellType, 1, 17), "..."), 
                              CellType)
  )

# Create manuscript figure BY CLUSTERS
p_manuscript_clusters <- ggplot(heatmap_data_clusters, 
                       aes(x = seurat_clusters, y = Method, label = CellType_display)) +
  geom_tile(color = "grey20", fill = "white", linewidth = 0.8) +
  geom_text(size = 3, hjust = 0.5, vjust = 0.5, fontface = "bold") +
  labs(
    title = "Discordance in Automated Cell Type Annotation - Cell Lines by Clusters",
    x = "Seurat Cluster",
    y = "Annotation Method"
  ) +
  theme_minimal(base_size = 16) +
  theme(
    axis.text.x = element_text(face = "bold", size = 14),
    axis.text.y = element_text(face = "bold", size = 14),
    axis.title = element_text(size = 16, face = "bold"),
    plot.title = element_text(hjust = 0.5, face = "bold", size = 18),
    panel.grid = element_blank(),
    plot.margin = margin(10, 10, 10, 10)
  )

print(p_manuscript_clusters)

# Save high-quality versions
ggsave("FigureX_CellLines_Annotation_BY_CLUSTERS.png", 
       plot = p_manuscript_clusters, width = 30, height = 5, dpi = 600, bg = "white")
ggsave("FigureX_CellLines_Annotation_BY_CLUSTERS.pdf", 
       plot = p_manuscript_clusters, width = 30, height = 5)
ggsave("FigureX_CellLines_Annotation_BY_CLUSTERS.tiff", 
       plot = p_manuscript_clusters, width = 30, height = 5, dpi = 600, bg = "white")

cat("\n✓ Manuscript figure BY CLUSTERS saved (PNG, PDF, TIFF at 600 DPI)\n")

```

##  MANUSCRIPT FIGURE: Text-Based Heatmap BY CELL LINES
```{r, fig.height= 5, fig.width= 18 }
# Prepare data by CELL LINES
cols_needed_cellline <- c("cell_line", "predicted.id", "predicted.celltype.l2", 
                          "singler.immune", "scATOMIC_annotation")

heatmap_data_celllines <- All_samples_Merged@meta.data[, cols_needed_cellline] %>%
  group_by(cell_line) %>%
  summarise(
   scPred = names(sort(table(predicted.id), decreasing = TRUE))[1],
    Azimuth = names(sort(table(predicted.celltype.l2), decreasing = TRUE))[1],
    SingleR = names(sort(table(singler.immune), decreasing = TRUE))[1],
    scATOMIC = names(sort(table(scATOMIC_annotation), decreasing = TRUE))[1],
    .groups = "drop"
  ) %>%
  pivot_longer(cols = c(scPred, Azimuth, SingleR, scATOMIC),
               names_to = "Method", values_to = "CellType") %>%
  mutate(
    Method = factor(Method, levels = c("scATOMIC", "SingleR", "Azimuth", "scPred")),
    CellType_display = ifelse(nchar(CellType) > 20, 
                              paste0(substr(CellType, 1, 17), "..."), 
                              CellType)
  )

# Create manuscript figure BY CELL LINES
p_manuscript_celllines <- ggplot(heatmap_data_celllines, 
                       aes(x = cell_line, y = Method, label = CellType_display)) +
  geom_tile(color = "grey20", fill = "white", linewidth = 0.8) +
  geom_text(size = 3, hjust = 0.5, vjust = 0.5, fontface = "bold") +
  labs(
    title = "Discordance in Automated Cell Type Annotation - By Cell Line",
    x = "Cell Line",
    y = "Annotation Method"
  ) +
  theme_minimal(base_size = 16) +
  theme(
    axis.text.x = element_text(face = "bold", size = 14, angle = 45, hjust = 1),
    axis.text.y = element_text(face = "bold", size = 14),
    axis.title = element_text(size = 16, face = "bold"),
    plot.title = element_text(hjust = 0.5, face = "bold", size = 18),
    panel.grid = element_blank(),
    plot.margin = margin(10, 10, 10, 10)
  )

print(p_manuscript_celllines)

# Save high-quality versions
ggsave("FigureX_CellLines_Annotation_BY_CELLLINE.png", 
       plot = p_manuscript_celllines, width = 14, height = 5, dpi = 600, bg = "white")
ggsave("FigureX_CellLines_Annotation_BY_CELLLINE.pdf", 
       plot = p_manuscript_celllines, width = 14, height = 5)
ggsave("FigureX_CellLines_Annotation_BY_CELLLINE.tiff", 
       plot = p_manuscript_celllines, width = 14, height = 5, dpi = 600, bg = "white")

cat("\n✓ Manuscript figure BY CELL LINE saved (PNG, PDF, TIFF at 600 DPI)\n")

```
##  COMBINED MANUSCRIPT FIGURE: Clusters × Cell Lines (Optional)
```{r, fig.height= 5, fig.width= 48 }
# Prepare data by CLUSTER × CELL LINE
cols_needed_combined <- c("seurat_clusters", "cell_line", "predicted.id", "predicted.celltype.l2", 
                          "singler.immune", "scATOMIC_annotation")

heatmap_data_combined <- All_samples_Merged@meta.data[, cols_needed_combined] %>%
  group_by(seurat_clusters, cell_line) %>%
  summarise(
   scPred = names(sort(table(predicted.id), decreasing = TRUE))[1],
    Azimuth = names(sort(table(predicted.celltype.l2), decreasing = TRUE))[1],
    SingleR = names(sort(table(singler.immune), decreasing = TRUE))[1],
    scATOMIC = names(sort(table(scATOMIC_annotation), decreasing = TRUE))[1],
    .groups = "drop"
  ) %>%
  pivot_longer(cols = c(scPred, Azimuth, SingleR, scATOMIC),
               names_to = "Method", values_to = "CellType") %>%
  mutate(
    Method = factor(Method, levels = c("scATOMIC", "SingleR", "Azimuth", "scPred")),
    seurat_clusters = factor(seurat_clusters, levels = as.character(0:13)),
    combined_label = paste0("C", seurat_clusters, "_", cell_line),
    CellType_display = ifelse(nchar(CellType) > 15, 
                              paste0(substr(CellType, 1, 12), "..."), 
                              CellType)
  )

# Create combined figure
p_manuscript_combined <- ggplot(heatmap_data_combined, 
                       aes(x = combined_label, y = Method, label = CellType_display)) +
  geom_tile(color = "grey20", fill = "white", linewidth = 0.5) +
  geom_text(size = 2, hjust = 0.5, vjust = 0.5, fontface = "bold") +
  labs(
    title = "Cell Type Annotation - By Cluster × Cell Line",
    x = "Cluster_CellLine",
    y = "Annotation Method"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    axis.text.x = element_text(face = "bold", size = 8, angle = 90, hjust = 1, vjust = 0.5),
    axis.text.y = element_text(face = "bold", size = 12),
    axis.title = element_text(size = 14, face = "bold"),
    plot.title = element_text(hjust = 0.5, face = "bold", size = 16),
    panel.grid = element_blank(),
    plot.margin = margin(10, 10, 10, 10)
  )

print(p_manuscript_combined)

# Save
ggsave("FigureX_CellLines_Annotation_CLUSTERS_x_CELLLINES.png", 
       plot = p_manuscript_combined, width = 24, height = 6, dpi = 600, bg = "white")
ggsave("FigureX_CellLines_Annotation_CLUSTERS_x_CELLLINES.pdf", 
       plot = p_manuscript_combined, width = 24, height = 6)

cat("\n✓ Combined manuscript figure (Cluster × Cell Line) saved\n")

```







# sessionInfo()
```{r}

sessionInfo()


```


