1 load libraries

2 Load RDS with all annotations

Idents(All_samples_Merged) <- "seurat_clusters"

# Ensure clusters are ordered 0-13
All_samples_Merged$seurat_clusters <- factor(All_samples_Merged$seurat_clusters, 
                                              levels = as.character(0:13))




cat("Total cells:", ncol(All_samples_Merged), "\n")
Total cells: 49305 
cat("Total clusters:", length(unique(All_samples_Merged$seurat_clusters)), "\n\n")
Total clusters: 14 
cat("Clusters (ordered 0-18):\n")
Clusters (ordered 0-18):
print(table(All_samples_Merged$seurat_clusters))

   0    1    2    3    4    5    6    7    8    9   10   11   12   13 
6789 5275 4663 4661 4086 3634 3536 3409 3338 3273 3212 1675 1063  691 
cat("\n\nSamples (ordered):\n")


Samples (ordered):
print(table(All_samples_Merged$cell_line))

           L1            L2            L3            L4            L5            L6            L7 CD4Tcells_lab 
         5825          5935          6428          6006          6022          5148          5331          5106 
CD4Tcells_10x 
         3504 

3 Create Annotation Summary Table

3.1 Create Annotation Summary Table by Clusters

library(dplyr)
library(knitr)
library(kableExtra)

# Extract dominant label per cluster per method
cols_needed <- c("seurat_clusters", "predicted.id", "predicted.celltype.l2", 
                 "singler.immune", "scATOMIC_annotation")

annotation_table_cluster <- All_samples_Merged@meta.data[, cols_needed] %>%
  group_by(seurat_clusters) %>%
  summarise(
    scPred = names(sort(table(predicted.id), decreasing = TRUE))[1],
    Azimuth = names(sort(table(predicted.celltype.l2), decreasing = TRUE))[1],
    SingleR = names(sort(table(singler.immune), decreasing = TRUE))[1],
    scATOMIC = names(sort(table(scATOMIC_annotation), decreasing = TRUE))[1],
    n_cells = n(),
    .groups = "drop"
  ) %>%
  arrange(seurat_clusters)

# Add consensus column
annotation_table_cluster <- annotation_table_cluster %>%
  mutate(
    Consensus = (scPred == Azimuth & Azimuth == SingleR & SingleR == scATOMIC)
  )

# Display
cat("\n=== ANNOTATION BY CLUSTER ===\n\n")

=== ANNOTATION BY CLUSTER ===
kable(annotation_table_cluster, caption = "Dominant Cell Type Annotation per Cluster") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), 
                full_width = FALSE, font_size = 10) %>%
  column_spec(7, background = ifelse(annotation_table_cluster$Consensus, "#d4edda", "#f8d7da"))
Dominant Cell Type Annotation per Cluster
seurat_clusters scPred Azimuth SingleR scATOMIC n_cells Consensus
0 Plasma cell CD4 Proliferating T cells, CD4+, memory TREG Lung Cancer Cell 6789 FALSE
1 CD4 T cell NK Proliferating NK cells Effector/Memory CD4+ T cells 5275 FALSE
2 Plasma cell CD4 Proliferating T cells, CD4+, memory TREG Lung Cancer Cell 4663 FALSE
3 CD4 T cell CD4 TCM T cells, CD4+, naive Naive CD4+ T cells 4661 FALSE
4 Plasma cell CD4 Proliferating T cells, CD4+, memory TREG Lung Cancer Cell 4086 FALSE
5 CD4 T cell CD4 Proliferating T cells, CD4+, memory TREG Effector/Memory CD4+ T cells 3634 FALSE
6 Plasma cell CD4 Proliferating T cells, CD4+, memory TREG Lung Cancer Cell 3536 FALSE
7 CD4 T cell CD4 Proliferating T cells, CD4+, memory TREG Effector/Memory CD4+ T cells 3409 FALSE
8 Plasma cell CD4 Proliferating T cells, CD4+, memory TREG Lung Cancer Cell 3338 FALSE
9 CD4 T cell CD4 TCM T cells, CD4+, memory TREG Effector/Memory CD4+ T cells 3273 FALSE
10 CD4 T cell CD4 TCM T cells, CD4+, TFH Naive CD4+ T cells 3212 FALSE
11 CD4 T cell CD4 TCM T cells, CD4+, memory TREG Effector/Memory CD4+ T cells 1675 FALSE
12 Plasma cell CD4 Proliferating T cells, CD4+, memory TREG Effector/Memory CD4+ T cells 1063 FALSE
13 Plasma cell CD4 Proliferating T cells, CD4+, memory TREG Lung Cancer Cell 691 FALSE

# Save
write.csv(annotation_table_cluster, "CellLines_annotation_BY_CLUSTER_04-02-2026.csv", 
          row.names = FALSE)

cat("\n✓ Annotation table by cluster saved\n")

✓ Annotation table by cluster saved

3.2 Create Annotation Summary Table - BY CELL LINE


library(dplyr)
library(knitr)
library(kableExtra)

# Extract dominant label per CELL LINE per method
cols_needed <- c("cell_line", "predicted.id", "predicted.celltype.l2", 
                 "singler.immune", "scATOMIC_annotation")

annotation_table_cellline <- All_samples_Merged@meta.data[, cols_needed] %>%
  group_by(cell_line) %>%
  summarise(
   scPred = names(sort(table(predicted.id), decreasing = TRUE))[1],
    Azimuth = names(sort(table(predicted.celltype.l2), decreasing = TRUE))[1],
    SingleR = names(sort(table(singler.immune), decreasing = TRUE))[1],
    scATOMIC = names(sort(table(scATOMIC_annotation), decreasing = TRUE))[1],
    n_cells = n(),
    .groups = "drop"
  ) %>%
  arrange(cell_line)

# Add consensus column
annotation_table_cellline <- annotation_table_cellline %>%
  mutate(
    Consensus = (scPred == Azimuth & Azimuth == SingleR & SingleR == scATOMIC)
  )

# Display
cat("\n=== ANNOTATION BY CELL LINE ===\n\n")

=== ANNOTATION BY CELL LINE ===
kable(annotation_table_cellline, caption = "Dominant Cell Type Annotation per Cell Line") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), 
                full_width = FALSE, font_size = 10) %>%
  column_spec(7, background = ifelse(annotation_table_cellline$Consensus, "#d4edda", "#f8d7da"))
Dominant Cell Type Annotation per Cell Line
cell_line scPred Azimuth SingleR scATOMIC n_cells Consensus
L1 CD4 T cell CD4 TCM T cells, CD4+, Th2 Effector/Memory CD4+ T cells 5825 FALSE
L2 CD4 T cell NK Proliferating NK cells Effector/Memory CD4+ T cells 5935 FALSE
L3 Plasma cell CD4 Proliferating T cells, CD4+, memory TREG Lung Cancer Cell 6428 FALSE
L4 Plasma cell CD4 Proliferating T cells, CD4+, memory TREG Lung Cancer Cell 6006 FALSE
L5 Plasma cell CD4 Proliferating T cells, CD4+, memory TREG Lung Cancer Cell 6022 FALSE
L6 cDC CD4 Proliferating T cells, CD4+, memory TREG Effector/Memory CD4+ T cells 5148 FALSE
L7 Plasma cell CD4 Proliferating T cells, CD4+, memory TREG Lung Cancer Cell 5331 FALSE
CD4Tcells_lab CD4 T cell CD4 TCM T cells, CD4+, TFH Naive CD4+ T cells 5106 FALSE
CD4Tcells_10x CD4 T cell CD4 TCM T cells, CD4+, naive Naive CD4+ T cells 3504 FALSE

# Save
write.csv(annotation_table_cellline, "CellLines_annotation_BY_CELLLINE_04-02-2026.csv", 
          row.names = FALSE)

cat("\n✓ Annotation table by cell line saved\n")

✓ Annotation table by cell line saved

3.3 Create Annotation Summary Table - Combined Table (Cluster × Cell Line)

library(dplyr)
library(knitr)
library(kableExtra)

# Extract dominant label per CLUSTER × CELL LINE × METHOD
cols_needed <- c("seurat_clusters", "cell_line", "predicted.id", "predicted.celltype.l2", 
                 "singler.immune", "scATOMIC_annotation")

annotation_table_combined <- All_samples_Merged@meta.data[, cols_needed] %>%
  group_by(seurat_clusters, cell_line) %>%
  summarise(
   scPred = names(sort(table(predicted.id), decreasing = TRUE))[1],
    Azimuth = names(sort(table(predicted.celltype.l2), decreasing = TRUE))[1],
    SingleR = names(sort(table(singler.immune), decreasing = TRUE))[1],
    scATOMIC = names(sort(table(scATOMIC_annotation), decreasing = TRUE))[1],
    n_cells = n(),
    .groups = "drop"
  ) %>%
  arrange(cell_line, seurat_clusters)

# Add consensus column
annotation_table_combined <- annotation_table_combined %>%
  mutate(
    Consensus = (scPred == Azimuth & Azimuth == SingleR & SingleR == scATOMIC)
  )

# Display (first 20 rows)
cat("\n=== ANNOTATION BY CLUSTER × CELL LINE (First 20 rows) ===\n\n")

=== ANNOTATION BY CLUSTER × CELL LINE (First 20 rows) ===
kable(head(annotation_table_combined, 20), 
      caption = "Dominant Annotation per Cluster × Cell Line") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), 
                full_width = FALSE, font_size = 9) %>%
  column_spec(8, background = ifelse(head(annotation_table_combined$Consensus, 20), 
                                      "#d4edda", "#f8d7da"))
Dominant Annotation per Cluster × Cell Line
seurat_clusters cell_line scPred Azimuth SingleR scATOMIC n_cells Consensus
0 L1 CD4 T cell CD4 Proliferating T cells, CD4+, memory TREG Effector/Memory CD4+ T cells 7 FALSE
1 L1 CD4 T cell CD4 Proliferating T cells, CD4+, memory TREG Effector/Memory CD4+ T cells 91 FALSE
2 L1 CD4 T cell CD4 TCM T cells, CD4+, Th2 Effector/Memory CD4+ T cells 101 FALSE
4 L1 B cell CD4 TCM T cells, CD4+, memory TREG Effector/Memory CD4+ T cells 1 FALSE
5 L1 CD4 T cell CD4 Proliferating T cells, CD4+, Th2 Effector/Memory CD4+ T cells 2514 FALSE
6 L1 CD4 T cell CD4 TCM T cells, CD4+, Th2 Effector/Memory CD4+ T cells 44 FALSE
7 L1 CD4 T cell CD4 Proliferating T cells, CD4+, memory TREG Effector/Memory CD4+ T cells 518 FALSE
9 L1 CD4 T cell CD4 TCM T cells, CD4+, Th2 Effector/Memory CD4+ T cells 2052 FALSE
10 L1 CD4 T cell CD4 Proliferating T cells, CD4+, Th1 Effector/Memory CD4+ T cells 8 FALSE
11 L1 CD4 T cell CD4 TCM T cells, CD4+, Th2 Effector/Memory CD4+ T cells 406 FALSE
12 L1 CD4 T cell CD4 TCM T cells, CD4+, memory TREG Effector/Memory CD4+ T cells 82 FALSE
13 L1 CD4 T cell CD4 Proliferating T cells, CD4+, Th2 Effector/Memory CD4+ T cells 1 FALSE
0 L2 CD4 T cell CD4 Proliferating T cells, CD4+, memory TREG Effector/Memory CD4+ T cells 2 FALSE
1 L2 CD4 T cell NK Proliferating NK cells Effector/Memory CD4+ T cells 5113 FALSE
2 L2 CD4 T cell CD4 Proliferating T cells, CD4+, memory TREG Effector/Memory CD4+ T cells 13 FALSE
4 L2 CD4 T cell CD4 Proliferating T cells, CD4+, memory TREG Effector/Memory CD4+ T cells 1 FALSE
5 L2 CD4 T cell NK Proliferating T cells, CD4+, memory TREG Effector/Memory CD4+ T cells 99 FALSE
6 L2 CD4 T cell CD4 Proliferating T cells, CD4+, memory TREG Effector/Memory CD4+ T cells 7 FALSE
7 L2 CD4 T cell CD4 Proliferating T cells, CD4+, memory TREG Effector/Memory CD4+ T cells 189 FALSE
9 L2 CD4 T cell CD4 Proliferating T cells, CD4+, memory TREG Effector/Memory CD4+ T cells 487 FALSE

# Save
write.csv(annotation_table_combined, 
          "CellLines_annotation_BY_CLUSTER_AND_CELLLINE_04-02-2026.csv", 
          row.names = FALSE)

cat("\n✓ Combined annotation table (cluster × cell line) saved\n")

✓ Combined annotation table (cluster × cell line) saved
cat("Total rows:", nrow(annotation_table_combined), "\n")
Total rows: 103 

4 Create Annotation Summary

library(purrr)  # Make sure this is loaded

# Define annotation methods
methods <- c(
  "predicted.id",             # scPred
  "predicted.celltype.l2",    # Azimuth (l2 prediction)
  "singler.immune",           # SingleR
  "scATOMIC_annotation"       # scATOMIC
)

# Create summary - most common label per cluster for each method
annotation_summary <- map_dfr(methods, function(m) {
  df <- All_samples_Merged@meta.data
  df %>%
    filter(!is.na(.data[[m]])) %>%
    group_by(seurat_clusters, label = .data[[m]]) %>%
    summarise(n = n(), .groups = "drop") %>%
    group_by(seurat_clusters) %>%
    slice_max(n, n = 1, with_ties = FALSE) %>%
    mutate(method = m)
})

# Rename methods for display
annotation_summary <- annotation_summary %>%
  mutate(method = recode(method,
                         "predicted.id" = "scPred",
                         "predicted.celltype.l2" = "Azimuth.l2",
                         "singler.immune" = "SingleR(Immune)",
                         "scATOMIC_annotation" = "scATOMIC"))

# Set method order
annotation_summary$method <- factor(annotation_summary$method, 
                                    levels = c("scPred", "Azimuth.l2", 
                                              "SingleR(Immune)", "scATOMIC"))

# Ensure cluster order
annotation_summary$seurat_clusters <- factor(annotation_summary$seurat_clusters, 
                                             levels = as.character(0:13))  # Adjust range

4.1 Summary Statistics

# Cell counts per method
cat("\n=== Cells Annotated Per Method ===\n\n")

=== Cells Annotated Per Method ===
method_cols <- c("predicted.id", "predicted.celltype.l2", "singler.immune", "scATOMIC_annotation")
for(m in method_cols){
  n_annotated <- sum(!is.na(All_samples_Merged@meta.data[[m]]))
  cat(sprintf("%-25s: %5d cells (%.1f%%)\n", 
              m, n_annotated, n_annotated/ncol(All_samples_Merged)*100))
}
predicted.id             : 49305 cells (100.0%)
predicted.celltype.l2    : 49305 cells (100.0%)
singler.immune           : 49272 cells (99.9%)
scATOMIC_annotation      : 49305 cells (100.0%)
# Number of unique labels per method
cat("\n=== Unique Labels Per Method ===\n\n")

=== Unique Labels Per Method ===
annotation_summary %>%
  group_by(method) %>%
  summarise(n_unique_labels = n_distinct(label)) %>%
  print()
# A tibble: 4 × 2
  method          n_unique_labels
  <fct>                     <int>
1 scPred                        2
2 Azimuth.l2                    3
3 SingleR(Immune)               4
4 scATOMIC                      3

5 Basic Heatmap Visualization

library(ggplot2)

ggplot(annotation_summary, aes(x = seurat_clusters, y = method, fill = label)) +
  geom_tile(color = "white", linewidth = 0.5) +
  scale_fill_discrete() +
  labs(
    x = "Seurat Clusters",
    y = "Annotation Method",
    fill = "Assigned Cell Type",
    title = "Cross-Method Comparison of Cell Type Annotations"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    axis.title = element_text(face = "bold"),
    plot.title = element_text(hjust = 0.5, face = "bold", size = 16)
  )

5.1 Enhanced Heatmap with Custom Colors

library(RColorBrewer)

# Generate color palette
num_colors <- length(unique(annotation_summary$label))
my_colors <- colorRampPalette(brewer.pal(8, "Set2"))(num_colors)

p_heatmap <- ggplot(annotation_summary, aes(x = seurat_clusters, y = method, fill = label)) +
  geom_tile(color = "white", linewidth = 0.5) +
  scale_fill_manual(values = my_colors) +
  scale_x_discrete(drop = FALSE) +  # Keep all cluster levels
  labs(
    x = "Seurat Clusters",
    y = "Annotation Method",
    fill = "Assigned Cell Type",
    title = "Cross-Method Comparison of Cell Type Annotations"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    axis.title = element_text(face = "bold"),
    plot.title = element_text(hjust = 0.5, face = "bold", size = 16),
    legend.position = "right"
  )

print(p_heatmap)


# Save high-quality versions
ggsave("CellLines_annotation_heatmap_04-02-2026.png", plot = p_heatmap, 
       width = 14, height = 5, dpi = 300, bg = "white")
ggsave("CellLines_annotation_heatmap_04-02-2026.pdf", plot = p_heatmap, 
       width = 14, height = 5)

cat("\n✓ Heatmap saved as PNG and PDF\n")

✓ Heatmap saved as PNG and PDF

6 MANUSCRIPT FIGURE

6.1 MANUSCRIPT FIGURE: Text-Based Heatmap BY CLUSTERS

library(ggplot2)
library(dplyr)
library(tidyr)

# Prepare data by CLUSTERS
cols_needed <- c("seurat_clusters", "predicted.id", "predicted.celltype.l2", 
                 "singler.immune", "scATOMIC_annotation")

heatmap_data_clusters <- All_samples_Merged@meta.data[, cols_needed] %>%
  group_by(seurat_clusters) %>%
  summarise(
   scPred = names(sort(table(predicted.id), decreasing = TRUE))[1],
    Azimuth = names(sort(table(predicted.celltype.l2), decreasing = TRUE))[1],
    SingleR = names(sort(table(singler.immune), decreasing = TRUE))[1],
    scATOMIC = names(sort(table(scATOMIC_annotation), decreasing = TRUE))[1],
    .groups = "drop"
  ) %>%
  pivot_longer(cols = c(scPred, Azimuth, SingleR, scATOMIC),
               names_to = "Method", values_to = "CellType") %>%
  mutate(
    Method = factor(Method, levels = c("scATOMIC", "SingleR", "Azimuth", "scPred")),
    seurat_clusters = factor(seurat_clusters, levels = as.character(0:13)),  # CHANGE RANGE
    CellType_display = ifelse(nchar(CellType) > 20, 
                              paste0(substr(CellType, 1, 17), "..."), 
                              CellType)
  )

# Create manuscript figure BY CLUSTERS
p_manuscript_clusters <- ggplot(heatmap_data_clusters, 
                       aes(x = seurat_clusters, y = Method, label = CellType_display)) +
  geom_tile(color = "grey20", fill = "white", linewidth = 0.8) +
  geom_text(size = 3, hjust = 0.5, vjust = 0.5, fontface = "bold") +
  labs(
    title = "Discordance in Automated Cell Type Annotation - Cell Lines by Clusters",
    x = "Seurat Cluster",
    y = "Annotation Method"
  ) +
  theme_minimal(base_size = 16) +
  theme(
    axis.text.x = element_text(face = "bold", size = 14),
    axis.text.y = element_text(face = "bold", size = 14),
    axis.title = element_text(size = 16, face = "bold"),
    plot.title = element_text(hjust = 0.5, face = "bold", size = 18),
    panel.grid = element_blank(),
    plot.margin = margin(10, 10, 10, 10)
  )

print(p_manuscript_clusters)


# Save high-quality versions
ggsave("FigureX_CellLines_Annotation_BY_CLUSTERS.png", 
       plot = p_manuscript_clusters, width = 30, height = 5, dpi = 600, bg = "white")
ggsave("FigureX_CellLines_Annotation_BY_CLUSTERS.pdf", 
       plot = p_manuscript_clusters, width = 30, height = 5)
ggsave("FigureX_CellLines_Annotation_BY_CLUSTERS.tiff", 
       plot = p_manuscript_clusters, width = 30, height = 5, dpi = 600, bg = "white")

cat("\n✓ Manuscript figure BY CLUSTERS saved (PNG, PDF, TIFF at 600 DPI)\n")

✓ Manuscript figure BY CLUSTERS saved (PNG, PDF, TIFF at 600 DPI)

6.2 MANUSCRIPT FIGURE: Text-Based Heatmap BY CELL LINES

# Prepare data by CELL LINES
cols_needed_cellline <- c("cell_line", "predicted.id", "predicted.celltype.l2", 
                          "singler.immune", "scATOMIC_annotation")

heatmap_data_celllines <- All_samples_Merged@meta.data[, cols_needed_cellline] %>%
  group_by(cell_line) %>%
  summarise(
   scPred = names(sort(table(predicted.id), decreasing = TRUE))[1],
    Azimuth = names(sort(table(predicted.celltype.l2), decreasing = TRUE))[1],
    SingleR = names(sort(table(singler.immune), decreasing = TRUE))[1],
    scATOMIC = names(sort(table(scATOMIC_annotation), decreasing = TRUE))[1],
    .groups = "drop"
  ) %>%
  pivot_longer(cols = c(scPred, Azimuth, SingleR, scATOMIC),
               names_to = "Method", values_to = "CellType") %>%
  mutate(
    Method = factor(Method, levels = c("scATOMIC", "SingleR", "Azimuth", "scPred")),
    CellType_display = ifelse(nchar(CellType) > 20, 
                              paste0(substr(CellType, 1, 17), "..."), 
                              CellType)
  )

# Create manuscript figure BY CELL LINES
p_manuscript_celllines <- ggplot(heatmap_data_celllines, 
                       aes(x = cell_line, y = Method, label = CellType_display)) +
  geom_tile(color = "grey20", fill = "white", linewidth = 0.8) +
  geom_text(size = 3, hjust = 0.5, vjust = 0.5, fontface = "bold") +
  labs(
    title = "Discordance in Automated Cell Type Annotation - By Cell Line",
    x = "Cell Line",
    y = "Annotation Method"
  ) +
  theme_minimal(base_size = 16) +
  theme(
    axis.text.x = element_text(face = "bold", size = 14, angle = 45, hjust = 1),
    axis.text.y = element_text(face = "bold", size = 14),
    axis.title = element_text(size = 16, face = "bold"),
    plot.title = element_text(hjust = 0.5, face = "bold", size = 18),
    panel.grid = element_blank(),
    plot.margin = margin(10, 10, 10, 10)
  )

print(p_manuscript_celllines)


# Save high-quality versions
ggsave("FigureX_CellLines_Annotation_BY_CELLLINE.png", 
       plot = p_manuscript_celllines, width = 14, height = 5, dpi = 600, bg = "white")
ggsave("FigureX_CellLines_Annotation_BY_CELLLINE.pdf", 
       plot = p_manuscript_celllines, width = 14, height = 5)
ggsave("FigureX_CellLines_Annotation_BY_CELLLINE.tiff", 
       plot = p_manuscript_celllines, width = 14, height = 5, dpi = 600, bg = "white")

cat("\n✓ Manuscript figure BY CELL LINE saved (PNG, PDF, TIFF at 600 DPI)\n")

✓ Manuscript figure BY CELL LINE saved (PNG, PDF, TIFF at 600 DPI)

6.3 COMBINED MANUSCRIPT FIGURE: Clusters × Cell Lines (Optional)

# Prepare data by CLUSTER × CELL LINE
cols_needed_combined <- c("seurat_clusters", "cell_line", "predicted.id", "predicted.celltype.l2", 
                          "singler.immune", "scATOMIC_annotation")

heatmap_data_combined <- All_samples_Merged@meta.data[, cols_needed_combined] %>%
  group_by(seurat_clusters, cell_line) %>%
  summarise(
   scPred = names(sort(table(predicted.id), decreasing = TRUE))[1],
    Azimuth = names(sort(table(predicted.celltype.l2), decreasing = TRUE))[1],
    SingleR = names(sort(table(singler.immune), decreasing = TRUE))[1],
    scATOMIC = names(sort(table(scATOMIC_annotation), decreasing = TRUE))[1],
    .groups = "drop"
  ) %>%
  pivot_longer(cols = c(scPred, Azimuth, SingleR, scATOMIC),
               names_to = "Method", values_to = "CellType") %>%
  mutate(
    Method = factor(Method, levels = c("scATOMIC", "SingleR", "Azimuth", "scPred")),
    seurat_clusters = factor(seurat_clusters, levels = as.character(0:13)),
    combined_label = paste0("C", seurat_clusters, "_", cell_line),
    CellType_display = ifelse(nchar(CellType) > 15, 
                              paste0(substr(CellType, 1, 12), "..."), 
                              CellType)
  )

# Create combined figure
p_manuscript_combined <- ggplot(heatmap_data_combined, 
                       aes(x = combined_label, y = Method, label = CellType_display)) +
  geom_tile(color = "grey20", fill = "white", linewidth = 0.5) +
  geom_text(size = 2, hjust = 0.5, vjust = 0.5, fontface = "bold") +
  labs(
    title = "Cell Type Annotation - By Cluster × Cell Line",
    x = "Cluster_CellLine",
    y = "Annotation Method"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    axis.text.x = element_text(face = "bold", size = 8, angle = 90, hjust = 1, vjust = 0.5),
    axis.text.y = element_text(face = "bold", size = 12),
    axis.title = element_text(size = 14, face = "bold"),
    plot.title = element_text(hjust = 0.5, face = "bold", size = 16),
    panel.grid = element_blank(),
    plot.margin = margin(10, 10, 10, 10)
  )

print(p_manuscript_combined)


# Save
ggsave("FigureX_CellLines_Annotation_CLUSTERS_x_CELLLINES.png", 
       plot = p_manuscript_combined, width = 24, height = 6, dpi = 600, bg = "white")
ggsave("FigureX_CellLines_Annotation_CLUSTERS_x_CELLLINES.pdf", 
       plot = p_manuscript_combined, width = 24, height = 6)

cat("\n✓ Combined manuscript figure (Cluster × Cell Line) saved\n")

✓ Combined manuscript figure (Cluster × Cell Line) saved

7 sessionInfo()


sessionInfo()
R version 4.5.2 (2025-10-31)
Platform: x86_64-pc-linux-gnu
Running under: Ubuntu 24.04.3 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.12.0 
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.12.0  LAPACK version 3.12.0

locale:
 [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C               LC_TIME=fr_FR.UTF-8        LC_COLLATE=en_GB.UTF-8    
 [5] LC_MONETARY=fr_FR.UTF-8    LC_MESSAGES=en_GB.UTF-8    LC_PAPER=fr_FR.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=C       

time zone: Europe/Paris
tzcode source: system (glibc)

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] tidyr_1.3.2                 RColorBrewer_1.1-3          purrr_1.2.1                 kableExtra_1.4.0           
 [5] knitr_1.51                  Azimuth_0.5.0               shinyBS_0.63.0              pbmcsca.SeuratData_3.0.0   
 [9] pbmcref.SeuratData_1.0.0    SeuratData_0.2.2.9002       SeuratDisk_0.0.0.9021       presto_1.0.0               
[13] data.table_1.18.2.1         Rcpp_1.1.1                  remotes_2.5.0               SingleR_2.12.0             
[17] celldex_1.20.0              SummarizedExperiment_1.40.0 Biobase_2.70.0              GenomicRanges_1.62.1       
[21] Seqinfo_1.0.0               IRanges_2.44.0              S4Vectors_0.48.0            BiocGenerics_0.56.0        
[25] generics_0.1.4              MatrixGenerics_1.22.0       matrixStats_1.5.0           scPred_1.9.2               
[29] pheatmap_1.0.13             ggplot2_4.0.1               patchwork_1.3.2             dplyr_1.1.4                
[33] Seurat_5.4.0                SeuratObject_5.3.0          sp_2.2-0                   

loaded via a namespace (and not attached):
  [1] dichromat_2.0-0.1                 nnet_7.3-20                       goftest_1.2-3                    
  [4] DT_0.34.0                         Biostrings_2.78.0                 HDF5Array_1.38.0                 
  [7] vctrs_0.7.1                       spatstat.random_3.4-4             digest_0.6.39                    
 [10] png_0.1-8                         gypsum_1.6.0                      ggrepel_0.9.6                    
 [13] deldir_2.0-4                      parallelly_1.46.1                 MASS_7.3-65                      
 [16] Signac_1.16.0                     reshape2_1.4.5                    httpuv_1.6.16                    
 [19] foreach_1.5.2                     withr_3.0.2                       xfun_0.56                        
 [22] survival_3.8-3                    EnsDb.Hsapiens.v86_2.99.0         memoise_2.0.1                    
 [25] ggbeeswarm_0.7.3                  systemfonts_1.3.1                 ragg_1.5.0                       
 [28] zoo_1.8-15                        gtools_3.9.5                      pbapply_1.7-4                    
 [31] KEGGREST_1.50.0                   promises_1.5.0                    otel_0.2.0                       
 [34] httr_1.4.7                        restfulr_0.0.16                   globals_0.18.0                   
 [37] fitdistrplus_1.2-6                rhdf5filters_1.22.0               rhdf5_2.54.1                     
 [40] rstudioapi_0.18.0                 UCSC.utils_1.6.1                  miniUI_0.1.2                     
 [43] curl_7.0.0                        h5mread_1.2.1                     polyclip_1.10-7                  
 [46] ExperimentHub_3.0.0               SparseArray_1.10.8                xtable_1.8-4                     
 [49] stringr_1.6.0                     evaluate_1.0.5                    S4Arrays_1.10.1                  
 [52] BiocFileCache_3.0.0               irlba_2.3.5.1                     filelock_1.0.3                   
 [55] hdf5r_1.3.12                      ROCR_1.0-12                       harmony_1.2.4                    
 [58] reticulate_1.44.1                 spatstat.data_3.1-9               magrittr_2.0.4                   
 [61] lmtest_0.9-40                     later_1.4.5                       lattice_0.22-7                   
 [64] spatstat.geom_3.7-0               future.apply_1.20.1               scattermore_1.2                  
 [67] XML_3.99-0.20                     cowplot_1.2.0                     RcppAnnoy_0.0.23                 
 [70] class_7.3-23                      pillar_1.11.1                     nlme_3.1-168                     
 [73] iterators_1.0.14                  pwalign_1.6.0                     caTools_1.18.3                   
 [76] compiler_4.5.2                    beachmat_2.26.0                   RSpectra_0.16-2                  
 [79] stringi_1.8.7                     gower_1.0.2                       tensor_1.5.1                     
 [82] lubridate_1.9.4                   GenomicAlignments_1.46.0          plyr_1.8.9                       
 [85] crayon_1.5.3                      abind_1.4-8                       BiocIO_1.20.0                    
 [88] googledrive_2.1.2                 bit_4.6.0                         fastmatch_1.1-8                  
 [91] textshaping_1.0.4                 codetools_0.2-20                  recipes_1.3.1                    
 [94] bslib_0.10.0                      alabaster.ranges_1.10.0           plotly_4.12.0                    
 [97] mime_0.13                         splines_4.5.2                     fastDummies_1.7.5                
[100] dbplyr_2.5.1                      sparseMatrixStats_1.22.0          cellranger_1.1.0                 
[103] utf8_1.2.6                        blob_1.3.0                        BiocVersion_3.22.0               
[106] seqLogo_1.76.0                    AnnotationFilter_1.34.0           fs_1.6.6                         
[109] listenv_0.10.0                    DelayedMatrixStats_1.32.0         tibble_3.3.1                     
[112] Matrix_1.7-4                      svglite_2.2.2                     pkgconfig_2.0.3                  
[115] tools_4.5.2                       cachem_1.1.0                      cigarillo_1.0.0                  
[118] RSQLite_2.4.5                     viridisLite_0.4.2                 DBI_1.2.3                        
[121] fastmap_1.2.0                     rmarkdown_2.30                    scales_1.4.0                     
[124] grid_4.5.2                        ica_1.0-3                         shinydashboard_0.7.3             
[127] Rsamtools_2.26.0                  sass_0.4.10                       AnnotationHub_4.0.0              
[130] BiocManager_1.30.27               dotCall64_1.2                     RANN_2.6.2                       
[133] alabaster.schemas_1.10.0          rpart_4.1.24                      farver_2.1.2                     
[136] yaml_2.3.12                       rtracklayer_1.70.1                cli_3.6.5                        
[139] lifecycle_1.0.5                   caret_7.0-1                       rsconnect_1.7.0                  
[142] uwot_0.2.4                        lava_1.8.2                        BSgenome.Hsapiens.UCSC.hg38_1.4.5
[145] BiocParallel_1.44.0               timechange_0.3.0                  gtable_0.3.6                     
[148] rjson_0.2.23                      ggridges_0.5.7                    progressr_0.18.0                 
[151] parallel_4.5.2                    pROC_1.19.0.1                     jsonlite_2.0.0                   
[154] RcppHNSW_0.6.0                    TFBSTools_1.48.0                  bitops_1.0-9                     
[157] bit64_4.6.0-1                     Rtsne_0.17                        alabaster.matrix_1.10.0          
[160] spatstat.utils_3.2-1              BiocNeighbors_2.4.0               jquerylib_0.1.4                  
[163] alabaster.se_1.10.0               shinyjs_2.1.1                     spatstat.univar_3.1-6            
[166] timeDate_4052.112                 lazyeval_0.2.2                    alabaster.base_1.10.0            
[169] shiny_1.12.1                      htmltools_0.5.9                   sctransform_0.4.3                
[172] rappdirs_0.3.4                    ensembldb_2.34.0                  glue_1.8.0                       
[175] TFMPvalue_1.0.0                   spam_2.11-3                       googlesheets4_1.1.2              
[178] httr2_1.2.2                       XVector_0.50.0                    RCurl_1.98-1.17                  
[181] BSgenome_1.78.0                   gridExtra_2.3                     JASPAR2020_0.99.10               
[184] igraph_2.2.1                      R6_2.6.1                          RcppRoll_0.3.1                   
[187] GenomicFeatures_1.62.0            cluster_2.1.8.1                   gargle_1.6.0                     
[190] Rhdf5lib_1.32.0                   GenomeInfoDb_1.46.2               ipred_0.9-15                     
[193] DirichletMultinomial_1.52.0       DelayedArray_0.36.0               tidyselect_1.2.1                 
[196] vipor_0.4.7                       ProtGenerics_1.42.0               xml2_1.5.2                       
[199] AnnotationDbi_1.72.0              future_1.69.0                     ModelMetrics_1.2.2.2             
[202] KernSmooth_2.23-26                S7_0.2.1                          htmlwidgets_1.6.4                
[205] rlang_1.1.7                       spatstat.sparse_3.1-0             spatstat.explore_3.7-0           
[208] hardhat_1.4.2                     beeswarm_0.4.0                    prodlim_2025.04.28               
---
title: "Cross-Method Annotation Comparison - Cell lines Sézary Syndrome Dataset"
author: Nasir Mahmood Abbasi
date: "`r Sys.Date()`"
output:
  html_notebook:
    number_sections: true
    toc: true
    toc_float:
      collapsed: true
    theme: journal
---



# load libraries
```{r setup, include=FALSE}
knitr::opts_chunk$set(
  echo = TRUE,
  warning = FALSE,
  message = FALSE,
  fig.width = 14,
  fig.height = 8,
  dpi = 300
)
# load libraries
    library(Seurat)
    library(dplyr)
    library(patchwork)
    library(ggplot2)
    library(pheatmap)
    library(scPred)
    library(celldex)
    library(SingleR)
    library(remotes)
    library(presto)
    library(SeuratDisk)
    library(SeuratData)
    library(Azimuth)

```




# Load RDS with all annotations
```{r}
# Load main object with all annotations
All_samples_Merged <- readRDS("/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/2-Benchmarking-Annotation_methods_05-11-2025/All_samples_Merged_Benchmarked_05-11-2025.rds")


Idents(All_samples_Merged) <- "seurat_clusters"

# Ensure clusters are ordered 0-13
All_samples_Merged$seurat_clusters <- factor(All_samples_Merged$seurat_clusters, 
                                              levels = as.character(0:13))




cat("Total cells:", ncol(All_samples_Merged), "\n")
cat("Total clusters:", length(unique(All_samples_Merged$seurat_clusters)), "\n\n")
cat("Clusters (ordered 0-18):\n")
print(table(All_samples_Merged$seurat_clusters))

cat("\n\nSamples (ordered):\n")
print(table(All_samples_Merged$cell_line))

```
# Create Annotation Summary Table
## Create Annotation Summary Table by Clusters
```{r }
library(dplyr)
library(knitr)
library(kableExtra)

# Extract dominant label per cluster per method
cols_needed <- c("seurat_clusters", "predicted.id", "predicted.celltype.l2", 
                 "singler.immune", "scATOMIC_annotation")

annotation_table_cluster <- All_samples_Merged@meta.data[, cols_needed] %>%
  group_by(seurat_clusters) %>%
  summarise(
    scPred = names(sort(table(predicted.id), decreasing = TRUE))[1],
    Azimuth = names(sort(table(predicted.celltype.l2), decreasing = TRUE))[1],
    SingleR = names(sort(table(singler.immune), decreasing = TRUE))[1],
    scATOMIC = names(sort(table(scATOMIC_annotation), decreasing = TRUE))[1],
    n_cells = n(),
    .groups = "drop"
  ) %>%
  arrange(seurat_clusters)

# Add consensus column
annotation_table_cluster <- annotation_table_cluster %>%
  mutate(
    Consensus = (scPred == Azimuth & Azimuth == SingleR & SingleR == scATOMIC)
  )

# Display
cat("\n=== ANNOTATION BY CLUSTER ===\n\n")
kable(annotation_table_cluster, caption = "Dominant Cell Type Annotation per Cluster") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), 
                full_width = FALSE, font_size = 10) %>%
  column_spec(7, background = ifelse(annotation_table_cluster$Consensus, "#d4edda", "#f8d7da"))

# Save
write.csv(annotation_table_cluster, "CellLines_annotation_BY_CLUSTER_04-02-2026.csv", 
          row.names = FALSE)

cat("\n✓ Annotation table by cluster saved\n")

```


## Create Annotation Summary Table - BY CELL LINE
```{r }

library(dplyr)
library(knitr)
library(kableExtra)

# Extract dominant label per CELL LINE per method
cols_needed <- c("cell_line", "predicted.id", "predicted.celltype.l2", 
                 "singler.immune", "scATOMIC_annotation")

annotation_table_cellline <- All_samples_Merged@meta.data[, cols_needed] %>%
  group_by(cell_line) %>%
  summarise(
   scPred = names(sort(table(predicted.id), decreasing = TRUE))[1],
    Azimuth = names(sort(table(predicted.celltype.l2), decreasing = TRUE))[1],
    SingleR = names(sort(table(singler.immune), decreasing = TRUE))[1],
    scATOMIC = names(sort(table(scATOMIC_annotation), decreasing = TRUE))[1],
    n_cells = n(),
    .groups = "drop"
  ) %>%
  arrange(cell_line)

# Add consensus column
annotation_table_cellline <- annotation_table_cellline %>%
  mutate(
    Consensus = (scPred == Azimuth & Azimuth == SingleR & SingleR == scATOMIC)
  )

# Display
cat("\n=== ANNOTATION BY CELL LINE ===\n\n")
kable(annotation_table_cellline, caption = "Dominant Cell Type Annotation per Cell Line") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), 
                full_width = FALSE, font_size = 10) %>%
  column_spec(7, background = ifelse(annotation_table_cellline$Consensus, "#d4edda", "#f8d7da"))

# Save
write.csv(annotation_table_cellline, "CellLines_annotation_BY_CELLLINE_04-02-2026.csv", 
          row.names = FALSE)

cat("\n✓ Annotation table by cell line saved\n")

```

## Create Annotation Summary Table - Combined Table (Cluster × Cell Line)
```{r }
library(dplyr)
library(knitr)
library(kableExtra)

# Extract dominant label per CLUSTER × CELL LINE × METHOD
cols_needed <- c("seurat_clusters", "cell_line", "predicted.id", "predicted.celltype.l2", 
                 "singler.immune", "scATOMIC_annotation")

annotation_table_combined <- All_samples_Merged@meta.data[, cols_needed] %>%
  group_by(seurat_clusters, cell_line) %>%
  summarise(
   scPred = names(sort(table(predicted.id), decreasing = TRUE))[1],
    Azimuth = names(sort(table(predicted.celltype.l2), decreasing = TRUE))[1],
    SingleR = names(sort(table(singler.immune), decreasing = TRUE))[1],
    scATOMIC = names(sort(table(scATOMIC_annotation), decreasing = TRUE))[1],
    n_cells = n(),
    .groups = "drop"
  ) %>%
  arrange(cell_line, seurat_clusters)

# Add consensus column
annotation_table_combined <- annotation_table_combined %>%
  mutate(
    Consensus = (scPred == Azimuth & Azimuth == SingleR & SingleR == scATOMIC)
  )

# Display (first 20 rows)
cat("\n=== ANNOTATION BY CLUSTER × CELL LINE (First 20 rows) ===\n\n")
kable(head(annotation_table_combined, 20), 
      caption = "Dominant Annotation per Cluster × Cell Line") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), 
                full_width = FALSE, font_size = 9) %>%
  column_spec(8, background = ifelse(head(annotation_table_combined$Consensus, 20), 
                                      "#d4edda", "#f8d7da"))

# Save
write.csv(annotation_table_combined, 
          "CellLines_annotation_BY_CLUSTER_AND_CELLLINE_04-02-2026.csv", 
          row.names = FALSE)

cat("\n✓ Combined annotation table (cluster × cell line) saved\n")
cat("Total rows:", nrow(annotation_table_combined), "\n")

```
# Create Annotation Summary
```{r }
library(purrr)  # Make sure this is loaded

# Define annotation methods
methods <- c(
  "predicted.id",             # scPred
  "predicted.celltype.l2",    # Azimuth (l2 prediction)
  "singler.immune",           # SingleR
  "scATOMIC_annotation"       # scATOMIC
)

# Create summary - most common label per cluster for each method
annotation_summary <- map_dfr(methods, function(m) {
  df <- All_samples_Merged@meta.data
  df %>%
    filter(!is.na(.data[[m]])) %>%
    group_by(seurat_clusters, label = .data[[m]]) %>%
    summarise(n = n(), .groups = "drop") %>%
    group_by(seurat_clusters) %>%
    slice_max(n, n = 1, with_ties = FALSE) %>%
    mutate(method = m)
})

# Rename methods for display
annotation_summary <- annotation_summary %>%
  mutate(method = recode(method,
                         "predicted.id" = "scPred",
                         "predicted.celltype.l2" = "Azimuth.l2",
                         "singler.immune" = "SingleR(Immune)",
                         "scATOMIC_annotation" = "scATOMIC"))

# Set method order
annotation_summary$method <- factor(annotation_summary$method, 
                                    levels = c("scPred", "Azimuth.l2", 
                                              "SingleR(Immune)", "scATOMIC"))

# Ensure cluster order
annotation_summary$seurat_clusters <- factor(annotation_summary$seurat_clusters, 
                                             levels = as.character(0:13))  # Adjust range


```

## Summary Statistics
```{r }
# Cell counts per method
cat("\n=== Cells Annotated Per Method ===\n\n")
method_cols <- c("predicted.id", "predicted.celltype.l2", "singler.immune", "scATOMIC_annotation")
for(m in method_cols){
  n_annotated <- sum(!is.na(All_samples_Merged@meta.data[[m]]))
  cat(sprintf("%-25s: %5d cells (%.1f%%)\n", 
              m, n_annotated, n_annotated/ncol(All_samples_Merged)*100))
}

# Number of unique labels per method
cat("\n=== Unique Labels Per Method ===\n\n")
annotation_summary %>%
  group_by(method) %>%
  summarise(n_unique_labels = n_distinct(label)) %>%
  print()

```
# Basic Heatmap Visualization
```{r, fig.height= 5, fig.width= 18}
library(ggplot2)

ggplot(annotation_summary, aes(x = seurat_clusters, y = method, fill = label)) +
  geom_tile(color = "white", linewidth = 0.5) +
  scale_fill_discrete() +
  labs(
    x = "Seurat Clusters",
    y = "Annotation Method",
    fill = "Assigned Cell Type",
    title = "Cross-Method Comparison of Cell Type Annotations"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    axis.title = element_text(face = "bold"),
    plot.title = element_text(hjust = 0.5, face = "bold", size = 16)
  )
```

## Enhanced Heatmap with Custom Colors
```{r, fig.height= 5, fig.width= 18 }
library(RColorBrewer)

# Generate color palette
num_colors <- length(unique(annotation_summary$label))
my_colors <- colorRampPalette(brewer.pal(8, "Set2"))(num_colors)

p_heatmap <- ggplot(annotation_summary, aes(x = seurat_clusters, y = method, fill = label)) +
  geom_tile(color = "white", linewidth = 0.5) +
  scale_fill_manual(values = my_colors) +
  scale_x_discrete(drop = FALSE) +  # Keep all cluster levels
  labs(
    x = "Seurat Clusters",
    y = "Annotation Method",
    fill = "Assigned Cell Type",
    title = "Cross-Method Comparison of Cell Type Annotations"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    axis.title = element_text(face = "bold"),
    plot.title = element_text(hjust = 0.5, face = "bold", size = 16),
    legend.position = "right"
  )

print(p_heatmap)

# Save high-quality versions
ggsave("CellLines_annotation_heatmap_04-02-2026.png", plot = p_heatmap, 
       width = 14, height = 5, dpi = 300, bg = "white")
ggsave("CellLines_annotation_heatmap_04-02-2026.pdf", plot = p_heatmap, 
       width = 14, height = 5)

cat("\n✓ Heatmap saved as PNG and PDF\n")


```
#  MANUSCRIPT FIGURE
##  MANUSCRIPT FIGURE: Text-Based Heatmap BY CLUSTERS
```{r, fig.height= 5, fig.width= 30 }
library(ggplot2)
library(dplyr)
library(tidyr)

# Prepare data by CLUSTERS
cols_needed <- c("seurat_clusters", "predicted.id", "predicted.celltype.l2", 
                 "singler.immune", "scATOMIC_annotation")

heatmap_data_clusters <- All_samples_Merged@meta.data[, cols_needed] %>%
  group_by(seurat_clusters) %>%
  summarise(
   scPred = names(sort(table(predicted.id), decreasing = TRUE))[1],
    Azimuth = names(sort(table(predicted.celltype.l2), decreasing = TRUE))[1],
    SingleR = names(sort(table(singler.immune), decreasing = TRUE))[1],
    scATOMIC = names(sort(table(scATOMIC_annotation), decreasing = TRUE))[1],
    .groups = "drop"
  ) %>%
  pivot_longer(cols = c(scPred, Azimuth, SingleR, scATOMIC),
               names_to = "Method", values_to = "CellType") %>%
  mutate(
    Method = factor(Method, levels = c("scATOMIC", "SingleR", "Azimuth", "scPred")),
    seurat_clusters = factor(seurat_clusters, levels = as.character(0:13)),  # CHANGE RANGE
    CellType_display = ifelse(nchar(CellType) > 20, 
                              paste0(substr(CellType, 1, 17), "..."), 
                              CellType)
  )

# Create manuscript figure BY CLUSTERS
p_manuscript_clusters <- ggplot(heatmap_data_clusters, 
                       aes(x = seurat_clusters, y = Method, label = CellType_display)) +
  geom_tile(color = "grey20", fill = "white", linewidth = 0.8) +
  geom_text(size = 3, hjust = 0.5, vjust = 0.5, fontface = "bold") +
  labs(
    title = "Discordance in Automated Cell Type Annotation - Cell Lines by Clusters",
    x = "Seurat Cluster",
    y = "Annotation Method"
  ) +
  theme_minimal(base_size = 16) +
  theme(
    axis.text.x = element_text(face = "bold", size = 14),
    axis.text.y = element_text(face = "bold", size = 14),
    axis.title = element_text(size = 16, face = "bold"),
    plot.title = element_text(hjust = 0.5, face = "bold", size = 18),
    panel.grid = element_blank(),
    plot.margin = margin(10, 10, 10, 10)
  )

print(p_manuscript_clusters)

# Save high-quality versions
ggsave("FigureX_CellLines_Annotation_BY_CLUSTERS.png", 
       plot = p_manuscript_clusters, width = 30, height = 5, dpi = 600, bg = "white")
ggsave("FigureX_CellLines_Annotation_BY_CLUSTERS.pdf", 
       plot = p_manuscript_clusters, width = 30, height = 5)
ggsave("FigureX_CellLines_Annotation_BY_CLUSTERS.tiff", 
       plot = p_manuscript_clusters, width = 30, height = 5, dpi = 600, bg = "white")

cat("\n✓ Manuscript figure BY CLUSTERS saved (PNG, PDF, TIFF at 600 DPI)\n")

```

##  MANUSCRIPT FIGURE: Text-Based Heatmap BY CELL LINES
```{r, fig.height= 5, fig.width= 18 }
# Prepare data by CELL LINES
cols_needed_cellline <- c("cell_line", "predicted.id", "predicted.celltype.l2", 
                          "singler.immune", "scATOMIC_annotation")

heatmap_data_celllines <- All_samples_Merged@meta.data[, cols_needed_cellline] %>%
  group_by(cell_line) %>%
  summarise(
   scPred = names(sort(table(predicted.id), decreasing = TRUE))[1],
    Azimuth = names(sort(table(predicted.celltype.l2), decreasing = TRUE))[1],
    SingleR = names(sort(table(singler.immune), decreasing = TRUE))[1],
    scATOMIC = names(sort(table(scATOMIC_annotation), decreasing = TRUE))[1],
    .groups = "drop"
  ) %>%
  pivot_longer(cols = c(scPred, Azimuth, SingleR, scATOMIC),
               names_to = "Method", values_to = "CellType") %>%
  mutate(
    Method = factor(Method, levels = c("scATOMIC", "SingleR", "Azimuth", "scPred")),
    CellType_display = ifelse(nchar(CellType) > 20, 
                              paste0(substr(CellType, 1, 17), "..."), 
                              CellType)
  )

# Create manuscript figure BY CELL LINES
p_manuscript_celllines <- ggplot(heatmap_data_celllines, 
                       aes(x = cell_line, y = Method, label = CellType_display)) +
  geom_tile(color = "grey20", fill = "white", linewidth = 0.8) +
  geom_text(size = 3, hjust = 0.5, vjust = 0.5, fontface = "bold") +
  labs(
    title = "Discordance in Automated Cell Type Annotation - By Cell Line",
    x = "Cell Line",
    y = "Annotation Method"
  ) +
  theme_minimal(base_size = 16) +
  theme(
    axis.text.x = element_text(face = "bold", size = 14, angle = 45, hjust = 1),
    axis.text.y = element_text(face = "bold", size = 14),
    axis.title = element_text(size = 16, face = "bold"),
    plot.title = element_text(hjust = 0.5, face = "bold", size = 18),
    panel.grid = element_blank(),
    plot.margin = margin(10, 10, 10, 10)
  )

print(p_manuscript_celllines)

# Save high-quality versions
ggsave("FigureX_CellLines_Annotation_BY_CELLLINE.png", 
       plot = p_manuscript_celllines, width = 14, height = 5, dpi = 600, bg = "white")
ggsave("FigureX_CellLines_Annotation_BY_CELLLINE.pdf", 
       plot = p_manuscript_celllines, width = 14, height = 5)
ggsave("FigureX_CellLines_Annotation_BY_CELLLINE.tiff", 
       plot = p_manuscript_celllines, width = 14, height = 5, dpi = 600, bg = "white")

cat("\n✓ Manuscript figure BY CELL LINE saved (PNG, PDF, TIFF at 600 DPI)\n")

```
##  COMBINED MANUSCRIPT FIGURE: Clusters × Cell Lines (Optional)
```{r, fig.height= 5, fig.width= 48 }
# Prepare data by CLUSTER × CELL LINE
cols_needed_combined <- c("seurat_clusters", "cell_line", "predicted.id", "predicted.celltype.l2", 
                          "singler.immune", "scATOMIC_annotation")

heatmap_data_combined <- All_samples_Merged@meta.data[, cols_needed_combined] %>%
  group_by(seurat_clusters, cell_line) %>%
  summarise(
   scPred = names(sort(table(predicted.id), decreasing = TRUE))[1],
    Azimuth = names(sort(table(predicted.celltype.l2), decreasing = TRUE))[1],
    SingleR = names(sort(table(singler.immune), decreasing = TRUE))[1],
    scATOMIC = names(sort(table(scATOMIC_annotation), decreasing = TRUE))[1],
    .groups = "drop"
  ) %>%
  pivot_longer(cols = c(scPred, Azimuth, SingleR, scATOMIC),
               names_to = "Method", values_to = "CellType") %>%
  mutate(
    Method = factor(Method, levels = c("scATOMIC", "SingleR", "Azimuth", "scPred")),
    seurat_clusters = factor(seurat_clusters, levels = as.character(0:13)),
    combined_label = paste0("C", seurat_clusters, "_", cell_line),
    CellType_display = ifelse(nchar(CellType) > 15, 
                              paste0(substr(CellType, 1, 12), "..."), 
                              CellType)
  )

# Create combined figure
p_manuscript_combined <- ggplot(heatmap_data_combined, 
                       aes(x = combined_label, y = Method, label = CellType_display)) +
  geom_tile(color = "grey20", fill = "white", linewidth = 0.5) +
  geom_text(size = 2, hjust = 0.5, vjust = 0.5, fontface = "bold") +
  labs(
    title = "Cell Type Annotation - By Cluster × Cell Line",
    x = "Cluster_CellLine",
    y = "Annotation Method"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    axis.text.x = element_text(face = "bold", size = 8, angle = 90, hjust = 1, vjust = 0.5),
    axis.text.y = element_text(face = "bold", size = 12),
    axis.title = element_text(size = 14, face = "bold"),
    plot.title = element_text(hjust = 0.5, face = "bold", size = 16),
    panel.grid = element_blank(),
    plot.margin = margin(10, 10, 10, 10)
  )

print(p_manuscript_combined)

# Save
ggsave("FigureX_CellLines_Annotation_CLUSTERS_x_CELLLINES.png", 
       plot = p_manuscript_combined, width = 24, height = 6, dpi = 600, bg = "white")
ggsave("FigureX_CellLines_Annotation_CLUSTERS_x_CELLLINES.pdf", 
       plot = p_manuscript_combined, width = 24, height = 6)

cat("\n✓ Combined manuscript figure (Cluster × Cell Line) saved\n")

```







# sessionInfo()
```{r}

sessionInfo()


```


