1. load libraries

2. Load Seurat Object


All_samples_Merged <- readRDS("../../PHD_3rd_YEAR_Analysis/0-Seurat_RDS_OBJECT_FINAL/All_samples_Merged_with_STCAT_Annotation_final-5-09-2025.rds")

DefaultAssay(All_samples_Merged) <- "RNA"

All_samples_Merged <- NormalizeData(
    All_samples_Merged,
    normalization.method = "LogNormalize",
    scale.factor = 1e4,
    verbose = TRUE
  )
Performing log-normalization
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|

3.find Top markers

Idents(All_samples_Merged) <- "seurat_clusters"

# ---------------------------------------------------------
# 2️⃣ Find marker genes per cluster
SS_markers <- FindAllMarkers(
  All_samples_Merged,
  only.pos = TRUE,
  min.pct = 0.25,
  logfc.threshold = 0.25,
  min.pct.diff = 0.2
  
)

library(dplyr)

# Precise blacklist for uninformative genes
blacklist_patterns <- c(
  "^TRAV", "^TRBV", "^TRGV", "^TRDV", "^TRBC", "^TRAC", "^TRDC", "^TRGC", # TCR
  "^IGH", "^IGK", "^IGL", "^IGJ",                                         # Ig genes
  "^RPL", "^RPS",                                                         # ribosomal
  "^MT-",                                                                 # mitochondria
  "^HBA", "^HBB", "^HB[ABZ]",                                             # hemoglobins
  "^NEAT1$", "^MALAT1$",                                                  # optional lncRNAs
  "^XIST$"                              )

blacklist_regex <- paste(blacklist_patterns, collapse = "|")

# Preview which markers will be removed
to_remove <- SS_markers %>%
  filter(grepl(blacklist_regex, gene, ignore.case = TRUE))
message("Rows to remove: ", nrow(to_remove))
head(to_remove$gene)
[1] "TRAV17"  "TRAV9-2" "RPL22L1" "RPL7"    "RPL35A"  "NEAT1"  
# Filter markers (keep important metabolic/proliferation genes)
SS_markers_filtered <- SS_markers %>%
  filter(!grepl(blacklist_regex, gene, ignore.case = TRUE))

4. Top5 Markers


library(dplyr)

# ---------------------------------------------------------
# Save filtered markers
write.csv(SS_markers_filtered, file = "SS_markers_filtered.csv", row.names = FALSE)

# ---------------------------------------------------------
# Extract top 25 markers per cluster
top25_markers <- SS_markers_filtered %>%
  filter(p_val_adj < 0.05) %>%  # ensure statistical significance
  group_by(cluster) %>%
  slice_max(order_by = avg_log2FC, n = 25) %>%
  ungroup()

write.csv(top25_markers, file = "SS_markers_top25.csv", row.names = FALSE)

# ---------------------------------------------------------
# Extract top 5 markers per cluster
top5_markers <- SS_markers_filtered %>%
  filter(p_val_adj < 0.05) %>%  # ensure statistical significance
  group_by(cluster) %>%
  slice_max(order_by = avg_log2FC, n = 5) %>%
  ungroup()

write.csv(top5_markers, file = "SS_markers_top5.csv", row.names = FALSE)

message("Filtered markers, top25, and top5 markers saved successfully.")

4. Top5 Markers


library(dplyr)

# ---------------------------------------------------------
# Save filtered markers
write.csv(SS_markers_filtered, file = "SS_markers_filtered.csv", row.names = FALSE)

# ---------------------------------------------------------
# Extract top 25 markers per cluster
top25_markers <- SS_markers_filtered %>%
  filter(p_val_adj < 0.05) %>%  # ensure statistical significance
  group_by(cluster) %>%
  slice_max(order_by = avg_log2FC, n = 25) %>%
  ungroup()

write.csv(top25_markers, file = "SS_markers_top25.csv", row.names = FALSE)

# ---------------------------------------------------------
# Extract top 5 markers per cluster
top5_markers <- SS_markers_filtered %>%
  filter(p_val_adj < 0.05) %>%  # ensure statistical significance
  group_by(cluster) %>%
  slice_max(order_by = avg_log2FC, n = 5) %>%
  ungroup()

write.csv(top5_markers, file = "SS_markers_top5.csv", row.names = FALSE)

message("Filtered markers, top25, and top5 markers saved successfully.")

5.Rename Clusters

# 4️⃣ Verify order
levels(All_samples_Merged$renamed_clusters)
 [1] "0: Malignant_CD4T (Transcriptionally_Dysregulated_with_APC)"
 [2] "1: Malignant_CD4T (cytotoxic/NK-like)"                      
 [3] "2: Malignant_CD4T (Th2-skewed)"                             
 [4] "3: Healthy_CD4T (Mixed_Population(Tn/TCM))"                 
 [5] "4: Malignant_CD4T (Migratory/Inflammatory)"                 
 [6] "5: Malignant_CD4T (Stem-like/Th2-associated)"               
 [7] "6: Malignant_CD4T (Sézary_like CCL17⁺ IL-13⁺)"              
 [8] "7: Malignant_CD4T (Metabolic_Stress_Active MXD3+ KLHL4+)"   
 [9] "8: Malignant_CD4T (Prolif_Metabolically_active_LHX9+)"      
[10] "9: Malignant_CD4T (Th2/Cytotoxic)"                          
[11] "10: Healthy_CD4T (TCM)"                                     
[12] "11: Malignant_CD4T (stressed/pro-inflammatory)"             
[13] "12: Malignant_CD4T (Cytotoxic/Inflammatory)"                
[14] "13: Malignant_CD4T (Type-I_Interferon_Hyperactivated)"      
table(All_samples_Merged$renamed_clusters)

0: Malignant_CD4T (Transcriptionally_Dysregulated_with_APC) 
                                                       6789 
                      1: Malignant_CD4T (cytotoxic/NK-like) 
                                                       5275 
                             2: Malignant_CD4T (Th2-skewed) 
                                                       4663 
                 3: Healthy_CD4T (Mixed_Population(Tn/TCM)) 
                                                       4661 
                 4: Malignant_CD4T (Migratory/Inflammatory) 
                                                       4086 
               5: Malignant_CD4T (Stem-like/Th2-associated) 
                                                       3634 
              6: Malignant_CD4T (Sézary_like CCL17⁺ IL-13⁺) 
                                                       3536 
   7: Malignant_CD4T (Metabolic_Stress_Active MXD3+ KLHL4+) 
                                                       3409 
      8: Malignant_CD4T (Prolif_Metabolically_active_LHX9+) 
                                                       3338 
                          9: Malignant_CD4T (Th2/Cytotoxic) 
                                                       3273 
                                     10: Healthy_CD4T (TCM) 
                                                       3212 
             11: Malignant_CD4T (stressed/pro-inflammatory) 
                                                       1675 
                12: Malignant_CD4T (Cytotoxic/Inflammatory) 
                                                       1063 
      13: Malignant_CD4T (Type-I_Interferon_Hyperactivated) 
                                                        691 
# 5️⃣ Plot
DimPlot(
  All_samples_Merged,
  group.by = "renamed_clusters",
  label = TRUE,
  repel = TRUE,
  reduction = "umap"
) +
  ggtitle("Cluster Annotations (Ordered 0–13)") +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))

Save the Annotated Seurat Object

# ✅ Save the Seurat object with new cluster annotations
saveRDS(
  All_samples_Merged,
  file = "../../PHD_3rd_YEAR_Analysis/0-Seurat_RDS_OBJECT_FINAL/All_samples_Merged_with_Renamed_Clusters_final-26-10-2025.rds"
)

message("✅ Seurat object saved successfully with renamed clusters and metadata column 'renamed_clusters'.")

dimplot

DimPlot(All_samples_Merged, group.by = "renamed_clusters", reduction = "umap", label = T, label.box = T, repel = T)

DimPlot(All_samples_Merged, group.by = "renamed_clusters", reduction = "umap", label = F, label.box = T, repel = T)


library(SCpubr)
SCpubr::do_DimPlot(sample = All_samples_Merged, group.by = "renamed_clusters", reduction = "umap", label = F, label.box = T, repel = T,
                    legend.position = "right")

 SCpubr::do_DimPlot(sample = All_samples_Merged, group.by = "renamed_clusters", reduction = "umap", label = T, label.box = T, repel = T,
                    legend.position = "none")

SCpubr::do_DimPlot(sample = All_samples_Merged, group.by = "renamed_clusters", reduction = "umap", label = F, label.box = T, repel = T,
                    legend.position = "right")


library(scCustomize)
DimPlot_scCustom(seurat_object = All_samples_Merged, group.by = "renamed_clusters", reduction = "umap", label = F, label.box = T, repel = T)

NA
NA

ScCutomize Dotplot

library(scCustomize)

Clustered_DotPlot(seurat_object = All_samples_Merged, features = top5_markers$gene, k=7, , x_lab_rotate = 90)
[[1]]

[[2]]

ScCutomize heatmap

# Scale the data for the selected features
alldata <- ScaleData(
  All_samples_Merged,
  features = as.character(unique(top5_markers$gene)),
  assay = "RNA"
)

  |                                                                                                                                 
  |                                                                                                                           |   0%
  |                                                                                                                                 
  |===========================================================================================================================| 100%
# Create the heatmap object
p <- DoHeatmap(
  alldata,
  features = as.character(unique(top5_markers$gene)),
  group.by = "renamed_clusters",  # use your annotated clusters
  assay = "RNA",
  size = 4
) +
  ggtitle("Top 5 Marker Genes per Cluster (Sézary Syndrome)") +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
  )

# ✅ Save as PNG
ggsave(
  filename = "Top5_Markers_Heatmap.png",
  plot = p,
  width = 20,
  height = 16,
  dpi = 300
)

# ✅ Save as PDF
ggsave(
  filename = "Top5_Markers_Heatmap.pdf",
  plot = p,
  width = 20,
  height = 16,
  dpi = 300
)

message("✅ Heatmap saved successfully as both PNG and PDF.")

ScCutomize Dotplot

ScCutomize Dotplot

library(scCustomize)

Clustered_DotPlot(seurat_object = All_samples_Merged, features = top5_markers$gene, k=7, , x_lab_rotate = 90)
[[1]]

[[2]]

---
title: "Identify Top5 Markers and rename clusters "
author: "Nasir Mahmood Abbasi"
date: "`r Sys.Date()`"
output:
  html_notebook:
    toc: yes
    toc_float: yes
    toc_collapsed: yes
  word_document:
    toc: yes
  html_document:
    toc: yes
    df_print: paged
  pdf_document:
    toc: yes
---


# 1. load libraries
```{r setup, include=FALSE}
# Load below libraries
library(Seurat)
library(ggplot2)
library(plotly)
library(tidyverse)
library(cowplot)


library(SCpubr)
library(dplyr)
```


# 2. Load Seurat Object 
```{r}

All_samples_Merged <- readRDS("../../PHD_3rd_YEAR_Analysis/0-Seurat_RDS_OBJECT_FINAL/All_samples_Merged_with_STCAT_Annotation_final-5-09-2025.rds")

DefaultAssay(All_samples_Merged) <- "RNA"

All_samples_Merged <- NormalizeData(
    All_samples_Merged,
    normalization.method = "LogNormalize",
    scale.factor = 1e4,
    verbose = TRUE
  )


```

# 3.find Top markers
```{r}
Idents(All_samples_Merged) <- "seurat_clusters"

# ---------------------------------------------------------
# 2️⃣ Find marker genes per cluster
SS_markers <- FindAllMarkers(
  All_samples_Merged,
  only.pos = TRUE,
  min.pct = 0.25,
  logfc.threshold = 0.25,
  min.pct.diff = 0.2
  
)

library(dplyr)

# Precise blacklist for uninformative genes
blacklist_patterns <- c(
  "^TRAV", "^TRBV", "^TRGV", "^TRDV", "^TRBC", "^TRAC", "^TRDC", "^TRGC", # TCR
  "^IGH", "^IGK", "^IGL", "^IGJ",                                         # Ig genes
  "^RPL", "^RPS",                                                         # ribosomal
  "^MT-",                                                                 # mitochondria
  "^HBA", "^HBB", "^HB[ABZ]",                                             # hemoglobins
  "^NEAT1$", "^MALAT1$",                                                  # optional lncRNAs
  "^XIST$"                              )

blacklist_regex <- paste(blacklist_patterns, collapse = "|")

# Preview which markers will be removed
to_remove <- SS_markers %>%
  filter(grepl(blacklist_regex, gene, ignore.case = TRUE))
message("Rows to remove: ", nrow(to_remove))
head(to_remove$gene)

# Filter markers (keep important metabolic/proliferation genes)
SS_markers_filtered <- SS_markers %>%
  filter(!grepl(blacklist_regex, gene, ignore.case = TRUE))


```


# 4. Top5 Markers
```{r, fig.width=12, fig.height=6}

library(dplyr)

# ---------------------------------------------------------
# Save filtered markers
write.csv(SS_markers_filtered, file = "SS_markers_filtered.csv", row.names = FALSE)

# ---------------------------------------------------------
# Extract top 25 markers per cluster
top25_markers <- SS_markers_filtered %>%
  filter(p_val_adj < 0.05) %>%  # ensure statistical significance
  group_by(cluster) %>%
  slice_max(order_by = avg_log2FC, n = 25) %>%
  ungroup()

write.csv(top25_markers, file = "SS_markers_top25.csv", row.names = FALSE)

# ---------------------------------------------------------
# Extract top 5 markers per cluster
top5_markers <- SS_markers_filtered %>%
  filter(p_val_adj < 0.05) %>%  # ensure statistical significance
  group_by(cluster) %>%
  slice_max(order_by = avg_log2FC, n = 5) %>%
  ungroup()

write.csv(top5_markers, file = "SS_markers_top5.csv", row.names = FALSE)

message("Filtered markers, top25, and top5 markers saved successfully.")

```

# 4. Top5 Markers
```{r, fig.width=12, fig.height=6}

library(dplyr)

# ---------------------------------------------------------
# Save filtered markers
write.csv(SS_markers_filtered, file = "SS_markers_filtered.csv", row.names = FALSE)

# ---------------------------------------------------------
# Extract top 25 markers per cluster
top25_markers <- SS_markers_filtered %>%
  filter(p_val_adj < 0.05) %>%  # ensure statistical significance
  group_by(cluster) %>%
  slice_max(order_by = avg_log2FC, n = 25) %>%
  ungroup()

write.csv(top25_markers, file = "SS_markers_top25.csv", row.names = FALSE)

# ---------------------------------------------------------
# Extract top 5 markers per cluster
top5_markers <- SS_markers_filtered %>%
  filter(p_val_adj < 0.05) %>%  # ensure statistical significance
  group_by(cluster) %>%
  slice_max(order_by = avg_log2FC, n = 5) %>%
  ungroup()

write.csv(top5_markers, file = "SS_markers_top5.csv", row.names = FALSE)

message("Filtered markers, top25, and top5 markers saved successfully.")

```


# 5.Rename Clusters
```{r, fig.width=12, fig.height=6}

library(dplyr)

# 1️⃣ Define new cluster names with numeric prefix
cluster_names <- c(
  "0"  = "0: Malignant_CD4T (Transcriptionally_Dysregulated_with_APC)",                        
  "1"  = "1: Malignant_CD4T (cytotoxic/NK-like)",                                                   
  "2"  = "2: Malignant_CD4T (Th2-skewed)",                                         
  "3"  = "3: Healthy_CD4T (Mixed_Population(Tn/TCM))",                                                             
  "4"  = "4: Malignant_CD4T (Migratory/Inflammatory)",                                         
  "5"  = "5: Malignant_CD4T (Stem-like/Th2-associated)",                                                     
  "6"  = "6: Malignant_CD4T (Sézary_like CCL17⁺ IL-13⁺)",                                                      
  "7"  = "7: Malignant_CD4T (Metabolic_Stress_Active MXD3+ KLHL4+)",                                               
  "8"  = "8: Malignant_CD4T (Prolif_Metabolically_active_LHX9+)",                                     
  "9"  = "9: Malignant_CD4T (Th2/Cytotoxic)",                                                 
  "10" = "10: Healthy_CD4T (TCM)",                                                          
  "11" = "11: Malignant_CD4T (stressed/pro-inflammatory)",                                            
  "12" = "12: Malignant_CD4T (Cytotoxic/Inflammatory)",                                                      
  "13" = "13: Malignant_CD4T (Type-I_Interferon_Hyperactivated)"                                           
)

# 2️⃣ Apply renaming
All_samples_Merged <- RenameIdents(All_samples_Merged, cluster_names)

# 3️⃣ Add metadata column
All_samples_Merged$renamed_clusters <- factor(
  Idents(All_samples_Merged),
  levels = cluster_names  # ensures order 0→13 in plots and legends
)

# 4️⃣ Verify order
levels(All_samples_Merged$renamed_clusters)

table(All_samples_Merged$renamed_clusters)

# 5️⃣ Plot
DimPlot(
  All_samples_Merged,
  group.by = "renamed_clusters",
  label = TRUE,
  repel = TRUE,
  reduction = "umap"
) +
  ggtitle("Cluster Annotations (Ordered 0–13)") +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))


```

## Save the Annotated Seurat Object
```{r, fig.width=12, fig.height=6}
# # ✅ Save the Seurat object with new cluster annotations
# saveRDS(
#   All_samples_Merged,
#   file = "../../PHD_3rd_YEAR_Analysis/0-Seurat_RDS_OBJECT_FINAL/All_samples_Merged_with_Renamed_Clusters_final-26-10-2025.rds"
# )
# 
# message("✅ Seurat object saved successfully with renamed clusters and metadata column 'renamed_clusters'.")



```
## dimplot
```{r, fig.width=12, fig.height=6}
DimPlot(All_samples_Merged, group.by = "renamed_clusters", reduction = "umap", label = T, label.box = T, repel = T)
DimPlot(All_samples_Merged, group.by = "renamed_clusters", reduction = "umap", label = F, label.box = T, repel = T)

library(SCpubr)
SCpubr::do_DimPlot(sample = All_samples_Merged, group.by = "renamed_clusters", reduction = "umap", label = F, label.box = T, repel = T,
                    legend.position = "right")
 SCpubr::do_DimPlot(sample = All_samples_Merged, group.by = "renamed_clusters", reduction = "umap", label = T, label.box = T, repel = T,
                    legend.position = "none")
SCpubr::do_DimPlot(sample = All_samples_Merged, group.by = "renamed_clusters", reduction = "umap", label = F, label.box = T, repel = T,
                    legend.position = "right")

library(scCustomize)
DimPlot_scCustom(seurat_object = All_samples_Merged, group.by = "renamed_clusters", reduction = "umap", label = F, label.box = T, repel = T)


```



## ScCutomize Dotplot
```{r, fig.width=16, fig.height=16}
library(scCustomize)

Clustered_DotPlot(seurat_object = All_samples_Merged, features = top5_markers$gene, k=7, , x_lab_rotate = 90)

```







## ScCutomize heatmap
```{r, fig.width=16, fig.height=16}
# Scale the data for the selected features
alldata <- ScaleData(
  All_samples_Merged,
  features = as.character(unique(top5_markers$gene)),
  assay = "RNA"
)

# Create the heatmap object
p <- DoHeatmap(
  alldata,
  features = as.character(unique(top5_markers$gene)),
  group.by = "renamed_clusters",  # use your annotated clusters
  assay = "RNA",
  size = 4
) +
  ggtitle("Top 5 Marker Genes per Cluster (Sézary Syndrome)") +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
  )

# ✅ Save as PNG
ggsave(
  filename = "Top5_Markers_Heatmap.png",
  plot = p,
  width = 20,
  height = 16,
  dpi = 300
)

# ✅ Save as PDF
ggsave(
  filename = "Top5_Markers_Heatmap.pdf",
  plot = p,
  width = 20,
  height = 16,
  dpi = 300
)

message("✅ Heatmap saved successfully as both PNG and PDF.")

```








## ScCutomize Dotplot
```{r, fig.width=16, fig.height=16}
library(SCpubr)

library(dplyr)
library(SCpubr)

# Ensure your top5_markers table has a column with renamed clusters
# Here, we assume you already added it as All_samples_Merged$renamed_clusters
top5_markers <- top5_markers %>%
  mutate(renamed_cluster = paste0(cluster, ": ", Idents(All_samples_Merged)[match(cluster, All_samples_Merged$seurat_clusters)]))

# Create a named list: names = cluster annotations, values = top 5 genes
top5_list <- top5_markers %>%
  group_by(renamed_cluster) %>%
  summarise(genes = list(unique(gene))) %>%
  deframe()  # converts tibble to named list

# Set identities to renamed clusters
Idents(All_samples_Merged) <- All_samples_Merged$renamed_clusters

# DotPlot using the named list
p <- SCpubr::do_DotPlot(
  sample = All_samples_Merged,
  features = top5_list,
  group.by = "renamed_clusters",
  dot.scale = 4,
  font.size = 10
)

p

ggsave("Top5_Markers_DotPlot_grouped.png", plot = p, width = 14, height = 8, dpi = 300)
ggsave("Top5_Markers_DotPlot_grouped.pdf", plot = p, width = 14, height = 8)

```









## ScCutomize Dotplot
```{r, fig.width=16, fig.height=16}
library(scCustomize)

Clustered_DotPlot(seurat_object = All_samples_Merged, features = top5_markers$gene, k=7, , x_lab_rotate = 90)

```