load libraries————————————

1. Load and Subset Normal CD4 T Cells


All_samples_Merged <- readRDS("../0-Seurat_RDS_OBJECT_FINAL/Seurat_object_Final_changes/All_samples_Merged_with_STCAT_Annotation_final-5-09-2025.rds")

2. Run FindAllMarkers on RNA Assay

# Set assay to RNA
DefaultAssay(All_samples_Merged) <- "RNA"

# Check if RNA assay has normalized data
if (!"data" %in% slotNames(All_samples_Merged[["RNA"]]) || 
    ncol(GetAssayData(All_samples_Merged[["RNA"]], slot = "data")) == 0) {
  
  message("RNA assay does not contain normalized data. Running NormalizeData...")
  All_samples_Merged <- NormalizeData(
    All_samples_Merged,
    assay = "RNA",
    normalization.method = "LogNormalize",
    scale.factor = 10000
  )
  
} else {
  message("RNA assay already contains normalized data. Skipping normalization.")
}
Performing log-normalization
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Idents(All_samples_Merged) <- "seurat_clusters"

# Find markers for all clusters
all_markers <- FindAllMarkers(
  All_samples_Merged,
  only.pos = TRUE,
  min.pct = 0.25,
  logfc.threshold = 0.25
)

# Save markers
write.csv(all_markers, "AllMarkers_clusters-17-09-2025.csv", row.names = FALSE)

3. Top 25 and Top 5 markers per cluster

cat("Top 5 marker list saved to Top5Markers_clusters.csv\n")
Top 5 marker list saved to Top5Markers_clusters.csv

3. Visualize Markers


DefaultAssay(All_samples_Merged) <- "RNA"

# Unique genes from top5
all_genes <- top5$gene

# 1️⃣ Check overlap with RNA assay
valid_genes <- intersect(unique(all_genes), rownames(All_samples_Merged[["RNA"]]))
invalid_genes <- setdiff(unique(all_genes), rownames(All_samples_Merged[["RNA"]]))

cat("Valid genes:", length(valid_genes), "\n")
Valid genes: 69 
cat("Missing genes:", invalid_genes, "\n\n")
Missing genes:  
# 2️⃣ Check duplicates in top5 list
gene_counts <- table(all_genes)
duplicate_genes <- names(gene_counts[gene_counts > 1])

if(length(duplicate_genes) > 0){
  cat("Genes appearing more than once in top5 list:\n")
  print(duplicate_genes)
} else {
  cat("No duplicates found in top5 list.\n")
}
Genes appearing more than once in top5 list:
[1] "TRAV17"
# Scale only the top5 marker genes
All_samples_Merged <- ScaleData(All_samples_Merged, features = unique(top5$gene))

  |                                                                                                            
  |                                                                                                      |   0%
  |                                                                                                            
  |======================================================================================================| 100%
 # DotPlot for top 5 markers
DotPlot(All_samples_Merged, features = unique(top5$gene), cols =
c("grey", "firebrick"), dot.scale = 8, assay = "RNA") + RotatedAxis()

3. Rename clusters (manual placeholder)


# 🔹 Cluster Annotation Table (0–13)
# Cluster   Top 5 Markers   Suggested Name  Reasoning
# 0 CTAG2, MACROD2, IMPG1, TUBA3C, AGMO Cancer/testis antigen–expressing malignant T cells  CTAG2 is a cancer/testis antigen; other markers (MACROD2, IMPG1) are associated with tumor-specific expression. Likely represents a malignant cluster with immunogenic features.
# 1 XCL1, XCL2, KIR2DL4, KRT81, IGFBP2  Cytotoxic/NK-like T cells   XCL1/2 are chemokines expressed by cytotoxic T cells; KIR2DL4 is an NK receptor; suggests cytotoxic, immune-interacting malignant subset.
# 2 CA2, IL13, HS3ST1, AC100801.1, C9orf135 Type 2 cytokine–expressing T cells / Th2-like malignant cells   IL13 indicates Th2-like phenotype; CA2 may mark metabolic adaptation; cluster likely represents Th2-skewed malignant cells.
# 3 RPS4Y1, LINC00861, LINC01550, TMIGD2, ARMH1 Ribosomal/translationally active T cells    RPS4Y1 is ribosomal; TMIGD2 is involved in T-cell costimulation; suggests highly active, proliferative subset.
# 4 AC096577.1, AC004160.1, PTPRD, MGST1, CLIC2 Malignant T cells with signaling/metabolic adaptation   PTPRD is a tumor suppressor often mutated in cancers; MGST1 and CLIC2 suggest oxidative stress or metabolic adaptation; likely malignant subset.
# 5 TTC29, VIPR2, HMGA2, ZNF521, ARHGEF28   Stem-like / progenitor-like malignant T cells   HMGA2 and ZNF521 are stemness-associated genes; TTC29 may indicate proliferative potential; cluster likely retains progenitor-like features.
# 6 THY1, CFI, RANBP17, AL023574.1, ALKAL2  Memory-like T cells / survival-oriented cluster THY1 is CD90 (memory marker); CFI involved in complement regulation; suggests memory-like or long-lived malignant T cells.
# 7 TRAV17, MXD3, KLHL4, SLC2A2, PSRC1  TCR-enriched / variable T-cell subset   TRAV17 is a TCR alpha variable gene; cluster likely reflects clonally expanded T cells; possibly malignant TCR-dominant population.
# 8 GPAT2, PTCHD1-AS, CTDSPL, LHX9, PRSS57  Developmental / early progenitor-like cluster   GPAT2 and PTCHD1-AS are testis or early developmental genes; may represent early differentiation or stem-like malignant cells.
# 9 TBX4, AC069410.1, PODXL2, NPTX1, LINC00469  Migratory / adhesion-oriented T cells   PODXL2 and TBX4 suggest migratory potential; possibly tissue-homing or trafficking T-cell subset.
# 10    IL7R, SESN3, PASK, AC006369.1, DUSP16   Naïve / central memory T cells  IL7R is canonical naïve/central memory marker; SESN3 and DUSP16 indicate survival and stress response; likely normal-like or less malignant T cells.
# 11    TRAV17, FUCA1, PTGDR2, TNFRSF18, S100P  Activated / regulatory-like T cells TNFRSF18 (GITR) indicates activation; S100P linked to stress response; TRAV17 suggests clonal expansion; cluster may have regulatory/activated features.
# 12    CCL3, CSF2, CCL1, SERPINE1, GZMB    Pro-inflammatory / cytotoxic T cells    CCL3/CCL1/CSF2 are cytokines; GZMB is cytotoxic; cluster likely represents inflammatory and cytotoxic subset.
# 13    OASL, IFIT3, IFIT2, CXCL10, CSAG3   Interferon-response / antiviral-like T cells    IFIT and OASL genes indicate strong interferon signaling; CXCL10 recruits T cells; suggests antiviral or inflammatory response in malignant T cells.
