1. load libraries

2. Load Seurat Object


All_samples_Merged <- readRDS("../../../0-Seurat_RDS_OBJECT_FINAL/Seurat_object_Final_changes/All_samples_Merged_with_STCAT_Annotation_final-5-09-2025.rds")

DefaultAssay(All_samples_Merged) <- "RNA"

All_samples_Merged <- NormalizeData(
    All_samples_Merged,
    normalization.method = "LogNormalize",
    scale.factor = 1e4,
    verbose = TRUE
  )

3.find Top markers

Idents(All_samples_Merged) <- "seurat_clusters"

# ---------------------------------------------------------
# 2️⃣ Find marker genes per cluster
SS_markers <- FindAllMarkers(
  All_samples_Merged,
  only.pos = TRUE,
  min.pct = 0.25,
  logfc.threshold = 0.25,
  min.pct.diff = 0.2
  
)

library(dplyr)

# Precise blacklist for uninformative genes
blacklist_patterns <- c(
  "^TRAV", "^TRBV", "^TRGV", "^TRDV", "^TRBC", "^TRAC", "^TRDC", "^TRGC", # TCR
  "^IGH", "^IGK", "^IGL", "^IGJ",                                         # Ig genes
  "^RPL", "^RPS",                                                         # ribosomal
  "^MT-",                                                                 # mitochondria
  "^HBA", "^HBB", "^HB[ABZ]",                                             # hemoglobins
  "^NEAT1$", "^MALAT1$",                                                  # optional lncRNAs
  "^XIST$"                              )

blacklist_regex <- paste(blacklist_patterns, collapse = "|")

# Preview which markers will be removed
to_remove <- SS_markers %>%
  filter(grepl(blacklist_regex, gene, ignore.case = TRUE))
message("Rows to remove: ", nrow(to_remove))
head(to_remove$gene)

# Filter markers (keep important metabolic/proliferation genes)
SS_markers_filtered <- SS_markers %>%
  filter(!grepl(blacklist_regex, gene, ignore.case = TRUE))

4. Top5 Markers


library(dplyr)

# ---------------------------------------------------------
# Save filtered markers
write.csv(SS_markers_filtered, file = "SS_markers_filtered.csv", row.names = FALSE)

# ---------------------------------------------------------
# Extract top 25 markers per cluster
top25_markers <- SS_markers_filtered %>%
  filter(p_val_adj < 0.05) %>%  # ensure statistical significance
  group_by(cluster) %>%
  slice_max(order_by = avg_log2FC, n = 25) %>%
  ungroup()

write.csv(top25_markers, file = "SS_markers_top25.csv", row.names = FALSE)

# ---------------------------------------------------------
# Extract top 5 markers per cluster
top5_markers <- SS_markers_filtered %>%
  filter(p_val_adj < 0.05) %>%  # ensure statistical significance
  group_by(cluster) %>%
  slice_max(order_by = avg_log2FC, n = 5) %>%
  ungroup()

write.csv(top5_markers, file = "SS_markers_top5.csv", row.names = FALSE)

message("Filtered markers, top25, and top5 markers saved successfully.")
LS0tCnRpdGxlOiAiSWRlbnRpZnkgVG9wNSBNYXJrZXJzICIKYXV0aG9yOiAiTmFzaXIgTWFobW9vZCBBYmJhc2kiCmRhdGU6ICJgciBTeXMuRGF0ZSgpYCIKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6CiAgICB0b2M6IHllcwogICAgdG9jX2Zsb2F0OiB5ZXMKICAgIHRvY19jb2xsYXBzZWQ6IHllcwogIHdvcmRfZG9jdW1lbnQ6CiAgICB0b2M6IHllcwogIGh0bWxfZG9jdW1lbnQ6CiAgICB0b2M6IHllcwogICAgZGZfcHJpbnQ6IHBhZ2VkCiAgcGRmX2RvY3VtZW50OgogICAgdG9jOiB5ZXMKLS0tCgoKIyAxLiBsb2FkIGxpYnJhcmllcwpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0KIyBMb2FkIGJlbG93IGxpYnJhcmllcwpsaWJyYXJ5KFNldXJhdCkKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KHBsb3RseSkKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkoY293cGxvdCkKCgpsaWJyYXJ5KFNDcHVicikKbGlicmFyeShkcGx5cikKYGBgCgoKIyAyLiBMb2FkIFNldXJhdCBPYmplY3QgCmBgYHtyfQoKQWxsX3NhbXBsZXNfTWVyZ2VkIDwtIHJlYWRSRFMoIi4uLy4uLy4uLzAtU2V1cmF0X1JEU19PQkpFQ1RfRklOQUwvU2V1cmF0X29iamVjdF9GaW5hbF9jaGFuZ2VzL0FsbF9zYW1wbGVzX01lcmdlZF93aXRoX1NUQ0FUX0Fubm90YXRpb25fZmluYWwtNS0wOS0yMDI1LnJkcyIpCgpEZWZhdWx0QXNzYXkoQWxsX3NhbXBsZXNfTWVyZ2VkKSA8LSAiUk5BIgoKQWxsX3NhbXBsZXNfTWVyZ2VkIDwtIE5vcm1hbGl6ZURhdGEoCiAgICBBbGxfc2FtcGxlc19NZXJnZWQsCiAgICBub3JtYWxpemF0aW9uLm1ldGhvZCA9ICJMb2dOb3JtYWxpemUiLAogICAgc2NhbGUuZmFjdG9yID0gMWU0LAogICAgdmVyYm9zZSA9IFRSVUUKICApCgoKYGBgCgojIDMuZmluZCBUb3AgbWFya2VycwpgYGB7cn0KSWRlbnRzKEFsbF9zYW1wbGVzX01lcmdlZCkgPC0gInNldXJhdF9jbHVzdGVycyIKCiMgLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tCiMgMu+4j+KDoyBGaW5kIG1hcmtlciBnZW5lcyBwZXIgY2x1c3RlcgpTU19tYXJrZXJzIDwtIEZpbmRBbGxNYXJrZXJzKAogIEFsbF9zYW1wbGVzX01lcmdlZCwKICBvbmx5LnBvcyA9IFRSVUUsCiAgbWluLnBjdCA9IDAuMjUsCiAgbG9nZmMudGhyZXNob2xkID0gMC4yNSwKICBtaW4ucGN0LmRpZmYgPSAwLjIKICAKKQoKbGlicmFyeShkcGx5cikKCiMgUHJlY2lzZSBibGFja2xpc3QgZm9yIHVuaW5mb3JtYXRpdmUgZ2VuZXMKYmxhY2tsaXN0X3BhdHRlcm5zIDwtIGMoCiAgIl5UUkFWIiwgIl5UUkJWIiwgIl5UUkdWIiwgIl5UUkRWIiwgIl5UUkJDIiwgIl5UUkFDIiwgIl5UUkRDIiwgIl5UUkdDIiwgIyBUQ1IKICAiXklHSCIsICJeSUdLIiwgIl5JR0wiLCAiXklHSiIsICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAjIElnIGdlbmVzCiAgIl5SUEwiLCAiXlJQUyIsICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIyByaWJvc29tYWwKICAiXk1ULSIsICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAjIG1pdG9jaG9uZHJpYQogICJeSEJBIiwgIl5IQkIiLCAiXkhCW0FCWl0iLCAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICMgaGVtb2dsb2JpbnMKICAiXk5FQVQxJCIsICJeTUFMQVQxJCIsICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAjIG9wdGlvbmFsIGxuY1JOQXMKICAiXlhJU1QkIiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICkKCmJsYWNrbGlzdF9yZWdleCA8LSBwYXN0ZShibGFja2xpc3RfcGF0dGVybnMsIGNvbGxhcHNlID0gInwiKQoKIyBQcmV2aWV3IHdoaWNoIG1hcmtlcnMgd2lsbCBiZSByZW1vdmVkCnRvX3JlbW92ZSA8LSBTU19tYXJrZXJzICU+JQogIGZpbHRlcihncmVwbChibGFja2xpc3RfcmVnZXgsIGdlbmUsIGlnbm9yZS5jYXNlID0gVFJVRSkpCm1lc3NhZ2UoIlJvd3MgdG8gcmVtb3ZlOiAiLCBucm93KHRvX3JlbW92ZSkpCmhlYWQodG9fcmVtb3ZlJGdlbmUpCgojIEZpbHRlciBtYXJrZXJzIChrZWVwIGltcG9ydGFudCBtZXRhYm9saWMvcHJvbGlmZXJhdGlvbiBnZW5lcykKU1NfbWFya2Vyc19maWx0ZXJlZCA8LSBTU19tYXJrZXJzICU+JQogIGZpbHRlcighZ3JlcGwoYmxhY2tsaXN0X3JlZ2V4LCBnZW5lLCBpZ25vcmUuY2FzZSA9IFRSVUUpKQoKCmBgYAoKCiMgNC4gVG9wNSBNYXJrZXJzCmBgYHtyLCBmaWcud2lkdGg9MTIsIGZpZy5oZWlnaHQ9Nn0KCmxpYnJhcnkoZHBseXIpCgojIC0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQojIFNhdmUgZmlsdGVyZWQgbWFya2Vycwp3cml0ZS5jc3YoU1NfbWFya2Vyc19maWx0ZXJlZCwgZmlsZSA9ICJTU19tYXJrZXJzX2ZpbHRlcmVkLmNzdiIsIHJvdy5uYW1lcyA9IEZBTFNFKQoKIyAtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0KIyBFeHRyYWN0IHRvcCAyNSBtYXJrZXJzIHBlciBjbHVzdGVyCnRvcDI1X21hcmtlcnMgPC0gU1NfbWFya2Vyc19maWx0ZXJlZCAlPiUKICBmaWx0ZXIocF92YWxfYWRqIDwgMC4wNSkgJT4lICAjIGVuc3VyZSBzdGF0aXN0aWNhbCBzaWduaWZpY2FuY2UKICBncm91cF9ieShjbHVzdGVyKSAlPiUKICBzbGljZV9tYXgob3JkZXJfYnkgPSBhdmdfbG9nMkZDLCBuID0gMjUpICU+JQogIHVuZ3JvdXAoKQoKd3JpdGUuY3N2KHRvcDI1X21hcmtlcnMsIGZpbGUgPSAiU1NfbWFya2Vyc190b3AyNS5jc3YiLCByb3cubmFtZXMgPSBGQUxTRSkKCiMgLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tCiMgRXh0cmFjdCB0b3AgNSBtYXJrZXJzIHBlciBjbHVzdGVyCnRvcDVfbWFya2VycyA8LSBTU19tYXJrZXJzX2ZpbHRlcmVkICU+JQogIGZpbHRlcihwX3ZhbF9hZGogPCAwLjA1KSAlPiUgICMgZW5zdXJlIHN0YXRpc3RpY2FsIHNpZ25pZmljYW5jZQogIGdyb3VwX2J5KGNsdXN0ZXIpICU+JQogIHNsaWNlX21heChvcmRlcl9ieSA9IGF2Z19sb2cyRkMsIG4gPSA1KSAlPiUKICB1bmdyb3VwKCkKCndyaXRlLmNzdih0b3A1X21hcmtlcnMsIGZpbGUgPSAiU1NfbWFya2Vyc190b3A1LmNzdiIsIHJvdy5uYW1lcyA9IEZBTFNFKQoKbWVzc2FnZSgiRmlsdGVyZWQgbWFya2VycywgdG9wMjUsIGFuZCB0b3A1IG1hcmtlcnMgc2F2ZWQgc3VjY2Vzc2Z1bGx5LiIpCgpgYGAKCgo=