1. load libraries

2. Load Seurat Object


All_samples_Merged <- readRDS("../../../../0-Seurat_RDS_OBJECT_FINAL/Seurat_object_Final_changes/All_samples_Merged_with_STCAT_Annotation_final-5-09-2025.rds")

DefaultAssay(All_samples_Merged) <- "RNA"

# If RNA has not been normalized yet, run NormalizeData (safe to run again)
All_samples_Merged <- NormalizeData(All_samples_Merged,
                                    normalization.method = "LogNormalize",
                                    scale.factor = 1e4,
                                    verbose = TRUE)

3. Run CytypeR for cluster annotation

Idents(All_samples_Merged) <- "seurat_clusters"

# ---------------------------------------------------------
# 2️⃣ Find marker genes per cluster
SS_markers <- FindAllMarkers(
  All_samples_Merged,
  only.pos = TRUE,
  min.pct = 0.25,
  logfc.threshold = 0.25,
  min.pct.diff = 0.2
  
)

library(dplyr)

# Precise blacklist for uninformative genes
blacklist_patterns <- c(
  "^TRAV", "^TRBV", "^TRGV", "^TRDV", "^TRBC", "^TRAC", "^TRDC", "^TRGC", # TCR
  "^IGH", "^IGK", "^IGL", "^IGJ",                                         # Ig genes
  "^RPL", "^RPS",                                                         # ribosomal
  "^MT-",                                                                 # mitochondria
  "^HBA", "^HBB", "^HB[ABZ]",                                             # hemoglobins
  "^NEAT1$", "^MALAT1$",                                                  # optional lncRNAs
  "^XIST$"                              )

blacklist_regex <- paste(blacklist_patterns, collapse = "|")

# Preview which markers will be removed
to_remove <- SS_markers %>%
  filter(grepl(blacklist_regex, gene, ignore.case = TRUE))
message("Rows to remove: ", nrow(to_remove))
head(to_remove$gene)

# Filter markers (keep important metabolic/proliferation genes)
SS_markers_filtered <- SS_markers %>%
  filter(!grepl(blacklist_regex, gene, ignore.case = TRUE))

3. Run CytypeR for cluster annotation

LS0tCnRpdGxlOiAiQ3l0eXBlUiBBbm5vdGF0aW9uIHVzaW5nIEFJLUZpbmFsaXplZCIKYXV0aG9yOiAiTmFzaXIgTWFobW9vZCBBYmJhc2kiCmRhdGU6ICJgciBTeXMuRGF0ZSgpYCIKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6CiAgICB0b2M6IHllcwogICAgdG9jX2Zsb2F0OiB5ZXMKICAgIHRvY19jb2xsYXBzZWQ6IHllcwogIHdvcmRfZG9jdW1lbnQ6CiAgICB0b2M6IHllcwogIGh0bWxfZG9jdW1lbnQ6CiAgICB0b2M6IHllcwogICAgZGZfcHJpbnQ6IHBhZ2VkCiAgcGRmX2RvY3VtZW50OgogICAgdG9jOiB5ZXMKLS0tCgoKIyAxLiBsb2FkIGxpYnJhcmllcwpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0KIyBMb2FkIGJlbG93IGxpYnJhcmllcwpsaWJyYXJ5KFNldXJhdCkKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KHBsb3RseSkKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkoY293cGxvdCkKCgpsaWJyYXJ5KFNDcHVicikKbGlicmFyeShkcGx5cikKYGBgCgoKIyAyLiBMb2FkIFNldXJhdCBPYmplY3QgCmBgYHtyfQoKQWxsX3NhbXBsZXNfTWVyZ2VkIDwtIHJlYWRSRFMoIi4uLy4uLy4uLy4uLzAtU2V1cmF0X1JEU19PQkpFQ1RfRklOQUwvU2V1cmF0X29iamVjdF9GaW5hbF9jaGFuZ2VzL0FsbF9zYW1wbGVzX01lcmdlZF93aXRoX1NUQ0FUX0Fubm90YXRpb25fZmluYWwtNS0wOS0yMDI1LnJkcyIpCgpEZWZhdWx0QXNzYXkoQWxsX3NhbXBsZXNfTWVyZ2VkKSA8LSAiUk5BIgoKIyBJZiBSTkEgaGFzIG5vdCBiZWVuIG5vcm1hbGl6ZWQgeWV0LCBydW4gTm9ybWFsaXplRGF0YSAoc2FmZSB0byBydW4gYWdhaW4pCkFsbF9zYW1wbGVzX01lcmdlZCA8LSBOb3JtYWxpemVEYXRhKEFsbF9zYW1wbGVzX01lcmdlZCwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbm9ybWFsaXphdGlvbi5tZXRob2QgPSAiTG9nTm9ybWFsaXplIiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgc2NhbGUuZmFjdG9yID0gMWU0LAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICB2ZXJib3NlID0gVFJVRSkKCmBgYAoKIyAzLiBSdW4gQ3l0eXBlUiBmb3IgY2x1c3RlciBhbm5vdGF0aW9uCmBgYHtyfQpJZGVudHMoQWxsX3NhbXBsZXNfTWVyZ2VkKSA8LSAic2V1cmF0X2NsdXN0ZXJzIgoKIyAtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0KIyAy77iP4oOjIEZpbmQgbWFya2VyIGdlbmVzIHBlciBjbHVzdGVyClNTX21hcmtlcnMgPC0gRmluZEFsbE1hcmtlcnMoCiAgQWxsX3NhbXBsZXNfTWVyZ2VkLAogIG9ubHkucG9zID0gVFJVRSwKICBtaW4ucGN0ID0gMC4yNSwKICBsb2dmYy50aHJlc2hvbGQgPSAwLjI1LAogIG1pbi5wY3QuZGlmZiA9IDAuMgogIAopCgpsaWJyYXJ5KGRwbHlyKQoKIyBQcmVjaXNlIGJsYWNrbGlzdCBmb3IgdW5pbmZvcm1hdGl2ZSBnZW5lcwpibGFja2xpc3RfcGF0dGVybnMgPC0gYygKICAiXlRSQVYiLCAiXlRSQlYiLCAiXlRSR1YiLCAiXlRSRFYiLCAiXlRSQkMiLCAiXlRSQUMiLCAiXlRSREMiLCAiXlRSR0MiLCAjIFRDUgogICJeSUdIIiwgIl5JR0siLCAiXklHTCIsICJeSUdKIiwgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICMgSWcgZ2VuZXMKICAiXlJQTCIsICJeUlBTIiwgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAjIHJpYm9zb21hbAogICJeTVQtIiwgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICMgbWl0b2Nob25kcmlhCiAgIl5IQkEiLCAiXkhCQiIsICJeSEJbQUJaXSIsICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIyBoZW1vZ2xvYmlucwogICJeTkVBVDEkIiwgIl5NQUxBVDEkIiwgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICMgb3B0aW9uYWwgbG5jUk5BcwogICJeWElTVCQiICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgKQoKYmxhY2tsaXN0X3JlZ2V4IDwtIHBhc3RlKGJsYWNrbGlzdF9wYXR0ZXJucywgY29sbGFwc2UgPSAifCIpCgojIFByZXZpZXcgd2hpY2ggbWFya2VycyB3aWxsIGJlIHJlbW92ZWQKdG9fcmVtb3ZlIDwtIFNTX21hcmtlcnMgJT4lCiAgZmlsdGVyKGdyZXBsKGJsYWNrbGlzdF9yZWdleCwgZ2VuZSwgaWdub3JlLmNhc2UgPSBUUlVFKSkKbWVzc2FnZSgiUm93cyB0byByZW1vdmU6ICIsIG5yb3codG9fcmVtb3ZlKSkKaGVhZCh0b19yZW1vdmUkZ2VuZSkKCiMgRmlsdGVyIG1hcmtlcnMgKGtlZXAgaW1wb3J0YW50IG1ldGFib2xpYy9wcm9saWZlcmF0aW9uIGdlbmVzKQpTU19tYXJrZXJzX2ZpbHRlcmVkIDwtIFNTX21hcmtlcnMgJT4lCiAgZmlsdGVyKCFncmVwbChibGFja2xpc3RfcmVnZXgsIGdlbmUsIGlnbm9yZS5jYXNlID0gVFJVRSkpCgoKYGBgCgojIDMuIFJ1biBDeXR5cGVSIGZvciBjbHVzdGVyIGFubm90YXRpb24KYGBge3IsIGZpZy53aWR0aD0xMiwgZmlnLmhlaWdodD02fQpsaWJyYXJ5KEN5dGVUeXBlUikKbGlicmFyeShkcGx5cikKIyAtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tCiMgUHJlcGFyZSBkYXRhIGZvciBDeXRlVHlwZVIKIyAtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tCnByZXBwZWRfZGF0YSA8LSBQcmVwYXJlQ3l0ZVR5cGVSKAogIG9iaiA9IEFsbF9zYW1wbGVzX01lcmdlZCwKICBTU19tYXJrZXJzX2ZpbHRlcmVkLAogIG5fdG9wX2dlbmVzID0gMjUsICAgICAgICAgICAgICAgIyB0b3AgNSBtYXJrZXIgZ2VuZXMgcGVyIGNsdXN0ZXIKICBncm91cF9rZXkgPSAnc2V1cmF0X2NsdXN0ZXJzJywgICMgbXVzdCBtYXRjaCBTZXVyYXQgb2JqZWN0CiAgYWdncmVnYXRlX21ldGFkYXRhID0gVFJVRSwKICBjb29yZGluYXRlc19rZXkgPSAidW1hcCIKKQoKCgojIC0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0KIyBNZXRhZGF0YSBmb3IgQ3l0ZVR5cGVSCiMgLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQptZXRhZGF0YSA8LSBsaXN0KAogIHRpdGxlID0gJ1PDqXphcnkgc3luZHJvbWUgc2luZ2xlLWNlbGwgYW5hbHlzaXMnLAogIHJ1bl9sYWJlbCA9ICdTU19jZWxsX2xpbmVzX2FuYWx5c2lzJywKICBleHBlcmltZW50X25hbWUgPSAnNyBTUyBjZWxsIGxpbmVzICsgMiBoZWFsdGh5IENENCBUIGNlbGxzJwopCgoKIyBSZW1vdmUgYm90aCBsaXRlcmFsICJOQSIgYW5kIHRydWUgTkEgbmFtZXMKcHJlcHBlZF9kYXRhJGNsdXN0ZXJNZXRhZGF0YSA8LSBwcmVwcGVkX2RhdGEkY2x1c3Rlck1ldGFkYXRhWwogICFpcy5uYShuYW1lcyhwcmVwcGVkX2RhdGEkY2x1c3Rlck1ldGFkYXRhKSkgJiAKICBuYW1lcyhwcmVwcGVkX2RhdGEkY2x1c3Rlck1ldGFkYXRhKSAhPSAiTkEiCl0KCiMgQ29uZmlybSBjbGVhbnVwCm5hbWVzKHByZXBwZWRfZGF0YSRjbHVzdGVyTWV0YWRhdGEpCgoKCgoKIyAtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tCiMgUnVuIEN5dGVUeXBlUgojIC0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0KcmVzdWx0cyA8LSBDeXRlVHlwZVIoCiAgb2JqID0gQWxsX3NhbXBsZXNfTWVyZ2VkLAogIHByZXBwZWRfZGF0YSA9IHByZXBwZWRfZGF0YSwKICBzdHVkeV9jb250ZXh0ID0gIlNlemFyeSBzeW5kcm9tZSA3IGNlbGwgbGluZXMgQ0Q0IFQgY2VsbHMgbWFsaWduYW50IGRlcml2ZWQgZnJvbSBibG9vZCBzYW1wbGVzIGFuZCAyIGhlYWx0aHkgZG9ub3IgQ0Q0IFQgY2VsbHMgZnJvbSBQQk1DLiBDTFVTVEVSIDQgQU5EIENMVVNURVIgMTEgQVJFIGhSQUxUSFkgRE9OT1IgQ0VMTCBDTFVTVEVSUyIsCiAgbWV0YWRhdGEgPSBtZXRhZGF0YQopCgojIC0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0KIyBDaGVjayByZXN1bHRzCiMgLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQojIElmIGpvYiBzdWNjZWVkcywgcmV0cmlldmUgcmVzdWx0czoKIyBHZXRSZXN1bHRzKHJlc3VsdHMpCgpzYXZlUkRTKHJlc3VsdHMsIGZpbGUgPSAiQ3l0ZVR5cGVSX3Jlc3VsdHNfU1NfY2VsbF9saW5lc19maW5hbGl6ZWQucmRzIikKCkRpbVBsb3QocmVzdWx0cywgZ3JvdXAuYnkgPSAiY3l0ZXR5cGVfY2VsbF9vbnRvbG9neV9zZXVyYXRfY2x1c3RlcnMiLCBsYWJlbCA9IFQsIGxhYmVsLmJveCA9IFQscmVwZWwgPSBULCByZWR1Y3Rpb24gPSAidW1hcCIpCkRpbVBsb3QocmVzdWx0cywgZ3JvdXAuYnkgPSAiY3l0ZXR5cGVfc2V1cmF0X2NsdXN0ZXJzIiwgbGFiZWwgPSBGLCBsYWJlbC5ib3ggPSBGLHJlcGVsID0gRiwgcmVkdWN0aW9uID0gInVtYXAiKQoKYGBg