1. load libraries

2. Load Seurat Object


All_samples_Merged <- readRDS("../0-Seurat_RDS_OBJECT_FINAL/Seurat_object_Final_changes/All_samples_Merged_with_STCAT_Annotation_final-5-09-2025.rds")

DefaultAssay(All_samples_Merged) <- "RNA"

# If RNA has not been normalized yet, run NormalizeData (safe to run again)
All_samples_Merged <- NormalizeData(All_samples_Merged,
                                    normalization.method = "LogNormalize",
                                    scale.factor = 1e4,
                                    verbose = TRUE)
Performing log-normalization
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|

3. Run CytypeR for cluster annotation

library(dplyr)

# Precise blacklist for uninformative genes
blacklist_patterns <- c(
  "^TRAV", "^TRBV", "^TRGV", "^TRDV", "^TRBC", "^TRAC", "^TRDC", "^TRGC", # TCR
  "^IGH", "^IGK", "^IGL", "^IGJ",                                         # Ig genes
  "^RPL", "^RPS",                                                         # ribosomal
  "^MT-",                                                                 # mitochondria
  "^HBA", "^HBB", "^HB[ABZ]",                                             # hemoglobins
  "^NEAT1$", "^MALAT1$"                                                  # optional lncRNAs
)

blacklist_regex <- paste(blacklist_patterns, collapse = "|")

# Preview which markers will be removed
to_remove <- SS_markers %>%
  filter(grepl(blacklist_regex, gene, ignore.case = TRUE))
message("Rows to remove: ", nrow(to_remove))
head(to_remove$gene)
[1] "TRAV17"  "TRAV9-2" "RPL22L1" "RPL7"    "RPL35A"  "NEAT1"  
# Filter markers (keep important metabolic/proliferation genes)
SS_markers_filtered <- SS_markers %>%
  filter(!grepl(blacklist_regex, gene, ignore.case = TRUE))

3. Run CytypeR for cluster annotation

LS0tCnRpdGxlOiAiQ3l0eXBlUiBBbm5vdGF0aW9uIHVzaW5nIEFJIgphdXRob3I6ICJOYXNpciBNYWhtb29kIEFiYmFzaSIKZGF0ZTogImByIFN5cy5EYXRlKClgIgpvdXRwdXQ6CiAgaHRtbF9ub3RlYm9vazoKICAgIHRvYzogeWVzCiAgICB0b2NfZmxvYXQ6IHllcwogICAgdG9jX2NvbGxhcHNlZDogeWVzCiAgd29yZF9kb2N1bWVudDoKICAgIHRvYzogeWVzCiAgaHRtbF9kb2N1bWVudDoKICAgIHRvYzogeWVzCiAgICBkZl9wcmludDogcGFnZWQKICBwZGZfZG9jdW1lbnQ6CiAgICB0b2M6IHllcwotLS0KCgojIDEuIGxvYWQgbGlicmFyaWVzCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQojIExvYWQgYmVsb3cgbGlicmFyaWVzCmxpYnJhcnkoU2V1cmF0KQpsaWJyYXJ5KGdncGxvdDIpCmxpYnJhcnkocGxvdGx5KQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShjb3dwbG90KQoKCmxpYnJhcnkoU0NwdWJyKQpsaWJyYXJ5KGRwbHlyKQpgYGAKCgojIDIuIExvYWQgU2V1cmF0IE9iamVjdCAKYGBge3J9CgpBbGxfc2FtcGxlc19NZXJnZWQgPC0gcmVhZFJEUygiLi4vMC1TZXVyYXRfUkRTX09CSkVDVF9GSU5BTC9TZXVyYXRfb2JqZWN0X0ZpbmFsX2NoYW5nZXMvQWxsX3NhbXBsZXNfTWVyZ2VkX3dpdGhfU1RDQVRfQW5ub3RhdGlvbl9maW5hbC01LTA5LTIwMjUucmRzIikKCkRlZmF1bHRBc3NheShBbGxfc2FtcGxlc19NZXJnZWQpIDwtICJSTkEiCgojIElmIFJOQSBoYXMgbm90IGJlZW4gbm9ybWFsaXplZCB5ZXQsIHJ1biBOb3JtYWxpemVEYXRhIChzYWZlIHRvIHJ1biBhZ2FpbikKQWxsX3NhbXBsZXNfTWVyZ2VkIDwtIE5vcm1hbGl6ZURhdGEoQWxsX3NhbXBsZXNfTWVyZ2VkLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBub3JtYWxpemF0aW9uLm1ldGhvZCA9ICJMb2dOb3JtYWxpemUiLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBzY2FsZS5mYWN0b3IgPSAxZTQsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHZlcmJvc2UgPSBUUlVFKQoKYGBgCgojIDMuIFJ1biBDeXR5cGVSIGZvciBjbHVzdGVyIGFubm90YXRpb24KYGBge3J9CklkZW50cyhBbGxfc2FtcGxlc19NZXJnZWQpIDwtICJzZXVyYXRfY2x1c3RlcnMiCgojIC0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQojIDHvuI/ig6MgTG9hZCBsaWJyYXJ5CmxpYnJhcnkoQ3l0ZVR5cGVSKQpsaWJyYXJ5KGRwbHlyKQoKIyAtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0KIyAy77iP4oOjIEZpbmQgbWFya2VyIGdlbmVzIHBlciBjbHVzdGVyClNTX21hcmtlcnMgPC0gRmluZEFsbE1hcmtlcnMoCiAgQWxsX3NhbXBsZXNfTWVyZ2VkLAogIG9ubHkucG9zID0gVFJVRSwKICBtaW4ucGN0ID0gMC4yNSwKICBsb2dmYy50aHJlc2hvbGQgPSAwLjI1LAogIG1pbi5wY3QuZGlmZiA9IDAuMgopCgpsaWJyYXJ5KGRwbHlyKQoKIyBQcmVjaXNlIGJsYWNrbGlzdCBmb3IgdW5pbmZvcm1hdGl2ZSBnZW5lcwpibGFja2xpc3RfcGF0dGVybnMgPC0gYygKICAiXlRSQVYiLCAiXlRSQlYiLCAiXlRSR1YiLCAiXlRSRFYiLCAiXlRSQkMiLCAiXlRSQUMiLCAiXlRSREMiLCAiXlRSR0MiLCAjIFRDUgogICJeSUdIIiwgIl5JR0siLCAiXklHTCIsICJeSUdKIiwgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICMgSWcgZ2VuZXMKICAiXlJQTCIsICJeUlBTIiwgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAjIHJpYm9zb21hbAogICJeTVQtIiwgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICMgbWl0b2Nob25kcmlhCiAgIl5IQkEiLCAiXkhCQiIsICJeSEJbQUJaXSIsICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIyBoZW1vZ2xvYmlucwogICJeTkVBVDEkIiwgIl5NQUxBVDEkIiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIyBvcHRpb25hbCBsbmNSTkFzCikKCmJsYWNrbGlzdF9yZWdleCA8LSBwYXN0ZShibGFja2xpc3RfcGF0dGVybnMsIGNvbGxhcHNlID0gInwiKQoKIyBQcmV2aWV3IHdoaWNoIG1hcmtlcnMgd2lsbCBiZSByZW1vdmVkCnRvX3JlbW92ZSA8LSBTU19tYXJrZXJzICU+JQogIGZpbHRlcihncmVwbChibGFja2xpc3RfcmVnZXgsIGdlbmUsIGlnbm9yZS5jYXNlID0gVFJVRSkpCm1lc3NhZ2UoIlJvd3MgdG8gcmVtb3ZlOiAiLCBucm93KHRvX3JlbW92ZSkpCmhlYWQodG9fcmVtb3ZlJGdlbmUpCgojIEZpbHRlciBtYXJrZXJzIChrZWVwIGltcG9ydGFudCBtZXRhYm9saWMvcHJvbGlmZXJhdGlvbiBnZW5lcykKU1NfbWFya2Vyc19maWx0ZXJlZCA8LSBTU19tYXJrZXJzICU+JQogIGZpbHRlcighZ3JlcGwoYmxhY2tsaXN0X3JlZ2V4LCBnZW5lLCBpZ25vcmUuY2FzZSA9IFRSVUUpKQoKCmBgYAoKIyAzLiBSdW4gQ3l0eXBlUiBmb3IgY2x1c3RlciBhbm5vdGF0aW9uCmBgYHtyLCBmaWcud2lkdGg9MTIsIGZpZy5oZWlnaHQ9Nn0KCiMgLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQojIFByZXBhcmUgZGF0YSBmb3IgQ3l0ZVR5cGVSCiMgLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQpwcmVwcGVkX2RhdGEgPC0gUHJlcGFyZUN5dGVUeXBlUigKICBvYmogPSBBbGxfc2FtcGxlc19NZXJnZWQsCiAgU1NfbWFya2Vyc19maWx0ZXJlZCwKICBuX3RvcF9nZW5lcyA9IDEwLCAgICAgICAgICAgICAgICMgdG9wIDEwIG1hcmtlciBnZW5lcyBwZXIgY2x1c3RlcgogIGdyb3VwX2tleSA9ICdzZXVyYXRfY2x1c3RlcnMnLCAgIyBtdXN0IG1hdGNoIFNldXJhdCBvYmplY3QKICBhZ2dyZWdhdGVfbWV0YWRhdGEgPSBUUlVFLAogIGNvb3JkaW5hdGVzX2tleSA9ICJ1bWFwIgopCgoKCiMgLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQojIE1ldGFkYXRhIGZvciBDeXRlVHlwZVIKIyAtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tCm1ldGFkYXRhIDwtIGxpc3QoCiAgdGl0bGUgPSAnU8OpemFyeSBzeW5kcm9tZSBzaW5nbGUtY2VsbCBhbmFseXNpcycsCiAgcnVuX2xhYmVsID0gJ1NTX2NlbGxfbGluZXNfYW5hbHlzaXMnLAogIGV4cGVyaW1lbnRfbmFtZSA9ICc3IFNTIGNlbGwgbGluZXMgKyAyIGhlYWx0aHkgQ0Q0IFQgY2VsbHMnCikKCgojIFJlbW92ZSBjbHVzdGVyTWV0YWRhdGEgZW50cmllcyBsaXRlcmFsbHkgbmFtZWQgIk5BIgpwcmVwcGVkX2RhdGEkY2x1c3Rlck1ldGFkYXRhIDwtIHByZXBwZWRfZGF0YSRjbHVzdGVyTWV0YWRhdGFbIW5hbWVzKHByZXBwZWRfZGF0YSRjbHVzdGVyTWV0YWRhdGEpICVpbiUgYygiTkEiKV0KCiMgQ2hlY2sgcmVtYWluaW5nIGNsdXN0ZXIgbmFtZXMKbmFtZXMocHJlcHBlZF9kYXRhJGNsdXN0ZXJNZXRhZGF0YSkKCgoKIyAtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tCiMgUnVuIEN5dGVUeXBlUgojIC0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0KcmVzdWx0cyA8LSBDeXRlVHlwZVIoCiAgb2JqID0gQWxsX3NhbXBsZXNfTWVyZ2VkLAogIHByZXBwZWRfZGF0YSA9IHByZXBwZWRfZGF0YSwKICBzdHVkeV9jb250ZXh0ID0gIlNlemFyeSBzeW5kcm9tZSA3IGNlbGwgbGluZXMgQ0Q0IFQgY2VsbHMgbWFsaWduYW50IGRlcml2ZWQgZnJvbSBibG9vZCBzYW1wbGVzIGFuZCAyIGhlYWx0aHkgZG9ub3IgQ0Q0IFQgY2VsbHMgZnJvbSBQQk1DIiwKICBtZXRhZGF0YSA9IG1ldGFkYXRhCikKCiMgLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQojIENoZWNrIHJlc3VsdHMKIyAtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tCiMgSWYgam9iIHN1Y2NlZWRzLCByZXRyaWV2ZSByZXN1bHRzOgojIEdldFJlc3VsdHMocmVzdWx0cykKCnNhdmVSRFMocmVzdWx0cywgZmlsZSA9ICJDeXRlVHlwZVJfcmVzdWx0c19TU19jZWxsX2xpbmVzLnJkcyIpCgpEaW1QbG90KHJlc3VsdHMsIGdyb3VwLmJ5ID0gImN5dGV0eXBlX2NlbGxfb250b2xvZ3lfc2V1cmF0X2NsdXN0ZXJzIiwgbGFiZWwgPSBULCBsYWJlbC5ib3ggPSBULHJlcGVsID0gVCwgcmVkdWN0aW9uID0gInVtYXAiKQpEaW1QbG90KHJlc3VsdHMsIGdyb3VwLmJ5ID0gImN5dGV0eXBlX3NldXJhdF9jbHVzdGVycyIsIGxhYmVsID0gRiwgbGFiZWwuYm94ID0gRixyZXBlbCA9IEYsIHJlZHVjdGlvbiA9ICJ1bWFwIikKCmBgYA==