load libraries
Load Seurat Object
All_samples_Merged <- readRDS("../../CD4_reference_RPCA_SCT_integrated_doublets_removed.rds")
DefaultAssay(All_samples_Merged) <- "RNA"
# If RNA has not been normalized yet, run NormalizeData (safe to run again)
All_samples_Merged <- NormalizeData(All_samples_Merged,
normalization.method = "LogNormalize",
scale.factor = 1e4,
verbose = TRUE)
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Run CytypeR for cluster
annotation
Idents(All_samples_Merged) <- "seurat_clusters"
# ---------------------------------------------------------
# 2️⃣ Find marker genes per cluster
SS_markers <- FindAllMarkers(
All_samples_Merged,
only.pos = TRUE,
min.pct = 0.25,
logfc.threshold = 0.25,
min.pct.diff = 0.2
)
library(dplyr)
# Precise blacklist for uninformative genes
blacklist_patterns <- c(
"^TRAV", "^TRBV", "^TRGV", "^TRDV", "^TRBC", "^TRAC", "^TRDC", "^TRGC", # TCR
"^IGH", "^IGK", "^IGL", "^IGJ", # Ig genes
"^RPL", "^RPS", # ribosomal
"^MT-", # mitochondria
"^HBA", "^HBB", "^HB[ABZ]", # hemoglobins
"^NEAT1$", "^MALAT1$", # optional lncRNAs
"^XIST$" )
blacklist_regex <- paste(blacklist_patterns, collapse = "|")
# Preview which markers will be removed
to_remove <- SS_markers %>%
filter(grepl(blacklist_regex, gene, ignore.case = TRUE))
message("Rows to remove: ", nrow(to_remove))
head(to_remove$gene)
[1] "NEAT1" "RPS16" "RPS6KA3" "RPS3A" "MALAT1" "RPS23"
# Filter markers (keep important metabolic/proliferation genes)
SS_markers_filtered <- SS_markers %>%
filter(!grepl(blacklist_regex, gene, ignore.case = TRUE))
Run CytypeR for cluster
annotation


LS0tCnRpdGxlOiAiQ3l0ZVR5cGVSIEFubm90YXRpb24gZm9yIE5vcm1hbCBJbnRlZ3JhdGVkIENENCBUIGNlbGxzIgphdXRob3I6ICJOYXNpciBNYWhtb29kIEFiYmFzaSIKZGF0ZTogImByIGZvcm1hdChTeXMudGltZSgpLCAnJUIgJWQsICVZJylgIgpvdXRwdXQ6CiAgaHRtbF9ub3RlYm9vazoKICAgIG51bWJlcl9zZWN0aW9uczogdHJ1ZQogICAgdG9jOiB0cnVlCiAgICB0b2NfZmxvYXQ6CiAgICAgIGNvbGxhcHNlZDogdHJ1ZQogICAgdGhlbWU6IGpvdXJuYWwKLS0tCgoKIyBsb2FkIGxpYnJhcmllcwpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0KIyBMb2FkIGxpYnJhcmllcwpsaWJyYXJ5KFNldXJhdCkKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShjb3dwbG90KQpsaWJyYXJ5KFNDcHVicikKbGlicmFyeShDeXRlVHlwZVIpCmxpYnJhcnkoQ29tcGxleEhlYXRtYXApCmxpYnJhcnkoY2lyY2xpemUpCmxpYnJhcnkodmlyaWRpcykKCiMgU2V0IHJhbmRvbSBzZWVkIGZvciByZXByb2R1Y2liaWxpdHkKc2V0LnNlZWQoMTIzKQoKIyBJbml0aWFsIGdhcmJhZ2UgY29sbGVjdGlvbgpnYygpCmBgYAoKIyBMb2FkIFNldXJhdCBPYmplY3QgCmBgYHtyfQoKQWxsX3NhbXBsZXNfTWVyZ2VkIDwtIHJlYWRSRFMoIi4uLy4uL0NENF9yZWZlcmVuY2VfUlBDQV9TQ1RfaW50ZWdyYXRlZF9kb3VibGV0c19yZW1vdmVkLnJkcyIpCgpEZWZhdWx0QXNzYXkoQWxsX3NhbXBsZXNfTWVyZ2VkKSA8LSAiUk5BIgoKIyBJZiBSTkEgaGFzIG5vdCBiZWVuIG5vcm1hbGl6ZWQgeWV0LCBydW4gTm9ybWFsaXplRGF0YSAoc2FmZSB0byBydW4gYWdhaW4pCkFsbF9zYW1wbGVzX01lcmdlZCA8LSBOb3JtYWxpemVEYXRhKEFsbF9zYW1wbGVzX01lcmdlZCwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbm9ybWFsaXphdGlvbi5tZXRob2QgPSAiTG9nTm9ybWFsaXplIiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgc2NhbGUuZmFjdG9yID0gMWU0LAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICB2ZXJib3NlID0gVFJVRSkKCmBgYAoKIyBSdW4gQ3l0eXBlUiBmb3IgY2x1c3RlciBhbm5vdGF0aW9uCmBgYHtyfQpJZGVudHMoQWxsX3NhbXBsZXNfTWVyZ2VkKSA8LSAic2V1cmF0X2NsdXN0ZXJzIgoKIyAtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0KIyAy77iP4oOjIEZpbmQgbWFya2VyIGdlbmVzIHBlciBjbHVzdGVyClNTX21hcmtlcnMgPC0gRmluZEFsbE1hcmtlcnMoCiAgQWxsX3NhbXBsZXNfTWVyZ2VkLAogIG9ubHkucG9zID0gVFJVRSwKICBtaW4ucGN0ID0gMC4yNSwKICBsb2dmYy50aHJlc2hvbGQgPSAwLjI1LAogIG1pbi5wY3QuZGlmZiA9IDAuMgogIAopCgpsaWJyYXJ5KGRwbHlyKQoKIyBQcmVjaXNlIGJsYWNrbGlzdCBmb3IgdW5pbmZvcm1hdGl2ZSBnZW5lcwpibGFja2xpc3RfcGF0dGVybnMgPC0gYygKICAiXlRSQVYiLCAiXlRSQlYiLCAiXlRSR1YiLCAiXlRSRFYiLCAiXlRSQkMiLCAiXlRSQUMiLCAiXlRSREMiLCAiXlRSR0MiLCAjIFRDUgogICJeSUdIIiwgIl5JR0siLCAiXklHTCIsICJeSUdKIiwgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICMgSWcgZ2VuZXMKICAiXlJQTCIsICJeUlBTIiwgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAjIHJpYm9zb21hbAogICJeTVQtIiwgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICMgbWl0b2Nob25kcmlhCiAgIl5IQkEiLCAiXkhCQiIsICJeSEJbQUJaXSIsICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIyBoZW1vZ2xvYmlucwogICJeTkVBVDEkIiwgIl5NQUxBVDEkIiwgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICMgb3B0aW9uYWwgbG5jUk5BcwogICJeWElTVCQiICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgKQoKYmxhY2tsaXN0X3JlZ2V4IDwtIHBhc3RlKGJsYWNrbGlzdF9wYXR0ZXJucywgY29sbGFwc2UgPSAifCIpCgojIFByZXZpZXcgd2hpY2ggbWFya2VycyB3aWxsIGJlIHJlbW92ZWQKdG9fcmVtb3ZlIDwtIFNTX21hcmtlcnMgJT4lCiAgZmlsdGVyKGdyZXBsKGJsYWNrbGlzdF9yZWdleCwgZ2VuZSwgaWdub3JlLmNhc2UgPSBUUlVFKSkKbWVzc2FnZSgiUm93cyB0byByZW1vdmU6ICIsIG5yb3codG9fcmVtb3ZlKSkKaGVhZCh0b19yZW1vdmUkZ2VuZSkKCiMgRmlsdGVyIG1hcmtlcnMgKGtlZXAgaW1wb3J0YW50IG1ldGFib2xpYy9wcm9saWZlcmF0aW9uIGdlbmVzKQpTU19tYXJrZXJzX2ZpbHRlcmVkIDwtIFNTX21hcmtlcnMgJT4lCiAgZmlsdGVyKCFncmVwbChibGFja2xpc3RfcmVnZXgsIGdlbmUsIGlnbm9yZS5jYXNlID0gVFJVRSkpCgoKYGBgCgojIFJ1biBDeXR5cGVSIGZvciBjbHVzdGVyIGFubm90YXRpb24KYGBge3IsIGZpZy53aWR0aD0xMiwgZmlnLmhlaWdodD02fQpsaWJyYXJ5KEN5dGVUeXBlUikKbGlicmFyeShkcGx5cikKIyAtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tCiMgUHJlcGFyZSBkYXRhIGZvciBDeXRlVHlwZVIKIyAtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tCnByZXBwZWRfZGF0YSA8LSBQcmVwYXJlQ3l0ZVR5cGVSKAogIG9iaiA9IEFsbF9zYW1wbGVzX01lcmdlZCwKICBTU19tYXJrZXJzX2ZpbHRlcmVkLAogIG5fdG9wX2dlbmVzID0gMjUsICAgICAgICAgICAgICAgIyB0b3AgNSBtYXJrZXIgZ2VuZXMgcGVyIGNsdXN0ZXIKICBncm91cF9rZXkgPSAnc2V1cmF0X2NsdXN0ZXJzJywgICMgbXVzdCBtYXRjaCBTZXVyYXQgb2JqZWN0CiAgYWdncmVnYXRlX21ldGFkYXRhID0gVFJVRSwKICBjb29yZGluYXRlc19rZXkgPSAidW1hcCIKKQoKCgojIC0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0KIyBNZXRhZGF0YSBmb3IgQ3l0ZVR5cGVSCiMgLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQptZXRhZGF0YSA8LSBsaXN0KAogIHRpdGxlID0gJ05vcm1hbCBQQk1DIHNpbmdsZS1jZWxsIGFuYWx5c2lzJywKICBydW5fbGFiZWwgPSAnU1NfY2VsbF9saW5lc19hbmFseXNpcycsCiAgZXhwZXJpbWVudF9uYW1lID0gJ0FsbCBIZWFsdGh5Q0Q0IFQgY2VsbHMnCikKCgojIFJlbW92ZSBib3RoIGxpdGVyYWwgIk5BIiBhbmQgdHJ1ZSBOQSBuYW1lcwpwcmVwcGVkX2RhdGEkY2x1c3Rlck1ldGFkYXRhIDwtIHByZXBwZWRfZGF0YSRjbHVzdGVyTWV0YWRhdGFbCiAgIWlzLm5hKG5hbWVzKHByZXBwZWRfZGF0YSRjbHVzdGVyTWV0YWRhdGEpKSAmIAogIG5hbWVzKHByZXBwZWRfZGF0YSRjbHVzdGVyTWV0YWRhdGEpICE9ICJOQSIKXQoKIyBDb25maXJtIGNsZWFudXAKbmFtZXMocHJlcHBlZF9kYXRhJGNsdXN0ZXJNZXRhZGF0YSkKCgoKCgojIC0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0KIyBSdW4gQ3l0ZVR5cGVSCiMgLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQpyZXN1bHRzIDwtIEN5dGVUeXBlUigKICBvYmogPSBBbGxfc2FtcGxlc19NZXJnZWQsCiAgcHJlcHBlZF9kYXRhID0gcHJlcHBlZF9kYXRhLAogIHN0dWR5X2NvbnRleHQgPSAiQWxsIGNsdXN0ZXJzIGFyZSBmcm9tIDMgUEJNQyBzYW1wbGVzIG5vcm1hbCBidXQgb25seSBDRDQgVCBjZWxscyIsCiAgbWV0YWRhdGEgPSBtZXRhZGF0YQopCgojIC0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0KIyBDaGVjayByZXN1bHRzCiMgLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQojIElmIGpvYiBzdWNjZWVkcywgcmV0cmlldmUgcmVzdWx0czoKIyBHZXRSZXN1bHRzKHJlc3VsdHMpCgpzYXZlUkRTKHJlc3VsdHMsIGZpbGUgPSAiQ3l0ZVR5cGVSX3Jlc3VsdHNfU1NfTm9ybWFsQ0Q0VF9maW5hbGl6ZWQucmRzIikKCkRpbVBsb3QocmVzdWx0cywgZ3JvdXAuYnkgPSAiY3l0ZXR5cGVfY2VsbF9vbnRvbG9neV9zZXVyYXRfY2x1c3RlcnMiLCBsYWJlbCA9IFQsIGxhYmVsLmJveCA9IFQscmVwZWwgPSBULCByZWR1Y3Rpb24gPSAidW1hcCIpCkRpbVBsb3QocmVzdWx0cywgZ3JvdXAuYnkgPSAiY3l0ZXR5cGVfc2V1cmF0X2NsdXN0ZXJzIiwgbGFiZWwgPSBGLCBsYWJlbC5ib3ggPSBGLHJlcGVsID0gRiwgcmVkdWN0aW9uID0gInVtYXAiKQoKIzxodHRwczovL255Z2VuLWxhYnMtcHJvZC0tY3l0ZXR5cGUtYXBpLm1vZGFsLnJ1bi9yZXBvcnQvMDczNjIzYzYtOTczZS00NmZkLTlhY2EtMDBmNGZjNTQ5NzFkPgoKYGBg