1 load libraries

2 Load Seurat Object


# Load object WITH pre-saved clusters & markers
ref_obj <- readRDS("../CD4_reference_annotated_with_markers.rds")  # ← your saved object

# Verify markers are there
print("Available markers:")
print(names(ref_obj@misc)[grepl("markers", names(ref_obj@misc))])
head(ref_obj$markers_top25)  # per-cell top markers

3 Extract Pre-Computed Markers for CyteTypeR

library(dplyr)

# Use TOP25 markers (recommended for cytypeR robustness)
SS_markers_filtered <- ref_obj@misc$markers_top25  # already filtered + top25/cluster

# Or ALL significant if you prefer:
SS_markers_filtered <- ref_obj@misc$markers_all_filtered

cat("Markers loaded:", nrow(SS_markers_filtered), "\n")
Markers loaded: 5249 
print(head(SS_markers_filtered[, c("cluster", "gene", "avg_log2FC")]))
       cluster   gene avg_log2FC
EEF1B2       0 EEF1B2  0.2696944
CCR7         0   CCR7  0.6658896
LEF1         0   LEF1  0.6232084
TCF7         0   TCF7  0.4312715
FHIT         0   FHIT  1.3244055
TMIGD2       0 TMIGD2  2.2133935

4 Run CyteTypeR with Pre-Saved Data

# Prepare using PRE-SAVED markers (no FindAllMarkers needed!)
prepped_data <- PrepareCyteTypeR(
  obj = ref_obj,
  SS_markers_filtered,
  n_top_genes = 25,
  group_key = 'seurat_clusters',
  aggregate_metadata = TRUE,
  coordinates_key = "umap"
)

# ---------------- Professional CyteTypeR Metadata -----------------------
metadata <- list(
  title = 'Human CD4+ T Cell Subset Atlas - Integrated Reference',
  run_label = 'CD4T_10xGenomics_Lab_Integrated_v1',
  experiment_name = 'Primary CD4 T cells from healthy donors (10x + lab CITE-seq)'
)

# Clean cluster metadata
prepped_data$clusterMetadata <- prepped_data$clusterMetadata[
  !is.na(names(prepped_data$clusterMetadata)) & 
  names(prepped_data$clusterMetadata) != "NA"
]
print("Cleaned cluster metadata:")
print(names(prepped_data$clusterMetadata))

# ---------------- Detailed Study Context -------------------------------
results <- CyteTypeR(
  obj = ref_obj,
  prepped_data = prepped_data,
  study_context = paste0(
    "Unsupervised clustering (0-6) of 11,482 high-quality CD4+ T cells from 3 healthy donors. ",
    "Integrated analysis combining 10x Genomics CITE-seq (CD4T_10x_S1/S2) + lab-generated ",
    "ADT data (CD4T_lab). RPCA+SCT integration with doublet removal. ",
    "Markers derived from RNA assay (top 25/cluster, p_adj<0.05, logFC>0.25, ",
    "min.pct.diff>0.20). Blacklist-filtered (TCR/Ig/ribo/MT/HB). ",
    "Objective: Automated cell-type annotation of naive/memory/effector/Treg subsets."
  ),
  metadata = metadata
)

saveRDS(results, "CyteTypeR_CD4T_integrated_reference.rds")
cat("✅ CyteTypeR results saved with professional metadata\n")

5 Visualize Results

# UMAPs with annotations
p1 <- DimPlot(results, group.by = "cytetype_cell_ontology_seurat_clusters", 
              label = TRUE, label.box = TRUE, repel = TRUE, reduction = "umap")
p2 <- DimPlot(results, group.by = "cytetype_seurat_clusters", reduction = "umap")

p1 | p2


# Compare original vs cytypeR annotation
p3 <- DimPlot(ref_obj, group.by = "seurat_clusters", label = TRUE, label.box = TRUE)
p4 <- DimPlot(results, group.by = "cytetype_seurat_clusters", label = TRUE)

(p3 | p4) / (p1 | p2)

6 Validation Plots (Optional)

## Use top markers from your saved object for validation
top_markers <- ref_obj@misc$markers_top5$gene[1:12]  # first 12 top markers
available <- intersect(top_markers, rownames(ref_obj))

FeaturePlot(ref_obj, features = available, ncol = 4, pt.size = 0.3)

7 Summary Table

# Compare original clusters vs cytypeR predictions
table(
  Original = ref_obj$seurat_clusters,
  CytypeR = results$cytetype_seurat_clusters
)
        CytypeR
Original Activated CD4+ T helper cells Activated effector regulatory T cell
       0                             0                                    0
       1                          3998                                    0
       2                             0                                    0
       3                             0                                    0
       4                             0                                    0
       5                             0                                  341
       6                             0                                    0
        CytypeR
Original CD4+ cytotoxic T lymphocytes CD4+ Th2-polarized effector T cell
       0                            0                                  0
       1                            0                                  0
       2                            0                                522
       3                          491                                  0
       4                            0                                  0
       5                            0                                  0
       6                            0                                  0
        CytypeR
Original Naive CD4+ T cell (IFITM3+) Naive thymic CD4+ T cell (RFX3+)
       0                        5485                                0
       1                           0                                0
       2                           0                                0
       3                           0                                0
       4                           0                                0
       5                           0                                0
       6                           0                              233
        CytypeR
Original Stressed/dying CD4+ T lymphocyte
       0                                0
       1                                0
       2                                0
       3                                0
       4                              412
       5                                0
       6                                0
# Save final annotated object
ref_obj$cytetype_annotation <- results$cytetype_seurat_clusters


#saveRDS(ref_obj, "CD4_reference_cytetype_annotated.rds")

#<https://nygen-labs-prod--cytetype-api.modal.run/report/9d726cf3-bdce-4f96-9610-c3a45a77a611>
LS0tCnRpdGxlOiAiQ3l0ZVR5cGVSIEFubm90YXRpb24gLSBQUkUtU0FWRUQgQ2x1c3RlcnMtTm9ybWFsIEludGVncmF0ZWQgQ0Q0IFQgY2VsbHMiCmF1dGhvcjogIk5hc2lyIE1haG1vb2QgQWJiYXNpIgpkYXRlOiAiYHIgZm9ybWF0KFN5cy50aW1lKCksICclQiAlZCwgJVknKWAiCm91dHB1dDoKICBodG1sX25vdGVib29rOgogICAgbnVtYmVyX3NlY3Rpb25zOiB0cnVlCiAgICB0b2M6IHRydWUKICAgIHRvY19mbG9hdDoKICAgICAgY29sbGFwc2VkOiB0cnVlCiAgICB0aGVtZTogam91cm5hbAotLS0KCgojIGxvYWQgbGlicmFyaWVzCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQpsaWJyYXJ5KFNldXJhdCkKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShjb3dwbG90KQpsaWJyYXJ5KFNDcHVicikKbGlicmFyeShDeXRlVHlwZVIpCmxpYnJhcnkoQ29tcGxleEhlYXRtYXApCmxpYnJhcnkoY2lyY2xpemUpCmxpYnJhcnkodmlyaWRpcykKc2V0LnNlZWQoMTIzKQpnYygpCmBgYAoKIyBMb2FkIFNldXJhdCBPYmplY3QgCmBgYHtyfQoKIyBMb2FkIG9iamVjdCBXSVRIIHByZS1zYXZlZCBjbHVzdGVycyAmIG1hcmtlcnMKcmVmX29iaiA8LSByZWFkUkRTKCIuLi9DRDRfcmVmZXJlbmNlX2Fubm90YXRlZF93aXRoX21hcmtlcnMucmRzIikgICMg4oaQIHlvdXIgc2F2ZWQgb2JqZWN0CgojIFZlcmlmeSBtYXJrZXJzIGFyZSB0aGVyZQpwcmludCgiQXZhaWxhYmxlIG1hcmtlcnM6IikKcHJpbnQobmFtZXMocmVmX29iakBtaXNjKVtncmVwbCgibWFya2VycyIsIG5hbWVzKHJlZl9vYmpAbWlzYykpXSkKaGVhZChyZWZfb2JqJG1hcmtlcnNfdG9wMjUpICAjIHBlci1jZWxsIHRvcCBtYXJrZXJzCmBgYAoKIyBFeHRyYWN0IFByZS1Db21wdXRlZCBNYXJrZXJzIGZvciBDeXRlVHlwZVIKYGBge3J9CmxpYnJhcnkoZHBseXIpCgojIFVzZSBUT1AyNSBtYXJrZXJzIChyZWNvbW1lbmRlZCBmb3IgY3l0eXBlUiByb2J1c3RuZXNzKQpTU19tYXJrZXJzX2ZpbHRlcmVkIDwtIHJlZl9vYmpAbWlzYyRtYXJrZXJzX3RvcDI1ICAjIGFscmVhZHkgZmlsdGVyZWQgKyB0b3AyNS9jbHVzdGVyCgojIE9yIEFMTCBzaWduaWZpY2FudCBpZiB5b3UgcHJlZmVyOgpTU19tYXJrZXJzX2ZpbHRlcmVkIDwtIHJlZl9vYmpAbWlzYyRtYXJrZXJzX2FsbF9maWx0ZXJlZAoKY2F0KCJNYXJrZXJzIGxvYWRlZDoiLCBucm93KFNTX21hcmtlcnNfZmlsdGVyZWQpLCAiXG4iKQpwcmludChoZWFkKFNTX21hcmtlcnNfZmlsdGVyZWRbLCBjKCJjbHVzdGVyIiwgImdlbmUiLCAiYXZnX2xvZzJGQyIpXSkpCmBgYAoKIyBSdW4gQ3l0ZVR5cGVSIHdpdGggUHJlLVNhdmVkIERhdGEKYGBge3IsIGZpZy53aWR0aD0xMiwgZmlnLmhlaWdodD02fQojIFByZXBhcmUgdXNpbmcgUFJFLVNBVkVEIG1hcmtlcnMgKG5vIEZpbmRBbGxNYXJrZXJzIG5lZWRlZCEpCnByZXBwZWRfZGF0YSA8LSBQcmVwYXJlQ3l0ZVR5cGVSKAogIG9iaiA9IHJlZl9vYmosCiAgU1NfbWFya2Vyc19maWx0ZXJlZCwKICBuX3RvcF9nZW5lcyA9IDI1LAogIGdyb3VwX2tleSA9ICdzZXVyYXRfY2x1c3RlcnMnLAogIGFnZ3JlZ2F0ZV9tZXRhZGF0YSA9IFRSVUUsCiAgY29vcmRpbmF0ZXNfa2V5ID0gInVtYXAiCikKCiMgLS0tLS0tLS0tLS0tLS0tLSBQcm9mZXNzaW9uYWwgQ3l0ZVR5cGVSIE1ldGFkYXRhIC0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tCm1ldGFkYXRhIDwtIGxpc3QoCiAgdGl0bGUgPSAnSHVtYW4gQ0Q0KyBUIENlbGwgU3Vic2V0IEF0bGFzIC0gSW50ZWdyYXRlZCBSZWZlcmVuY2UnLAogIHJ1bl9sYWJlbCA9ICdDRDRUXzEweEdlbm9taWNzX0xhYl9JbnRlZ3JhdGVkX3YxJywKICBleHBlcmltZW50X25hbWUgPSAnUHJpbWFyeSBDRDQgVCBjZWxscyBmcm9tIGhlYWx0aHkgZG9ub3JzICgxMHggKyBsYWIgQ0lURS1zZXEpJwopCgojIENsZWFuIGNsdXN0ZXIgbWV0YWRhdGEKcHJlcHBlZF9kYXRhJGNsdXN0ZXJNZXRhZGF0YSA8LSBwcmVwcGVkX2RhdGEkY2x1c3Rlck1ldGFkYXRhWwogICFpcy5uYShuYW1lcyhwcmVwcGVkX2RhdGEkY2x1c3Rlck1ldGFkYXRhKSkgJiAKICBuYW1lcyhwcmVwcGVkX2RhdGEkY2x1c3Rlck1ldGFkYXRhKSAhPSAiTkEiCl0KcHJpbnQoIkNsZWFuZWQgY2x1c3RlciBtZXRhZGF0YToiKQpwcmludChuYW1lcyhwcmVwcGVkX2RhdGEkY2x1c3Rlck1ldGFkYXRhKSkKCiMgLS0tLS0tLS0tLS0tLS0tLSBEZXRhaWxlZCBTdHVkeSBDb250ZXh0IC0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0KcmVzdWx0cyA8LSBDeXRlVHlwZVIoCiAgb2JqID0gcmVmX29iaiwKICBwcmVwcGVkX2RhdGEgPSBwcmVwcGVkX2RhdGEsCiAgc3R1ZHlfY29udGV4dCA9IHBhc3RlMCgKICAgICJVbnN1cGVydmlzZWQgY2x1c3RlcmluZyAoMC02KSBvZiAxMSw0ODIgaGlnaC1xdWFsaXR5IENENCsgVCBjZWxscyBmcm9tIDMgaGVhbHRoeSBkb25vcnMuICIsCiAgICAiSW50ZWdyYXRlZCBhbmFseXNpcyBjb21iaW5pbmcgMTB4IEdlbm9taWNzIENJVEUtc2VxIChDRDRUXzEweF9TMS9TMikgKyBsYWItZ2VuZXJhdGVkICIsCiAgICAiQURUIGRhdGEgKENENFRfbGFiKS4gUlBDQStTQ1QgaW50ZWdyYXRpb24gd2l0aCBkb3VibGV0IHJlbW92YWwuICIsCiAgICAiTWFya2VycyBkZXJpdmVkIGZyb20gUk5BIGFzc2F5ICh0b3AgMjUvY2x1c3RlciwgcF9hZGo8MC4wNSwgbG9nRkM+MC4yNSwgIiwKICAgICJtaW4ucGN0LmRpZmY+MC4yMCkuIEJsYWNrbGlzdC1maWx0ZXJlZCAoVENSL0lnL3JpYm8vTVQvSEIpLiAiLAogICAgIk9iamVjdGl2ZTogQXV0b21hdGVkIGNlbGwtdHlwZSBhbm5vdGF0aW9uIG9mIG5haXZlL21lbW9yeS9lZmZlY3Rvci9UcmVnIHN1YnNldHMuIgogICksCiAgbWV0YWRhdGEgPSBtZXRhZGF0YQopCgpzYXZlUkRTKHJlc3VsdHMsICJDeXRlVHlwZVJfQ0Q0VF9pbnRlZ3JhdGVkX3JlZmVyZW5jZS5yZHMiKQpjYXQoIuKchSBDeXRlVHlwZVIgcmVzdWx0cyBzYXZlZCB3aXRoIHByb2Zlc3Npb25hbCBtZXRhZGF0YVxuIikKYGBgCgoKCiMgVmlzdWFsaXplIFJlc3VsdHMKYGBge3IsIGZpZy53aWR0aD0xMiwgZmlnLmhlaWdodD02fQojIFVNQVBzIHdpdGggYW5ub3RhdGlvbnMKcDEgPC0gRGltUGxvdChyZXN1bHRzLCBncm91cC5ieSA9ICJjeXRldHlwZV9jZWxsX29udG9sb2d5X3NldXJhdF9jbHVzdGVycyIsIAogICAgICAgICAgICAgIGxhYmVsID0gVFJVRSwgbGFiZWwuYm94ID0gVFJVRSwgcmVwZWwgPSBUUlVFLCByZWR1Y3Rpb24gPSAidW1hcCIpCnAyIDwtIERpbVBsb3QocmVzdWx0cywgZ3JvdXAuYnkgPSAiY3l0ZXR5cGVfc2V1cmF0X2NsdXN0ZXJzIiwgcmVkdWN0aW9uID0gInVtYXAiKQoKcDEgfCBwMgoKIyBDb21wYXJlIG9yaWdpbmFsIHZzIGN5dHlwZVIgYW5ub3RhdGlvbgpwMyA8LSBEaW1QbG90KHJlZl9vYmosIGdyb3VwLmJ5ID0gInNldXJhdF9jbHVzdGVycyIsIGxhYmVsID0gVFJVRSwgbGFiZWwuYm94ID0gVFJVRSkKcDQgPC0gRGltUGxvdChyZXN1bHRzLCBncm91cC5ieSA9ICJjeXRldHlwZV9zZXVyYXRfY2x1c3RlcnMiLCBsYWJlbCA9IFRSVUUpCgoocDMgfCBwNCkgLyAocDEgfCBwMikKYGBgCgoKCiMgVmFsaWRhdGlvbiBQbG90cyAoT3B0aW9uYWwpCmBgYHtyLCBmaWcud2lkdGg9MTIsIGZpZy5oZWlnaHQ9Nn0KIyMgVXNlIHRvcCBtYXJrZXJzIGZyb20geW91ciBzYXZlZCBvYmplY3QgZm9yIHZhbGlkYXRpb24KdG9wX21hcmtlcnMgPC0gcmVmX29iakBtaXNjJG1hcmtlcnNfdG9wNSRnZW5lWzE6MTJdICAjIGZpcnN0IDEyIHRvcCBtYXJrZXJzCmF2YWlsYWJsZSA8LSBpbnRlcnNlY3QodG9wX21hcmtlcnMsIHJvd25hbWVzKHJlZl9vYmopKQoKRmVhdHVyZVBsb3QocmVmX29iaiwgZmVhdHVyZXMgPSBhdmFpbGFibGUsIG5jb2wgPSA0LCBwdC5zaXplID0gMC4zKQpgYGAKCgoKCgojIFN1bW1hcnkgVGFibGUKYGBge3IsIGZpZy53aWR0aD0xMiwgZmlnLmhlaWdodD02fQojIENvbXBhcmUgb3JpZ2luYWwgY2x1c3RlcnMgdnMgY3l0eXBlUiBwcmVkaWN0aW9ucwp0YWJsZSgKICBPcmlnaW5hbCA9IHJlZl9vYmokc2V1cmF0X2NsdXN0ZXJzLAogIEN5dHlwZVIgPSByZXN1bHRzJGN5dGV0eXBlX3NldXJhdF9jbHVzdGVycwopCgojIFNhdmUgZmluYWwgYW5ub3RhdGVkIG9iamVjdApyZWZfb2JqJGN5dGV0eXBlX2Fubm90YXRpb24gPC0gcmVzdWx0cyRjeXRldHlwZV9zZXVyYXRfY2x1c3RlcnMKCgojc2F2ZVJEUyhyZWZfb2JqLCAiQ0Q0X3JlZmVyZW5jZV9jeXRldHlwZV9hbm5vdGF0ZWQucmRzIikKCiM8aHR0cHM6Ly9ueWdlbi1sYWJzLXByb2QtLWN5dGV0eXBlLWFwaS5tb2RhbC5ydW4vcmVwb3J0LzlkNzI2Y2YzLWJkY2UtNGY5Ni05NjEwLWMzYTQ1YTc3YTYxMT4KYGBgCgoKCgoK