1. load libraries

2. Read object with all samples


All_samples_Merged <- readRDS("../../0-Seurat_RDS_OBJECT_FINAL/All_samples_Merged_with_STCAT_and_renamed_FINAL.rds")

# Add "group" column to metadata
All_samples_Merged$Group <- ifelse(All_samples_Merged$cell_line %in% paste0("L", 1:7),
                                   "MalignantCD4T",
                            ifelse(All_samples_Merged$cell_line %in% c("CD4Tcells_lab", "CD4Tcells_10x"),
                                   "NormalCD4T", "Other"))

table(All_samples_Merged$Group)

MalignantCD4T    NormalCD4T 
        40695          8610 

3. Read object with all Reference_CD4Tcells


 reference_integrated <- readRDS("Step1_sezary_cell_lines_mapped_to_cd4_reference_integrated_before_Query_Projection_03-09-2025.rds")

Trajectory and Pseudotime with Monocle3

reference_integrated$pseudotime[!is.finite(reference_integrated$pseudotime)] <- NA


max_finite <- max(reference_integrated$pseudotime, na.rm = TRUE)
reference_integrated$pseudotime[!is.finite(reference_integrated$pseudotime)] <- max_finite

summary(reference_integrated$pseudotime)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
0.00000 0.01969 0.19290 0.93865 0.90736 5.63665 
table(is.finite(reference_integrated$pseudotime))

TRUE 
8610 
FeaturePlot(
  reference_integrated,
  features = "pseudotime",
  reduction = "umap",
  cols = c("lightblue", "red"),
  label = TRUE
) + ggtitle("UMAP of Integrated CD4⁺ T Cells by Pseudotime") +
  theme(plot.title = element_text(hjust = 0.5))

NA
NA

Trajectory and Pseudotime with Monocle3



# Visualize UMAP colored by original donor (cell_line)
DimPlot(reference_integrated, group.by = "Prediction", reduction = "umap") +
  ggtitle("UMAP of Integrated CD4⁺ T Cells")


# Visualize UMAP colored by original donor (cell_line)
DimPlot(reference_integrated, group.by = "predicted.celltype.l2", reduction = "umap") +
  ggtitle("UMAP of Integrated CD4⁺ T Cells")

4. Subset MalignantCD4T cells


# Subset MalignantCD4T cells
MalignantCD4T <- subset(All_samples_Merged, subset = Group == "MalignantCD4T")


# Use SCT assay for both
DefaultAssay(reference_integrated) <- "SCT"
DefaultAssay(MalignantCD4T) <- "SCT"


# Make sure both have variable features set
if (length(VariableFeatures(reference_integrated)) == 0) {
  reference_integrated <- FindVariableFeatures(reference_integrated, assay = "SCT", selection.method = "vst", nfeatures = 3000)
}
if (length(VariableFeatures(MalignantCD4T)) == 0) {
  MalignantCD4T <- FindVariableFeatures(MalignantCD4T, assay = "SCT", selection.method = "vst", nfeatures = 3000)
}

# Find anchors
anchors <- FindTransferAnchors(
  reference = reference_integrated,
  query = MalignantCD4T,
  normalization.method = "SCT",
  reference.reduction = "pca",
  dims = 1:18
)
[1] "Given reference assay has multiple sct models, selecting model with most cells for finding transfer anchors"
gc()
             used    (Mb) gc trigger    (Mb)   max used    (Mb)
Ncells    8828727   471.6   15971052   853.0   15971052   853.0
Vcells 2508768160 19140.4 4750680560 36244.9 3958833800 30203.6
reference_integrated <- RunUMAP(
  reference_integrated,
  reduction = "pca",
  dims = 1:18,
  assay = "SCT",
  return.model = TRUE,   # For Seurat v5, save.model = TRUE deprecated; use return.model
)
Using method 'umap'
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
# Map query onto reference and transfer pseudotime
mapped_MalignantCD4T <- MapQuery(
  anchorset = anchors,
  query = MalignantCD4T,
  reference = reference_integrated,
  refdata = list(
    pseudotime = reference_integrated$pseudotime,
    seurat_clusters = reference_integrated$seurat_clusters,
    Prediction = reference_integrated$Prediction,
    predicted.celltype.l2 = reference_integrated$predicted.celltype.l2
  ),
  reference.reduction = "pca",
  reduction.model = "umap"
)
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|

  |                                                  | 0 % ~calculating  
  |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=03s  
Using method 'umap'
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
# Convert numeric pseudotime vector into a 1-row matrix with rowname
pseudo_mat <- matrix(reference_integrated$pseudotime, nrow = 1)
colnames(pseudo_mat) <- colnames(reference_integrated)  # cells
rownames(pseudo_mat) <- "pseudotime"                     # feature name

# Transfer numeric pseudotime
pseudotime_transfer <- TransferData(
  anchorset = anchors,
  refdata = pseudo_mat,
  weight.reduction = MalignantCD4T[["pca"]],
  dims = 1:18
)
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
# Extract the data from the assay
pseudotime_vector <- GetAssayData(pseudotime_transfer, slot = "data")["pseudotime", ]

# Add to metadata
mapped_MalignantCD4T$pseudotime <- pseudotime_vector

#  Visualize pseudotime
FeaturePlot(mapped_MalignantCD4T,
            features = "pseudotime",
            reduction = "ref.umap",
            cols = c("lightblue","red"),
            label = TRUE) +
  ggtitle("Pseudotime Mapping of MalignantCD4T onto Reference") +
  theme(plot.title = element_text(hjust = 0.5))


#  Visualize transferred cell type annotations
DimPlot(mapped_MalignantCD4T, group.by = "Prediction", reduction = "ref.umap") +
  ggtitle("Mapped MalignantCD4T: Prediction Annotations") +
  theme(plot.title = element_text(hjust = 0.5))


DimPlot(mapped_MalignantCD4T, group.by = "predicted.celltype.l2", reduction = "ref.umap") +
  ggtitle("Mapped MalignantCD4T: predicted.celltype.l2 Annotations") +
  theme(plot.title = element_text(hjust = 0.5))

Mapping of MalignantCD4T on Reference



library(ggplot2)

# Prepare reference data
ref_df <- data.frame(Embeddings(reference_integrated, "umap"))
ref_df$dataset <- "Reference"

# Prepare MalignantCD4T query data
query_df <- data.frame(Embeddings(mapped_MalignantCD4T, "ref.umap"))
query_df$dataset <- "MalignantCD4T"
query_df$pseudotime <- mapped_MalignantCD4T$pseudotime  # if pseudotime exists

# Plot reference in grey, MalignantCD4T cells colored by pseudotime
ggplot() +
  geom_point(data = ref_df, aes(x = umap_1, y = umap_2),
             color = "grey80", size = 0.5) +
  geom_point(data = query_df, aes(x = refUMAP_1, y = refUMAP_2, color = pseudotime),
             size = 1) +
  scale_color_gradient(low = "lightblue", high = "red") +
  theme_classic() +
  ggtitle("MalignantCD4T cells projected onto reference UMAP") +
  theme(plot.title = element_text(hjust = 0.5))

NA
NA

Mapping of MalignantCD4T on Reference (Azimuth)


library(ggplot2)

# Reference coordinates (grey background)
ref_df <- data.frame(Embeddings(reference_integrated, "umap"))
ref_df$dataset <- "Reference"

# Query (MalignantCD4T) coordinates
query_df <- data.frame(Embeddings(mapped_MalignantCD4T, "ref.umap"))
query_df$dataset <- "MalignantCD4T"

# Make sure annotations exist in mapped_MalignantCD4T
query_df$celltype <- mapped_MalignantCD4T$predicted.celltype.l2  # or "Prediction"

library(RColorBrewer)

big_palette <- colorRampPalette(brewer.pal(12, "Paired"))(20)

ggplot() +
  geom_point(data = ref_df, aes(x = umap_1, y = umap_2),
             color = "grey80", size = 0.5) +
  geom_point(data = query_df, aes(x = refUMAP_1, y = refUMAP_2, color = celltype),
             size = 1) +
  scale_color_manual(values = big_palette) +
  theme_classic() +
  ggtitle("MalignantCD4T cells projected onto reference UMAP with annotations") +
  theme(plot.title = element_text(hjust = 0.5))

NA
NA

Mapping of MalignantCD4T on Reference (STCAT)


library(ggplot2)

# Reference coordinates (grey background)
ref_df <- data.frame(Embeddings(reference_integrated, "umap"))
ref_df$dataset <- "Reference"

# Query (MalignantCD4T) coordinates
query_df <- data.frame(Embeddings(mapped_MalignantCD4T, "ref.umap"))
query_df$dataset <- "MalignantCD4T"

# Make sure annotations exist in mapped_MalignantCD4T
query_df$celltype <- mapped_MalignantCD4T$Prediction  # or "Prediction"

library(RColorBrewer)

# Get a palette with up to 20 colors
big_palette <- colorRampPalette(brewer.pal(12, "Paired"))(20)

ggplot() +
  geom_point(data = ref_df, aes(x = umap_1, y = umap_2),
             color = "grey80", size = 0.5) +
  geom_point(data = query_df, aes(x = refUMAP_1, y = refUMAP_2, color = celltype),
             size = 1) +
  scale_color_manual(values = big_palette) +
  theme_classic() +
  ggtitle("MalignantCD4T cells projected onto reference UMAP with annotations") +
  theme(plot.title = element_text(hjust = 0.5))

NA
NA

5. Subset MalignantCD4T Projection


# 1. Define output directory
out_dir <- "results/MalignantCD4T_projection"
if(!dir.exists(out_dir)) dir.create(out_dir, recursive = TRUE)

save(mapped_MalignantCD4T, file = file.path(out_dir, "MalignantCD4T_mapped_on_reference_with_pseudotime-3-8-2025.Robj"))
