Introduction
This R Markdown script outlines a comprehensive, corrected workflow
for the analysis of Antibody-Derived Tag (ADT) data from a CITE-seq
experiment, with a specific focus on T cell
immunophenotyping in your L1-L7 cell lines. The primary goal is
to use the 28 surface protein markers to accurately define T cell
subsets.
Prerequisites: The analysis assumes a Seurat object
named All_samples_Merged is loaded, containing both RNA
(SCT assay) and ADT (ADT assay) data.
# Replace with the actual path to your Seurat object
All_samples_Merged <- readRDS("../../../PHD_3rd_YEAR_Analysis/0-Seurat_RDS_OBJECT_FINAL/All_samples_Merged_with_Renamed_Clusters_final-26-10-2025.rds")
# Verify the object structure
print(All_samples_Merged)
# Set default assay to SCT for initial RNA-based context
DefaultAssay(All_samples_Merged) <- "SCT"
DimPlot(All_samples_Merged, group.by = "orig.ident", label = T)
QC & Normalization
of ADT (Corrected)
CRITICAL CORRECTION: Subset Object
for ADT-Positive Cells This step is necessary because
the ‘CD4T_10x’ sample does not have ADT data. We will create a subsetted
object for ADT-only analysis. The full object will be used for RNA/ADT
integration.
# Identify cells with ADT data (i.e., cells where the total ADT count is > 0)
# This assumes that cells without ADT data have 0 total ADT counts after re-initialization.
adt_cells <- WhichCells(All_samples_Merged, expression = nCount_ADT > 0)
# Create a subsetted object for ADT-only analysis
All_samples_Merged_ADT <- subset(All_samples_Merged, cells = adt_cells)
# Verify the subsetting
print(All_samples_Merged_ADT)
DimPlot(All_samples_Merged_ADT, group.by = "orig.ident", label = T)
Examine Raw Counts
Distribution and Protein-to-RNA Correlation
This step is crucial for initial quality assessment and identifying
potential issues like non-specific binding or technical artifacts.
Idents(All_samples_Merged) <- "orig.ident"
# **CORRECTION:** Re-initialize the ADT assay from raw counts to ensure a clean start for normalization.
# This is necessary if the ADT assay was previously incorrectly normalized.
# We assume the raw counts are still in the 'counts' slot of the 'ADT' assay.
raw_counts <- GetAssayData(All_samples_Merged, assay = "ADT", layer = "counts")
All_samples_Merged[["ADT"]] <- CreateAssayObject(counts = raw_counts)
# Set the default assay to ADT for protein-specific QC
DefaultAssay(All_samples_Merged) <- "ADT"
# Calculate the total number of ADT counts per cell
All_samples_Merged$nFeature_ADT <- colSums(All_samples_Merged@assays$ADT@counts > 0)
All_samples_Merged$nCount_ADT <- colSums(All_samples_Merged@assays$ADT@counts)
# Visualize ADT count distribution
p1 <- VlnPlot(All_samples_Merged, features = c("nFeature_ADT", "nCount_ADT"), ncol = 2, pt.size = 0.1)
print(p1)

# Identify potential outlier antibodies (e.g., highly expressed across all cells)
# Plot raw counts for all 28 proteins
adt_features <- rownames(All_samples_Merged[["ADT"]])
p2 <- VlnPlot(All_samples_Merged, features = adt_features[1:min(28, length(adt_features))], ncol = 4, pt.size = 0)
print(p2)

Apply Centered
Log-Ratio (CLR) Normalization (CRITICAL
CORRECTION)
CLR normalization is the standard method in Seurat for ADT data.
The critical correction here is the use of
margin = 2.
# **CORRECTION:** Use margin = 2 for CLR normalization.
# margin = 2 normalizes across features (proteins) for each cell (standard for compositional data).
All_samples_Merged <- NormalizeData(All_samples_Merged, assay = "ADT", normalization.method = "CLR", margin = 2)
Quality control after
normalization
# **CORRECTION:** Plot the *normalized* data (default layer for VlnPlot after NormalizeData)
# to assess the effect of CLR normalization.
adt_features <- rownames(All_samples_Merged[["ADT"]])
p3 <- VlnPlot(All_samples_Merged, features = adt_features[1:min(28, length(adt_features))], ncol = 4, pt.size = 0)
print(p3)

T Cell
Immunophenotyping and Visualization
Visualize multiple
modalities side-by-side (CORRECTION)
The previous code used the adt_ prefix which is
unnecessary and can be confusing. The standard way is to set the default
assay and use the feature name directly.
# Set default assay to ADT for protein plots
DefaultAssay(All_samples_Merged_ADT) <- "ADT"
p4 <- FeaturePlot(All_samples_Merged_ADT, "CD4", cols = c("lightgrey", "darkgreen"), reduction = "umap") + ggtitle("CD4 Protein (ADT UMAP)")
# Set default assay to SCT for RNA plots
DefaultAssay(All_samples_Merged) <- "SCT"
p5 <- FeaturePlot(All_samples_Merged, "CD4", reduction = "umap") + ggtitle("CD4 RNA (RNA UMAP)")
# place plots side-by-side
p4 | p5

# Example for PD1/PDCD1
DefaultAssay(All_samples_Merged_ADT) <- "ADT"
p6 <- FeaturePlot(All_samples_Merged_ADT, "PD1", cols = c("lightgrey", "darkgreen"), reduction = "umap") + ggtitle("PD1 Protein")
DefaultAssay(All_samples_Merged) <- "SCT"
p7 <- FeaturePlot(All_samples_Merged, "PDCD1", reduction = "umap") + ggtitle("PDCD1 RNA (SCT)")
p6 | p7

Biaxial ADT Plots for
Phenotypic Gating (T Cell Focus)
This is the core of T cell immunophenotyping. We use
FeatureScatter on the normalized ADT data to define
subsets, similar to flow cytometry.
# Set ADT as the active assay
DefaultAssay(All_samples_Merged_ADT)
[1] "ADT"
Idents(All_samples_Merged_ADT) <- "orig.ident"
DefaultAssay(All_samples_Merged_ADT) <- "ADT"
# Define gating marker pairs
gating_pairs <- list(
c("CD4", "CD19"),
c("CD45RA", "CD45RO"),
c("CD4", "CD25"),
c("PD1", "CD274"),
c("CCR7", "CD62L")
)
# Generate FeatureScatter plots
gating_plots <- list()
for (i in seq_along(gating_pairs)) {
f1 <- gating_pairs[[i]][1]
f2 <- gating_pairs[[i]][2]
# Ensure both markers exist in ADT
if (f1 %in% rownames(All_samples_Merged_ADT[["ADT"]]) &&
f2 %in% rownames(All_samples_Merged_ADT[["ADT"]])) {
gating_plots[[i]] <- FeatureScatter(
All_samples_Merged_ADT,
feature1 = f1,
feature2 = f2,
group.by = "orig.ident"
) + ggtitle(paste("T Cell Gating:", f1, "vs", f2))
}
}
library(patchwork)
wrap_plots(gating_plots)

Heatmap of Protein
Expression by Cell Line (L1-L7 Focus)
This visualization directly addresses the goal of comparing
immunophenotypes across the L1-L7 cell lines.
DefaultAssay(All_samples_Merged_ADT) <- "ADT"
# -----------------------
# 1. Extract metadata
# -----------------------
cell_line_info <- All_samples_Merged_ADT$orig.ident # L1–L7
cell_lines <- sort(unique(cell_line_info))
# -----------------------
# 2. Extract ADT data
# -----------------------
adt_matrix <- GetAssayData(All_samples_Merged_ADT, slot = "data")
adt_features <- rownames(adt_matrix) # actual protein names
# -----------------------
# 3. Compute mean protein expression per cell line
# -----------------------
mean_expression_cell_line <- sapply(cell_lines, function(cl) {
cells <- names(cell_line_info)[cell_line_info == cl]
rowMeans(adt_matrix[, cells, drop = FALSE])
})
# reorder rows to ADT feature order
mean_expression_cell_line <- mean_expression_cell_line[adt_features, ]
# -----------------------
# 4. Heatmap with labels
# -----------------------
pheatmap(
mean_expression_cell_line,
main = "Protein Expression Heatmap by Cell Line",
scale = "row",
cluster_rows = TRUE,
cluster_cols = TRUE,
show_rownames = TRUE,
show_colnames = TRUE,
fontsize_col = 9,
angle_col = 45,
labels_col = cell_lines # <- YOUR COLUMN LABELS
)

# -----------------------
# 5. NJ tree (labels = orig.ident)
# -----------------------
dist_matrix <- dist(t(mean_expression_cell_line))
Found more than one class "dist" in cache; using the first, from namespace 'spam'
Also defined by ‘BiocGenerics’
tree <- nj(dist_matrix)
plot(
tree,
main = "Cell Line Similarity Tree (ADT Expression)",
tip.label = cell_lines # <- LABEL THE TREE
)

Marker Discovery and
Visualization by ADT Cluster
Generate
Protein-level Differential Markers
# Find markers for ADT clusters
adt_markers <- FindAllMarkers(All_samples_Merged_ADT, assay = "ADT", only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, group.by = "seurat_clusters")
# Display top 5 markers per cluster
top5_adt_markers <- adt_markers %>%
group_by(cluster) %>%
slice_max(n = 5, order_by = avg_log2FC)
print(top5_adt_markers)
Heatmap of Protein
Expression by ADT Cluster
DefaultAssay(All_samples_Merged_ADT) <- "ADT"
adt_feats <- unique(top5_adt_markers$gene) # remove duplicates
p12 <- DotPlot(
All_samples_Merged_ADT,
features = adt_feats,
assay = "ADT",
group.by = "seurat_clusters"
) +
ggtitle("Protein Expression by ADT Cluster")
p12

p12_2 <- DotPlot(
All_samples_Merged_ADT,
features = adt_feats,
assay = "ADT",
group.by = "orig.ident"
) +
ggtitle("Protein Expression by ADT Cluster")
p12_2

Ridgeplots: Protein
Expression across RNA Clusters
DefaultAssay(All_samples_Merged) <- "ADT"
# Ensure you only use existing features
adt_feats <- adt_features[adt_features %in% rownames(All_samples_Merged[["ADT"]])]
for (f in adt_feats) {
p <- RidgePlot(
All_samples_Merged,
features = f,
assay = "ADT",
group.by = "orig.ident"
) + ggtitle(f)
print(p)
}




























Fwatureplots: Protein
Expression across RNA Clusters
DefaultAssay(All_samples_Merged) <- "ADT"
adt_feats <- adt_features[adt_features %in% rownames(All_samples_Merged[["ADT"]])]
for (f in adt_feats) {
p <- FeaturePlot(
All_samples_Merged,
features = f,
reduction = "umap", # UMAP calculated from SCT RNA
cols = c("lightgrey", "darkred") # optional
) + ggtitle(f)
print(p)
}




























NA
NA
---
title: "Corrected ADT (CITE-seq) Analysis Workflow for T Cell Immunophenotyping"
author: "NASIR MAHMOOD ABBASI"
date: "`r format(Sys.Date(), '%B %d, %Y')`"
output:
  html_notebook:
    number_sections: true
    toc: true
    toc_float:
      collapsed: true
    theme: journal
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE, fig.width = 10, fig.height = 8)
# Load necessary libraries
library(Seurat)
library(ggplot2)
library(patchwork)
library(dplyr)
library(pheatmap) # For heatmap visualization
library(ape) # For tree visualization
library(dittoSeq) # For dittoHeatmap
library(RColorBrewer)
```

# Introduction
This R Markdown script outlines a comprehensive, corrected workflow for the analysis of Antibody-Derived Tag (ADT) data from a CITE-seq experiment, with a specific focus on **T cell immunophenotyping** in your L1-L7 cell lines. The primary goal is to use the 28 surface protein markers to accurately define T cell subsets.

**Prerequisites:** The analysis assumes a Seurat object named `All_samples_Merged` is loaded, containing both RNA (SCT assay) and ADT (ADT assay) data.

```{r load_data, eval=FALSE}
# Replace with the actual path to your Seurat object
All_samples_Merged <- readRDS("../../../PHD_3rd_YEAR_Analysis/0-Seurat_RDS_OBJECT_FINAL/All_samples_Merged_with_Renamed_Clusters_final-26-10-2025.rds") 

# Verify the object structure
print(All_samples_Merged)
# Set default assay to SCT for initial RNA-based context
DefaultAssay(All_samples_Merged) <- "SCT"

DimPlot(All_samples_Merged, group.by = "orig.ident", label = T)
```

#  QC & Normalization of ADT (Corrected)

 **<span style="color:red;">CRITICAL CORRECTION: Subset Object for ADT-Positive Cells</span>**
 This step is necessary because the 'CD4T_10x' sample does not have ADT data.
 We will create a subsetted object for ADT-only analysis.
 The full object will be used for RNA/ADT integration.

```{r subset_adt_cells, eval=FALSE}
# Identify cells with ADT data (i.e., cells where the total ADT count is > 0)
# This assumes that cells without ADT data have 0 total ADT counts after re-initialization.
adt_cells <- WhichCells(All_samples_Merged, expression = nCount_ADT > 0)

# Create a subsetted object for ADT-only analysis
All_samples_Merged_ADT <- subset(All_samples_Merged, cells = adt_cells)

# Verify the subsetting
print(All_samples_Merged_ADT)
DimPlot(All_samples_Merged_ADT, group.by = "orig.ident", label = T)
```

## Examine Raw Counts Distribution and Protein-to-RNA Correlation

This step is crucial for initial quality assessment and identifying potential issues like non-specific binding or technical artifacts.

```{r adt_qc1, fig.height= 4, fig.width= 12}

Scientific review by The Impulsion NEWMOON Research Network (New Models in Oncology)
Friday, October 17, 2025
Place: amphitheater of the Bordeaux Biology Health (BBS) building, Carreire Campus

Its not Thesis draft its Manuscript on Disecting Heterogeneity and immune evasion Mechanisms in Sezary syndrome ModelsIdents(All_samples_Merged) <- "orig.ident"

# **CORRECTION:** Re-initialize the ADT assay from raw counts to ensure a clean start for normalization.
# This is necessary if the ADT assay was previously incorrectly normalized.
# We assume the raw counts are still in the 'counts' slot of the 'ADT' assay.
raw_counts <- GetAssayData(All_samples_Merged, assay = "ADT", layer = "counts")
All_samples_Merged[["ADT"]] <- CreateAssayObject(counts = raw_counts)

# Set the default assay to ADT for protein-specific QC
DefaultAssay(All_samples_Merged) <- "ADT"

# Calculate the total number of ADT counts per cell
All_samples_Merged$nFeature_ADT <- colSums(All_samples_Merged@assays$ADT@counts > 0)
All_samples_Merged$nCount_ADT <- colSums(All_samples_Merged@assays$ADT@counts)

# Visualize ADT count distribution
p1 <- VlnPlot(All_samples_Merged, features = c("nFeature_ADT", "nCount_ADT"), ncol = 2, pt.size = 0.1)
print(p1)
```

```{r adt_qc2, fig.height= 24, fig.width= 30}
# Identify potential outlier antibodies (e.g., highly expressed across all cells)
# Plot raw counts for all 28 proteins
adt_features <- rownames(All_samples_Merged[["ADT"]])
p2 <- VlnPlot(All_samples_Merged, features = adt_features[1:min(28, length(adt_features))], ncol = 4, pt.size = 0)
print(p2)
```

## Apply Centered Log-Ratio (CLR) Normalization (**CRITICAL CORRECTION**)

CLR normalization is the standard method in Seurat for ADT data. **The critical correction here is the use of `margin = 2`**.

```{r adt_normalization, eval=FALSE}
# **CORRECTION:** Use margin = 2 for CLR normalization. 
# margin = 2 normalizes across features (proteins) for each cell (standard for compositional data).
All_samples_Merged <- NormalizeData(All_samples_Merged, assay = "ADT", normalization.method = "CLR", margin = 2)
```

## Quality control after normalization

```{r adt_qc3, fig.height= 24, fig.width= 30}
# **CORRECTION:** Plot the *normalized* data (default layer for VlnPlot after NormalizeData)
# to assess the effect of CLR normalization.
adt_features <- rownames(All_samples_Merged[["ADT"]])
p3 <- VlnPlot(All_samples_Merged, features = adt_features[1:min(28, length(adt_features))], ncol = 4, pt.size = 0)
print(p3)
```






# T Cell Immunophenotyping and Visualization

## Visualize multiple modalities side-by-side (**CORRECTION**)

The previous code used the `adt_` prefix which is unnecessary and can be confusing. The standard way is to set the default assay and use the feature name directly.

```{r rna_adt_side_by_side, fig.height=4, fig.width=10}
# Set default assay to ADT for protein plots
DefaultAssay(All_samples_Merged_ADT) <- "ADT"
p4 <- FeaturePlot(All_samples_Merged_ADT, "CD4", cols = c("lightgrey", "darkgreen"), reduction = "umap") + ggtitle("CD4 Protein (ADT UMAP)")

# Set default assay to SCT for RNA plots
DefaultAssay(All_samples_Merged) <- "SCT"
p5 <- FeaturePlot(All_samples_Merged, "CD4", reduction = "umap") + ggtitle("CD4 RNA (RNA UMAP)")

# place plots side-by-side
p4 | p5

# Example for PD1/PDCD1
DefaultAssay(All_samples_Merged_ADT) <- "ADT"
p6 <- FeaturePlot(All_samples_Merged_ADT, "PD1", cols = c("lightgrey", "darkgreen"), reduction = "umap") + ggtitle("PD1 Protein")
DefaultAssay(All_samples_Merged) <- "SCT"
p7 <- FeaturePlot(All_samples_Merged, "PDCD1", reduction = "umap") + ggtitle("PDCD1 RNA (SCT)")
p6 | p7
```

##  Biaxial ADT Plots for Phenotypic Gating (T Cell Focus)

This is the core of T cell immunophenotyping. We use `FeatureScatter` on the normalized ADT data to define subsets, similar to flow cytometry.

```{r adt_gating_tcell, fig.height=10, fig.width=20}
# Set ADT as the active assay 
DefaultAssay(All_samples_Merged_ADT)

Idents(All_samples_Merged_ADT) <- "orig.ident"
DefaultAssay(All_samples_Merged_ADT) <- "ADT"

# Define gating marker pairs
gating_pairs <- list(
  c("CD4", "CD19"),
  c("CD45RA", "CD45RO"),
  c("CD4", "CD25"),
  c("PD1", "CD274"),
  c("CCR7", "CD62L")
)

# Generate FeatureScatter plots
gating_plots <- list()

for (i in seq_along(gating_pairs)) {
  f1 <- gating_pairs[[i]][1]
  f2 <- gating_pairs[[i]][2]

  # Ensure both markers exist in ADT
  if (f1 %in% rownames(All_samples_Merged_ADT[["ADT"]]) &&
      f2 %in% rownames(All_samples_Merged_ADT[["ADT"]])) {

    gating_plots[[i]] <- FeatureScatter(
      All_samples_Merged_ADT,
      feature1 = f1,
      feature2 = f2,
      group.by = "orig.ident"
    ) + ggtitle(paste("T Cell Gating:", f1, "vs", f2))
  }
}

library(patchwork)
wrap_plots(gating_plots)
```

## Heatmap of Protein Expression by Cell Line (L1-L7 Focus)

This visualization directly addresses the goal of comparing immunophenotypes across the L1-L7 cell lines.

```{r adt_heatmap_cellline, fig.height=6, fig.width=12}
DefaultAssay(All_samples_Merged_ADT) <- "ADT"

# -----------------------
# 1. Extract metadata
# -----------------------
cell_line_info <- All_samples_Merged_ADT$orig.ident    # L1–L7
cell_lines <- sort(unique(cell_line_info))

# -----------------------
# 2. Extract ADT data
# -----------------------
adt_matrix <- GetAssayData(All_samples_Merged_ADT, slot = "data")  
adt_features <- rownames(adt_matrix)     # actual protein names

# -----------------------
# 3. Compute mean protein expression per cell line
# -----------------------
mean_expression_cell_line <- sapply(cell_lines, function(cl) {
  cells <- names(cell_line_info)[cell_line_info == cl]
  rowMeans(adt_matrix[, cells, drop = FALSE])
})

# reorder rows to ADT feature order
mean_expression_cell_line <- mean_expression_cell_line[adt_features, ]

# -----------------------
# 4. Heatmap with labels
# -----------------------
pheatmap(
  mean_expression_cell_line,
  main = "Protein Expression Heatmap by Cell Line",
  scale = "row",
  cluster_rows = TRUE,
  cluster_cols = TRUE,
  show_rownames = TRUE,
  show_colnames = TRUE,
  fontsize_col = 9,
  angle_col = 45,
  labels_col = cell_lines         # <- YOUR COLUMN LABELS
)

# -----------------------
# 5. NJ tree (labels = orig.ident)
# -----------------------
dist_matrix <- dist(t(mean_expression_cell_line))
tree <- nj(dist_matrix)

plot(
  tree,
  main = "Cell Line Similarity Tree (ADT Expression)",
  tip.label = cell_lines           # <- LABEL THE TREE
)

```

# Marker Discovery and Visualization by ADT Cluster

## Generate Protein-level Differential Markers

```{r adt_markers, eval=FALSE}
# Find markers for ADT clusters
adt_markers <- FindAllMarkers(All_samples_Merged_ADT, assay = "ADT", only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, group.by = "seurat_clusters")

# Display top 5 markers per cluster
top5_adt_markers <- adt_markers %>%
    group_by(cluster) %>%
    slice_max(n = 5, order_by = avg_log2FC)

print(top5_adt_markers)
```

## Heatmap of Protein Expression by ADT Cluster

```{r adt_heatmap_cluster, fig.height=6, fig.width=20}
DefaultAssay(All_samples_Merged_ADT) <- "ADT"

adt_feats <- unique(top5_adt_markers$gene)   # remove duplicates

p12 <- DotPlot(
  All_samples_Merged_ADT,
  features = adt_feats,
  assay = "ADT",
  group.by = "seurat_clusters"
) +
  ggtitle("Protein Expression by ADT Cluster")

p12


p12_2 <- DotPlot(
  All_samples_Merged_ADT,
  features = adt_feats,
  assay = "ADT",
  group.by = "orig.ident"
) +
  ggtitle("Protein Expression by ADT Cluster")

p12_2
```



## Ridgeplots: Protein Expression across RNA Clusters

```{r rna_adt_ridge,  fig.height=6, fig.width=20}
DefaultAssay(All_samples_Merged) <- "ADT"

# Ensure you only use existing features
adt_feats <- adt_features[adt_features %in% rownames(All_samples_Merged[["ADT"]])]

for (f in adt_feats) {
    p <- RidgePlot(
        All_samples_Merged,
        features = f,
        assay = "ADT",
        group.by = "orig.ident"
    ) + ggtitle(f)
    print(p)
}

```


## Fwatureplots: Protein Expression across RNA Clusters
```{r rna_adt_feature,  fig.height=6, fig.width=8}
DefaultAssay(All_samples_Merged) <- "ADT"

adt_feats <- adt_features[adt_features %in% rownames(All_samples_Merged[["ADT"]])]

for (f in adt_feats) {
    p <- FeaturePlot(
        All_samples_Merged,
        features = f,
        reduction = "umap",  # UMAP calculated from SCT RNA
        cols = c("lightgrey", "darkred")  # optional
    ) + ggtitle(f)
    print(p)
}


```






