This R Markdown script outlines a comprehensive workflow for the analysis of Antibody-Derived Tag (ADT) data from a CITE-seq experiment, integrated with the corresponding single-cell RNA sequencing (scRNA-seq) data, using the Seurat package. The analysis follows the user-specified steps for Quality Control (QC), normalization, dimensionality reduction, marker discovery, and visualization.
Prerequisites: The analysis assumes a Seurat object
named All_samples_Merged is loaded, containing both RNA
(SCT assay) and ADT (ADT assay) data.
# Replace with the actual path to your Seurat object
All_samples_Merged <- readRDS("../../../PHD_3rd_YEAR_Analysis/0-Seurat_RDS_OBJECT_FINAL/All_samples_Merged_with_Renamed_Clusters_final-26-10-2025.rds")
# Verify the object structure
print(All_samples_Merged)
DefaultAssay(All_samples_Merged) <- "SCT"
DimPlot(All_samples_Merged, group.by = "orig.ident", label = T)
This step is crucial for initial quality assessment and identifying potential issues like non-specific binding or technical artifacts.
Idents(All_samples_Merged) <- "orig.ident"
# Extract raw counts from somewhere safe (e.g. backup or original data)
raw_counts <- GetAssayData(All_samples_Merged, assay = "ADT", layer = "counts")
# Create a new ADT assay with the raw counts, resetting normalized/scaled data
All_samples_Merged[["ADT"]] <- CreateAssayObject(counts = raw_counts)
# Set the default assay to ADT
DefaultAssay(All_samples_Merged) <- "ADT"
# Calculate the total number of ADT counts per cell
All_samples_Merged$nFeature_ADT <- colSums(All_samples_Merged@assays$ADT@counts > 0)
All_samples_Merged$nCount_ADT <- colSums(All_samples_Merged@assays$ADT@counts)
# Visualize ADT count distribution
p1 <- VlnPlot(All_samples_Merged, features = c("nFeature_ADT", "nCount_ADT"), ncol = 2, pt.size = 0.1)
print(p1)
# Identify potential outlier antibodies (e.g., highly expressed across all cells)
# Plot raw counts for all 28 proteins
adt_features <- rownames(All_samples_Merged[["ADT"]])
p2 <- VlnPlot(All_samples_Merged, features = adt_features[1:min(28, length(adt_features))], ncol = 4, pt.size = 0)
print(p2)
CLR normalization is the standard method in Seurat for ADT data, treating the protein counts as compositional data.
# margin=1 means “perform CLR normalization for each feature independently” # margin=2 means “perform CLR normalization within a cell”
# Apply CLR normalization (margin = 2 normalizes across features for each cell)
All_samples_Merged <- NormalizeData(All_samples_Merged, assay = "ADT", normalization.method = "CLR", margin = 2)
# Identify potential outlier antibodies (e.g., highly expressed across all cells)
# Plot raw counts for all 28 proteins
adt_features <- rownames(All_samples_Merged[["ADT"]])
p3 <- VlnPlot(All_samples_Merged, features = adt_features[1:min(28, length(adt_features))], ncol = 4, pt.size = 0)
print(p3)
Idents(All_samples_Merged) <- "cell_line"
# Now, we will visualize CD14 levels for RNA and protein By setting the default assay, we can
# visualize one or the other
DefaultAssay(All_samples_Merged) <- "SCT"
p1 <- FeaturePlot(All_samples_Merged, "CD4", cols = c("lightgrey", "darkgreen")) + ggtitle("CD4 protein")
DefaultAssay(All_samples_Merged) <- "SCT"
p2 <- FeaturePlot(All_samples_Merged, "CD4") + ggtitle("CD4 RNA")
# place plots side-by-side
p1 | p2
Key(All_samples_Merged[["RNA"]])
[1] "rna_"
Key(All_samples_Merged[["SCT"]])
[1] "sct_"
Key(All_samples_Merged[["ADT"]])
[1] "adt_"
# Now, we can include the key in the feature name, which overrides the default assay
p1 <- FeaturePlot(All_samples_Merged, "adt_PD1", cols = c("lightgrey", "darkgreen")) + ggtitle("PD1 protein")
p2 <- FeaturePlot(All_samples_Merged, "rna_PDCD1") + ggtitle("PD1 RNA")
p3 <- FeaturePlot(All_samples_Merged, "sct_PDCD1") + ggtitle("PD1 SCT")
p1 | p2 | p3
# Now, we can include the key in the feature name, which overrides the default assay
p1 <- FeaturePlot(All_samples_Merged, "adt_CD274", cols = c("lightgrey", "darkgreen")) + ggtitle("CD274 protein")
p2 <- FeaturePlot(All_samples_Merged, "rna_CD274") + ggtitle("CD274 RNA")
p3 <- FeaturePlot(All_samples_Merged, "sct_CD274") + ggtitle("CD274 RNA_SCT")
p1 | p2 | p3
# Now, we can include the key in the feature name, which overrides the default assay
p1 <- FeaturePlot(All_samples_Merged, "adt_CD7", cols = c("lightgrey", "darkgreen")) + ggtitle("CD7 protein")
p2 <- FeaturePlot(All_samples_Merged, "rna_CD7") + ggtitle("CD7 RNA")
p3 <- FeaturePlot(All_samples_Merged, "sct_CD7") + ggtitle("CD7 RNA_SCT")
p1 | p2 | p3
DefaultAssay -> "ADT"
Idents(object=All_samples_Merged) <- "orig.ident"
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD2")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD3")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD5")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD28")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD127")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD45")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD19")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD7")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD25")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD26")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD45RA")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD45RO")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD62L")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CCR7")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CCR4")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CCR6")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CCR8")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CCR10")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CXCR3")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CXCR4")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD95")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD30")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD40")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD44")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD45")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD274")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_PD1")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "TCRab")
library(Seurat)
library(dittoSeq)
library(RColorBrewer)
# Make sure default assay is ADT
DefaultAssay(All_samples_Merged) <- "ADT"
# Keep only cells that have ADT data
adt_cells <- colnames(All_samples_Merged[["ADT"]]@data)
All_samples_ADT <- subset(All_samples_Merged, cells = adt_cells)
# Choose a reasonable set of proteins to plot
genes_to_plot <- rownames(All_samples_ADT[["ADT"]])
# Optional: remove proteins with 0 expression across all cells
genes_to_plot <- genes_to_plot[rowSums(All_samples_ADT[["ADT"]]@data) > 0]
# Custom color palette
my_colors <- colorRampPalette(brewer.pal(9, "YlGnBu"))(100)
# Run dittoHeatmap
dittoHeatmap(
All_samples_ADT,
genes = genes_to_plot,
annot.by = "Patient_origin", # or "orig.ident"
heatmap.colors = my_colors,
scaled.to.max = TRUE
)
rownames(All_samples_Merged[["ADT"]])
DefaultAssay(All_samples_Merged) <- "ADT"
library(dittoSeq)
dittoHeatmap(All_samples_Merged, genes,
annot.by = c("orig.ident"))
# Load additional libraries for color palettes
library(RColorBrewer)
# Define a custom color palette for the heatmap
my_colors <- colorRampPalette(brewer.pal(9, "YlGnBu"))(100) # Example: Yellow to Blue
# Generate the heatmap with custom colors
dittoHeatmap(
All_samples_Merged,
genes,
annot.by = c("orig.ident"),
heatmap.colors = my_colors, # Apply custom heatmap colors
scaled.to.max = TRUE # Optionally, scale data for better contrast
)
DefaultAssay(All_samples_Merged) <- 'ADT'
# Get cluster information
cluster_info <- All_samples_Merged$seurat_clusters
print(table(cluster_info)) # This will show the distribution of cells across clusters
# Get unique cluster IDs
clusters <- sort(unique(cluster_info))
print(clusters) # This will show you what cluster IDs are actually present
# Calculate mean expression for each protein in each cluster
mean_expression_clusters <- sapply(clusters, function(cl) {
cells <- names(cluster_info)[cluster_info == cl]
rowMeans(LayerData(All_samples_Merged, assay = "ADT", layer = "data")[, cells, drop = FALSE])
})
# Check the dimensions of the resulting matrix
print(dim(mean_expression_clusters))
# Create cluster labels starting from 0
cluster_labels <- paste0("Cluster ", seq(0, length(clusters) - 1))
# Set column names
colnames(mean_expression_clusters) <- cluster_labels
# Create the heatmap
pheatmap(mean_expression_clusters,
main = "Protein Expression Heatmap by Cluster",
scale = "row",
cluster_rows = TRUE,
cluster_cols = TRUE,
show_rownames = TRUE,
show_colnames = TRUE,
fontsize_col = 8,
angle_col = 45)
# For the tree visualization
dist_matrix_clusters <- dist(t(mean_expression_clusters))
tree_clusters <- nj(dist_matrix_clusters)
plot(tree_clusters, main = "Cluster Similarity Tree Based on Protein Expression")