1 Introduction

This R Markdown script outlines a comprehensive workflow for the analysis of Antibody-Derived Tag (ADT) data from a CITE-seq experiment, integrated with the corresponding single-cell RNA sequencing (scRNA-seq) data, using the Seurat package. The analysis follows the user-specified steps for Quality Control (QC), normalization, dimensionality reduction, marker discovery, and visualization.

Prerequisites: The analysis assumes a Seurat object named All_samples_Merged is loaded, containing both RNA (SCT assay) and ADT (ADT assay) data.

# Replace with the actual path to your Seurat object
All_samples_Merged <- readRDS("../../../PHD_3rd_YEAR_Analysis/0-Seurat_RDS_OBJECT_FINAL/All_samples_Merged_with_Renamed_Clusters_final-26-10-2025.rds") 

# Verify the object structure
print(All_samples_Merged)
DefaultAssay(All_samples_Merged) <- "SCT"

DimPlot(All_samples_Merged, group.by = "orig.ident", label = T)

2 QC & Normalization of ADT

2.1 Examine Raw Counts Distribution and Protein-to-RNA Correlation

This step is crucial for initial quality assessment and identifying potential issues like non-specific binding or technical artifacts.

Idents(All_samples_Merged) <- "orig.ident"


# Extract raw counts from somewhere safe (e.g. backup or original data)
raw_counts <- GetAssayData(All_samples_Merged, assay = "ADT", layer = "counts")

# Create a new ADT assay with the raw counts, resetting normalized/scaled data
All_samples_Merged[["ADT"]] <- CreateAssayObject(counts = raw_counts)

# Set the default assay to ADT
DefaultAssay(All_samples_Merged) <- "ADT"


# Calculate the total number of ADT counts per cell
All_samples_Merged$nFeature_ADT <- colSums(All_samples_Merged@assays$ADT@counts > 0)
All_samples_Merged$nCount_ADT <- colSums(All_samples_Merged@assays$ADT@counts)

# Visualize ADT count distribution
p1 <- VlnPlot(All_samples_Merged, features = c("nFeature_ADT", "nCount_ADT"), ncol = 2, pt.size = 0.1)
print(p1)

# Identify potential outlier antibodies (e.g., highly expressed across all cells)
# Plot raw counts for all 28 proteins
adt_features <- rownames(All_samples_Merged[["ADT"]])
p2 <- VlnPlot(All_samples_Merged, features = adt_features[1:min(28, length(adt_features))], ncol = 4, pt.size = 0)
print(p2)

2.2 Apply Centered Log-Ratio (CLR) Normalization

CLR normalization is the standard method in Seurat for ADT data, treating the protein counts as compositional data.

# margin=1 means “perform CLR normalization for each feature independently” # margin=2 means “perform CLR normalization within a cell”

Alt text

# Apply CLR normalization (margin = 2 normalizes across features for each cell)
All_samples_Merged <- NormalizeData(All_samples_Merged, assay = "ADT", normalization.method = "CLR", margin = 1)

2.3 Quality control after nomrlization

# Identify potential outlier antibodies (e.g., highly expressed across all cells)
# Plot raw counts for all 28 proteins
adt_features <- rownames(All_samples_Merged[["ADT"]])
p3 <- VlnPlot(All_samples_Merged, features = adt_features[1:min(28, length(adt_features))], ncol = 4, pt.size = 0)
print(p3)

2.4 Visualize multiple modalities side-by-side

Idents(All_samples_Merged) <- "cell_line"
# Now, we will visualize CD14 levels for RNA and protein By setting the default assay, we can
# visualize one or the other
DefaultAssay(All_samples_Merged) <- "SCT"
p1 <- FeaturePlot(All_samples_Merged, "CD4", cols = c("lightgrey", "darkgreen")) + ggtitle("CD4 protein")
DefaultAssay(All_samples_Merged) <- "SCT"
p2 <- FeaturePlot(All_samples_Merged, "CD4") + ggtitle("CD4 RNA")

# place plots side-by-side
p1 | p2


Key(All_samples_Merged[["RNA"]])
[1] "rna_"
Key(All_samples_Merged[["SCT"]])
[1] "sct_"
Key(All_samples_Merged[["ADT"]])
[1] "adt_"
# Now, we can include the key in the feature name, which overrides the default assay
p1 <- FeaturePlot(All_samples_Merged, "adt_PD1", cols = c("lightgrey", "darkgreen")) + ggtitle("PD1 protein")
p2 <- FeaturePlot(All_samples_Merged, "rna_PDCD1") + ggtitle("PD1 RNA")
p3 <- FeaturePlot(All_samples_Merged, "sct_PDCD1") + ggtitle("PD1 SCT")
p1 | p2 | p3


# Now, we can include the key in the feature name, which overrides the default assay
p1 <- FeaturePlot(All_samples_Merged, "adt_CD274", cols = c("lightgrey", "darkgreen")) + ggtitle("CD274 protein")
p2 <- FeaturePlot(All_samples_Merged, "rna_CD274") + ggtitle("CD274 RNA")
p3 <- FeaturePlot(All_samples_Merged, "sct_CD274") + ggtitle("CD274 RNA_SCT")
p1 | p2 | p3


# Now, we can include the key in the feature name, which overrides the default assay
p1 <- FeaturePlot(All_samples_Merged, "adt_CD7", cols = c("lightgrey", "darkgreen")) + ggtitle("CD7 protein")
p2 <- FeaturePlot(All_samples_Merged, "rna_CD7") + ggtitle("CD7 RNA")
p3 <- FeaturePlot(All_samples_Merged, "sct_CD7") + ggtitle("CD7 RNA_SCT")
p1 | p2 | p3

DefaultAssay -> "ADT"
Idents(object=All_samples_Merged) <- "orig.ident"
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD2")

FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD3")

FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD5")

FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD28")

FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD127")

FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD45")

FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD19")


FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD7")

FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD25")

FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD26")

FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD45RA")

FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD45RO")


FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD62L")

FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CCR7")

FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CCR4")

FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CCR6")

FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CCR8")

FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CCR10")


FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CXCR3")

FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CXCR4")

FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD95")


FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD30")

FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD40")

FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD44")

FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD45")


FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD274")

FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_PD1")

FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "TCRab")


library(Seurat)
library(dittoSeq)
library(RColorBrewer)

# Make sure default assay is ADT
DefaultAssay(All_samples_Merged) <- "ADT"

# Keep only cells that have ADT data
adt_cells <- colnames(All_samples_Merged[["ADT"]]@data)
All_samples_ADT <- subset(All_samples_Merged, cells = adt_cells)

# Choose a reasonable set of proteins to plot
genes_to_plot <- rownames(All_samples_ADT[["ADT"]])
# Optional: remove proteins with 0 expression across all cells
genes_to_plot <- genes_to_plot[rowSums(All_samples_ADT[["ADT"]]@data) > 0]

# Custom color palette
my_colors <- colorRampPalette(brewer.pal(9, "YlGnBu"))(100)

# Run dittoHeatmap
dittoHeatmap(
  All_samples_ADT,
  genes = genes_to_plot,
  annot.by = "Patient_origin",      # or "orig.ident"
  heatmap.colors = my_colors,
  scaled.to.max = TRUE
)

NA
NA


rownames(All_samples_Merged[["ADT"]])
 [1] "CD274"  "CD30"   "CD40"   "CD3"    "CD45RA" "CD7"    "CCR4"   "CD4"    "CD25"   "CD45RO" "PD1"    "CD44"   "CD5"    "CXCR3"  "CCR6"   "CD62L"  "CCR7"   "CD95"  
[19] "TCRab"  "CXCR4"  "CD2"    "CD28"   "CD127"  "CD45"   "CD26"   "CCR10"  "CCR8"   "CD19"  
DefaultAssay(All_samples_Merged) <- "ADT"



library(dittoSeq)

dittoHeatmap(All_samples_Merged, genes,
    annot.by = c("orig.ident"))


# Load additional libraries for color palettes
library(RColorBrewer)

# Define a custom color palette for the heatmap
my_colors <- colorRampPalette(brewer.pal(9, "YlGnBu"))(100)  # Example: Yellow to Blue

# Generate the heatmap with custom colors
dittoHeatmap(
  All_samples_Merged, 
  genes,
  annot.by = c("orig.ident"),
  heatmap.colors = my_colors,  # Apply custom heatmap colors
  scaled.to.max = TRUE         # Optionally, scale data for better contrast
)


DefaultAssay(All_samples_Merged) <- 'ADT'

# Get cluster information
cluster_info <- All_samples_Merged$seurat_clusters
print(table(cluster_info))  # This will show the distribution of cells across clusters
cluster_info
   0    1    2    3    4    5    6    7    8    9   10   11   12   13 
6789 5275 4663 4661 4086 3634 3536 3409 3338 3273 3212 1675 1063  691 
# Get unique cluster IDs
clusters <- sort(unique(cluster_info))
print(clusters)  # This will show you what cluster IDs are actually present
 [1] 0  1  2  3  4  5  6  7  8  9  10 11 12 13
Levels: 0 1 2 3 4 5 6 7 8 9 10 11 12 13
# Calculate mean expression for each protein in each cluster
mean_expression_clusters <- sapply(clusters, function(cl) {
  cells <- names(cluster_info)[cluster_info == cl]
  rowMeans(LayerData(All_samples_Merged, assay = "ADT", layer = "data")[, cells, drop = FALSE])
})

# Check the dimensions of the resulting matrix
print(dim(mean_expression_clusters))
[1] 28 14
# Create cluster labels starting from 0
cluster_labels <- paste0("Cluster ", seq(0, length(clusters) - 1))

# Set column names
colnames(mean_expression_clusters) <- cluster_labels

# Create the heatmap
pheatmap(mean_expression_clusters, 
         main = "Protein Expression Heatmap by Cluster",
         scale = "row",
         cluster_rows = TRUE, 
         cluster_cols = TRUE,
         show_rownames = TRUE,
         show_colnames = TRUE,
         fontsize_col = 8,
         angle_col = 45)


# For the tree visualization
dist_matrix_clusters <- dist(t(mean_expression_clusters))
Found more than one class "dist" in cache; using the first, from namespace 'spam'
Also defined by ‘BiocGenerics’
tree_clusters <- nj(dist_matrix_clusters)

plot(tree_clusters, main = "Cluster Similarity Tree Based on Protein Expression")

---
title: "Full ADT (CITE-seq) Analysis Workflow-17-11-2025-Margin1"
author: "NASIR MAHMOOD ABBASI"
date: "`r format(Sys.Date(), '%B %d, %Y')`"
output:
  html_notebook:
    number_sections: true
    toc: true
    toc_depth: 3
    toc_float: true
    code_folding: hide
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE, fig.width = 10, fig.height = 8)
# Load necessary libraries
library(Seurat)
library(ggplot2)
library(patchwork)
library(dplyr)

```

# Introduction
This R Markdown script outlines a comprehensive workflow for the analysis of Antibody-Derived Tag (ADT) data from a CITE-seq experiment, integrated with the corresponding single-cell RNA sequencing (scRNA-seq) data, using the Seurat package. The analysis follows the user-specified steps for Quality Control (QC), normalization, dimensionality reduction, marker discovery, and visualization.

**Prerequisites:** The analysis assumes a Seurat object named `All_samples_Merged` is loaded, containing both RNA (SCT assay) and ADT (ADT assay) data.

```{r load_data, eval=FALSE}
# Replace with the actual path to your Seurat object
All_samples_Merged <- readRDS("../../../PHD_3rd_YEAR_Analysis/0-Seurat_RDS_OBJECT_FINAL/All_samples_Merged_with_Renamed_Clusters_final-26-10-2025.rds") 

# Verify the object structure
print(All_samples_Merged)
DefaultAssay(All_samples_Merged) <- "SCT"

DimPlot(All_samples_Merged, group.by = "orig.ident", label = T)
```

#  QC & Normalization of ADT

## Examine Raw Counts Distribution and Protein-to-RNA Correlation

This step is crucial for initial quality assessment and identifying potential issues like non-specific binding or technical artifacts.

```{r adt_qc1, fig.height= 4, fig.width= 12}
Idents(All_samples_Merged) <- "orig.ident"


# Extract raw counts from somewhere safe (e.g. backup or original data)
raw_counts <- GetAssayData(All_samples_Merged, assay = "ADT", layer = "counts")

# Create a new ADT assay with the raw counts, resetting normalized/scaled data
All_samples_Merged[["ADT"]] <- CreateAssayObject(counts = raw_counts)

# Set the default assay to ADT
DefaultAssay(All_samples_Merged) <- "ADT"


# Calculate the total number of ADT counts per cell
All_samples_Merged$nFeature_ADT <- colSums(All_samples_Merged@assays$ADT@counts > 0)
All_samples_Merged$nCount_ADT <- colSums(All_samples_Merged@assays$ADT@counts)

# Visualize ADT count distribution
p1 <- VlnPlot(All_samples_Merged, features = c("nFeature_ADT", "nCount_ADT"), ncol = 2, pt.size = 0.1)
print(p1)
```

```{r adt_qc2, fig.height= 24, fig.width= 30}
# Identify potential outlier antibodies (e.g., highly expressed across all cells)
# Plot raw counts for all 28 proteins
adt_features <- rownames(All_samples_Merged[["ADT"]])
p2 <- VlnPlot(All_samples_Merged, features = adt_features[1:min(28, length(adt_features))], ncol = 4, pt.size = 0)
print(p2)

```

##  Apply Centered Log-Ratio (CLR) Normalization

CLR normalization is the standard method in Seurat for ADT data, treating the protein counts as compositional data.

 # margin=1 means "perform CLR normalization for each feature independently"
    # margin=2 means "perform CLR normalization within a cell"

<img src="adt_margin.png" alt="Alt text" width="500" height="100">


```{r adt_normalization, eval=FALSE}
# Apply CLR normalization (margin = 2 normalizes across features for each cell)
All_samples_Merged <- NormalizeData(All_samples_Merged, assay = "ADT", normalization.method = "CLR", margin = 1)
```

## Quality control after nomrlization
```{r adt_qc3, fig.height= 24, fig.width= 30}
# Identify potential outlier antibodies (e.g., highly expressed across all cells)
# Plot raw counts for all 28 proteins
adt_features <- rownames(All_samples_Merged[["ADT"]])
p3 <- VlnPlot(All_samples_Merged, features = adt_features[1:min(28, length(adt_features))], ncol = 4, pt.size = 0)
print(p3)
```

## Visualize multiple modalities side-by-side
```{r , fig.height=4, fig.width=10}
Idents(All_samples_Merged) <- "cell_line"
# Now, we will visualize CD14 levels for RNA and protein By setting the default assay, we can
# visualize one or the other
DefaultAssay(All_samples_Merged) <- "SCT"
p1 <- FeaturePlot(All_samples_Merged, "CD4", cols = c("lightgrey", "darkgreen")) + ggtitle("CD4 protein")
DefaultAssay(All_samples_Merged) <- "SCT"
p2 <- FeaturePlot(All_samples_Merged, "CD4") + ggtitle("CD4 RNA")

# place plots side-by-side
p1 | p2

Key(All_samples_Merged[["RNA"]])

Key(All_samples_Merged[["SCT"]])

Key(All_samples_Merged[["ADT"]])


# Now, we can include the key in the feature name, which overrides the default assay
p1 <- FeaturePlot(All_samples_Merged, "adt_PD1", cols = c("lightgrey", "darkgreen")) + ggtitle("PD1 protein")
p2 <- FeaturePlot(All_samples_Merged, "rna_PDCD1") + ggtitle("PD1 RNA")
p3 <- FeaturePlot(All_samples_Merged, "sct_PDCD1") + ggtitle("PD1 SCT")
p1 | p2 | p3

# Now, we can include the key in the feature name, which overrides the default assay
p1 <- FeaturePlot(All_samples_Merged, "adt_CD274", cols = c("lightgrey", "darkgreen")) + ggtitle("CD274 protein")
p2 <- FeaturePlot(All_samples_Merged, "rna_CD274") + ggtitle("CD274 RNA")
p3 <- FeaturePlot(All_samples_Merged, "sct_CD274") + ggtitle("CD274 RNA_SCT")
p1 | p2 | p3

# Now, we can include the key in the feature name, which overrides the default assay
p1 <- FeaturePlot(All_samples_Merged, "adt_CD7", cols = c("lightgrey", "darkgreen")) + ggtitle("CD7 protein")
p2 <- FeaturePlot(All_samples_Merged, "rna_CD7") + ggtitle("CD7 RNA")
p3 <- FeaturePlot(All_samples_Merged, "sct_CD7") + ggtitle("CD7 RNA_SCT")
p1 | p2 | p3
```

```{r , fig.height=6, fig.width=10}
DefaultAssay -> "ADT"
Idents(object=All_samples_Merged) <- "orig.ident"
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD2")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD3")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD5")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD28")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD127")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD45")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD19")

FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD7")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD25")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD26")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD45RA")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD45RO")

FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD62L")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CCR7")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CCR4")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CCR6")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CCR8")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CCR10")

FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CXCR3")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CXCR4")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD95")

FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD30")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD40")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD44")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD45")

FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_CD274")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "adt_PD1")
FeatureScatter(All_samples_Merged, feature1 = "adt_CD4", feature2 = "TCRab")
```

```{r ADT, fig.height=6, fig.width=12}

library(Seurat)
library(dittoSeq)
library(RColorBrewer)

# Make sure default assay is ADT
DefaultAssay(All_samples_Merged) <- "ADT"

# Keep only cells that have ADT data
adt_cells <- colnames(All_samples_Merged[["ADT"]]@data)
All_samples_ADT <- subset(All_samples_Merged, cells = adt_cells)

# Choose a reasonable set of proteins to plot
genes_to_plot <- rownames(All_samples_ADT[["ADT"]])
# Optional: remove proteins with 0 expression across all cells
genes_to_plot <- genes_to_plot[rowSums(All_samples_ADT[["ADT"]]@data) > 0]

# Custom color palette
my_colors <- colorRampPalette(brewer.pal(9, "YlGnBu"))(100)

# Run dittoHeatmap
dittoHeatmap(
  All_samples_ADT,
  genes = genes_to_plot,
  annot.by = "Patient_origin",      # or "orig.ident"
  heatmap.colors = my_colors,
  scaled.to.max = TRUE
)


```


```{r ADT2, fig.height=6, fig.width=12}


rownames(All_samples_Merged[["ADT"]])

DefaultAssay(All_samples_Merged) <- "ADT"



library(dittoSeq)

dittoHeatmap(All_samples_Merged, genes,
    annot.by = c("orig.ident"))

# Load additional libraries for color palettes
library(RColorBrewer)

# Define a custom color palette for the heatmap
my_colors <- colorRampPalette(brewer.pal(9, "YlGnBu"))(100)  # Example: Yellow to Blue

# Generate the heatmap with custom colors
dittoHeatmap(
  All_samples_Merged, 
  genes,
  annot.by = c("orig.ident"),
  heatmap.colors = my_colors,  # Apply custom heatmap colors
  scaled.to.max = TRUE         # Optionally, scale data for better contrast
)

```




```{r data3, fig.height=6, fig.width=10}

DefaultAssay(All_samples_Merged) <- 'ADT'

# Get cluster information
cluster_info <- All_samples_Merged$seurat_clusters
print(table(cluster_info))  # This will show the distribution of cells across clusters

# Get unique cluster IDs
clusters <- sort(unique(cluster_info))
print(clusters)  # This will show you what cluster IDs are actually present

# Calculate mean expression for each protein in each cluster
mean_expression_clusters <- sapply(clusters, function(cl) {
  cells <- names(cluster_info)[cluster_info == cl]
  rowMeans(LayerData(All_samples_Merged, assay = "ADT", layer = "data")[, cells, drop = FALSE])
})

# Check the dimensions of the resulting matrix
print(dim(mean_expression_clusters))

# Create cluster labels starting from 0
cluster_labels <- paste0("Cluster ", seq(0, length(clusters) - 1))

# Set column names
colnames(mean_expression_clusters) <- cluster_labels

# Create the heatmap
pheatmap(mean_expression_clusters, 
         main = "Protein Expression Heatmap by Cluster",
         scale = "row",
         cluster_rows = TRUE, 
         cluster_cols = TRUE,
         show_rownames = TRUE,
         show_colnames = TRUE,
         fontsize_col = 8,
         angle_col = 45)

# For the tree visualization
dist_matrix_clusters <- dist(t(mean_expression_clusters))
tree_clusters <- nj(dist_matrix_clusters)

plot(tree_clusters, main = "Cluster Similarity Tree Based on Protein Expression")

```
