1. Load Libraries
2. load seurat object
#Load Seurat Object
load("../../To_Transfer_between_computers/23-Harmony_Integration/0-robj/5-Harmony_Integrated_All_samples_Merged_CD4Tcells_final_Resolution_Selected_0.8_ADT_Normalized_cleaned_mt.robj")
All_samples_Merged
3. Set Up Identifiers for Clustering
# Assign cluster identities to the Seurat object
Idents(All_samples_Merged) <- "seurat_clusters"
DimPlot(All_samples_Merged, reduction = "umap", group.by = "cell_line",label = T, label.box = T)

DimPlot(All_samples_Merged, reduction = "umap", group.by = "seurat_clusters",label = T, label.box = T)

4. Differential Expression Analysis
4.1 Using Wilcox with SCT
# Load necessary libraries
library(Seurat)
# Define control clusters
control_clusters <- c("3", "10")
# Define all clusters except control
malignant_clusters <- setdiff(as.character(unique(Idents(All_samples_Merged))), control_clusters)
# **Set identities to cluster**
Idents(All_samples_Merged) <- "seurat_clusters"
# List to store DE results
de_results <- list()
for (cluster in malignant_clusters) {
de_results[[cluster]] <- FindMarkers(
object = All_samples_Merged,
ident.1 = cluster,
ident.2 = control_clusters,
assay = "RNA",
test.use = "wilcox",
logfc.threshold = 0,
min.pct = 0
)
write.csv(de_results[[cluster]], paste0("DE_RNA_Wilcox_Cluster_", cluster, "_vs_Control.csv"))
}
Avis : The `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0.
Please use the `layer` argument instead.Avis : `PackageCheck()` was deprecated in SeuratObject 5.0.0.
Please use `rlang::check_installed()` instead.
5. Compute Mean Expression for Groups
# Load necessary libraries
library(Seurat)
library(dplyr)
library(tibble)
expression_data_RNA <- GetAssayData(All_samples_Merged, assay = "RNA", slot = "data")
calculate_mean_expression <- function(markers, group1_cells, group2_cells, expression_data) {
group1_mean <- rowMeans(expression_data[, group1_cells, drop = FALSE], na.rm = TRUE)
group2_mean <- rowMeans(expression_data[, group2_cells, drop = FALSE], na.rm = TRUE)
markers <- markers %>%
rownames_to_column("gene") %>%
mutate(mean_expr_group1 = group1_mean[gene],
mean_expr_group2 = group2_mean[gene])
return(markers)
}
for (cluster in malignant_clusters) {
existing_results <- de_results[[cluster]]
group1_cells <- WhichCells(All_samples_Merged, idents = cluster)
group2_cells <- WhichCells(All_samples_Merged, idents = control_clusters)
updated_results <- calculate_mean_expression(existing_results, group1_cells, group2_cells, expression_data_RNA)
de_results[[cluster]] <- updated_results
write.csv(updated_results, paste0("Updated_DE_Cluster_", cluster, "_with_MeanExpr.csv"), row.names = TRUE)
}
6. Summarize Results
6.1 Summary Function
summarize_markers <- function(markers) {
num_pval0 <- sum(markers$p_val_adj == 0)
num_pval1 <- sum(markers$p_val_adj == 1)
num_significant <- sum(markers$p_val_adj < 0.05)
num_upregulated <- sum(markers$avg_log2FC > 1)
num_downregulated <- sum(markers$avg_log2FC < -1)
cat("Number of genes with p_val_adj = 0:", num_pval0, "\n")
cat("Number of genes with p_val_adj = 1:", num_pval1, "\n")
cat("Number of significant genes (p_val_adj < 0.05):", num_significant, "\n")
cat("Number of upregulated genes (avg_log2FC > 1):", num_upregulated, "\n")
cat("Number of downregulated genes (avg_log2FC < -1):", num_downregulated, "\n")
}
Run Summary on Each Cluster
for (cluster in malignant_clusters) {
cat(paste0("Cluster ", cluster, " Summary:\n"))
summarize_markers(de_results[[cluster]])
cat("\n")
}
Cluster 9 Summary:
Number of genes with p_val_adj = 0: 500
Number of genes with p_val_adj = 1: 26386
Number of significant genes (p_val_adj < 0.05): 9410
Number of upregulated genes (avg_log2FC > 1): 17178
Number of downregulated genes (avg_log2FC < -1): 6455
Cluster 5 Summary:
Number of genes with p_val_adj = 0: 1013
Number of genes with p_val_adj = 1: 24035
Number of significant genes (p_val_adj < 0.05): 11892
Number of upregulated genes (avg_log2FC > 1): 18370
Number of downregulated genes (avg_log2FC < -1): 3336
Cluster 7 Summary:
Number of genes with p_val_adj = 0: 1320
Number of genes with p_val_adj = 1: 24112
Number of significant genes (p_val_adj < 0.05): 11704
Number of upregulated genes (avg_log2FC > 1): 17815
Number of downregulated genes (avg_log2FC < -1): 6276
Cluster 11 Summary:
Number of genes with p_val_adj = 0: 269
Number of genes with p_val_adj = 1: 27004
Number of significant genes (p_val_adj < 0.05): 8701
Number of upregulated genes (avg_log2FC > 1): 18670
Number of downregulated genes (avg_log2FC < -1): 4461
Cluster 12 Summary:
Number of genes with p_val_adj = 0: 1014
Number of genes with p_val_adj = 1: 22829
Number of significant genes (p_val_adj < 0.05): 12978
Number of upregulated genes (avg_log2FC > 1): 20875
Number of downregulated genes (avg_log2FC < -1): 4079
Cluster 1 Summary:
Number of genes with p_val_adj = 0: 3147
Number of genes with p_val_adj = 1: 22626
Number of significant genes (p_val_adj < 0.05): 13309
Number of upregulated genes (avg_log2FC > 1): 7128
Number of downregulated genes (avg_log2FC < -1): 7269
Cluster 2 Summary:
Number of genes with p_val_adj = 0: 2072
Number of genes with p_val_adj = 1: 23430
Number of significant genes (p_val_adj < 0.05): 12441
Number of upregulated genes (avg_log2FC > 1): 8316
Number of downregulated genes (avg_log2FC < -1): 7111
Cluster 6 Summary:
Number of genes with p_val_adj = 0: 2091
Number of genes with p_val_adj = 1: 23577
Number of significant genes (p_val_adj < 0.05): 12296
Number of upregulated genes (avg_log2FC > 1): 18481
Number of downregulated genes (avg_log2FC < -1): 6953
Cluster 0 Summary:
Number of genes with p_val_adj = 0: 2686
Number of genes with p_val_adj = 1: 22526
Number of significant genes (p_val_adj < 0.05): 13354
Number of upregulated genes (avg_log2FC > 1): 7075
Number of downregulated genes (avg_log2FC < -1): 8555
Cluster 4 Summary:
Number of genes with p_val_adj = 0: 2533
Number of genes with p_val_adj = 1: 23208
Number of significant genes (p_val_adj < 0.05): 12772
Number of upregulated genes (avg_log2FC > 1): 7574
Number of downregulated genes (avg_log2FC < -1): 7990
Cluster 13 Summary:
Number of genes with p_val_adj = 0: 683
Number of genes with p_val_adj = 1: 24477
Number of significant genes (p_val_adj < 0.05): 11180
Number of upregulated genes (avg_log2FC > 1): 21984
Number of downregulated genes (avg_log2FC < -1): 3909
Cluster 8 Summary:
Number of genes with p_val_adj = 0: 3198
Number of genes with p_val_adj = 1: 21600
Number of significant genes (p_val_adj < 0.05): 14357
Number of upregulated genes (avg_log2FC > 1): 18556
Number of downregulated genes (avg_log2FC < -1): 6225
7. Visualization
7.1 Volcano Plot for Cell lines with Normal Control
library(EnhancedVolcano)
library(ggplot2)
for (cluster in malignant_clusters) {
df <- de_results[[cluster]]
# Check if gene names are in a "gene" column
if ("gene" %in% colnames(df)) {
labels <- df$gene
rownames(df) <- df$gene # optional: set for consistent handling
} else {
labels <- rownames(df)
}
volcano <- EnhancedVolcano(df,
lab = labels,
x = "avg_log2FC",
y = "p_val_adj",
title = paste0("Cluster ", cluster, " vs Control"),
pCutoff = 0.05,
FCcutoff = 1.0)
print(volcano)
}
Avis : One or more p-values is 0. Converting to 10^-1 * current lowest non-zero p-value...







