1. Load Libraries

2. load seurat object

#Load Seurat Object
load("../../To_Transfer_between_computers/23-Harmony_Integration/0-robj/5-Harmony_Integrated_All_samples_Merged_CD4Tcells_final_Resolution_Selected_0.8_ADT_Normalized_cleaned_mt.robj")


All_samples_Merged

3. Set Up Identifiers for Clustering

# Assign cluster identities to the Seurat object
Idents(All_samples_Merged) <- "seurat_clusters"

DimPlot(All_samples_Merged, reduction = "umap", group.by = "cell_line",label = T, label.box = T)

DimPlot(All_samples_Merged, reduction = "umap", group.by = "seurat_clusters",label = T, label.box = T)

4. Differential Expression Analysis

4.1 Using Wilcox with SCT

# Load necessary libraries
library(Seurat)



# Define control clusters
control_clusters <- c("3", "10")

# Define all clusters except control
malignant_clusters <- setdiff(as.character(unique(Idents(All_samples_Merged))), control_clusters)


# **Set identities to cluster**
Idents(All_samples_Merged) <- "seurat_clusters"

# List to store DE results
de_results <- list()

for (cluster in malignant_clusters) {
  de_results[[cluster]] <- FindMarkers(
    object = All_samples_Merged,
    ident.1 = cluster,
    ident.2 = control_clusters,
    assay = "RNA",
    test.use = "wilcox",
    logfc.threshold = 0,
    min.pct = 0
  )
  write.csv(de_results[[cluster]], paste0("DE_RNA_Wilcox_Cluster_", cluster, "_vs_Control.csv"))
}
Avis : The `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0.
Please use the `layer` argument instead.Avis : `PackageCheck()` was deprecated in SeuratObject 5.0.0.
Please use `rlang::check_installed()` instead.

5. Compute Mean Expression for Groups

# Load necessary libraries
library(Seurat)
library(dplyr)
library(tibble)

expression_data_RNA <- GetAssayData(All_samples_Merged, assay = "RNA", slot = "data")

calculate_mean_expression <- function(markers, group1_cells, group2_cells, expression_data) {
  group1_mean <- rowMeans(expression_data[, group1_cells, drop = FALSE], na.rm = TRUE)
  group2_mean <- rowMeans(expression_data[, group2_cells, drop = FALSE], na.rm = TRUE)
  markers <- markers %>%
    rownames_to_column("gene") %>%
    mutate(mean_expr_group1 = group1_mean[gene],
           mean_expr_group2 = group2_mean[gene])
  return(markers)
}

for (cluster in malignant_clusters) {
  existing_results <- de_results[[cluster]]
  group1_cells <- WhichCells(All_samples_Merged, idents = cluster)
  group2_cells <- WhichCells(All_samples_Merged, idents = control_clusters)
  updated_results <- calculate_mean_expression(existing_results, group1_cells, group2_cells, expression_data_RNA)
  de_results[[cluster]] <- updated_results
  write.csv(updated_results, paste0("Updated_DE_Cluster_", cluster, "_with_MeanExpr.csv"), row.names = TRUE)
}

6. Summarize Results

6.1 Summary Function

summarize_markers <- function(markers) {
  num_pval0 <- sum(markers$p_val_adj == 0)
  num_pval1 <- sum(markers$p_val_adj == 1)
  num_significant <- sum(markers$p_val_adj < 0.05)
  num_upregulated <- sum(markers$avg_log2FC > 1)
  num_downregulated <- sum(markers$avg_log2FC < -1)
  
  cat("Number of genes with p_val_adj = 0:", num_pval0, "\n")
  cat("Number of genes with p_val_adj = 1:", num_pval1, "\n")
  cat("Number of significant genes (p_val_adj < 0.05):", num_significant, "\n")
  cat("Number of upregulated genes (avg_log2FC > 1):", num_upregulated, "\n")
  cat("Number of downregulated genes (avg_log2FC < -1):", num_downregulated, "\n")
}

Run Summary on Each Cluster

for (cluster in malignant_clusters) {
  cat(paste0("Cluster ", cluster, " Summary:\n"))
  summarize_markers(de_results[[cluster]])
  cat("\n")
}
Cluster 9 Summary:
Number of genes with p_val_adj = 0: 500 
Number of genes with p_val_adj = 1: 26386 
Number of significant genes (p_val_adj < 0.05): 9410 
Number of upregulated genes (avg_log2FC > 1): 17178 
Number of downregulated genes (avg_log2FC < -1): 6455 

Cluster 5 Summary:
Number of genes with p_val_adj = 0: 1013 
Number of genes with p_val_adj = 1: 24035 
Number of significant genes (p_val_adj < 0.05): 11892 
Number of upregulated genes (avg_log2FC > 1): 18370 
Number of downregulated genes (avg_log2FC < -1): 3336 

Cluster 7 Summary:
Number of genes with p_val_adj = 0: 1320 
Number of genes with p_val_adj = 1: 24112 
Number of significant genes (p_val_adj < 0.05): 11704 
Number of upregulated genes (avg_log2FC > 1): 17815 
Number of downregulated genes (avg_log2FC < -1): 6276 

Cluster 11 Summary:
Number of genes with p_val_adj = 0: 269 
Number of genes with p_val_adj = 1: 27004 
Number of significant genes (p_val_adj < 0.05): 8701 
Number of upregulated genes (avg_log2FC > 1): 18670 
Number of downregulated genes (avg_log2FC < -1): 4461 

Cluster 12 Summary:
Number of genes with p_val_adj = 0: 1014 
Number of genes with p_val_adj = 1: 22829 
Number of significant genes (p_val_adj < 0.05): 12978 
Number of upregulated genes (avg_log2FC > 1): 20875 
Number of downregulated genes (avg_log2FC < -1): 4079 

Cluster 1 Summary:
Number of genes with p_val_adj = 0: 3147 
Number of genes with p_val_adj = 1: 22626 
Number of significant genes (p_val_adj < 0.05): 13309 
Number of upregulated genes (avg_log2FC > 1): 7128 
Number of downregulated genes (avg_log2FC < -1): 7269 

Cluster 2 Summary:
Number of genes with p_val_adj = 0: 2072 
Number of genes with p_val_adj = 1: 23430 
Number of significant genes (p_val_adj < 0.05): 12441 
Number of upregulated genes (avg_log2FC > 1): 8316 
Number of downregulated genes (avg_log2FC < -1): 7111 

Cluster 6 Summary:
Number of genes with p_val_adj = 0: 2091 
Number of genes with p_val_adj = 1: 23577 
Number of significant genes (p_val_adj < 0.05): 12296 
Number of upregulated genes (avg_log2FC > 1): 18481 
Number of downregulated genes (avg_log2FC < -1): 6953 

Cluster 0 Summary:
Number of genes with p_val_adj = 0: 2686 
Number of genes with p_val_adj = 1: 22526 
Number of significant genes (p_val_adj < 0.05): 13354 
Number of upregulated genes (avg_log2FC > 1): 7075 
Number of downregulated genes (avg_log2FC < -1): 8555 

Cluster 4 Summary:
Number of genes with p_val_adj = 0: 2533 
Number of genes with p_val_adj = 1: 23208 
Number of significant genes (p_val_adj < 0.05): 12772 
Number of upregulated genes (avg_log2FC > 1): 7574 
Number of downregulated genes (avg_log2FC < -1): 7990 

Cluster 13 Summary:
Number of genes with p_val_adj = 0: 683 
Number of genes with p_val_adj = 1: 24477 
Number of significant genes (p_val_adj < 0.05): 11180 
Number of upregulated genes (avg_log2FC > 1): 21984 
Number of downregulated genes (avg_log2FC < -1): 3909 

Cluster 8 Summary:
Number of genes with p_val_adj = 0: 3198 
Number of genes with p_val_adj = 1: 21600 
Number of significant genes (p_val_adj < 0.05): 14357 
Number of upregulated genes (avg_log2FC > 1): 18556 
Number of downregulated genes (avg_log2FC < -1): 6225 

7. Visualization

7.1 Volcano Plot for Cell lines with Normal Control


library(EnhancedVolcano)
library(ggplot2)

for (cluster in malignant_clusters) {
  df <- de_results[[cluster]]

  # Check if gene names are in a "gene" column
  if ("gene" %in% colnames(df)) {
    labels <- df$gene
    rownames(df) <- df$gene  # optional: set for consistent handling
  } else {
    labels <- rownames(df)
  }

  volcano <- EnhancedVolcano(df,
                             lab = labels,
                             x = "avg_log2FC",
                             y = "p_val_adj",
                             title = paste0("Cluster ", cluster, " vs Control"),
                             pCutoff = 0.05,
                             FCcutoff = 1.0)

  print(volcano)
}
Avis : One or more p-values is 0. Converting to 10^-1 * current lowest non-zero p-value...

LS0tCnRpdGxlOiAiRWFjaCBDbHVzdGVyIHZzIE5vcm1hbCBDb250cm9sLURFLU5ld1VNQVAiCmF1dGhvcjogIk5hc2lyIE1haG1vb2QgQWJiYXNpIgpkYXRlOiAiYHIgU3lzLkRhdGUoKWAiCm91dHB1dDoKICBodG1sX25vdGVib29rOgogICAgdG9jOiB5ZXMKICAgIHRvY19mbG9hdDogeWVzCiAgICB0b2NfY29sbGFwc2VkOiB5ZXMKICBwZGZfZG9jdW1lbnQ6CiAgICB0b2M6IHllcwotLS0KCiMgMS4gTG9hZCBMaWJyYXJpZXMKYGBge3Igc2V0dXAsIGluY2x1ZGU9RkFMU0V9CmxpYnJhcnkoU2V1cmF0KQpsaWJyYXJ5KGhhcm1vbnkpCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkoRW5oYW5jZWRWb2xjYW5vKQpsaWJyYXJ5KGdncGxvdDIpCmxpYnJhcnkocGhlYXRtYXApCmxpYnJhcnkodGliYmxlKQpgYGAKCgojIDIuIGxvYWQgc2V1cmF0IG9iamVjdApgYGB7ciBsb2FkX3NldXJhdH0KI0xvYWQgU2V1cmF0IE9iamVjdApsb2FkKCIuLi8uLi9Ub19UcmFuc2Zlcl9iZXR3ZWVuX2NvbXB1dGVycy8yMy1IYXJtb255X0ludGVncmF0aW9uLzAtcm9iai81LUhhcm1vbnlfSW50ZWdyYXRlZF9BbGxfc2FtcGxlc19NZXJnZWRfQ0Q0VGNlbGxzX2ZpbmFsX1Jlc29sdXRpb25fU2VsZWN0ZWRfMC44X0FEVF9Ob3JtYWxpemVkX2NsZWFuZWRfbXQucm9iaiIpCgoKQWxsX3NhbXBsZXNfTWVyZ2VkCmBgYAoKIyAzLiBTZXQgVXAgSWRlbnRpZmllcnMgZm9yIENsdXN0ZXJpbmcKYGBge3J9CiMgQXNzaWduIGNsdXN0ZXIgaWRlbnRpdGllcyB0byB0aGUgU2V1cmF0IG9iamVjdApJZGVudHMoQWxsX3NhbXBsZXNfTWVyZ2VkKSA8LSAic2V1cmF0X2NsdXN0ZXJzIgoKRGltUGxvdChBbGxfc2FtcGxlc19NZXJnZWQsIHJlZHVjdGlvbiA9ICJ1bWFwIiwgZ3JvdXAuYnkgPSAiY2VsbF9saW5lIixsYWJlbCA9IFQsIGxhYmVsLmJveCA9IFQpCkRpbVBsb3QoQWxsX3NhbXBsZXNfTWVyZ2VkLCByZWR1Y3Rpb24gPSAidW1hcCIsIGdyb3VwLmJ5ID0gInNldXJhdF9jbHVzdGVycyIsbGFiZWwgPSBULCBsYWJlbC5ib3ggPSBUKQoKYGBgCgojIDQuICBEaWZmZXJlbnRpYWwgRXhwcmVzc2lvbiBBbmFseXNpcwoKIyMgNC4xIFVzaW5nIFdpbGNveCB3aXRoIFNDVApgYGB7cn0KIyBMb2FkIG5lY2Vzc2FyeSBsaWJyYXJpZXMKbGlicmFyeShTZXVyYXQpCgpEZWZhdWx0QXNzYXkoc2V1cmF0X29iaikgPC0gIlJOQSIKCkFsbF9zYW1wbGVzX01lcmdlZCA8LSBOb3JtYWxpemVEYXRhKAogIEFsbF9zYW1wbGVzX01lcmdlZCwgCiAgYXNzYXkgPSAiUk5BIiwgCiAgbm9ybWFsaXphdGlvbi5tZXRob2QgPSAiTG9nTm9ybWFsaXplIiwgCiAgc2NhbGUuZmFjdG9yID0gMTAwMDAKKQoKCgoKCiMgRGVmaW5lIGNvbnRyb2wgY2x1c3RlcnMKY29udHJvbF9jbHVzdGVycyA8LSBjKCIzIiwgIjEwIikKCiMgRGVmaW5lIGFsbCBjbHVzdGVycyBleGNlcHQgY29udHJvbAptYWxpZ25hbnRfY2x1c3RlcnMgPC0gc2V0ZGlmZihhcy5jaGFyYWN0ZXIodW5pcXVlKElkZW50cyhBbGxfc2FtcGxlc19NZXJnZWQpKSksIGNvbnRyb2xfY2x1c3RlcnMpCgoKIyAqKlNldCBpZGVudGl0aWVzIHRvIGNsdXN0ZXIqKgpJZGVudHMoQWxsX3NhbXBsZXNfTWVyZ2VkKSA8LSAic2V1cmF0X2NsdXN0ZXJzIgoKIyBMaXN0IHRvIHN0b3JlIERFIHJlc3VsdHMKZGVfcmVzdWx0cyA8LSBsaXN0KCkKCmZvciAoY2x1c3RlciBpbiBtYWxpZ25hbnRfY2x1c3RlcnMpIHsKICBkZV9yZXN1bHRzW1tjbHVzdGVyXV0gPC0gRmluZE1hcmtlcnMoCiAgICBvYmplY3QgPSBBbGxfc2FtcGxlc19NZXJnZWQsCiAgICBpZGVudC4xID0gY2x1c3RlciwKICAgIGlkZW50LjIgPSBjb250cm9sX2NsdXN0ZXJzLAogICAgYXNzYXkgPSAiUk5BIiwKICAgIHRlc3QudXNlID0gIndpbGNveCIsCiAgICBsb2dmYy50aHJlc2hvbGQgPSAwLAogICAgbWluLnBjdCA9IDAKICApCiAgd3JpdGUuY3N2KGRlX3Jlc3VsdHNbW2NsdXN0ZXJdXSwgcGFzdGUwKCJERV9STkFfV2lsY294X0NsdXN0ZXJfIiwgY2x1c3RlciwgIl92c19Db250cm9sLmNzdiIpKQp9CgpgYGAKCiMgNS4gQ29tcHV0ZSBNZWFuIEV4cHJlc3Npb24gZm9yIEdyb3VwcwpgYGB7cn0KIyBMb2FkIG5lY2Vzc2FyeSBsaWJyYXJpZXMKbGlicmFyeShTZXVyYXQpCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkodGliYmxlKQoKZXhwcmVzc2lvbl9kYXRhX1JOQSA8LSBHZXRBc3NheURhdGEoQWxsX3NhbXBsZXNfTWVyZ2VkLCBhc3NheSA9ICJSTkEiLCBzbG90ID0gImRhdGEiKQoKY2FsY3VsYXRlX21lYW5fZXhwcmVzc2lvbiA8LSBmdW5jdGlvbihtYXJrZXJzLCBncm91cDFfY2VsbHMsIGdyb3VwMl9jZWxscywgZXhwcmVzc2lvbl9kYXRhKSB7CiAgZ3JvdXAxX21lYW4gPC0gcm93TWVhbnMoZXhwcmVzc2lvbl9kYXRhWywgZ3JvdXAxX2NlbGxzLCBkcm9wID0gRkFMU0VdLCBuYS5ybSA9IFRSVUUpCiAgZ3JvdXAyX21lYW4gPC0gcm93TWVhbnMoZXhwcmVzc2lvbl9kYXRhWywgZ3JvdXAyX2NlbGxzLCBkcm9wID0gRkFMU0VdLCBuYS5ybSA9IFRSVUUpCiAgbWFya2VycyA8LSBtYXJrZXJzICU+JQogICAgcm93bmFtZXNfdG9fY29sdW1uKCJnZW5lIikgJT4lCiAgICBtdXRhdGUobWVhbl9leHByX2dyb3VwMSA9IGdyb3VwMV9tZWFuW2dlbmVdLAogICAgICAgICAgIG1lYW5fZXhwcl9ncm91cDIgPSBncm91cDJfbWVhbltnZW5lXSkKICByZXR1cm4obWFya2VycykKfQoKZm9yIChjbHVzdGVyIGluIG1hbGlnbmFudF9jbHVzdGVycykgewogIGV4aXN0aW5nX3Jlc3VsdHMgPC0gZGVfcmVzdWx0c1tbY2x1c3Rlcl1dCiAgZ3JvdXAxX2NlbGxzIDwtIFdoaWNoQ2VsbHMoQWxsX3NhbXBsZXNfTWVyZ2VkLCBpZGVudHMgPSBjbHVzdGVyKQogIGdyb3VwMl9jZWxscyA8LSBXaGljaENlbGxzKEFsbF9zYW1wbGVzX01lcmdlZCwgaWRlbnRzID0gY29udHJvbF9jbHVzdGVycykKICB1cGRhdGVkX3Jlc3VsdHMgPC0gY2FsY3VsYXRlX21lYW5fZXhwcmVzc2lvbihleGlzdGluZ19yZXN1bHRzLCBncm91cDFfY2VsbHMsIGdyb3VwMl9jZWxscywgZXhwcmVzc2lvbl9kYXRhX1JOQSkKICBkZV9yZXN1bHRzW1tjbHVzdGVyXV0gPC0gdXBkYXRlZF9yZXN1bHRzCiAgd3JpdGUuY3N2KHVwZGF0ZWRfcmVzdWx0cywgcGFzdGUwKCJVcGRhdGVkX0RFX0NsdXN0ZXJfIiwgY2x1c3RlciwgIl93aXRoX01lYW5FeHByLmNzdiIpLCByb3cubmFtZXMgPSBUUlVFKQp9CgpgYGAKCiMgNi4gU3VtbWFyaXplIFJlc3VsdHMKCiMjIDYuMSBTdW1tYXJ5IEZ1bmN0aW9uCmBgYHtyfQpzdW1tYXJpemVfbWFya2VycyA8LSBmdW5jdGlvbihtYXJrZXJzKSB7CiAgbnVtX3B2YWwwIDwtIHN1bShtYXJrZXJzJHBfdmFsX2FkaiA9PSAwKQogIG51bV9wdmFsMSA8LSBzdW0obWFya2VycyRwX3ZhbF9hZGogPT0gMSkKICBudW1fc2lnbmlmaWNhbnQgPC0gc3VtKG1hcmtlcnMkcF92YWxfYWRqIDwgMC4wNSkKICBudW1fdXByZWd1bGF0ZWQgPC0gc3VtKG1hcmtlcnMkYXZnX2xvZzJGQyA+IDEpCiAgbnVtX2Rvd25yZWd1bGF0ZWQgPC0gc3VtKG1hcmtlcnMkYXZnX2xvZzJGQyA8IC0xKQogIAogIGNhdCgiTnVtYmVyIG9mIGdlbmVzIHdpdGggcF92YWxfYWRqID0gMDoiLCBudW1fcHZhbDAsICJcbiIpCiAgY2F0KCJOdW1iZXIgb2YgZ2VuZXMgd2l0aCBwX3ZhbF9hZGogPSAxOiIsIG51bV9wdmFsMSwgIlxuIikKICBjYXQoIk51bWJlciBvZiBzaWduaWZpY2FudCBnZW5lcyAocF92YWxfYWRqIDwgMC4wNSk6IiwgbnVtX3NpZ25pZmljYW50LCAiXG4iKQogIGNhdCgiTnVtYmVyIG9mIHVwcmVndWxhdGVkIGdlbmVzIChhdmdfbG9nMkZDID4gMSk6IiwgbnVtX3VwcmVndWxhdGVkLCAiXG4iKQogIGNhdCgiTnVtYmVyIG9mIGRvd25yZWd1bGF0ZWQgZ2VuZXMgKGF2Z19sb2cyRkMgPCAtMSk6IiwgbnVtX2Rvd25yZWd1bGF0ZWQsICJcbiIpCn0KCmBgYAoKIyMgIFJ1biBTdW1tYXJ5IG9uIEVhY2ggQ2x1c3RlcgpgYGB7cn0KZm9yIChjbHVzdGVyIGluIG1hbGlnbmFudF9jbHVzdGVycykgewogIGNhdChwYXN0ZTAoIkNsdXN0ZXIgIiwgY2x1c3RlciwgIiBTdW1tYXJ5OlxuIikpCiAgc3VtbWFyaXplX21hcmtlcnMoZGVfcmVzdWx0c1tbY2x1c3Rlcl1dKQogIGNhdCgiXG4iKQp9CgpgYGAKCgojIDcuIFZpc3VhbGl6YXRpb24KCiMjIDcuMSBWb2xjYW5vIFBsb3QgZm9yIENlbGwgbGluZXMgd2l0aCBOb3JtYWwgQ29udHJvbApgYGB7ciwgZmlnLmhlaWdodD0xMiwgZmlnLndpZHRoPTEyfQoKbGlicmFyeShFbmhhbmNlZFZvbGNhbm8pCmxpYnJhcnkoZ2dwbG90MikKCmZvciAoY2x1c3RlciBpbiBtYWxpZ25hbnRfY2x1c3RlcnMpIHsKICBkZiA8LSBkZV9yZXN1bHRzW1tjbHVzdGVyXV0KCiAgIyBDaGVjayBpZiBnZW5lIG5hbWVzIGFyZSBpbiBhICJnZW5lIiBjb2x1bW4KICBpZiAoImdlbmUiICVpbiUgY29sbmFtZXMoZGYpKSB7CiAgICBsYWJlbHMgPC0gZGYkZ2VuZQogICAgcm93bmFtZXMoZGYpIDwtIGRmJGdlbmUgICMgb3B0aW9uYWw6IHNldCBmb3IgY29uc2lzdGVudCBoYW5kbGluZwogIH0gZWxzZSB7CiAgICBsYWJlbHMgPC0gcm93bmFtZXMoZGYpCiAgfQoKICB2b2xjYW5vIDwtIEVuaGFuY2VkVm9sY2FubyhkZiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICBsYWIgPSBsYWJlbHMsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgeCA9ICJhdmdfbG9nMkZDIiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICB5ID0gInBfdmFsX2FkaiIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgdGl0bGUgPSBwYXN0ZTAoIkNsdXN0ZXIgIiwgY2x1c3RlciwgIiB2cyBDb250cm9sIiksCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgcEN1dG9mZiA9IDAuMDUsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgRkNjdXRvZmYgPSAxLjApCgogIHByaW50KHZvbGNhbm8pCn0KCgpgYGAKCg==