1. load libraries
3. Summarize Markers
summarize_markers <- function(markers) {
num_pval0 <- sum(markers$p_val_adj == 0)
num_pval1 <- sum(markers$p_val_adj == 1)
num_significant <- sum(markers$p_val_adj < 0.05)
num_upregulated <- sum(markers$avg_log2FC > 1)
num_downregulated <- sum(markers$avg_log2FC < -1)
cat("Number of genes with p_val_adj = 0:", num_pval0, "\n")
cat("Number of genes with p_val_adj = 1:", num_pval1, "\n")
cat("Number of significant genes (p_val_adj < 0.05):", num_significant, "\n")
cat("Number of upregulated genes (avg_log2FC > 1):", num_upregulated, "\n")
cat("Number of downregulated genes (avg_log2FC < 1):", num_downregulated, "\n")
}
cat("Markers1 Summary (MAST with Batch Correction):\n")
summarize_markers(markers)
4. EnhancedVolcano plot
library(dplyr)
library(EnhancedVolcano)
# Assuming you have a data frame named Malignant_CD4Tcells_vs_Normal_CD4Tcells
# Filter genes based on lowest p-values but include all genes
filtered_genes <- markers %>%
arrange(p_val_adj, desc(abs(avg_log2FC)))
# Create the EnhancedVolcano plot with the filtered data
EnhancedVolcano(
filtered_genes,
lab = ifelse(filtered_genes$p_val_adj <= 0.0000905 & abs(filtered_genes$avg_log2FC) >= 1.0, filtered_genes$gene, NA),
x = "avg_log2FC",
y = "p_val_adj",
title = "Malignant CD4 T cells(cell lines) vs normal CD4 T cells",
pCutoff = 0.0000905,
FCcutoff = 1.0,
legendPosition = 'right',
labCol = 'black',
labFace = 'bold',
boxedLabels = FALSE, # Set to FALSE to remove boxed labels
pointSize = 3.0,
labSize = 5.0,
col = c('grey70', 'black', 'blue', 'red'), # Customize point colors
selectLab = filtered_genes$gene[filtered_genes$p_val_adj <= 0.05 & abs(filtered_genes$avg_log2FC) >= 1.0] # Only label significant genes
)
Warning: One or more p-values is 0. Converting to 10^-1 * current lowest non-zero p-value...

5. Filter and Summarize Results
# Apply the expression filter first
markers_filtered <- markers %>%
filter(!(mean_expr_group1 < 0.2 & mean_expr_group2 < 0.2))
write.csv(markers_filtered, "3-MAST_with_SCT_batch_patient_cellline_as_Covariate_with_Filtered_on_meanExpression.csv", row.names = FALSE)
5.2 Summary Function
summarize_markers <- function(markers) {
num_pval0 <- sum(markers$p_val_adj == 0)
num_pval1 <- sum(markers$p_val_adj == 1)
num_significant <- sum(markers$p_val_adj < 0.05)
num_upregulated <- sum(markers$avg_log2FC > 1)
num_downregulated <- sum(markers$avg_log2FC < -1)
cat("Number of genes with p_val_adj = 0:", num_pval0, "\n")
cat("Number of genes with p_val_adj = 1:", num_pval1, "\n")
cat("Number of significant genes (p_val_adj < 0.05):", num_significant, "\n")
cat("Number of upregulated genes (avg_log2FC > 1):", num_upregulated, "\n")
cat("Number of downregulated genes (avg_log2FC < -1):", num_downregulated, "\n")
}
cat("Markers1 Summary (markers_mast_SCT):\n")
Markers1 Summary (markers_mast_SCT):
summarize_markers(markers_filtered)
Number of genes with p_val_adj = 0: 74
Number of genes with p_val_adj = 1: 2209
Number of significant genes (p_val_adj < 0.05): 3042
Number of upregulated genes (avg_log2FC > 1): 4514
Number of downregulated genes (avg_log2FC < -1): 125
5.3 EnhancedVolcano plot
library(dplyr)
library(EnhancedVolcano)
# Assuming you have a data frame named Malignant_CD4Tcells_vs_Normal_CD4Tcells
# Filter genes based on lowest p-values but include all genes
filtered_genes <- markers_filtered %>%
arrange(p_val_adj, desc(abs(avg_log2FC)))
# Create the EnhancedVolcano plot with the filtered data
EnhancedVolcano(
filtered_genes,
lab = ifelse(filtered_genes$p_val_adj <= 0.0000905 & abs(filtered_genes$avg_log2FC) >= 1.0, filtered_genes$gene, NA),
x = "avg_log2FC",
y = "p_val_adj",
title = "Malignant CD4 T cells(cell lines) vs normal CD4 T cells",
pCutoff = 0.0000905,
FCcutoff = 1.0,
legendPosition = 'right',
labCol = 'black',
labFace = 'bold',
boxedLabels = FALSE, # Set to FALSE to remove boxed labels
pointSize = 3.0,
labSize = 5.0,
col = c('grey70', 'black', 'blue', 'red'), # Customize point colors
selectLab = filtered_genes$gene[filtered_genes$p_val_adj <= 0.05 & abs(filtered_genes$avg_log2FC) >= 1.0] # Only label significant genes
)
Warning: One or more p-values is 0. Converting to 10^-1 * current lowest non-zero p-value...

LS0tCnRpdGxlOiAiRGlmZmVyZW50aWFsIEV4cHJlc3Npb24gQW5hbHlzaXMgb2YgTWFsaWduYW50IENENFRjZWxscyB2cyBDb250cm9sKE5vcm1hbCBDRDQgVGNlbGxzKS1HU0VBLWFmdGVyX2ZpbHRlcmluZ19vbl9NZWFuRXhwcmVzc2lvbiIKYXV0aG9yOiBOYXNpciBNYWhtb29kIEFiYmFzaQpkYXRlOiAiYHIgU3lzLkRhdGUoKWAiCm91dHB1dDoKICAjcm1kZm9ybWF0czo6cmVhZHRoZWRvd24KICBodG1sX25vdGVib29rOgogICAgdG9jOiB0cnVlCiAgICB0b2NfZmxvYXQ6IHRydWUKICAgIHRvY19jb2xsYXBzZWQ6IHRydWUKLS0tCgojIDEuIGxvYWQgbGlicmFyaWVzCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQpzdXBwcmVzc1BhY2thZ2VTdGFydHVwTWVzc2FnZXMoewpsaWJyYXJ5KFNldXJhdCkKbGlicmFyeShTZXVyYXRPYmplY3QpCmxpYnJhcnkoU2V1cmF0RGF0YSkKbGlicmFyeShwYXRjaHdvcmspCmxpYnJhcnkoaGFybW9ueSkKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KGNvd3Bsb3QpCmxpYnJhcnkocmV0aWN1bGF0ZSkKbGlicmFyeShBemltdXRoKQpsaWJyYXJ5KGRwbHlyKQpsaWJyYXJ5KFJ0c25lKQpsaWJyYXJ5KGhhcm1vbnkpCmxpYnJhcnkoZ3JpZEV4dHJhKQpsaWJyYXJ5KEVuaGFuY2VkVm9sY2FubykKICAKfSkKYGBgCgojIDIuIFBlcmZvcm0gREUgYW5hbHlzaXMgdXNpbmcgTWFsaWduYW50X0NENFRjZWxsc192c19Ob3JtYWxfQ0Q0VGNlbGxzIGdlbmVzCmBgYHtyICwgZmlnLmhlaWdodD04LCBmaWcud2lkdGg9MTJ9CgptYXJrZXJzIDwtIHJlYWQuY3N2KCIuLi8xLU1BU1Rfd2l0aF9TQ1RfYmF0Y2hfcGF0aWVudF9jZWxsbGluZV9hc19Db3ZhcmlhdGVfd2l0aF9tZWFuRXhwcmVzc2lvbi5jc3YiLCBoZWFkZXIgPSBUKQoKCgpgYGAKCiMgMy4gU3VtbWFyaXplIE1hcmtlcnMKYGBge3IgLCBmaWcuaGVpZ2h0PTEyLCBmaWcud2lkdGg9MTR9CgpzdW1tYXJpemVfbWFya2VycyA8LSBmdW5jdGlvbihtYXJrZXJzKSB7CiAgbnVtX3B2YWwwIDwtIHN1bShtYXJrZXJzJHBfdmFsX2FkaiA9PSAwKQogIG51bV9wdmFsMSA8LSBzdW0obWFya2VycyRwX3ZhbF9hZGogPT0gMSkKICBudW1fc2lnbmlmaWNhbnQgPC0gc3VtKG1hcmtlcnMkcF92YWxfYWRqIDwgMC4wNSkKICBudW1fdXByZWd1bGF0ZWQgPC0gc3VtKG1hcmtlcnMkYXZnX2xvZzJGQyA+IDEpCiAgbnVtX2Rvd25yZWd1bGF0ZWQgPC0gc3VtKG1hcmtlcnMkYXZnX2xvZzJGQyA8IC0xKQogIAogIGNhdCgiTnVtYmVyIG9mIGdlbmVzIHdpdGggcF92YWxfYWRqID0gMDoiLCBudW1fcHZhbDAsICJcbiIpCiAgY2F0KCJOdW1iZXIgb2YgZ2VuZXMgd2l0aCBwX3ZhbF9hZGogPSAxOiIsIG51bV9wdmFsMSwgIlxuIikKICBjYXQoIk51bWJlciBvZiBzaWduaWZpY2FudCBnZW5lcyAocF92YWxfYWRqIDwgMC4wNSk6IiwgbnVtX3NpZ25pZmljYW50LCAiXG4iKQogIGNhdCgiTnVtYmVyIG9mIHVwcmVndWxhdGVkIGdlbmVzIChhdmdfbG9nMkZDID4gMSk6IiwgbnVtX3VwcmVndWxhdGVkLCAiXG4iKQogIGNhdCgiTnVtYmVyIG9mIGRvd25yZWd1bGF0ZWQgZ2VuZXMgKGF2Z19sb2cyRkMgPCAxKToiLCBudW1fZG93bnJlZ3VsYXRlZCwgIlxuIikKfQoKY2F0KCJNYXJrZXJzMSBTdW1tYXJ5IChNQVNUIHdpdGggQmF0Y2ggQ29ycmVjdGlvbik6XG4iKQoKc3VtbWFyaXplX21hcmtlcnMobWFya2VycykKYGBgCiMgNC4gRW5oYW5jZWRWb2xjYW5vIHBsb3QKYGBge3IgLCBmaWcuaGVpZ2h0PTgsIGZpZy53aWR0aD0xMn0KCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkoRW5oYW5jZWRWb2xjYW5vKQoKIyBBc3N1bWluZyB5b3UgaGF2ZSBhIGRhdGEgZnJhbWUgbmFtZWQgTWFsaWduYW50X0NENFRjZWxsc192c19Ob3JtYWxfQ0Q0VGNlbGxzCiMgRmlsdGVyIGdlbmVzIGJhc2VkIG9uIGxvd2VzdCBwLXZhbHVlcyBidXQgaW5jbHVkZSBhbGwgZ2VuZXMKZmlsdGVyZWRfZ2VuZXMgPC0gbWFya2VycyAlPiUKICBhcnJhbmdlKHBfdmFsX2FkaiwgZGVzYyhhYnMoYXZnX2xvZzJGQykpKQoKIyBDcmVhdGUgdGhlIEVuaGFuY2VkVm9sY2FubyBwbG90IHdpdGggdGhlIGZpbHRlcmVkIGRhdGEKRW5oYW5jZWRWb2xjYW5vKAogIGZpbHRlcmVkX2dlbmVzLCAKICBsYWIgPSBpZmVsc2UoZmlsdGVyZWRfZ2VuZXMkcF92YWxfYWRqIDw9IDAuMDAwMDkwNSAmIGFicyhmaWx0ZXJlZF9nZW5lcyRhdmdfbG9nMkZDKSA+PSAxLjAsIGZpbHRlcmVkX2dlbmVzJGdlbmUsIE5BKSwKICB4ID0gImF2Z19sb2cyRkMiLCAKICB5ID0gInBfdmFsX2FkaiIsCiAgdGl0bGUgPSAiTWFsaWduYW50IENENCBUIGNlbGxzKGNlbGwgbGluZXMpIHZzIG5vcm1hbCBDRDQgVCBjZWxscyIsCiAgcEN1dG9mZiA9IDAuMDAwMDkwNSwKICBGQ2N1dG9mZiA9IDEuMCwKICBsZWdlbmRQb3NpdGlvbiA9ICdyaWdodCcsIAogIGxhYkNvbCA9ICdibGFjaycsCiAgbGFiRmFjZSA9ICdib2xkJywKICBib3hlZExhYmVscyA9IEZBTFNFLCAgIyBTZXQgdG8gRkFMU0UgdG8gcmVtb3ZlIGJveGVkIGxhYmVscwogIHBvaW50U2l6ZSA9IDMuMCwKICBsYWJTaXplID0gNS4wLAogIGNvbCA9IGMoJ2dyZXk3MCcsICdibGFjaycsICdibHVlJywgJ3JlZCcpLCAgIyBDdXN0b21pemUgcG9pbnQgY29sb3JzCiAgc2VsZWN0TGFiID0gZmlsdGVyZWRfZ2VuZXMkZ2VuZVtmaWx0ZXJlZF9nZW5lcyRwX3ZhbF9hZGogPD0gMC4wNSAmIGFicyhmaWx0ZXJlZF9nZW5lcyRhdmdfbG9nMkZDKSA+PSAxLjBdICAjIE9ubHkgbGFiZWwgc2lnbmlmaWNhbnQgZ2VuZXMKKQoKYGBgCiMgNS4gRmlsdGVyIGFuZCBTdW1tYXJpemUgUmVzdWx0cwpgYGB7ciAsIGZpZy5oZWlnaHQ9OCwgZmlnLndpZHRoPTEyfQoKIyBBcHBseSB0aGUgZXhwcmVzc2lvbiBmaWx0ZXIgZmlyc3QKbWFya2Vyc19maWx0ZXJlZCA8LSBtYXJrZXJzICU+JQogIGZpbHRlcighKG1lYW5fZXhwcl9ncm91cDEgPCAwLjIgJiBtZWFuX2V4cHJfZ3JvdXAyIDwgMC4yKSkKCgp3cml0ZS5jc3YobWFya2Vyc19maWx0ZXJlZCwgIjMtTUFTVF93aXRoX1NDVF9iYXRjaF9wYXRpZW50X2NlbGxsaW5lX2FzX0NvdmFyaWF0ZV93aXRoX0ZpbHRlcmVkX29uX21lYW5FeHByZXNzaW9uLmNzdiIsIHJvdy5uYW1lcyA9IEZBTFNFKQpgYGAKCiMgNS4yIFN1bW1hcnkgRnVuY3Rpb24KYGBge3IgLCBmaWcuaGVpZ2h0PTgsIGZpZy53aWR0aD0xMn0KCnN1bW1hcml6ZV9tYXJrZXJzIDwtIGZ1bmN0aW9uKG1hcmtlcnMpIHsKICBudW1fcHZhbDAgPC0gc3VtKG1hcmtlcnMkcF92YWxfYWRqID09IDApCiAgbnVtX3B2YWwxIDwtIHN1bShtYXJrZXJzJHBfdmFsX2FkaiA9PSAxKQogIG51bV9zaWduaWZpY2FudCA8LSBzdW0obWFya2VycyRwX3ZhbF9hZGogPCAwLjA1KQogIG51bV91cHJlZ3VsYXRlZCA8LSBzdW0obWFya2VycyRhdmdfbG9nMkZDID4gMSkKICBudW1fZG93bnJlZ3VsYXRlZCA8LSBzdW0obWFya2VycyRhdmdfbG9nMkZDIDwgLTEpCiAgCiAgY2F0KCJOdW1iZXIgb2YgZ2VuZXMgd2l0aCBwX3ZhbF9hZGogPSAwOiIsIG51bV9wdmFsMCwgIlxuIikKICBjYXQoIk51bWJlciBvZiBnZW5lcyB3aXRoIHBfdmFsX2FkaiA9IDE6IiwgbnVtX3B2YWwxLCAiXG4iKQogIGNhdCgiTnVtYmVyIG9mIHNpZ25pZmljYW50IGdlbmVzIChwX3ZhbF9hZGogPCAwLjA1KToiLCBudW1fc2lnbmlmaWNhbnQsICJcbiIpCiAgY2F0KCJOdW1iZXIgb2YgdXByZWd1bGF0ZWQgZ2VuZXMgKGF2Z19sb2cyRkMgPiAxKToiLCBudW1fdXByZWd1bGF0ZWQsICJcbiIpCiAgY2F0KCJOdW1iZXIgb2YgZG93bnJlZ3VsYXRlZCBnZW5lcyAoYXZnX2xvZzJGQyA8IC0xKToiLCBudW1fZG93bnJlZ3VsYXRlZCwgIlxuIikKfQoKY2F0KCJNYXJrZXJzMSBTdW1tYXJ5IChtYXJrZXJzX21hc3RfU0NUKTpcbiIpCgpzdW1tYXJpemVfbWFya2VycyhtYXJrZXJzX2ZpbHRlcmVkKQoKYGBgCgojIDUuMyBFbmhhbmNlZFZvbGNhbm8gcGxvdApgYGB7ciAsIGZpZy5oZWlnaHQ9OCwgZmlnLndpZHRoPTEyfQoKbGlicmFyeShkcGx5cikKbGlicmFyeShFbmhhbmNlZFZvbGNhbm8pCgojIEFzc3VtaW5nIHlvdSBoYXZlIGEgZGF0YSBmcmFtZSBuYW1lZCBNYWxpZ25hbnRfQ0Q0VGNlbGxzX3ZzX05vcm1hbF9DRDRUY2VsbHMKIyBGaWx0ZXIgZ2VuZXMgYmFzZWQgb24gbG93ZXN0IHAtdmFsdWVzIGJ1dCBpbmNsdWRlIGFsbCBnZW5lcwpmaWx0ZXJlZF9nZW5lcyA8LSBtYXJrZXJzX2ZpbHRlcmVkICU+JQogIGFycmFuZ2UocF92YWxfYWRqLCBkZXNjKGFicyhhdmdfbG9nMkZDKSkpCgojIENyZWF0ZSB0aGUgRW5oYW5jZWRWb2xjYW5vIHBsb3Qgd2l0aCB0aGUgZmlsdGVyZWQgZGF0YQpFbmhhbmNlZFZvbGNhbm8oCiAgZmlsdGVyZWRfZ2VuZXMsIAogIGxhYiA9IGlmZWxzZShmaWx0ZXJlZF9nZW5lcyRwX3ZhbF9hZGogPD0gMC4wMDAwOTA1ICYgYWJzKGZpbHRlcmVkX2dlbmVzJGF2Z19sb2cyRkMpID49IDEuMCwgZmlsdGVyZWRfZ2VuZXMkZ2VuZSwgTkEpLAogIHggPSAiYXZnX2xvZzJGQyIsIAogIHkgPSAicF92YWxfYWRqIiwKICB0aXRsZSA9ICJNYWxpZ25hbnQgQ0Q0IFQgY2VsbHMoY2VsbCBsaW5lcykgdnMgbm9ybWFsIENENCBUIGNlbGxzIiwKICBwQ3V0b2ZmID0gMC4wMDAwOTA1LAogIEZDY3V0b2ZmID0gMS4wLAogIGxlZ2VuZFBvc2l0aW9uID0gJ3JpZ2h0JywgCiAgbGFiQ29sID0gJ2JsYWNrJywKICBsYWJGYWNlID0gJ2JvbGQnLAogIGJveGVkTGFiZWxzID0gRkFMU0UsICAjIFNldCB0byBGQUxTRSB0byByZW1vdmUgYm94ZWQgbGFiZWxzCiAgcG9pbnRTaXplID0gMy4wLAogIGxhYlNpemUgPSA1LjAsCiAgY29sID0gYygnZ3JleTcwJywgJ2JsYWNrJywgJ2JsdWUnLCAncmVkJyksICAjIEN1c3RvbWl6ZSBwb2ludCBjb2xvcnMKICBzZWxlY3RMYWIgPSBmaWx0ZXJlZF9nZW5lcyRnZW5lW2ZpbHRlcmVkX2dlbmVzJHBfdmFsX2FkaiA8PSAwLjA1ICYgYWJzKGZpbHRlcmVkX2dlbmVzJGF2Z19sb2cyRkMpID49IDEuMF0gICMgT25seSBsYWJlbCBzaWduaWZpY2FudCBnZW5lcwopCgoKCmBgYAoK