1. load libraries
3. Create the EnhancedVolcano plot
EnhancedVolcano(Malignant_CD4Tcells_vs_Normal_CD4Tcells,
lab = Malignant_CD4Tcells_vs_Normal_CD4Tcells$gene,
x = "avg_log2FC",
y = "p_val_adj",
title = "MAST with Batch Correction (All Genes)",
pCutoff = 0.05,
FCcutoff = 1.0)
Avis : One or more p-values is 0. Converting to 10^-1 * current lowest non-zero p-value...

EnhancedVolcano(Malignant_CD4Tcells_vs_Normal_CD4Tcells,
lab = Malignant_CD4Tcells_vs_Normal_CD4Tcells$gene,
x = "avg_log2FC",
y = "p_val_adj",
selectLab = c('EPCAM', 'BCAT1', 'KIR3DL2', 'FOXM1', 'TWIST1', 'TNFSF9',
'CD80', 'IL1B', 'RPS4Y1',
'IL7R', 'TCF7', 'MKI67', 'CD70',
'IL2RA','TRBV6-2', 'TRBV10-3', 'TRBV4-2', 'TRBV9', 'TRBV7-9',
'TRAV12-1', 'CD8B', 'FCGR3A', 'GNLY', 'FOXP3', 'SELL',
'GIMAP1', 'RIPOR2', 'LEF1', 'HOXC9', 'SP5',
'CCL17', 'ETV4', 'THY1', 'FOXA2', 'ITGAD', 'S100P', 'TBX4',
'ID1', 'XCL1', 'SOX2', 'CD27', 'CD28','PLS3','CD70','RAB25' , 'TRBV27', 'TRBV2'),
title = "Malignant CD4 T cells(cell lines) vs normal CD4 T cells",
xlab = bquote(~Log[2]~ 'fold change'),
pCutoff = 0.05,
FCcutoff = 1.5,
pointSize = 3.0,
labSize = 5.0,
boxedLabels = TRUE,
colAlpha = 0.5,
legendPosition = 'right',
legendLabSize = 10,
legendIconSize = 4.0,
drawConnectors = TRUE,
widthConnectors = 0.5,
colConnectors = 'grey50',
arrowheads = FALSE,
max.overlaps = 30)
Avis : One or more p-values is 0. Converting to 10^-1 * current lowest non-zero p-value...

library(dplyr)
library(EnhancedVolcano)
# Assuming you have a data frame named Malignant_CD4Tcells_vs_Normal_CD4Tcells
# Filter genes based on lowest p-values but include all genes
filtered_genes <- Malignant_CD4Tcells_vs_Normal_CD4Tcells %>%
arrange(p_val_adj, desc(abs(avg_log2FC)))
# Create the EnhancedVolcano plot with the filtered data
EnhancedVolcano(
filtered_genes,
lab = ifelse(filtered_genes$p_val_adj <= 0.05 & abs(filtered_genes$avg_log2FC) >= 1.0, filtered_genes$gene, NA),
x = "avg_log2FC",
y = "p_val_adj",
title = "Malignant CD4 T cells(cell lines) vs normal CD4 T cells",
pCutoff = 0.05,
FCcutoff = 1.0,
legendPosition = 'right',
labCol = 'black',
labFace = 'bold',
boxedLabels = FALSE, # Set to FALSE to remove boxed labels
pointSize = 3.0,
labSize = 5.0,
col = c('grey70', 'black', 'blue', 'red'), # Customize point colors
selectLab = filtered_genes$gene[filtered_genes$p_val_adj <= 0.05 & abs(filtered_genes$avg_log2FC) >= 1.0] # Only label significant genes
)
Avis : One or more p-values is 0. Converting to 10^-1 * current lowest non-zero p-value...

EnhancedVolcano(
filtered_genes,
lab = ifelse(filtered_genes$p_val_adj <= 0.05 & abs(filtered_genes$avg_log2FC) >= 1.0, filtered_genes$gene, NA),
x = "avg_log2FC",
y = "p_val_adj",
title = "Malignant CD4 T cells (cell lines) vs Normal CD4 T cells",
subtitle = "Highlighting differentially expressed genes",
pCutoff = 0.05,
FCcutoff = 1.0,
legendPosition = 'right',
colAlpha = 0.8, # Slight transparency for non-significant points
col = c('grey70', 'black', 'blue', 'red'), # Custom color scheme
gridlines.major = TRUE,
gridlines.minor = FALSE,
selectLab = filtered_genes$gene[filtered_genes$p_val_adj <= 0.05 & abs(filtered_genes$avg_log2FC) >= 1.0]
)
Avis : One or more p-values is 0. Converting to 10^-1 * current lowest non-zero p-value...

5. Create the Heatmap of fgsea results
library(pheatmap)
# Select the top 50 pathways
top_pathways <- fgsea_result %>%
arrange(padj) %>%
head(50)
# Create a matrix for the heatmap with pathways as rows and NES as the values
heatmap_data <- matrix(top_pathways$NES, nrow = length(top_pathways$pathway), ncol = 1)
rownames(heatmap_data) <- top_pathways$pathway
colnames(heatmap_data) <- c("NES")
# Plot the combined heatmap for the top 50 pathways
pheatmap(heatmap_data,
cluster_rows = TRUE,
cluster_cols = FALSE,
show_rownames = TRUE,
show_colnames = TRUE,
main = "Hallmark Pathways: Malignant CD4 T Cells compared to normal CD4 T cells",
color = colorRampPalette(c("blue", "white", "red"))(50))

6. Obtain KEGG Gene Sets and Perform Fast GSEA Using KEGG
Pathways
library(fgsea)
library(msigdbr)
library(dplyr)
library(pheatmap)
# Obtain KEGG gene sets from msigdbr
kegg_genes <- msigdbr(species = "Homo sapiens", category = "C2", subcategory = "CP:KEGG")
# Convert the gene sets to a list format for fgsea
kegg_list <- kegg_genes %>%
split(x = .$gene_symbol, f = .$gs_name)
# Assuming you have a data frame named Malignant_CD4Tcells_vs_Normal_CD4Tcells
# Create a ranked list based on avg_log2FC and p_val_adj
Malignant_CD4Tcells_vs_Normal_CD4Tcells <- Malignant_CD4Tcells_vs_Normal_CD4Tcells %>%
mutate(rank_metric = avg_log2FC * -log10(p_val_adj))
# Ensure no NA values in rank_metric
Malignant_CD4Tcells_vs_Normal_CD4Tcells <- Malignant_CD4Tcells_vs_Normal_CD4Tcells %>%
filter(!is.na(rank_metric))
# Create a named vector for ranking
gene_list <- Malignant_CD4Tcells_vs_Normal_CD4Tcells$rank_metric
names(gene_list) <- Malignant_CD4Tcells_vs_Normal_CD4Tcells$gene
# Sort the named vector in decreasing order
gene_list <- sort(gene_list, decreasing = TRUE)
gene_list <- gene_list[is.finite(gene_list)]
# Perform fast GSEA using KEGG pathways
fgsea_result_kegg <- fgsea(pathways = kegg_list,
stats = gene_list,
minSize = 10,
maxSize = 500,
nperm = 10000) # Number of permutations
Avis : You are trying to run fgseaSimple. It is recommended to use fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument in the fgsea function call.
# View the fgsea results
head(fgsea_result_kegg)
#Filter the results table to show only the top 10 UP or DOWN regulated processes (Optional)
top10_UP_kegg <- fgsea_result_kegg$pathways[1:10]
#summary Table
plotGseaTable(kegg_list[top10_UP_kegg], gene_list, fgsea_result_kegg, gseaParam = 0.5)

# Separate upregulated (positive NES) and downregulated (negative NES) pathways
upregulated_pathways <- fgsea_result_kegg %>% filter(NES > 0) %>% arrange(padj) %>% head(20)
downregulated_pathways <- fgsea_result_kegg %>% filter(NES < 0) %>% arrange(padj) %>% head(20)
# Combine the top 20 upregulated and top 20 downregulated pathways
combined_pathways <- bind_rows(upregulated_pathways, downregulated_pathways)
# Create a matrix for the heatmap with pathways as rows and NES as the values
heatmap_data_combined <- matrix(combined_pathways$NES, nrow = length(combined_pathways$pathway), ncol = 1)
rownames(heatmap_data_combined) <- combined_pathways$pathway
colnames(heatmap_data_combined) <- c("NES")
# Plot the combined heatmap for the top 20 upregulated and top 20 downregulated pathways
pheatmap(heatmap_data_combined,
cluster_rows = TRUE,
cluster_cols = FALSE,
show_rownames = TRUE,
show_colnames = TRUE,
main = "KEGG Pathways: Malignant CD4 T Cells compared to normal CD4 T Cells",
color = colorRampPalette(c("blue", "white", "red"))(50))

. Visualization-Hallmark
fgseaResTidy <- fgsea_result %>%
as_tibble() %>%
arrange(desc(NES))
# Show in a nice table:
fgseaResTidy %>%
dplyr::select(-leadingEdge, -ES, -nMoreExtreme) %>%
arrange(padj) %>%
DT::datatable()
ggplot(fgseaResTidy, aes(reorder(pathway, NES), NES)) +
geom_col(aes(fill=padj<0.05)) +
coord_flip() +
labs(x="Pathway", y="Normalized Enrichment Score",
title="Hallmark pathways NES from GSEA") +
theme_minimal()+ scale_fill_manual(values = c("TRUE" = "red", "FALSE" = "grey"))

NA
NA
NA
. Visualization-Kegg1
fgseaResTidy <- fgsea_result_kegg %>%
as_tibble() %>%
arrange(desc(NES))
# Show in a nice table:
fgseaResTidy %>%
dplyr::select(-leadingEdge, -ES, -nMoreExtreme) %>%
arrange(padj) %>%
DT::datatable()
ggplot(fgseaResTidy, aes(reorder(pathway, NES), NES)) +
geom_col(aes(fill=padj<0.05)) +
coord_flip() +
labs(x="Pathway", y="Normalized Enrichment Score",
title="KEGG pathways NES from GSEA") +
theme_minimal()

NA
NA
NA
. Visualization-Kegg2
# Arrange by NES and select top 20 up and down pathways
topUp <- fgseaResTidy %>%
dplyr::filter(NES > 0) %>%
dplyr::arrange(desc(NES)) %>%
dplyr::slice_head(n = 20)
topDown <- fgseaResTidy %>%
dplyr::filter(NES < 0) %>%
dplyr::arrange(NES) %>%
dplyr::slice_head(n = 20)
# Combine the top up and down pathways
topPathways <- dplyr::bind_rows(topUp, topDown)
ggplot(topPathways, aes(reorder(pathway, NES), NES)) +
geom_col(aes(fill = padj < 0.05)) +
coord_flip() +
labs(x = "Pathway", y = "Normalized Enrichment Score",
title = "Top 20 Up and Down KEGG Pathways NES from GSEA") +
theme_minimal() +
scale_fill_manual(values = c("TRUE" = "red", "FALSE" = "grey"))

NA
NA
NA
NA
7. Save Hallmark and kegg to CSV
# Assuming you have the results stored in fgsea_result_hallmark and fgsea_result_kegg
# Flatten the list columns into character strings for Hallmark results
fgsea_result_hallmark_flattened <- fgsea_result %>%
mutate(across(where(is.list), ~ sapply(., toString)))
# Write the flattened Hallmark results to a CSV file
write.csv(fgsea_result_hallmark_flattened, "fgsea_results_hallmark.csv", row.names = FALSE)
# Flatten the list columns into character strings for KEGG results
fgsea_result_kegg_flattened <- fgsea_result_kegg %>%
mutate(across(where(is.list), ~ sapply(., toString)))
# Write the flattened KEGG results to a CSV file
write.csv(fgsea_result_kegg_flattened, "fgsea_results_kegg.csv", row.names = FALSE)
