4. Enrichment Analysis-All_Pathways
# Load necessary libraries
library(clusterProfiler)
library(org.Hs.eg.db)
library(enrichplot)
library(ReactomePA)
library(DOSE) # For GSEA analysis
library(ggplot2) # Ensure ggplot2 is available for plotting
library(dplyr)
# Define the output folder where the results will be saved
output_folder <- "NK_Proliferating_vs_Control/"
Malignant_CD4Tcells_vs_Normal_CD4Tcells <- df
# Create the output folder if it doesn't exist
if (!dir.exists(output_folder)) {
dir.create(output_folder)
}
# Define the number of upregulated and downregulated genes to select
UP_genes <- 100
Down_genes <- 100
# Define threshold for differential expression selection (modified thresholds)
logFC_up_threshold <- 4 # Upregulated logFC threshold
logFC_down_threshold <- -4 # Downregulated logFC threshold
# Load your differential expression results (modify based on actual data structure)
# Malignant_CD4Tcells_vs_Normal_CD4Tcells <- read.csv("Your_DE_Results_File.csv")
# Filter the genes based on avg_log2FC and arrange by p_val_adj
filtered_genes <- Malignant_CD4Tcells_vs_Normal_CD4Tcells %>%
filter(avg_log2FC > logFC_up_threshold | avg_log2FC < logFC_down_threshold) %>%
arrange(p_val_adj)
# Separate upregulated and downregulated genes
upregulated_genes <- filtered_genes %>%
filter(avg_log2FC > logFC_up_threshold)
downregulated_genes <- filtered_genes %>%
filter(avg_log2FC < logFC_down_threshold)
# Check if there are fewer than the specified number of upregulated genes
if (nrow(upregulated_genes) < UP_genes) {
top_upregulated_genes <- upregulated_genes
cat("Number of upregulated genes selected:", nrow(top_upregulated_genes), "\n")
cat("p_val_adj value for the last selected upregulated gene:", tail(top_upregulated_genes$p_val_adj, 1), "\n")
} else {
# Select the specified number of upregulated genes
top_upregulated_genes <- upregulated_genes %>%
head(UP_genes)
cat("Number of upregulated genes selected:", nrow(top_upregulated_genes), "\n")
cat("p_val_adj value for the last selected upregulated gene:", tail(top_upregulated_genes$p_val_adj, 1), "\n")
}
Number of upregulated genes selected: 100
p_val_adj value for the last selected upregulated gene: 0
# Check if there are fewer than the specified number of downregulated genes
if (nrow(downregulated_genes) < Down_genes) {
top_downregulated_genes <- downregulated_genes
cat("Number of downregulated genes selected:", nrow(top_downregulated_genes), "\n")
cat("p_val_adj value for the last selected downregulated gene:", tail(top_downregulated_genes$p_val_adj, 1), "\n")
} else {
# Select the specified number of downregulated genes
top_downregulated_genes <- downregulated_genes %>%
head(Down_genes)
cat("Number of downregulated genes selected:", nrow(top_downregulated_genes), "\n")
cat("p_val_adj value for the last selected downregulated gene:", tail(top_downregulated_genes$p_val_adj, 1), "\n")
}
Number of downregulated genes selected: 81
p_val_adj value for the last selected downregulated gene: 5.664039e-83
# Combine the top upregulated and downregulated genes
top_genes <- bind_rows(top_upregulated_genes, top_downregulated_genes)
# Check for missing genes (NAs) in the gene column and remove them
top_genes <- na.omit(top_genes)
# Save upregulated and downregulated gene results to CSV
write.csv(top_upregulated_genes, paste0(output_folder, "upregulated_genes.csv"), row.names = FALSE)
write.csv(top_downregulated_genes, paste0(output_folder, "downregulated_genes.csv"), row.names = FALSE)
# Convert gene symbols to Entrez IDs for enrichment analysis, with checks for missing values
upregulated_entrez <- bitr(top_upregulated_genes$gene, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db)
'select()' returned 1:1 mapping between keys and columns
Warning: 4% of input gene IDs are fail to map...
downregulated_entrez <- bitr(top_downregulated_genes$gene, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db)
'select()' returned 1:1 mapping between keys and columns
Warning: 3.7% of input gene IDs are fail to map...
# Check for missing Entrez IDs and retain gene names
missing_upregulated <- top_upregulated_genes$gene[!top_upregulated_genes$gene %in% upregulated_entrez$SYMBOL]
missing_downregulated <- top_downregulated_genes$gene[!top_downregulated_genes$gene %in% downregulated_entrez$SYMBOL]
# Print out the missing gene symbols for debugging
cat("Missing upregulated genes:\n", missing_upregulated, "\n")
Missing upregulated genes:
WDR34 HIST1H1B HIST1H1A HIST1H3B
cat("Missing downregulated genes:\n", missing_downregulated, "\n")
Missing downregulated genes:
AC139720.1 AC245407.2 C5orf17
# Merge the Entrez IDs back with the original data frames to retain gene names
top_upregulated_genes <- merge(top_upregulated_genes, upregulated_entrez, by.x = "gene", by.y = "SYMBOL", all.x = TRUE)
top_downregulated_genes <- merge(top_downregulated_genes, downregulated_entrez, by.x = "gene", by.y = "SYMBOL", all.x = TRUE)
# Remove genes that couldn't be mapped to Entrez IDs
top_upregulated_genes <- top_upregulated_genes[!is.na(top_upregulated_genes$ENTREZID), ]
top_downregulated_genes <- top_downregulated_genes[!is.na(top_downregulated_genes$ENTREZID), ]
# Extract Entrez IDs for enrichment analysis
upregulated_entrez <- top_upregulated_genes$ENTREZID
downregulated_entrez <- top_downregulated_genes$ENTREZID
# Define a function to safely run enrichment, plot results, and save them
safe_enrichGO <- function(gene_list, title, filename) {
if (length(gene_list) > 0) {
result <- enrichGO(gene = gene_list, OrgDb = org.Hs.eg.db, keyType = "SYMBOL",
ont = "BP", pAdjustMethod = "BH", pvalueCutoff = 0.05, readable = TRUE)
if (!is.null(result) && nrow(as.data.frame(result)) > 0) {
p <- dotplot(result, showCategory = 10, title = title)
print(p)
ggsave(paste0(output_folder, gsub(".csv", "_dotplot.png", filename)), plot = p, width = 8, height = 6)
write.csv(as.data.frame(result), file = paste0(output_folder, filename), row.names = FALSE)
} else {
message(paste("No significant enrichment found for:", title))
}
} else {
message(paste("No genes found for:", title))
}
}
safe_enrichKEGG <- function(entrez_list, title, filename) {
if (length(entrez_list) > 0) {
result <- enrichKEGG(gene = entrez_list, organism = "hsa", pvalueCutoff = 0.05)
if (!is.null(result) && nrow(as.data.frame(result)) > 0) {
result <- setReadable(result, OrgDb = org.Hs.eg.db, keyType = "ENTREZID")
p <- dotplot(result, showCategory = 10, title = title)
print(p)
ggsave(paste0(output_folder, gsub(".csv", "_dotplot.png", filename)), plot = p, width = 8, height = 6)
write.csv(as.data.frame(result), file = paste0(output_folder, filename), row.names = FALSE)
} else {
message(paste("No significant KEGG pathways found for:", title))
}
} else {
message(paste("No genes found for:", title))
}
}
safe_enrichReactome <- function(entrez_list, title, filename) {
if (length(entrez_list) > 0) {
result <- enrichPathway(gene = entrez_list, organism = "human", pvalueCutoff = 0.05)
if (!is.null(result) && nrow(as.data.frame(result)) > 0) {
result <- setReadable(result, OrgDb = org.Hs.eg.db, keyType = "ENTREZID")
p <- dotplot(result, showCategory = 10, title = title)
print(p)
ggsave(paste0(output_folder, gsub(".csv", "_dotplot.png", filename)), plot = p, width = 8, height = 6)
write.csv(as.data.frame(result), file = paste0(output_folder, filename), row.names = FALSE)
} else {
message(paste("No significant Reactome pathways found for:", title))
}
} else {
message(paste("No genes found for:", title))
}
}
# Perform enrichment analyses, generate plots, and save results
safe_enrichGO(top_upregulated_genes$gene, "GO Enrichment for Upregulated Genes", "upregulated_GO_results.csv")

safe_enrichGO(top_downregulated_genes$gene, "GO Enrichment for Downregulated Genes", "downregulated_GO_results.csv")

safe_enrichKEGG(upregulated_entrez, "KEGG Pathway Enrichment for Upregulated Genes", "upregulated_KEGG_results.csv")
Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/pathway"...
Reading KEGG annotation online: "https://rest.kegg.jp/list/pathway/hsa"...

safe_enrichKEGG(downregulated_entrez, "KEGG Pathway Enrichment for Downregulated Genes", "downregulated_KEGG_results.csv")

safe_enrichReactome(upregulated_entrez, "Reactome Pathway Enrichment for Upregulated Genes", "upregulated_Reactome_results.csv")

safe_enrichReactome(downregulated_entrez, "Reactome Pathway Enrichment for Downregulated Genes", "downregulated_Reactome_results.csv")
No significant Reactome pathways found for: Reactome Pathway Enrichment for Downregulated Genes
Enrichment Analysis_Hallmark
# Load necessary libraries
library(clusterProfiler)
library(org.Hs.eg.db)
library(msigdbr)
library(enrichplot)
library(ggplot2)
library(dplyr)
# Define the output folder where the results will be saved
output_folder <- "NK_Proliferating_vs_Control/"
# Create the output folder if it doesn't exist
if (!dir.exists(output_folder)) {
dir.create(output_folder)
}
# Load Hallmark gene sets from msigdbr
hallmark_sets <- msigdbr(species = "Homo sapiens", collection = "H") # "H" is for Hallmark gene sets
# Convert gene symbols to uppercase for consistency
top_upregulated_genes$gene <- toupper(top_upregulated_genes$gene)
top_downregulated_genes$gene <- toupper(top_downregulated_genes$gene)
# Check for overlap between your upregulated/downregulated genes and Hallmark gene sets
upregulated_in_hallmark <- intersect(top_upregulated_genes$gene, hallmark_sets$gene_symbol)
downregulated_in_hallmark <- intersect(top_downregulated_genes$gene, hallmark_sets$gene_symbol)
# Print the number of overlapping genes for both upregulated and downregulated genes
cat("Number of upregulated genes in Hallmark gene sets:", length(upregulated_in_hallmark), "\n")
Number of upregulated genes in Hallmark gene sets: 54
cat("Number of downregulated genes in Hallmark gene sets:", length(downregulated_in_hallmark), "\n")
Number of downregulated genes in Hallmark gene sets: 28
# If there are genes to analyze, proceed with enrichment analysis
if (length(upregulated_in_hallmark) > 0) {
# Perform enrichment analysis for upregulated genes using Hallmark gene sets
hallmark_up <- enricher(gene = upregulated_in_hallmark,
TERM2GENE = hallmark_sets[, c("gs_name", "gene_symbol")], # Ensure TERM2GENE uses correct columns
pvalueCutoff = 0.05)
# Check if results exist
if (!is.null(hallmark_up) && nrow(hallmark_up) > 0) {
# Visualize results if available
up_dotplot <- dotplot(hallmark_up, showCategory = 20, title = "Hallmark Pathway Enrichment for Upregulated Genes")
# Display the plot in the notebook
print(up_dotplot)
# Save the dotplot to a PNG file
ggsave(paste0(output_folder, "hallmark_upregulated_dotplot.png"), plot = up_dotplot, width = 10, height = 8)
# Optionally, save the results as CSV
write.csv(as.data.frame(hallmark_up), file = paste0(output_folder, "hallmark_upregulated_enrichment.csv"), row.names = FALSE)
} else {
cat("No significant enrichment found for upregulated genes.\n")
}
} else {
cat("No upregulated genes overlap with Hallmark gene sets.\n")
}

if (length(downregulated_in_hallmark) > 0) {
# Perform enrichment analysis for downregulated genes using Hallmark gene sets
hallmark_down <- enricher(gene = downregulated_in_hallmark,
TERM2GENE = hallmark_sets[, c("gs_name", "gene_symbol")], # Ensure TERM2GENE uses correct columns
pvalueCutoff = 0.05)
# Check if results exist
if (!is.null(hallmark_down) && nrow(hallmark_down) > 0) {
# Visualize results if available
down_dotplot <- dotplot(hallmark_down, showCategory = 20, title = "Hallmark Pathway Enrichment for Downregulated Genes")
# Display the plot in the notebook
print(down_dotplot)
# Save the dotplot to a PNG file
ggsave(paste0(output_folder, "hallmark_downregulated_dotplot.png"), plot = down_dotplot, width = 10, height = 8)
# Optionally, save the results as CSV
write.csv(as.data.frame(hallmark_down), file = paste0(output_folder, "hallmark_downregulated_enrichment.csv"), row.names = FALSE)
} else {
cat("No significant enrichment found for downregulated genes.\n")
}
} else {
cat("No downregulated genes overlap with Hallmark gene sets.\n")
}

NA
NA
---
title: "NK Proliferating vs Control_filtred_on_mean"
author: Nasir Mahmood Abbasi
date: "`r Sys.Date()`"
output:
  # pdf_document: default
  # word_document: default
  # html_document: default
  #rmdformats::readthedown
  html_notebook:
    toc: true
    toc_float: true
    toc_collapsed: true
---

# 1. load libraries
```{r setup, include=FALSE}

# Load libraries
library(Seurat)
library(Matrix)
library(ggplot2)
library(dplyr)
library(ggrepel)
library(tibble)


```

# 2. Load the filtered list on mean expression
```{r , fig.height=8, fig.width=10}

# Load the DE results from CSV
df <- read.csv("3-RNA_NK_Prolif_vs_Control_Filtered_by_MeanExp.csv", stringsAsFactors = FALSE)


DE_results_df <- df


markers <- DE_results_df
```

# 3. Volcano Plots
```{r , fig.height=12, fig.width=16}

library(dplyr)
library(EnhancedVolcano)

# Assuming you have a data frame named Malignant_CD4Tcells_vs_Normal_CD4Tcells
# Filter genes based on lowest p-values but include all genes
filtered_genes <- markers %>%
  arrange(p_val_adj, desc(abs(avg_log2FC)))

# Create the EnhancedVolcano plot with the filtered data
EnhancedVolcano(
  filtered_genes, 
  lab = ifelse(filtered_genes$p_val_adj <= 1e-6 & abs(filtered_genes$avg_log2FC) >= 1.5, filtered_genes$gene, NA),
  x = "avg_log2FC", 
  y = "p_val_adj",
  title = "Malignant CD4 T cells(cell lines) vs normal CD4 T cells",
  pCutoff = 1e-6,
  FCcutoff = 1.0,
  legendPosition = 'right', 
  labCol = 'black',
  labFace = 'bold',
  boxedLabels = FALSE,  # Set to FALSE to remove boxed labels
  pointSize = 3.0,
  labSize = 5.0,
  col = c('grey70', 'black', 'blue', 'red'),  # Customize point colors
  selectLab = filtered_genes$gene[filtered_genes$p_val_adj <= 0.05 & abs(filtered_genes$avg_log2FC) >= 1.0]  # Only label significant genes
)



```


# 4. Enrichment Analysis-All_Pathways
```{r , fig.height=8, fig.width=12}
# Load necessary libraries
library(clusterProfiler)
library(org.Hs.eg.db)
library(enrichplot)
library(ReactomePA)
library(DOSE) # For GSEA analysis
library(ggplot2) # Ensure ggplot2 is available for plotting
library(dplyr)

# Define the output folder where the results will be saved
output_folder <- "NK_Proliferating_vs_Control/"

Malignant_CD4Tcells_vs_Normal_CD4Tcells <- df

# Create the output folder if it doesn't exist
if (!dir.exists(output_folder)) {
  dir.create(output_folder)
}

# Define the number of upregulated and downregulated genes to select
UP_genes <- 100
Down_genes <- 100

# Define threshold for differential expression selection (modified thresholds)
logFC_up_threshold <- 4          # Upregulated logFC threshold
logFC_down_threshold <- -4         # Downregulated logFC threshold

# Load your differential expression results (modify based on actual data structure)
# Malignant_CD4Tcells_vs_Normal_CD4Tcells <- read.csv("Your_DE_Results_File.csv")

# Filter the genes based on avg_log2FC and arrange by p_val_adj
filtered_genes <- Malignant_CD4Tcells_vs_Normal_CD4Tcells %>%
  filter(avg_log2FC > logFC_up_threshold | avg_log2FC < logFC_down_threshold) %>%
  arrange(p_val_adj)

# Separate upregulated and downregulated genes
upregulated_genes <- filtered_genes %>%
  filter(avg_log2FC > logFC_up_threshold)

downregulated_genes <- filtered_genes %>%
  filter(avg_log2FC < logFC_down_threshold)

# Check if there are fewer than the specified number of upregulated genes
if (nrow(upregulated_genes) < UP_genes) {
  top_upregulated_genes <- upregulated_genes
  cat("Number of upregulated genes selected:", nrow(top_upregulated_genes), "\n")
  cat("p_val_adj value for the last selected upregulated gene:", tail(top_upregulated_genes$p_val_adj, 1), "\n")
} else {
  # Select the specified number of upregulated genes
  top_upregulated_genes <- upregulated_genes %>%
    head(UP_genes)
  cat("Number of upregulated genes selected:", nrow(top_upregulated_genes), "\n")
  cat("p_val_adj value for the last selected upregulated gene:", tail(top_upregulated_genes$p_val_adj, 1), "\n")
}

# Check if there are fewer than the specified number of downregulated genes
if (nrow(downregulated_genes) < Down_genes) {
  top_downregulated_genes <- downregulated_genes
  cat("Number of downregulated genes selected:", nrow(top_downregulated_genes), "\n")
  cat("p_val_adj value for the last selected downregulated gene:", tail(top_downregulated_genes$p_val_adj, 1), "\n")
} else {
  # Select the specified number of downregulated genes
  top_downregulated_genes <- downregulated_genes %>%
    head(Down_genes)
  cat("Number of downregulated genes selected:", nrow(top_downregulated_genes), "\n")
  cat("p_val_adj value for the last selected downregulated gene:", tail(top_downregulated_genes$p_val_adj, 1), "\n")
}

# Combine the top upregulated and downregulated genes
top_genes <- bind_rows(top_upregulated_genes, top_downregulated_genes)

# Check for missing genes (NAs) in the gene column and remove them
top_genes <- na.omit(top_genes)

# Save upregulated and downregulated gene results to CSV
write.csv(top_upregulated_genes, paste0(output_folder, "upregulated_genes.csv"), row.names = FALSE)
write.csv(top_downregulated_genes, paste0(output_folder, "downregulated_genes.csv"), row.names = FALSE)

# Convert gene symbols to Entrez IDs for enrichment analysis, with checks for missing values
upregulated_entrez <- bitr(top_upregulated_genes$gene, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db)
downregulated_entrez <- bitr(top_downregulated_genes$gene, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db)

# Check for missing Entrez IDs and retain gene names
missing_upregulated <- top_upregulated_genes$gene[!top_upregulated_genes$gene %in% upregulated_entrez$SYMBOL]
missing_downregulated <- top_downregulated_genes$gene[!top_downregulated_genes$gene %in% downregulated_entrez$SYMBOL]

# Print out the missing gene symbols for debugging
cat("Missing upregulated genes:\n", missing_upregulated, "\n")
cat("Missing downregulated genes:\n", missing_downregulated, "\n")

# Merge the Entrez IDs back with the original data frames to retain gene names
top_upregulated_genes <- merge(top_upregulated_genes, upregulated_entrez, by.x = "gene", by.y = "SYMBOL", all.x = TRUE)
top_downregulated_genes <- merge(top_downregulated_genes, downregulated_entrez, by.x = "gene", by.y = "SYMBOL", all.x = TRUE)

# Remove genes that couldn't be mapped to Entrez IDs
top_upregulated_genes <- top_upregulated_genes[!is.na(top_upregulated_genes$ENTREZID), ]
top_downregulated_genes <- top_downregulated_genes[!is.na(top_downregulated_genes$ENTREZID), ]

# Extract Entrez IDs for enrichment analysis
upregulated_entrez <- top_upregulated_genes$ENTREZID
downregulated_entrez <- top_downregulated_genes$ENTREZID

# Define a function to safely run enrichment, plot results, and save them
safe_enrichGO <- function(gene_list, title, filename) {
  if (length(gene_list) > 0) {
    result <- enrichGO(gene = gene_list, OrgDb = org.Hs.eg.db, keyType = "SYMBOL",
                       ont = "BP", pAdjustMethod = "BH", pvalueCutoff = 0.05, readable = TRUE)
    if (!is.null(result) && nrow(as.data.frame(result)) > 0) {
      p <- dotplot(result, showCategory = 10, title = title)
      print(p)  
      ggsave(paste0(output_folder, gsub(".csv", "_dotplot.png", filename)), plot = p, width = 8, height = 6)
      write.csv(as.data.frame(result), file = paste0(output_folder, filename), row.names = FALSE)
    } else {
      message(paste("No significant enrichment found for:", title))
    }
  } else {
    message(paste("No genes found for:", title))
  }
}

safe_enrichKEGG <- function(entrez_list, title, filename) {
  if (length(entrez_list) > 0) {
    result <- enrichKEGG(gene = entrez_list, organism = "hsa", pvalueCutoff = 0.05)
    if (!is.null(result) && nrow(as.data.frame(result)) > 0) {
      result <- setReadable(result, OrgDb = org.Hs.eg.db, keyType = "ENTREZID")
      p <- dotplot(result, showCategory = 10, title = title)
      print(p)
      ggsave(paste0(output_folder, gsub(".csv", "_dotplot.png", filename)), plot = p, width = 8, height = 6)
      write.csv(as.data.frame(result), file = paste0(output_folder, filename), row.names = FALSE)
    } else {
      message(paste("No significant KEGG pathways found for:", title))
    }
  } else {
    message(paste("No genes found for:", title))
  }
}

safe_enrichReactome <- function(entrez_list, title, filename) {
  if (length(entrez_list) > 0) {
    result <- enrichPathway(gene = entrez_list, organism = "human", pvalueCutoff = 0.05)
    if (!is.null(result) && nrow(as.data.frame(result)) > 0) {
      result <- setReadable(result, OrgDb = org.Hs.eg.db, keyType = "ENTREZID")
      p <- dotplot(result, showCategory = 10, title = title)
      print(p)
      ggsave(paste0(output_folder, gsub(".csv", "_dotplot.png", filename)), plot = p, width = 8, height = 6)
      write.csv(as.data.frame(result), file = paste0(output_folder, filename), row.names = FALSE)
    } else {
      message(paste("No significant Reactome pathways found for:", title))
    }
  } else {
    message(paste("No genes found for:", title))
  }
}

# Perform enrichment analyses, generate plots, and save results
safe_enrichGO(top_upregulated_genes$gene, "GO Enrichment for Upregulated Genes", "upregulated_GO_results.csv")
safe_enrichGO(top_downregulated_genes$gene, "GO Enrichment for Downregulated Genes", "downregulated_GO_results.csv")

safe_enrichKEGG(upregulated_entrez, "KEGG Pathway Enrichment for Upregulated Genes", "upregulated_KEGG_results.csv")
safe_enrichKEGG(downregulated_entrez, "KEGG Pathway Enrichment for Downregulated Genes", "downregulated_KEGG_results.csv")

safe_enrichReactome(upregulated_entrez, "Reactome Pathway Enrichment for Upregulated Genes", "upregulated_Reactome_results.csv")
safe_enrichReactome(downregulated_entrez, "Reactome Pathway Enrichment for Downregulated Genes", "downregulated_Reactome_results.csv")

```

## Enrichment Analysis_Hallmark
```{r , fig.height=8, fig.width=12}

# Load necessary libraries
library(clusterProfiler)
library(org.Hs.eg.db)
library(msigdbr)
library(enrichplot)
library(ggplot2)
library(dplyr)

# Define the output folder where the results will be saved
output_folder <- "NK_Proliferating_vs_Control/"

# Create the output folder if it doesn't exist
if (!dir.exists(output_folder)) {
  dir.create(output_folder)
}

# Load Hallmark gene sets from msigdbr
hallmark_sets <- msigdbr(species = "Homo sapiens", collection = "H")  # "H" is for Hallmark gene sets

# Convert gene symbols to uppercase for consistency
top_upregulated_genes$gene <- toupper(top_upregulated_genes$gene)
top_downregulated_genes$gene <- toupper(top_downregulated_genes$gene)

# Check for overlap between your upregulated/downregulated genes and Hallmark gene sets
upregulated_in_hallmark <- intersect(top_upregulated_genes$gene, hallmark_sets$gene_symbol)
downregulated_in_hallmark <- intersect(top_downregulated_genes$gene, hallmark_sets$gene_symbol)

# Print the number of overlapping genes for both upregulated and downregulated genes
cat("Number of upregulated genes in Hallmark gene sets:", length(upregulated_in_hallmark), "\n")
cat("Number of downregulated genes in Hallmark gene sets:", length(downregulated_in_hallmark), "\n")

# If there are genes to analyze, proceed with enrichment analysis
if (length(upregulated_in_hallmark) > 0) {
  # Perform enrichment analysis for upregulated genes using Hallmark gene sets
  hallmark_up <- enricher(gene = upregulated_in_hallmark, 
                          TERM2GENE = hallmark_sets[, c("gs_name", "gene_symbol")],  # Ensure TERM2GENE uses correct columns
                          pvalueCutoff = 0.05)
  # Check if results exist
  if (!is.null(hallmark_up) && nrow(hallmark_up) > 0) {
    # Visualize results if available
    up_dotplot <- dotplot(hallmark_up, showCategory = 20, title = "Hallmark Pathway Enrichment for Upregulated Genes")
    
    # Display the plot in the notebook
    print(up_dotplot)
    
    # Save the dotplot to a PNG file
    ggsave(paste0(output_folder, "hallmark_upregulated_dotplot.png"), plot = up_dotplot, width = 10, height = 8)
    
    # Optionally, save the results as CSV
    write.csv(as.data.frame(hallmark_up), file = paste0(output_folder, "hallmark_upregulated_enrichment.csv"), row.names = FALSE)
  } else {
    cat("No significant enrichment found for upregulated genes.\n")
  }
} else {
  cat("No upregulated genes overlap with Hallmark gene sets.\n")
}

if (length(downregulated_in_hallmark) > 0) {
  # Perform enrichment analysis for downregulated genes using Hallmark gene sets
  hallmark_down <- enricher(gene = downregulated_in_hallmark, 
                            TERM2GENE = hallmark_sets[, c("gs_name", "gene_symbol")],  # Ensure TERM2GENE uses correct columns
                            pvalueCutoff = 0.05)
  # Check if results exist
  if (!is.null(hallmark_down) && nrow(hallmark_down) > 0) {
    # Visualize results if available
    down_dotplot <- dotplot(hallmark_down, showCategory = 20, title = "Hallmark Pathway Enrichment for Downregulated Genes")
    
    # Display the plot in the notebook
    print(down_dotplot)
    
    # Save the dotplot to a PNG file
    ggsave(paste0(output_folder, "hallmark_downregulated_dotplot.png"), plot = down_dotplot, width = 10, height = 8)
    
    # Optionally, save the results as CSV
    write.csv(as.data.frame(hallmark_down), file = paste0(output_folder, "hallmark_downregulated_enrichment.csv"), row.names = FALSE)
  } else {
    cat("No significant enrichment found for downregulated genes.\n")
  }
} else {
  cat("No downregulated genes overlap with Hallmark gene sets.\n")
}


```


