# Load required packages
# BiocManager::install(c("clusterProfiler","org.Hs.eg.db","enrichplot","DOSE"))
library(clusterProfiler)  # Package for enrichment analysis
library(org.Hs.eg.db)     # Human genome annotation database
library(enrichplot)       # For visualization of enrichment results
library(DOSE)             # Disease Ontology Semantic and Enrichment analysis
library(ggplot2)          # For creating customized graphics

# =========================================
# 1. Create a list of cancer-related genes using Entrez IDs
# =========================================
gene_list <- c(
  "7157",  # TP53 - Tumor suppressor gene, often mutated in many cancers
  "1956",  # EGFR - Epidermal Growth Factor Receptor, target for cancer therapy
  "5290",  # PTEN - Tumor suppressor, frequently inactivated in many tumors
  "5970",  # RB1 - Retinoblastoma protein, tumor suppressor 
  "4609",  # MYC - Proto-oncogene, transcription factor
  "2064",  # FAS - Cell surface receptor involved in apoptosis signaling
  "2475",  # FLT3 - Receptor tyrosine kinase, often mutated in leukemia
  "7422",  # VEGFA - Vascular Endothelial Growth Factor A, promotes angiogenesis
  "1021",  # BCL2 - Anti-apoptotic protein, prevents cell death
  "3741",  # JUN - Transcription factor, component of AP-1
  "4221",  # KDR - VEGF Receptor 2, mediates vascular development
  "3845",  # KRAS - Proto-oncogene, involved in signal transduction
  "5293",  # PIK3CA - PI3K signaling pathway, frequently mutated in cancer
  "5837",  # SMAD4 - Signal transducer in TGF-β pathway, tumor suppressor
  "8318",  # TGFBR2 - TGF-β receptor, involved in cell growth regulation
  "207",   # AR - Androgen Receptor, important in prostate cancer
  "4287",  # MAPK1 - Mitogen-Activated Protein Kinase 1, key signaling molecule
  "5925",  # RET - Proto-oncogene, receptor tyrosine kinase
  "2073",  # ERBB2 - HER2, target for breast cancer therapy
  "5241"   # PLK1 - Polo-Like Kinase 1, regulator of cell cycle
)

# =========================================
# 2. Gene Ontology (GO) Enrichment Analysis - Biological Process (BP)
# =========================================
# Perform GO enrichment analysis using the Biological Process ontology
ego_bp <- enrichGO(
  gene          = gene_list,    # Input gene list (Entrez IDs)
  OrgDb         = org.Hs.eg.db, # Organism database for human
  keyType       = "ENTREZID",   # Type of gene identifiers provided
  ont           = "BP",         # Ontology: Biological Process (alternatives: MF, CC)
  pAdjustMethod = "BH",         # Benjamini-Hochberg adjustment for multiple testing
  pvalueCutoff  = 0.001,        # Only keep terms with p-value < 0.001
  qvalueCutoff  = 0.001,        # Only keep terms with q-value < 0.001
  readable      = TRUE          # Convert gene IDs to gene symbols for readability
)

# Display the top results from enrichment analysis
head(ego_bp)  # Shows the first few rows of the enrichment results table
##                    ID                                          Description
## GO:0050678 GO:0050678          regulation of epithelial cell proliferation
## GO:0050673 GO:0050673                        epithelial cell proliferation
## GO:0009416 GO:0009416                           response to light stimulus
## GO:0009411 GO:0009411                                       response to UV
## GO:0009314 GO:0009314                                response to radiation
## GO:0050679 GO:0050679 positive regulation of epithelial cell proliferation
##            GeneRatio   BgRatio       pvalue     p.adjust       qvalue
## GO:0050678     11/20 381/18723 3.060385e-14 5.808610e-11 2.061733e-11
## GO:0050673     11/20 437/18723 1.374505e-13 1.304405e-10 4.629911e-11
## GO:0009416      9/20 320/18723 1.584026e-11 1.002160e-08 3.557111e-09
## GO:0009411      7/20 149/18723 1.246996e-10 5.916995e-08 2.100204e-08
## GO:0009314      9/20 456/18723 3.690787e-10 1.401023e-07 4.972849e-08
## GO:0050679      7/20 207/18723 1.251793e-09 3.959839e-07 1.405522e-07
##                                                             geneID Count
## GO:0050678 EGFR/MYC/ERBB2/MTOR/VEGFA/CDK6/MEN1/PIK3CD/AKT1/RB1/PGR    11
## GO:0050673 EGFR/MYC/ERBB2/MTOR/VEGFA/CDK6/MEN1/PIK3CD/AKT1/RB1/PGR    11
## GO:0009416            TP53/EGFR/RELA/MYC/MTOR/MEN1/KRAS/AKT1/ERCC5     9
## GO:0009411                      TP53/EGFR/RELA/MYC/MEN1/AKT1/ERCC5     7
## GO:0009314            TP53/EGFR/RELA/MYC/MTOR/MEN1/KRAS/AKT1/ERCC5     9
## GO:0050679                   EGFR/MYC/ERBB2/MTOR/VEGFA/PIK3CD/AKT1     7
# Create a dotplot visualization of the enriched GO terms
# Size of dots represents gene count, color represents p-value
dotplot(ego_bp, showCategory=10) + ggtitle("GO BP Enrichment (Dotplot)")

# Create a gene-concept network plot showing relationship between genes and GO terms
# Shows how genes are connected to the enriched biological processes
cnetplot(ego_bp, showCategory = 5, foldChange=NULL)