# Load required packages
# BiocManager::install(c("clusterProfiler","org.Hs.eg.db","enrichplot","DOSE"))
library(clusterProfiler) # Package for enrichment analysis
library(org.Hs.eg.db) # Human genome annotation database
library(enrichplot) # For visualization of enrichment results
library(DOSE) # Disease Ontology Semantic and Enrichment analysis
library(ggplot2) # For creating customized graphics
# =========================================
# 1. Create a list of cancer-related genes using Entrez IDs
# =========================================
gene_list <- c(
"7157", # TP53 - Tumor suppressor gene, often mutated in many cancers
"1956", # EGFR - Epidermal Growth Factor Receptor, target for cancer therapy
"5290", # PTEN - Tumor suppressor, frequently inactivated in many tumors
"5970", # RB1 - Retinoblastoma protein, tumor suppressor
"4609", # MYC - Proto-oncogene, transcription factor
"2064", # FAS - Cell surface receptor involved in apoptosis signaling
"2475", # FLT3 - Receptor tyrosine kinase, often mutated in leukemia
"7422", # VEGFA - Vascular Endothelial Growth Factor A, promotes angiogenesis
"1021", # BCL2 - Anti-apoptotic protein, prevents cell death
"3741", # JUN - Transcription factor, component of AP-1
"4221", # KDR - VEGF Receptor 2, mediates vascular development
"3845", # KRAS - Proto-oncogene, involved in signal transduction
"5293", # PIK3CA - PI3K signaling pathway, frequently mutated in cancer
"5837", # SMAD4 - Signal transducer in TGF-β pathway, tumor suppressor
"8318", # TGFBR2 - TGF-β receptor, involved in cell growth regulation
"207", # AR - Androgen Receptor, important in prostate cancer
"4287", # MAPK1 - Mitogen-Activated Protein Kinase 1, key signaling molecule
"5925", # RET - Proto-oncogene, receptor tyrosine kinase
"2073", # ERBB2 - HER2, target for breast cancer therapy
"5241" # PLK1 - Polo-Like Kinase 1, regulator of cell cycle
)
# =========================================
# 2. Gene Ontology (GO) Enrichment Analysis - Biological Process (BP)
# =========================================
# Perform GO enrichment analysis using the Biological Process ontology
ego_bp <- enrichGO(
gene = gene_list, # Input gene list (Entrez IDs)
OrgDb = org.Hs.eg.db, # Organism database for human
keyType = "ENTREZID", # Type of gene identifiers provided
ont = "BP", # Ontology: Biological Process (alternatives: MF, CC)
pAdjustMethod = "BH", # Benjamini-Hochberg adjustment for multiple testing
pvalueCutoff = 0.001, # Only keep terms with p-value < 0.001
qvalueCutoff = 0.001, # Only keep terms with q-value < 0.001
readable = TRUE # Convert gene IDs to gene symbols for readability
)
# Display the top results from enrichment analysis
head(ego_bp) # Shows the first few rows of the enrichment results table
## ID Description
## GO:0050678 GO:0050678 regulation of epithelial cell proliferation
## GO:0050673 GO:0050673 epithelial cell proliferation
## GO:0009416 GO:0009416 response to light stimulus
## GO:0009411 GO:0009411 response to UV
## GO:0009314 GO:0009314 response to radiation
## GO:0050679 GO:0050679 positive regulation of epithelial cell proliferation
## GeneRatio BgRatio pvalue p.adjust qvalue
## GO:0050678 11/20 381/18723 3.060385e-14 5.808610e-11 2.061733e-11
## GO:0050673 11/20 437/18723 1.374505e-13 1.304405e-10 4.629911e-11
## GO:0009416 9/20 320/18723 1.584026e-11 1.002160e-08 3.557111e-09
## GO:0009411 7/20 149/18723 1.246996e-10 5.916995e-08 2.100204e-08
## GO:0009314 9/20 456/18723 3.690787e-10 1.401023e-07 4.972849e-08
## GO:0050679 7/20 207/18723 1.251793e-09 3.959839e-07 1.405522e-07
## geneID Count
## GO:0050678 EGFR/MYC/ERBB2/MTOR/VEGFA/CDK6/MEN1/PIK3CD/AKT1/RB1/PGR 11
## GO:0050673 EGFR/MYC/ERBB2/MTOR/VEGFA/CDK6/MEN1/PIK3CD/AKT1/RB1/PGR 11
## GO:0009416 TP53/EGFR/RELA/MYC/MTOR/MEN1/KRAS/AKT1/ERCC5 9
## GO:0009411 TP53/EGFR/RELA/MYC/MEN1/AKT1/ERCC5 7
## GO:0009314 TP53/EGFR/RELA/MYC/MTOR/MEN1/KRAS/AKT1/ERCC5 9
## GO:0050679 EGFR/MYC/ERBB2/MTOR/VEGFA/PIK3CD/AKT1 7
# Create a dotplot visualization of the enriched GO terms
# Size of dots represents gene count, color represents p-value
dotplot(ego_bp, showCategory=10) + ggtitle("GO BP Enrichment (Dotplot)")

# Create a gene-concept network plot showing relationship between genes and GO terms
# Shows how genes are connected to the enriched biological processes
cnetplot(ego_bp, showCategory = 5, foldChange=NULL)
