setwd("C:/Users/石源方/Desktop/数据搬家/华理工/班级-华/各科课程作业/高等生物信息学-注意PDF格式/24-12-19-practice6/6")

# 已经完成了 Stage4 vs Normal 的差异基因分析
deg_stage4 <- read.csv("Final_DEG_Stage4_vs_Normal.csv", row.names = 1)

# 提取 logFC 列,并去除 NA 值
geneList <- deg_stage4$logFC

# 将基因名称作为 geneList 的名字
names(geneList) <- rownames(deg_stage4)

# 去除 NA 值
geneList <- na.omit(geneList)

# 根据 logFC 从大到小排序
geneList <- sort(geneList, decreasing = TRUE)

# 加载 MSigDB 等数据库(需要安装 msigdbr 和 clusterProfiler 包)
library(msigdbr)
library(clusterProfiler)
## 
## clusterProfiler v4.14.4 Learn more at https://yulab-smu.top/contribution-knowledge-mining/
## 
## Please cite:
## 
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024,
## 5(6):100722
## 
## Attaching package: 'clusterProfiler'
## The following object is masked from 'package:stats':
## 
##     filter
# 获取 MSigDB Hallmark 基因集(例如 HALLMARK 基因集)
msigdbr_df <- msigdbr(species="Homo sapiens", category="H")

# 创建从基因集名称到基因符号的映射
msigdbr_t2g <- msigdbr_df %>%
  dplyr::distinct(gs_name, gene_symbol) %>%
  as.data.frame()

# 进行 GSEA 分析
gsea_result <- GSEA(geneList = geneList,
                    minGSSize = 1, 
                    maxGSSize = 1000,
                    pvalueCutoff = 0.2,
                    TERM2GENE = msigdbr_t2g)
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## leading edge analysis...
## done...
# 导出结果
write.csv(gsea_result, file="gsea_result_N4.csv")
gsea_result_N4<- read.csv("gsea_result_N4.csv", header = TRUE)

head(gsea_result_N4)
##                                    X                                 ID
## 1               HALLMARK_E2F_TARGETS               HALLMARK_E2F_TARGETS
## 2            HALLMARK_G2M_CHECKPOINT            HALLMARK_G2M_CHECKPOINT
## 3   HALLMARK_TNFA_SIGNALING_VIA_NFKB   HALLMARK_TNFA_SIGNALING_VIA_NFKB
## 4 HALLMARK_INTERFERON_GAMMA_RESPONSE HALLMARK_INTERFERON_GAMMA_RESPONSE
## 5     HALLMARK_FATTY_ACID_METABOLISM     HALLMARK_FATTY_ACID_METABOLISM
## 6     HALLMARK_XENOBIOTIC_METABOLISM     HALLMARK_XENOBIOTIC_METABOLISM
##                          Description setSize enrichmentScore       NES
## 1               HALLMARK_E2F_TARGETS      46       0.4701923  2.694023
## 2            HALLMARK_G2M_CHECKPOINT      44       0.4752417  2.661733
## 3   HALLMARK_TNFA_SIGNALING_VIA_NFKB      32      -0.5407841 -2.615474
## 4 HALLMARK_INTERFERON_GAMMA_RESPONSE      22      -0.6017602 -2.515406
## 5     HALLMARK_FATTY_ACID_METABOLISM      21      -0.5718637 -2.348903
## 6     HALLMARK_XENOBIOTIC_METABOLISM      28      -0.5124435 -2.342257
##         pvalue     p.adjust       qvalue rank                    leading_edge
## 1 6.175927e-07 3.087964e-05 2.210332e-05  441  tags=89%, list=47%, signal=49%
## 2 1.694533e-06 4.236333e-05 3.032323e-05  441  tags=91%, list=47%, signal=50%
## 3 3.154977e-06 5.258295e-05 3.763832e-05  376  tags=91%, list=40%, signal=56%
## 4 1.327700e-05 1.659625e-04 1.187942e-04  385 tags=100%, list=41%, signal=60%
## 5 6.756784e-05 6.756784e-04 4.836435e-04  284  tags=81%, list=31%, signal=58%
## 6 1.466081e-04 1.221734e-03 8.745045e-04  356  tags=86%, list=38%, signal=55%
##                                                                                                                                                                                                                                          core_enrichment
## 1 MKI67/BIRC5/CDKN3/UBE2T/CDC20/MYBL2/SPC24/PTTG1/CDK1/SPC25/KIF4A/MELK/CCNB2/DEPDC1/TOP2A/DLGAP5/PLK1/KIF18B/CDCA3/KIF2C/E2F8/AURKA/CDKN2A/CENPM/BUB1B/RRM2/ASF1B/CDCA8/TRIP13/AURKB/HMMR/CENPE/ORC6/MXD3/TCF19/STMN1/CDKN2C/CCNE1/RACGAP1/DDX39A/HMGA1
## 2                  MKI67/BIRC5/UBE2C/CDKN3/NEK2/CENPF/CENPA/CDC20/MYBL2/PTTG1/CDK1/KIF4A/CCNB2/TROAP/TOP2A/CDC6/E2F1/EXO1/PLK1/BUB1/CCNA2/KIF2C/AURKA/TTK/KIF15/NDC80/PBK/CDC45/TRAIP/AURKB/HMMR/CENPE/ORC6/KIF23/STMN1/CDKN2C/RACGAP1/UCK2/DDX39A/HMGA1
## 3                                                                                   KDM6B/KLF6/RHOB/ETS2/PNRC1/CD69/NFIL3/GADD45B/PLPP3/NAMPT/SDC4/ZFP36/SOCS3/CCL4/JUN/SERPINB8/ATF3/CCN1/EGR1/TRIB1/NR4A2/PHLDA1/NR4A1/CXCL2/EGR2/RCAN1/FOS/NR4A3/FOSB
## 4                                                                                                                                 DDX60/USP18/PLSCR1/IRF8/STAT4/OASL/CD69/HERC6/EPSTI1/FGL2/APOL6/NAMPT/IFI44L/SOCS3/CD274/RSAD2/C1S/C1R/XAF1/IL2RB/FPR1
## 5                                                                                                                                                 ACADM/ACADS/ETFDH/ACSL5/ACAA1/BCKDHB/ACAA2/CD1D/EHHADH/ACSL1/ENO3/GSTZ1/CYP4A11/ACSM3/RDH16/AADAT/HAO2
## 6                                                                                                               ETS2/IRF8/PEMT/ACOX1/MAN1A1/SLC35D1/LPIN2/ACOX2/EPHA2/NDRG2/DHRS1/CYB5A/MTHFD1/CAT/ETFDH/ALDH2/CSAD/PLG/ALAS1/LCAT/MBL2/FMO3/ESR1/CYP1A2
# 提取 NES > 0 的 Top 3 上调基因集
top_upregulated <- gsea_result_N4 %>%
  filter(NES > 0) %>%                  # 过滤 NES > 0
  arrange(desc(NES)) %>%              # 按 NES 从大到小排序
  head(3)                             # 取前 3 条记录

# 提取 NES < 0 的 Top 3 下调基因集
top_downregulated <- gsea_result_N4 %>%
  filter(NES < 0) %>%                 # 过滤 NES < 0
  arrange(NES) %>%                    # 按 NES 从小到大排序
  head(3)                             # 取前 3 条记录

# 合并两组数据
top_gene_sets <- rbind(top_upregulated, top_downregulated)

# 添加组别信息(上调/下调)
top_gene_sets$Group <- ifelse(top_gene_sets$NES > 0, "Upregulated", "Downregulated")

# 提取 NES 和 FDR 阈值
nes_threshold <- sort(gsea_result_N4$NES, decreasing = TRUE)[3]
fdr_threshold <- sort(gsea_result_N4$qvalue)[3]

# 文本指示 NES 和 FDR 阈值
cat("NES Threshold:", round(nes_threshold, 2), "\n")
## NES Threshold: 2.19
cat("FDR Threshold:", round(fdr_threshold, 2), "\n")
## FDR Threshold: 0
# 设置 NES 和 FDR 阈值
nes_threshold_up <- 2.15   # NES 上调阈值
nes_threshold_down <- -2.15 # NES 下调阈值
fdr_threshold <- 0.001       # FDR 阈值

# 绘制条形图
library(ggplot2)
ggplot(top_gene_sets, aes(x = reorder(Description, NES), y = NES, fill = Group)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  scale_fill_manual(values = c("Upregulated" = "red", "Downregulated" = "blue")) +
  labs(title = "Top 3 Upregulated and Downregulated Gene Sets in Stage4 vs Normal",
       x = "Gene Set Name",
       y = "Normalized Enrichment Score (NES)",
       fill = "Group") +
  theme_minimal() +
  theme(axis.text.x = element_text(size = 10, angle = 0, hjust = 1),
        plot.title = element_text(hjust = 0.5), # 居中标题
        panel.grid.major.x = element_blank(), # 去掉x轴的主要网格线
        panel.grid.minor.x = element_blank()) + # 去掉x轴的次要网格线
  geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.8) + # 修改 linewidth 替代 size
  annotate("text", x = 3.5, y = nes_threshold_up + 0.2, label = "NES > 2.13,NES < -2.13", hjust = 0.7) +
  annotate("text", x = 3.5, y = 0.5, label = paste("FDR < 0.001"))

# ##绘制20个条形码图
# 对20个基因集可视化生成二维码图并保存为PDF
library(enrichplot)
## enrichplot v1.26.3 Learn more at https://yulab-smu.top/contribution-knowledge-mining/
## 
## Please cite:
## 
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024,
## 5(6):100722
# 提取前20个基因集
top20_gene_sets <- head(gsea_result_N4[order(gsea_result_N4$NES, decreasing = TRUE), ], 20)

# 循环生成并保存每个基因集的二维码图
for (i in 1:20) {
  # 获取当前基因集的名称
  gene_set_name <- top20_gene_sets$Description[i]
  
  # 创建二维码图并保存为 PDF 文件
  gsea_plot <- gseaplot2(gsea_result, 
                         geneSetID = i,            # 选择当前基因集
                         color = "green",          # 设置颜色
                         pvalue_table = TRUE)      # 显示 p-value 表格
  
  # 保存每个基因集的二维码图为单独的 PDF 文件
  file_name <- paste0("gsea_result_", i, ".png")
  ggsave(gsea_plot, file = file_name, width = 11.69, height = 8.27, units = 'in')
  
  cat("Saved:", file_name, "\n")
}
## Saved: gsea_result_1.png
## Saved: gsea_result_2.png
## Saved: gsea_result_3.png
## Saved: gsea_result_4.png
## Saved: gsea_result_5.png
## Saved: gsea_result_6.png
## Saved: gsea_result_7.png
## Saved: gsea_result_8.png
## Saved: gsea_result_9.png
## Saved: gsea_result_10.png
## Saved: gsea_result_11.png
## Saved: gsea_result_12.png
## Saved: gsea_result_13.png
## Saved: gsea_result_14.png
## Saved: gsea_result_15.png
## Saved: gsea_result_16.png
## Saved: gsea_result_17.png
## Saved: gsea_result_18.png
## Saved: gsea_result_19.png
## Saved: gsea_result_20.png