数据准备

setwd("D:/R/R-4.0.5/bin/project_fuxian/4-TCGA")
suppressMessages(library(clusterProfiler)) 
suppressMessages(library("org.Hs.eg.db")) 
load("nrDEG.Rdata")
head(nrDEG)
##           ENSEMBL  baseMean       logFC     lfcSE       stat      P.Value
## 1 ENSG00000000003 4052.6463 -0.04072695 0.2764914 -0.1472992 8.828959e-01
## 2 ENSG00000000005  357.6960 -5.36970681 0.6658144 -8.0648703 7.331365e-16
## 3 ENSG00000000419 2017.2067  0.56738319 0.1361133  4.1684626 3.066611e-05
## 4 ENSG00000000457 1678.1434  0.63459942 0.2955487  2.1471907 3.177811e-02
## 5 ENSG00000000460  627.2378  1.57124661 0.2455491  6.3989108 1.564892e-10
## 6 ENSG00000000938  654.3861  0.31068003 0.4309338  0.7209461 4.709427e-01
##           padj          SYMBOL ENTREZID
## 1 9.350763e-01 ENSG00000000003     7105
## 2 1.045653e-13 ENSG00000000005    64102
## 3 3.140474e-04 ENSG00000000419     8813
## 4 9.085836e-02 ENSG00000000457    57147
## 5 7.577651e-09 ENSG00000000460    55732
## 6 6.348910e-01 ENSG00000000938     2268
logFC_cutoff <- with(nrDEG,mean(abs(logFC)+2*sd(abs(logFC))))
nrDEG$change <- as.factor(with(nrDEG,ifelse(P.Value < 0.01 & abs(logFC) > logFC_cutoff,ifelse(logFC > logFC_cutoff,"up","down"),"not")))
table(nrDEG$change)
## 
##  down   not    up 
##   613 21192   564
{
  gene_up <-nrDEG[nrDEG$change == "up","ENTREZID"]
  gene_down <- nrDEG[nrDEG$change == "down","ENTREZID"]
  gene_dif <- c(gene_up,gene_down)
  gene_all <- as.character(nrDEG[,"ENTREZID"])#背景基因:全部基因
}

{
  genelist <- nrDEG$logFC
  names(genelist) <- nrDEG$ENTREZID
  genelist <- sort(genelist,decreasing = T)
}
head(gene_up)
## [1] "30812" "1750"  "1870"  "54443" "2556"  "10460"
head(gene_all)
## [1] "7105"  "64102" "8813"  "57147" "55732" "2268"

KEGG 富集分析

{kk.up <- enrichKEGG( gene =gene_up,
                       organism = "hsa",
                       universe = gene_all,
                       pvalueCutoff = 0.8,
                       qvalueCutoff = 0.8    )
}
## Reading KEGG annotation online:
## 
## Reading KEGG annotation online:
{kk.down <- enrichKEGG(gene = gene_down,
                       organism = "hsa",
                       universe = gene_all,
                       pvalueCutoff = 0.8,
                       qvalueCutoff = 0.8 )
  
}
head(kk.up)[,1:6]
##                ID                             Description GeneRatio  BgRatio
## hsa04110 hsa04110                              Cell cycle    24/175 126/7101
## hsa05034 hsa05034                              Alcoholism    19/175 151/7101
## hsa04114 hsa04114                          Oocyte meiosis    17/175 121/7101
## hsa05322 hsa05322            Systemic lupus erythematosus    14/175 101/7101
## hsa04613 hsa04613 Neutrophil extracellular trap formation    15/175 157/7101
## hsa04914 hsa04914 Progesterone-mediated oocyte maturation    11/175  93/7101
##                pvalue     p.adjust
## hsa04110 2.389521e-15 5.233052e-13
## hsa05034 3.951410e-09 3.646515e-07
## hsa04114 4.995227e-09 3.646515e-07
## hsa05322 1.435976e-07 7.861970e-06
## hsa04613 6.507597e-06 2.850328e-04
## hsa04914 1.570115e-05 5.730918e-04
head(kk.down)[,1:6]
##                ID                                  Description GeneRatio
## hsa04080 hsa04080      Neuroactive ligand-receptor interaction    30/236
## hsa03320 hsa03320                       PPAR signaling pathway    15/236
## hsa04923 hsa04923        Regulation of lipolysis in adipocytes    13/236
## hsa00980 hsa00980 Metabolism of xenobiotics by cytochrome P450    13/236
## hsa00830 hsa00830                           Retinol metabolism    12/236
## hsa00982 hsa00982            Drug metabolism - cytochrome P450    12/236
##           BgRatio       pvalue     p.adjust
## hsa04080 273/7101 5.310459e-09 1.174724e-06
## hsa03320  72/7101 9.141820e-09 1.174724e-06
## hsa04923  54/7101 1.433296e-08 1.227857e-06
## hsa00980  60/7101 5.595264e-08 3.594957e-06
## hsa00830  53/7101 1.094888e-07 5.627723e-06
## hsa00982  55/7101 1.697332e-07 7.270237e-06

结果可视化

suppressMessages(library(tidyverse))
suppressMessages(library(ggplot2)) 
## 数据准备
{ up_kegg <- as.data.frame(kk.up) %>% filter(pvalue < 0.05) %>% mutate(group=-1)
  down_kegg <- as.data.frame(kk.down) %>% filter(pvalue < 0.05) %>% mutate(group=1)
  dat <- rbind(up_kegg,down_kegg)
  dat$pvalue <- -log10(dat$pvalue)
  dat$pvalue <- dat$pvalue*dat$group
  
  dat <- dat %>% arrange(pvalue)
  head(dat)
}
##                ID                             Description GeneRatio  BgRatio
## hsa04110 hsa04110                              Cell cycle    24/175 126/7101
## hsa05034 hsa05034                              Alcoholism    19/175 151/7101
## hsa04114 hsa04114                          Oocyte meiosis    17/175 121/7101
## hsa05322 hsa05322            Systemic lupus erythematosus    14/175 101/7101
## hsa04613 hsa04613 Neutrophil extracellular trap formation    15/175 157/7101
## hsa04914 hsa04914 Progesterone-mediated oocyte maturation    11/175  93/7101
##              pvalue     p.adjust       qvalue
## hsa04110 -14.621689 5.233052e-13 4.980266e-13
## hsa05034  -8.403248 3.646515e-07 3.470368e-07
## hsa04114  -8.301445 3.646515e-07 3.470368e-07
## hsa05322  -6.842853 7.861970e-06 7.482192e-06
## hsa04613  -5.186579 2.850328e-04 2.712641e-04
## hsa04914  -4.804069 5.730918e-04 5.454082e-04
##                                                                                                                   geneID
## hsa04110 1870/27127/4998/23594/8318/990/1869/898/7272/991/9088/891/9700/890/1029/701/9133/995/993/9232/5347/699/983/9134
## hsa05034                2906/8970/2792/2786/51806/7054/8329/3013/8356/8351/8355/8344/8348/85235/8367/3012/8343/8336/8357
## hsa04114                            27127/6790/898/991/9088/151648/891/9700/9133/995/9232/5347/699/983/9134/51806/387778
## hsa05322                                          8970/8329/3013/8356/8351/8355/8344/8348/85235/8367/3012/8343/8336/8357
## hsa04613                                      8970/820/8329/3013/8356/8351/8355/8344/8348/85235/8367/3012/8343/8336/8357
## hsa04914                                                              6790/9088/891/890/9133/995/993/5347/699/983/387778
##          Count group
## hsa04110    24    -1
## hsa05034    19    -1
## hsa04114    17    -1
## hsa05322    14    -1
## hsa04613    15    -1
## hsa04914    11    -1
## 绘图
{
  g_kegg <- ggplot(dat,
                   aes(x=reorder(Description,order(pvalue,decreasing = F)),#将x按pvalue排序
                       y=pvalue,fill = group))+
    geom_bar(stat = "identity")+ #x与y的对应方式
    scale_fill_gradient(low="blue",high = "red",guide = FALSE)+
    scale_x_discrete(name="Pathway names")+
    scale_y_continuous(name = "log10P-value")+
    coord_flip()+#将x和y翻转
    theme_bw()+theme(plot.title = element_text(hjust = 0.5))+
    ggtitle("Pathway Enrichment")
  g_kegg
  }
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

参数说明

  • identity :“绘图函数里的stat参数表示对样本点做统计的方式,默认为identity,表示一个x对应一个y,同时还可以是bin,表示一个x对应落到该x的样本数。”说白了就是,identity提取横坐标x对应的y值,bin提取横坐标x的频数。