数据准备
setwd("D:/R/R-4.0.5/bin/project_fuxian/4-TCGA")
suppressMessages(library(clusterProfiler))
suppressMessages(library("org.Hs.eg.db"))
load("nrDEG.Rdata")
head(nrDEG)
## ENSEMBL baseMean logFC lfcSE stat P.Value
## 1 ENSG00000000003 4052.6463 -0.04072695 0.2764914 -0.1472992 8.828959e-01
## 2 ENSG00000000005 357.6960 -5.36970681 0.6658144 -8.0648703 7.331365e-16
## 3 ENSG00000000419 2017.2067 0.56738319 0.1361133 4.1684626 3.066611e-05
## 4 ENSG00000000457 1678.1434 0.63459942 0.2955487 2.1471907 3.177811e-02
## 5 ENSG00000000460 627.2378 1.57124661 0.2455491 6.3989108 1.564892e-10
## 6 ENSG00000000938 654.3861 0.31068003 0.4309338 0.7209461 4.709427e-01
## padj SYMBOL ENTREZID
## 1 9.350763e-01 ENSG00000000003 7105
## 2 1.045653e-13 ENSG00000000005 64102
## 3 3.140474e-04 ENSG00000000419 8813
## 4 9.085836e-02 ENSG00000000457 57147
## 5 7.577651e-09 ENSG00000000460 55732
## 6 6.348910e-01 ENSG00000000938 2268
logFC_cutoff <- with(nrDEG,mean(abs(logFC)+2*sd(abs(logFC))))
nrDEG$change <- as.factor(with(nrDEG,ifelse(P.Value < 0.01 & abs(logFC) > logFC_cutoff,ifelse(logFC > logFC_cutoff,"up","down"),"not")))
table(nrDEG$change)
##
## down not up
## 613 21192 564
{
gene_up <-nrDEG[nrDEG$change == "up","ENTREZID"]
gene_down <- nrDEG[nrDEG$change == "down","ENTREZID"]
gene_dif <- c(gene_up,gene_down)
gene_all <- as.character(nrDEG[,"ENTREZID"])#背景基因:全部基因
}
{
genelist <- nrDEG$logFC
names(genelist) <- nrDEG$ENTREZID
genelist <- sort(genelist,decreasing = T)
}
head(gene_up)
## [1] "30812" "1750" "1870" "54443" "2556" "10460"
head(gene_all)
## [1] "7105" "64102" "8813" "57147" "55732" "2268"
KEGG 富集分析
{kk.up <- enrichKEGG( gene =gene_up,
organism = "hsa",
universe = gene_all,
pvalueCutoff = 0.8,
qvalueCutoff = 0.8 )
}
## Reading KEGG annotation online:
##
## Reading KEGG annotation online:
{kk.down <- enrichKEGG(gene = gene_down,
organism = "hsa",
universe = gene_all,
pvalueCutoff = 0.8,
qvalueCutoff = 0.8 )
}
head(kk.up)[,1:6]
## ID Description GeneRatio BgRatio
## hsa04110 hsa04110 Cell cycle 24/175 126/7101
## hsa05034 hsa05034 Alcoholism 19/175 151/7101
## hsa04114 hsa04114 Oocyte meiosis 17/175 121/7101
## hsa05322 hsa05322 Systemic lupus erythematosus 14/175 101/7101
## hsa04613 hsa04613 Neutrophil extracellular trap formation 15/175 157/7101
## hsa04914 hsa04914 Progesterone-mediated oocyte maturation 11/175 93/7101
## pvalue p.adjust
## hsa04110 2.389521e-15 5.233052e-13
## hsa05034 3.951410e-09 3.646515e-07
## hsa04114 4.995227e-09 3.646515e-07
## hsa05322 1.435976e-07 7.861970e-06
## hsa04613 6.507597e-06 2.850328e-04
## hsa04914 1.570115e-05 5.730918e-04
head(kk.down)[,1:6]
## ID Description GeneRatio
## hsa04080 hsa04080 Neuroactive ligand-receptor interaction 30/236
## hsa03320 hsa03320 PPAR signaling pathway 15/236
## hsa04923 hsa04923 Regulation of lipolysis in adipocytes 13/236
## hsa00980 hsa00980 Metabolism of xenobiotics by cytochrome P450 13/236
## hsa00830 hsa00830 Retinol metabolism 12/236
## hsa00982 hsa00982 Drug metabolism - cytochrome P450 12/236
## BgRatio pvalue p.adjust
## hsa04080 273/7101 5.310459e-09 1.174724e-06
## hsa03320 72/7101 9.141820e-09 1.174724e-06
## hsa04923 54/7101 1.433296e-08 1.227857e-06
## hsa00980 60/7101 5.595264e-08 3.594957e-06
## hsa00830 53/7101 1.094888e-07 5.627723e-06
## hsa00982 55/7101 1.697332e-07 7.270237e-06
结果可视化
suppressMessages(library(tidyverse))
suppressMessages(library(ggplot2))
## 数据准备
{ up_kegg <- as.data.frame(kk.up) %>% filter(pvalue < 0.05) %>% mutate(group=-1)
down_kegg <- as.data.frame(kk.down) %>% filter(pvalue < 0.05) %>% mutate(group=1)
dat <- rbind(up_kegg,down_kegg)
dat$pvalue <- -log10(dat$pvalue)
dat$pvalue <- dat$pvalue*dat$group
dat <- dat %>% arrange(pvalue)
head(dat)
}
## ID Description GeneRatio BgRatio
## hsa04110 hsa04110 Cell cycle 24/175 126/7101
## hsa05034 hsa05034 Alcoholism 19/175 151/7101
## hsa04114 hsa04114 Oocyte meiosis 17/175 121/7101
## hsa05322 hsa05322 Systemic lupus erythematosus 14/175 101/7101
## hsa04613 hsa04613 Neutrophil extracellular trap formation 15/175 157/7101
## hsa04914 hsa04914 Progesterone-mediated oocyte maturation 11/175 93/7101
## pvalue p.adjust qvalue
## hsa04110 -14.621689 5.233052e-13 4.980266e-13
## hsa05034 -8.403248 3.646515e-07 3.470368e-07
## hsa04114 -8.301445 3.646515e-07 3.470368e-07
## hsa05322 -6.842853 7.861970e-06 7.482192e-06
## hsa04613 -5.186579 2.850328e-04 2.712641e-04
## hsa04914 -4.804069 5.730918e-04 5.454082e-04
## geneID
## hsa04110 1870/27127/4998/23594/8318/990/1869/898/7272/991/9088/891/9700/890/1029/701/9133/995/993/9232/5347/699/983/9134
## hsa05034 2906/8970/2792/2786/51806/7054/8329/3013/8356/8351/8355/8344/8348/85235/8367/3012/8343/8336/8357
## hsa04114 27127/6790/898/991/9088/151648/891/9700/9133/995/9232/5347/699/983/9134/51806/387778
## hsa05322 8970/8329/3013/8356/8351/8355/8344/8348/85235/8367/3012/8343/8336/8357
## hsa04613 8970/820/8329/3013/8356/8351/8355/8344/8348/85235/8367/3012/8343/8336/8357
## hsa04914 6790/9088/891/890/9133/995/993/5347/699/983/387778
## Count group
## hsa04110 24 -1
## hsa05034 19 -1
## hsa04114 17 -1
## hsa05322 14 -1
## hsa04613 15 -1
## hsa04914 11 -1
## 绘图
{
g_kegg <- ggplot(dat,
aes(x=reorder(Description,order(pvalue,decreasing = F)),#将x按pvalue排序
y=pvalue,fill = group))+
geom_bar(stat = "identity")+ #x与y的对应方式
scale_fill_gradient(low="blue",high = "red",guide = FALSE)+
scale_x_discrete(name="Pathway names")+
scale_y_continuous(name = "log10P-value")+
coord_flip()+#将x和y翻转
theme_bw()+theme(plot.title = element_text(hjust = 0.5))+
ggtitle("Pathway Enrichment")
g_kegg
}
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

参数说明
- identity :“绘图函数里的stat参数表示对样本点做统计的方式,默认为identity,表示一个x对应一个y,同时还可以是bin,表示一个x对应落到该x的样本数。”说白了就是,identity提取横坐标x对应的y值,bin提取横坐标x的频数。