setwd("D:/R/R-4.0.5/bin/project_fuxian/4-TCGA")
rm(list=ls())
数据准备
Rdata_dir <- '.'
load(file=
file.path(Rdata_dir,"tnbc_paired_exprSet.Rdata")
)
dim(exprSet)
## [1] 31378 20
#group_list
head(substr(colnames(exprSet),14,15))
## [1] "01" "11" "11" "11" "11" "11"
## "01"代表肿瘤,“11”代表正常组织
group_list <- factor(ifelse(as.numeric(substr(colnames(exprSet),14,15))<10,"tumor","normal"))
exprSet <- ceiling(exprSet)
#准备好2个数据
exprSet[1:5,1:5]#原始数据,不需要log2(count+1)处理
## TCGA-E2-A1LS-01A TCGA-BH-A0E0-11A TCGA-E2-A1L7-11A
## ENSG00000000003.13 6735 3757 4226
## ENSG00000000005.5 6 254 71
## ENSG00000000419.11 2255 1586 1612
## ENSG00000000457.12 4363 1260 1450
## ENSG00000000460.15 518 198 376
## TCGA-E2-A1LS-11A TCGA-E2-A158-11A
## ENSG00000000003.13 4021 1822
## ENSG00000000005.5 1518 684
## ENSG00000000419.11 1970 882
## ENSG00000000457.12 1082 579
## ENSG00000000460.15 275 168
table(group_list)
## group_list
## normal tumor
## 10 10
group_list
## [1] tumor normal normal normal normal normal normal tumor tumor tumor
## [11] normal tumor normal tumor tumor tumor tumor tumor normal normal
## Levels: normal tumor
DESeq差异分析
if(T){
suppressMessages(library(DESeq2))
(colData <- data.frame(row.names=colnames(exprSet),
group_list=group_list) )
dds <- DESeqDataSetFromMatrix(countData = exprSet,
colData = colData,
design = ~ group_list)
tmp_f <- file.path(Rdata_dir,'TCGA-KIRC-miRNA-DESeq2-dds.Rdata')
if(!file.exists(tmp_f)){
dds <- DESeq(dds)
save(dds,file = tmp_f)
}
load(file = tmp_f)
res <- results(dds,
contrast=c("group_list","tumor","normal"))
resOrdered <- res[order(res$padj),]
head(resOrdered)
DEG =as.data.frame(resOrdered)
DESeq2_DEG = na.omit(DEG)
nrDEG=DESeq2_DEG[,c(2,6)]
colnames(nrDEG)=c('log2FoldChange','pvalue')
head(nrDEG)
}
## converting counts to integer mode
## log2FoldChange pvalue
## ENSG00000175063.15 5.358142 6.235969e-37
## ENSG00000249669.6 -5.160409 4.724624e-30
## ENSG00000115163.13 4.936797 2.027679e-29
## ENSG00000161888.10 4.081656 6.253802e-29
## ENSG00000088325.14 4.714156 1.185978e-28
## ENSG00000075218.17 4.138282 2.521373e-28
edgeR做差异分析
#BiocManager::install("edgeR")
if(T){
suppressMessages(library(edgeR))
d <- DGEList(counts=exprSet,group=factor(group_list))
keep <- rowSums(cpm(d)>1) >= 2
table(keep)
d <- d[keep, , keep.lib.sizes=FALSE]
d$samples$lib.size <- colSums(d$counts)
d <- calcNormFactors(d)
d$samples
dge=d
design <- model.matrix(~0+factor(group_list))
rownames(design)<-colnames(dge)
colnames(design)<-levels(factor(group_list))
dge=d
dge <- estimateGLMCommonDisp(dge,design)
dge <- estimateGLMTrendedDisp(dge, design)
dge <- estimateGLMTagwiseDisp(dge, design)
fit <- glmFit(dge, design)
# https://www.biostars.org/p/110861/
lrt <- glmLRT(fit, contrast=c(-1,1))
nrDEG=topTags(lrt, n=nrow(dge))
nrDEG=as.data.frame(nrDEG)
head(nrDEG)
edgeR_DEG =nrDEG
nrDEG=edgeR_DEG[,c(1,5)]
colnames(nrDEG)=c('log2FoldChange','pvalue')
}
用limma做差异分析
if(T){
suppressMessages(library(limma))
design <- model.matrix(~0+factor(group_list))
colnames(design)=levels(factor(group_list))
rownames(design)=colnames(exprSet)
design
dge <- DGEList(counts=exprSet)
dge <- calcNormFactors(dge)
logCPM <- cpm(dge, log=TRUE, prior.count=3)
v <- voom(dge,design,plot=TRUE, normalize="quantile")
fit <- lmFit(v, design)
group_list
cont.matrix=makeContrasts(contrasts=c('tumor-normal'),levels = design)
fit2=contrasts.fit(fit,cont.matrix)
fit2=eBayes(fit2)
tempOutput = topTable(fit2, coef='tumor-normal', n=Inf)
DEG_limma_voom = na.omit(tempOutput)
head(DEG_limma_voom)
nrDEG=DEG_limma_voom[,c(1,4)]
colnames(nrDEG)=c('log2FoldChange','pvalue')
}

比较三种方法的结果
nrDEG1=DEG_limma_voom[,c(1,4)]
colnames(nrDEG1)=c('log2FoldChange','pvalue')
nrDEG2=edgeR_DEG[,c(1,5)]
colnames(nrDEG2)=c('log2FoldChange','pvalue')
nrDEG3=DESeq2_DEG[,c(2,6)]
colnames(nrDEG3)=c('log2FoldChange','pvalue')
mi=unique(c(rownames(nrDEG1),rownames(nrDEG1),rownames(nrDEG1)))
lf=data.frame(limma=nrDEG1[mi,1],
edgeR=nrDEG2[mi,1],
DESeq2=nrDEG3[mi,1])
cor(na.omit(lf))
## limma edgeR DESeq2
## limma 1.0000000 0.9052743 0.9366909
## edgeR 0.9052743 1.0000000 0.9539397
## DESeq2 0.9366909 0.9539397 1.0000000