edgeR failure:
# source('http://bioconductor.org/biocLite.R')
# ... stuff
# biocLite('edgeR')
# make: *** [edgeR.so] Error 1
# ERROR: compilation failed for package ???edgeR???
# ... more stuff
DESeq failure:
# biocLite('DESeq')
# ... stuff
# ** testing if installed package can be loaded
# * DONE (locfit)
# ERROR: dependency ???XML??? is not available for package ???annotate???
# * removing
# ???/home/quadra/R/x86_64-pc-linux-gnu-library/2.15/annotate???
# ERROR: dependency ???annotate??? is not available for package
# ???genefilter???
# * removing
# ???/home/quadra/R/x86_64-pc-linux-gnu-library/2.15/genefilter???
# ERROR: dependency ???annotate??? is not available for package
# ???geneplotter???
# * removing
# ???/home/quadra/R/x86_64-pc-linux-gnu-library/2.15/geneplotter???
# ERROR: dependencies ???genefilter???, ???geneplotter??? are not
# available for package ???DESeq???
# * removing ???/home/quadra/R/x86_64-pc-linux-gnu-library/2.15/DESeq???
# ... more stuff
dat = read.table("bottomly_count_table.tsv", header = T, row.names = 1)
dim(dat)
## [1] 36536 21
colnames(dat)
## [1] "SRX033480" "SRX033488" "SRX033481" "SRX033489" "SRX033482"
## [6] "SRX033490" "SRX033483" "SRX033476" "SRX033478" "SRX033479"
## [11] "SRX033472" "SRX033473" "SRX033474" "SRX033475" "SRX033491"
## [16] "SRX033484" "SRX033492" "SRX033485" "SRX033493" "SRX033486"
## [21] "SRX033494"
des = read.table("bottomly_phenodata.tsv", header = T, row.names = 1)
dim(des)
## [1] 21 4
colnames(des)
## [1] "num.tech.reps" "strain" "experiment.number"
## [4] "lane.number"
all(rownames(des) == colnames(dat))
## [1] TRUE
library(limma)
source("./edgeR/R/classes.R") # my favourite jerry rig
source("./edgeR/R/DGEList.R")
group <- factor(c(rep("1", 10), rep("2", 11)))
dge.glm <- DGEList(counts = dat, group = group)
str(dge.glm)
## Formal class 'DGEList' [package ".GlobalEnv"] with 1 slots
## ..@ .Data:List of 2
## .. ..$ : int [1:36536, 1:21] 369 0 0 0 0 0 21 15 517 0 ...
## .. .. ..- attr(*, "dimnames")=List of 2
## .. .. .. ..$ : chr [1:36536] "ENSMUSG00000000001" "ENSMUSG00000000003" "ENSMUSG00000000028" "ENSMUSG00000000031" ...
## .. .. .. ..$ : chr [1:21] "SRX033480" "SRX033488" "SRX033481" "SRX033489" ...
## .. ..$ :'data.frame': 21 obs. of 3 variables:
## .. .. ..$ group : Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
## .. .. ..$ lib.size : num [1:21] 3040296 6303665 2717092 6545795 3016179 ...
## .. .. ..$ norm.factors: num [1:21] 1 1 1 1 1 1 1 1 1 1 ...
design <- model.matrix(~group)
# design
dyn.load("./edgeR/src/R_one_group.so")
## Error: unable to load shared object '/home/quadra/Documents/stat-540-2014-durno-william/seminar7/./edgeR/src/R_one_group.so':
## /home/quadra/Documents/stat-540-2014-durno-william/seminar7/./edgeR/src/R_one_group.so: undefined symbol: _Z13glm_one_groupRKiS0_RKdPS1_S3_S2_
source("./edgeR/R/expandAsMatrix.R")
source("./edgeR/R/aveLogCPM.R")
source("./edgeR/R/mglmOneGroup.R")
source("./edgeR/R/estimateGLMCommonDisp.R")
dge.glm.com.disp <- estimateGLMCommonDisp(dge.glm, design, verbose = TRUE)
## Error: C symbol name "R_one_group" not in DLL for package "edgeR"
library.dynam()
## Filename
## 1 /usr/lib/R/library/methods/libs/methods.so
## 2 /usr/lib/R/library/grDevices/libs/grDevices.so
## 3 /usr/lib/R/library/stats/libs/stats.so
## 4 /usr/lib/R/library/tools/libs/tools.so
## 5 /home/quadra/R/x86_64-pc-linux-gnu-library/2.15/limma/libs/limma.so
## Dynamic.Lookup
## 1 FALSE
## 2 FALSE
## 3 FALSE
## 4 FALSE
## 5 TRUE
Uncle! Sorry, this is the best I could do. On to Voom.
# library(limma)
source("./edgeR/R/calcNormFactors.R")
design <- model.matrix(~group)
norm.factor <- calcNormFactors(dat)
dat.voomed <- voom(dat, design, plot = TRUE, lib.size = colSums(dat) * norm.factor)
names(dat.voomed)
## [1] "E" "weights" "design" "lib.size"
fit <- lmFit(dat.voomed, design)
fit <- eBayes(fit)
topTable(fit)
## ID X.Intercept. group2 AveExpr
## ENSMUSG00000025867 ENSMUSG00000025867 12.477 -0.013979 12.471
## ENSMUSG00000022892 ENSMUSG00000022892 12.268 -0.086479 12.223
## ENSMUSG00000037852 ENSMUSG00000037852 12.321 -0.237470 12.194
## ENSMUSG00000042700 ENSMUSG00000042700 10.716 -0.048029 10.692
## ENSMUSG00000029461 ENSMUSG00000029461 10.300 0.020715 10.311
## ENSMUSG00000020658 ENSMUSG00000020658 9.610 -0.019628 9.601
## ENSMUSG00000060261 ENSMUSG00000060261 9.469 -0.015743 9.461
## ENSMUSG00000032549 ENSMUSG00000032549 11.904 0.003545 11.905
## ENSMUSG00000024462 ENSMUSG00000024462 10.227 -0.138929 10.153
## ENSMUSG00000030102 ENSMUSG00000030102 12.085 -0.026149 12.073
## F P.Value adj.P.Val
## ENSMUSG00000025867 154234 2.584e-55 9.442e-51
## ENSMUSG00000022892 138459 1.103e-54 1.568e-50
## ENSMUSG00000037852 134826 1.576e-54 1.568e-50
## ENSMUSG00000042700 133973 1.717e-54 1.568e-50
## ENSMUSG00000029461 121795 6.182e-54 4.424e-50
## ENSMUSG00000020658 119935 7.604e-54 4.424e-50
## ENSMUSG00000060261 117841 9.635e-54 4.424e-50
## ENSMUSG00000032549 117324 1.022e-53 4.424e-50
## ENSMUSG00000024462 116767 1.090e-53 4.424e-50
## ENSMUSG00000030102 112155 1.874e-53 6.417e-50
Here's the proportion of dimensions significantly effected by the 'group2' effect with a q-value \( \leq \) 0.05.
sig = dim(topTable(fit, coef = "group2", number = dim(dat.voomed)[1], p.value = 0.05))[1]
sig
## [1] 875
all = dim(dat.voomed)[1]
all
## [1] 36536
sig/all
## [1] 0.02395
Due to earlier failures, a comparison between methods cannot be made. If I could have, I would have used the following commands to create a venn diagram.
vennCounts
vennDiagram