Seminar 7

W. Evan Durno

Guided analysis

Library issues

edgeR failure:

# source('http://bioconductor.org/biocLite.R')

# ... stuff

# biocLite('edgeR')

# make: *** [edgeR.so] Error 1

# ERROR: compilation failed for package ???edgeR???

# ... more stuff

DESeq failure:

# biocLite('DESeq')

# ... stuff

# ** testing if installed package can be loaded

# * DONE (locfit)

# ERROR: dependency ???XML??? is not available for package ???annotate???

# * removing
# ???/home/quadra/R/x86_64-pc-linux-gnu-library/2.15/annotate???

# ERROR: dependency ???annotate??? is not available for package
# ???genefilter???

# * removing
# ???/home/quadra/R/x86_64-pc-linux-gnu-library/2.15/genefilter???

# ERROR: dependency ???annotate??? is not available for package
# ???geneplotter???

# * removing
# ???/home/quadra/R/x86_64-pc-linux-gnu-library/2.15/geneplotter???

# ERROR: dependencies ???genefilter???, ???geneplotter??? are not
# available for package ???DESeq???

# * removing ???/home/quadra/R/x86_64-pc-linux-gnu-library/2.15/DESeq???

# ... more stuff

Loading data

dat = read.table("bottomly_count_table.tsv", header = T, row.names = 1)
dim(dat)
## [1] 36536    21
colnames(dat)
##  [1] "SRX033480" "SRX033488" "SRX033481" "SRX033489" "SRX033482"
##  [6] "SRX033490" "SRX033483" "SRX033476" "SRX033478" "SRX033479"
## [11] "SRX033472" "SRX033473" "SRX033474" "SRX033475" "SRX033491"
## [16] "SRX033484" "SRX033492" "SRX033485" "SRX033493" "SRX033486"
## [21] "SRX033494"
des = read.table("bottomly_phenodata.tsv", header = T, row.names = 1)
dim(des)
## [1] 21  4
colnames(des)
## [1] "num.tech.reps"     "strain"            "experiment.number"
## [4] "lane.number"
all(rownames(des) == colnames(dat))
## [1] TRUE

GLM edgeR

library(limma)
source("./edgeR/R/classes.R")  # my favourite jerry rig
source("./edgeR/R/DGEList.R")
group <- factor(c(rep("1", 10), rep("2", 11)))
dge.glm <- DGEList(counts = dat, group = group)
str(dge.glm)
## Formal class 'DGEList' [package ".GlobalEnv"] with 1 slots
##   ..@ .Data:List of 2
##   .. ..$ : int [1:36536, 1:21] 369 0 0 0 0 0 21 15 517 0 ...
##   .. .. ..- attr(*, "dimnames")=List of 2
##   .. .. .. ..$ : chr [1:36536] "ENSMUSG00000000001" "ENSMUSG00000000003" "ENSMUSG00000000028" "ENSMUSG00000000031" ...
##   .. .. .. ..$ : chr [1:21] "SRX033480" "SRX033488" "SRX033481" "SRX033489" ...
##   .. ..$ :'data.frame':  21 obs. of  3 variables:
##   .. .. ..$ group       : Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
##   .. .. ..$ lib.size    : num [1:21] 3040296 6303665 2717092 6545795 3016179 ...
##   .. .. ..$ norm.factors: num [1:21] 1 1 1 1 1 1 1 1 1 1 ...
design <- model.matrix(~group)
# design
dyn.load("./edgeR/src/R_one_group.so")
## Error: unable to load shared object '/home/quadra/Documents/stat-540-2014-durno-william/seminar7/./edgeR/src/R_one_group.so':
##   /home/quadra/Documents/stat-540-2014-durno-william/seminar7/./edgeR/src/R_one_group.so: undefined symbol: _Z13glm_one_groupRKiS0_RKdPS1_S3_S2_
source("./edgeR/R/expandAsMatrix.R")
source("./edgeR/R/aveLogCPM.R")
source("./edgeR/R/mglmOneGroup.R")
source("./edgeR/R/estimateGLMCommonDisp.R")
dge.glm.com.disp <- estimateGLMCommonDisp(dge.glm, design, verbose = TRUE)
## Error: C symbol name "R_one_group" not in DLL for package "edgeR"
library.dynam()
##                                                              Filename
## 1                          /usr/lib/R/library/methods/libs/methods.so
## 2                      /usr/lib/R/library/grDevices/libs/grDevices.so
## 3                              /usr/lib/R/library/stats/libs/stats.so
## 4                              /usr/lib/R/library/tools/libs/tools.so
## 5 /home/quadra/R/x86_64-pc-linux-gnu-library/2.15/limma/libs/limma.so
##   Dynamic.Lookup
## 1          FALSE
## 2          FALSE
## 3          FALSE
## 4          FALSE
## 5           TRUE

Uncle! Sorry, this is the best I could do. On to Voom.

Voom & limma

# library(limma)
source("./edgeR/R/calcNormFactors.R")
design <- model.matrix(~group)
norm.factor <- calcNormFactors(dat)
dat.voomed <- voom(dat, design, plot = TRUE, lib.size = colSums(dat) * norm.factor)

plot of chunk unnamed-chunk-5

names(dat.voomed)
## [1] "E"        "weights"  "design"   "lib.size"
fit <- lmFit(dat.voomed, design)
fit <- eBayes(fit)
topTable(fit)
##                                    ID X.Intercept.    group2 AveExpr
## ENSMUSG00000025867 ENSMUSG00000025867       12.477 -0.013979  12.471
## ENSMUSG00000022892 ENSMUSG00000022892       12.268 -0.086479  12.223
## ENSMUSG00000037852 ENSMUSG00000037852       12.321 -0.237470  12.194
## ENSMUSG00000042700 ENSMUSG00000042700       10.716 -0.048029  10.692
## ENSMUSG00000029461 ENSMUSG00000029461       10.300  0.020715  10.311
## ENSMUSG00000020658 ENSMUSG00000020658        9.610 -0.019628   9.601
## ENSMUSG00000060261 ENSMUSG00000060261        9.469 -0.015743   9.461
## ENSMUSG00000032549 ENSMUSG00000032549       11.904  0.003545  11.905
## ENSMUSG00000024462 ENSMUSG00000024462       10.227 -0.138929  10.153
## ENSMUSG00000030102 ENSMUSG00000030102       12.085 -0.026149  12.073
##                         F   P.Value adj.P.Val
## ENSMUSG00000025867 154234 2.584e-55 9.442e-51
## ENSMUSG00000022892 138459 1.103e-54 1.568e-50
## ENSMUSG00000037852 134826 1.576e-54 1.568e-50
## ENSMUSG00000042700 133973 1.717e-54 1.568e-50
## ENSMUSG00000029461 121795 6.182e-54 4.424e-50
## ENSMUSG00000020658 119935 7.604e-54 4.424e-50
## ENSMUSG00000060261 117841 9.635e-54 4.424e-50
## ENSMUSG00000032549 117324 1.022e-53 4.424e-50
## ENSMUSG00000024462 116767 1.090e-53 4.424e-50
## ENSMUSG00000030102 112155 1.874e-53 6.417e-50

Unguided analysis

Here's the proportion of dimensions significantly effected by the 'group2' effect with a q-value \( \leq \) 0.05.

sig = dim(topTable(fit, coef = "group2", number = dim(dat.voomed)[1], p.value = 0.05))[1]
sig
## [1] 875
all = dim(dat.voomed)[1]
all
## [1] 36536
sig/all
## [1] 0.02395

Due to earlier failures, a comparison between methods cannot be made. If I could have, I would have used the following commands to create a venn diagram.

vennCounts
vennDiagram