STAT 540 Seminar 8

Shaun Jackman — Mar 1, 2013, 12:17 PM

# STAT 540 Seminar 8
# Shaun Jackman

# edgeR
library(edgeR)
Loading required package: limma
dat <- read.table("../data/bottomly/bottomly_count_table.txt",
    header = TRUE, row.names = 1)
des <- read.table("../data/bottomly/bottomly_phenodata.txt",
    header = TRUE, row.names = 1)
all(rownames(des) == colnames(dat))
[1] TRUE

# GLM edgeR
table(des$strain)

C57BL/6J   DBA/2J 
      10       11 
group <- factor(c(rep("1", 10), rep(2, 11)))
dge.glm <- DGEList(counts = dat, group = group)
Calculating library sizes from column totals.
design <- model.matrix(~group)
dge.glm.com.disp <- estimateGLMCommonDisp(dge.glm, design, verbose = TRUE)
Disp = 0.03893 , BCV = 0.1973 
dge.glm.trend.disp <- estimateGLMTrendedDisp(dge.glm.com.disp)
Loading required package: splines
dge.glm.tag.disp <- estimateGLMTagwiseDisp(dge.glm.trend.disp, design)
plotBCV(dge.glm.tag.disp)

plot of chunk unnamed-chunk-1

fit <- glmFit(dge.glm.tag.disp, design)
lrt <- glmLRT(fit, coef = 2)
topTags(lrt)
Coefficient:  group2 
                    logFC logCPM    LR    PValue       FDR
ENSMUSG00000020912 -5.248  3.090 407.6 1.230e-90 4.495e-86
ENSMUSG00000050141 -5.491  2.200 309.5 2.743e-69 5.010e-65
ENSMUSG00000035775 -4.599  2.583 302.6 9.121e-68 1.111e-63
ENSMUSG00000024248 -3.164  3.409 271.5 5.301e-61 4.842e-57
ENSMUSG00000054354 -6.623  1.790 257.2 6.990e-58 5.108e-54
ENSMUSG00000050824  3.725  3.301 243.1 8.382e-55 5.104e-51
ENSMUSG00000015484 -1.968  4.273 230.4 4.843e-52 2.528e-48
ENSMUSG00000030532  1.548  5.565 219.6 1.123e-49 5.128e-46
ENSMUSG00000028393  1.803  6.103 214.5 1.444e-48 5.860e-45
ENSMUSG00000023236  1.426  7.064 207.5 4.920e-47 1.798e-43
tt.glm <- topTags(lrt, n = Inf)
nrow(tt.glm$table[tt.glm$table$FDR < 0.01, ])
[1] 566
interestingSamples <- rownames(tt.glm$table[tt.glm$table$FDR < 1e-50, ])
cpm(dge.glm.tag.disp)[interestingSamples, ]
                   SRX033480 SRX033488 SRX033481 SRX033489 SRX033482
ENSMUSG00000020912   20.3928   12.6910     15.83    14.819   19.2296
ENSMUSG00000050141   14.1434   10.1528     10.67     6.264    9.9464
ENSMUSG00000035775   14.4723   10.6287     19.51    11.305   12.9303
ENSMUSG00000024248   14.8012   26.0166     17.30    23.527   18.5665
ENSMUSG00000054354   10.8542    7.6146     11.41     8.708    8.2886
ENSMUSG00000050824    0.6578    0.6346      1.84     1.528    0.6631
                   SRX033490 SRX033483 SRX033476 SRX033478 SRX033479
ENSMUSG00000020912    17.330   26.4301    13.359   23.6211    14.310
ENSMUSG00000050141     7.890    8.3605     6.583   11.9115    12.402
ENSMUSG00000035775    12.540   12.1363     6.389   13.5266    11.210
ENSMUSG00000024248    25.784   18.8786    24.781   14.3342    14.310
ENSMUSG00000054354     7.608    7.0121     3.291    4.4416     5.963
ENSMUSG00000050824     1.409    0.2697     3.485    0.8076     1.431
                   SRX033472 SRX033473 SRX033474 SRX033475 SRX033491
ENSMUSG00000020912    0.5037    0.4226    1.0805    0.2802    1.0995
ENSMUSG00000050141    0.2518    0.0000    0.0000    0.2802    0.8246
ENSMUSG00000035775    0.7555    0.0000    0.2701    0.2802    0.6872
ENSMUSG00000024248    2.2666    1.2677    1.8908    3.3618    3.1610
ENSMUSG00000054354    0.0000    0.0000    0.0000    0.0000    0.5497
ENSMUSG00000050824   19.6437   24.2975   12.6957   15.6885   19.6531
                   SRX033484 SRX033492 SRX033485 SRX033493 SRX033486
ENSMUSG00000020912    0.0000    0.7027    0.8651    0.1362    0.0000
ENSMUSG00000050141    0.0000    0.1405    0.0000    0.1362    0.2578
ENSMUSG00000035775    0.6784    0.1405    0.5767    0.1362    1.8045
ENSMUSG00000024248    1.5829    1.6864    1.7302    1.7712    2.5779
ENSMUSG00000054354    0.0000    0.0000    0.0000    0.0000    0.0000
ENSMUSG00000050824   19.2209   13.3505   12.1117   14.5780   13.1473
                   SRX033494
ENSMUSG00000020912    0.0000
ENSMUSG00000050141    0.1477
ENSMUSG00000035775    0.5907
ENSMUSG00000024248    3.1012
ENSMUSG00000054354    0.0000
ENSMUSG00000050824   26.4336
summary(de.glm <- decideTestsDGE(lrt, p = 0.05, adjust = "BH"))
   [,1] 
-1   437
0  35698
1    401
tags.glm <- rownames(dge.glm.tag.disp)[as.logical(de.glm)]
plotSmear(lrt, de.tags = tags.glm)
abline(h = c(-2, 2), col = "blue")

plot of chunk unnamed-chunk-1

# Mini exercise:
# Redo the above analysis but first filter the data and remove any gene that
# has: 1. count equal tot zero across all samples 2. count equal to zero in at
# least one sample in each genotype group
do.edgeR <- function(dat) {
    dge.glm <- DGEList(counts = dat, group = group)
    design <- model.matrix(~group)
    dge.glm.com.disp <- estimateGLMCommonDisp(dge.glm, design, verbose = TRUE)
    dge.glm.trend.disp <- estimateGLMTrendedDisp(dge.glm.com.disp)
    dge.glm.tag.disp <- estimateGLMTagwiseDisp(dge.glm.trend.disp, design)
    plotBCV(dge.glm.tag.disp)
    fit <- glmFit(dge.glm.tag.disp, design)
    lrt <- glmLRT(fit, coef = 2)
    cat('Top genes:')
    print(topTags(lrt))
    tt.glm <- topTags(lrt, n = Inf)
    cat('Number of genes with FDR < 0.01:')
    print(sum(tt.glm$table$FDR < 0.01))
    interestingSamples <- rownames(tt.glm$table[tt.glm$table$FDR < 1e-50, ])
    cat('Interesting genes:')
    print(cpm(dge.glm.tag.disp)[interestingSamples, ])
    cat('Interesting genes with BH FDR < 0.05:')
    print(table(de.glm <- decideTestsDGE(lrt, p = 0.05, adjust = "BH")))
    tags.glm <- rownames(dge.glm.tag.disp)[as.logical(de.glm)]
    plotSmear(lrt, de.tags = tags.glm)
    abline(h = c(-2, 2), col = "blue")
    de.glm
}
edgeR.hits <- do.edgeR(subset(dat, apply(dat, 1, function(x) !all(x == 0))))
Calculating library sizes from column totals.
Disp = 0.03893 , BCV = 0.1973 

plot of chunk unnamed-chunk-1

Top genes:Coefficient:  group2 
                    logFC logCPM    LR    PValue       FDR
ENSMUSG00000020912 -5.248  3.091 404.9 4.715e-90 6.569e-86
ENSMUSG00000050141 -5.490  2.199 313.3 4.212e-70 2.934e-66
ENSMUSG00000035775 -4.599  2.583 303.2 6.482e-68 3.010e-64
ENSMUSG00000024248 -3.164  3.409 270.5 8.807e-61 3.068e-57
ENSMUSG00000054354 -6.623  1.789 259.3 2.380e-58 6.631e-55
ENSMUSG00000050824  3.725  3.301 241.4 1.996e-54 4.635e-51
ENSMUSG00000015484 -1.968  4.274 228.1 1.547e-51 3.078e-48
ENSMUSG00000030532  1.548  5.565 220.3 7.756e-50 1.351e-46
ENSMUSG00000028393  1.803  6.103 216.3 5.703e-49 8.828e-46
ENSMUSG00000023236  1.426  7.064 210.8 9.040e-48 1.259e-44
Number of genes with FDR < 0.01:[1] 696
Interesting genes:                   SRX033480 SRX033488 SRX033481 SRX033489 SRX033482
ENSMUSG00000020912   20.3928   12.6910     15.83    14.819   19.2296
ENSMUSG00000050141   14.1434   10.1528     10.67     6.264    9.9464
ENSMUSG00000035775   14.4723   10.6287     19.51    11.305   12.9303
ENSMUSG00000024248   14.8012   26.0166     17.30    23.527   18.5665
ENSMUSG00000054354   10.8542    7.6146     11.41     8.708    8.2886
ENSMUSG00000050824    0.6578    0.6346      1.84     1.528    0.6631
                   SRX033490 SRX033483 SRX033476 SRX033478 SRX033479
ENSMUSG00000020912    17.330   26.4301    13.359   23.6211    14.310
ENSMUSG00000050141     7.890    8.3605     6.583   11.9115    12.402
ENSMUSG00000035775    12.540   12.1363     6.389   13.5266    11.210
ENSMUSG00000024248    25.784   18.8786    24.781   14.3342    14.310
ENSMUSG00000054354     7.608    7.0121     3.291    4.4416     5.963
ENSMUSG00000050824     1.409    0.2697     3.485    0.8076     1.431
                   SRX033472 SRX033473 SRX033474 SRX033475 SRX033491
ENSMUSG00000020912    0.5037    0.4226    1.0805    0.2802    1.0995
ENSMUSG00000050141    0.2518    0.0000    0.0000    0.2802    0.8246
ENSMUSG00000035775    0.7555    0.0000    0.2701    0.2802    0.6872
ENSMUSG00000024248    2.2666    1.2677    1.8908    3.3618    3.1610
ENSMUSG00000054354    0.0000    0.0000    0.0000    0.0000    0.5497
ENSMUSG00000050824   19.6437   24.2975   12.6957   15.6885   19.6531
                   SRX033484 SRX033492 SRX033485 SRX033493 SRX033486
ENSMUSG00000020912    0.0000    0.7027    0.8651    0.1362    0.0000
ENSMUSG00000050141    0.0000    0.1405    0.0000    0.1362    0.2578
ENSMUSG00000035775    0.6784    0.1405    0.5767    0.1362    1.8045
ENSMUSG00000024248    1.5829    1.6864    1.7302    1.7712    2.5779
ENSMUSG00000054354    0.0000    0.0000    0.0000    0.0000    0.0000
ENSMUSG00000050824   19.2209   13.3505   12.1117   14.5780   13.1473
                   SRX033494
ENSMUSG00000020912    0.0000
ENSMUSG00000050141    0.1477
ENSMUSG00000035775    0.5907
ENSMUSG00000024248    3.1012
ENSMUSG00000054354    0.0000
ENSMUSG00000050824   26.4336
Interesting genes with BH FDR < 0.05:
   -1     0     1 
  591 12762   579 

plot of chunk unnamed-chunk-1

edgeR.hits <- do.edgeR(subset(dat, apply(dat, 1, function(x) !any(x == 0))))
Calculating library sizes from column totals.
Disp = 0.03704 , BCV = 0.1925 

plot of chunk unnamed-chunk-1

Top genes:Coefficient:  group2 
                    logFC logCPM    LR    PValue       FDR
ENSMUSG00000024248 -3.164  3.412 274.4 1.216e-61 1.119e-57
ENSMUSG00000050824  3.724  3.304 246.2 1.759e-55 8.090e-52
ENSMUSG00000015484 -1.969  4.276 236.1 2.828e-53 8.668e-50
ENSMUSG00000023236  1.426  7.067 218.0 2.503e-49 5.753e-46
ENSMUSG00000030532  1.547  5.568 214.1 1.762e-48 3.241e-45
ENSMUSG00000028393  1.803  6.106 211.1 7.905e-48 1.212e-44
ENSMUSG00000015852 -2.371  3.068 191.8 1.264e-43 1.661e-40
ENSMUSG00000066800 -2.328  3.467 176.8 2.402e-40 2.761e-37
ENSMUSG00000026697 -3.725  3.097 175.1 5.630e-40 5.753e-37
ENSMUSG00000072572 -2.016  3.661 170.7 5.289e-39 4.864e-36
Number of genes with FDR < 0.01:[1] 533
Interesting genes:                   SRX033480 SRX033488 SRX033481 SRX033489 SRX033482
ENSMUSG00000024248   14.8221   26.0799    17.323     23.57   18.5943
ENSMUSG00000050824    0.6588    0.6361     1.843      1.53    0.6641
                   SRX033490 SRX033483 SRX033476 SRX033478 SRX033479
ENSMUSG00000024248    25.834   18.9053    24.832   14.3890    14.333
ENSMUSG00000050824     1.412    0.2701     3.492    0.8106     1.433
                   SRX033472 SRX033473 SRX033474 SRX033475 SRX033491
ENSMUSG00000024248      2.27      1.27     1.894     3.367     3.166
ENSMUSG00000050824     19.68     24.33    12.715    15.711    19.683
                   SRX033484 SRX033492 SRX033485 SRX033493 SRX033486
ENSMUSG00000024248     1.586     1.689     1.733     1.775     2.582
ENSMUSG00000050824    19.256    13.370    12.132    14.606    13.167
                   SRX033494
ENSMUSG00000024248     3.108
ENSMUSG00000050824    26.490
Interesting genes with BH FDR < 0.05:
  -1    0    1 
 474 8249  473 

plot of chunk unnamed-chunk-1


# DESeq
library(DESeq)
Loading required package: Biobase
Loading required package: BiocGenerics
Attaching package: 'BiocGenerics'
The following object(s) are masked from 'package:stats':

xtabs
The following object(s) are masked from 'package:base':

anyDuplicated, cbind, colnames, duplicated, eval, Filter, Find, get,
intersect, lapply, Map, mapply, mget, order, paste, pmax, pmax.int, pmin,
pmin.int, Position, rbind, Reduce, rep.int, rownames, sapply, setdiff,
table, tapply, union, unique
Welcome to Bioconductor

Vignettes contain introductory material; view with 'browseVignettes()'. To
cite Bioconductor, see 'citation("Biobase")', and for packages
'citation("pkgname")'.
Loading required package: locfit
locfit 1.5-8 2012-04-25
Loading required package: lattice
Attaching package: 'DESeq'
The following object(s) are masked from 'package:limma':

plotMA
deSeqDat <- newCountDataSet(dat, group)
deSeqDat <- estimateSizeFactors(deSeqDat)
deSeqDat <- estimateDispersions(deSeqDat)
plotDispEsts(deSeqDat)

plot of chunk unnamed-chunk-1

deseq.results <- nbinomTest(deSeqDat, levels(group)[1], levels(group)[2])
plotMA(deseq.results)

plot of chunk unnamed-chunk-1


# Voom & limma
library(limma)
dat.voomed <- voom(dat, design)
fit <- eBayes(lmFit(dat.voomed, design))

# Take Home Problem
# Choose a specific threshold for the adjusted p value, find the genes
# identified as differentially expressed using each of edgeR, DESeq and
# voom+limma. Compare the number of genes in these 3 lists, and draw a venn
# digram demonstrating the overlap (if any!).
edgeR.hits <- do.edgeR(dat)
Calculating library sizes from column totals.
Disp = 0.03893 , BCV = 0.1973 

plot of chunk unnamed-chunk-1

Top genes:Coefficient:  group2 
                    logFC logCPM    LR    PValue       FDR
ENSMUSG00000020912 -5.248  3.090 407.6 1.230e-90 4.495e-86
ENSMUSG00000050141 -5.491  2.200 309.5 2.743e-69 5.010e-65
ENSMUSG00000035775 -4.599  2.583 302.6 9.121e-68 1.111e-63
ENSMUSG00000024248 -3.164  3.409 271.5 5.301e-61 4.842e-57
ENSMUSG00000054354 -6.623  1.790 257.2 6.990e-58 5.108e-54
ENSMUSG00000050824  3.725  3.301 243.1 8.382e-55 5.104e-51
ENSMUSG00000015484 -1.968  4.273 230.4 4.843e-52 2.528e-48
ENSMUSG00000030532  1.548  5.565 219.6 1.123e-49 5.128e-46
ENSMUSG00000028393  1.803  6.103 214.5 1.444e-48 5.860e-45
ENSMUSG00000023236  1.426  7.064 207.5 4.920e-47 1.798e-43
Number of genes with FDR < 0.01:[1] 566
Interesting genes:                   SRX033480 SRX033488 SRX033481 SRX033489 SRX033482
ENSMUSG00000020912   20.3928   12.6910     15.83    14.819   19.2296
ENSMUSG00000050141   14.1434   10.1528     10.67     6.264    9.9464
ENSMUSG00000035775   14.4723   10.6287     19.51    11.305   12.9303
ENSMUSG00000024248   14.8012   26.0166     17.30    23.527   18.5665
ENSMUSG00000054354   10.8542    7.6146     11.41     8.708    8.2886
ENSMUSG00000050824    0.6578    0.6346      1.84     1.528    0.6631
                   SRX033490 SRX033483 SRX033476 SRX033478 SRX033479
ENSMUSG00000020912    17.330   26.4301    13.359   23.6211    14.310
ENSMUSG00000050141     7.890    8.3605     6.583   11.9115    12.402
ENSMUSG00000035775    12.540   12.1363     6.389   13.5266    11.210
ENSMUSG00000024248    25.784   18.8786    24.781   14.3342    14.310
ENSMUSG00000054354     7.608    7.0121     3.291    4.4416     5.963
ENSMUSG00000050824     1.409    0.2697     3.485    0.8076     1.431
                   SRX033472 SRX033473 SRX033474 SRX033475 SRX033491
ENSMUSG00000020912    0.5037    0.4226    1.0805    0.2802    1.0995
ENSMUSG00000050141    0.2518    0.0000    0.0000    0.2802    0.8246
ENSMUSG00000035775    0.7555    0.0000    0.2701    0.2802    0.6872
ENSMUSG00000024248    2.2666    1.2677    1.8908    3.3618    3.1610
ENSMUSG00000054354    0.0000    0.0000    0.0000    0.0000    0.5497
ENSMUSG00000050824   19.6437   24.2975   12.6957   15.6885   19.6531
                   SRX033484 SRX033492 SRX033485 SRX033493 SRX033486
ENSMUSG00000020912    0.0000    0.7027    0.8651    0.1362    0.0000
ENSMUSG00000050141    0.0000    0.1405    0.0000    0.1362    0.2578
ENSMUSG00000035775    0.6784    0.1405    0.5767    0.1362    1.8045
ENSMUSG00000024248    1.5829    1.6864    1.7302    1.7712    2.5779
ENSMUSG00000054354    0.0000    0.0000    0.0000    0.0000    0.0000
ENSMUSG00000050824   19.2209   13.3505   12.1117   14.5780   13.1473
                   SRX033494
ENSMUSG00000020912    0.0000
ENSMUSG00000050141    0.1477
ENSMUSG00000035775    0.5907
ENSMUSG00000024248    3.1012
ENSMUSG00000054354    0.0000
ENSMUSG00000050824   26.4336
Interesting genes with BH FDR < 0.05:
   -1     0     1 
  437 35698   401 

plot of chunk unnamed-chunk-1

table(deseq.results$padj < 0.05)

FALSE  TRUE 
13353   579 
voom.hits <- topTable(fit, coef='group2', num=Inf, sort.by='none')
table(voom.hits$adj.P.Val < 0.05)

FALSE  TRUE 
35663   873 
results <- data.frame(
    edgeR = edgeR.hits != 0,
    DESeq = deseq.results$padj < 0.05, 
    voom.limma = voom.hits$adj.P.Val < 0.05)
table(results)
, , voom.limma = FALSE

       DESeq
edgeR   FALSE  TRUE
  FALSE 12943     8
  TRUE     92    16

, , voom.limma = TRUE

       DESeq
edgeR   FALSE  TRUE
  FALSE   141     2
  TRUE    177   553
vennDiagram(results)

plot of chunk unnamed-chunk-1