seminar7.R

Huiting — Mar 30, 2014, 3:26 PM

library(edgeR)
Loading required package: limma
dat <- read.table("bottomly_count_table.tsv", header = TRUE, 
                  row.names = 1)
des <- read.table("bottomly_phenodata.tsv", header = TRUE, 
                  row.names = 1)
str(dat)
'data.frame':   36536 obs. of  21 variables:
 $ SRX033480: int  369 0 0 0 0 0 21 15 517 0 ...
 $ SRX033488: int  744 0 1 0 1 1 46 43 874 0 ...
 $ SRX033481: int  287 0 0 0 1 0 20 12 340 0 ...
 $ SRX033489: int  769 0 1 0 5 1 36 34 813 0 ...
 $ SRX033482: int  348 0 1 0 0 0 12 14 378 0 ...
 $ SRX033490: int  803 0 1 0 4 0 55 32 860 0 ...
 $ SRX033483: int  433 0 0 0 0 0 27 19 528 0 ...
 $ SRX033476: int  469 0 7 0 0 0 44 18 401 0 ...
 $ SRX033478: int  585 0 6 0 0 0 32 44 584 0 ...
 $ SRX033479: int  321 0 1 0 0 0 47 22 401 0 ...
 $ SRX033472: int  301 0 1 0 4 0 40 17 331 0 ...
 $ SRX033473: int  461 0 1 0 1 0 40 24 431 0 ...
 $ SRX033474: int  309 0 1 0 1 0 30 29 341 0 ...
 $ SRX033475: int  374 0 1 0 0 0 27 15 480 0 ...
 $ SRX033491: int  781 0 1 0 1 0 46 34 930 0 ...
 $ SRX033484: int  555 0 2 0 2 0 28 23 585 0 ...
 $ SRX033492: int  820 0 1 0 1 0 40 38 1137 0 ...
 $ SRX033485: int  294 0 1 0 1 0 21 17 490 0 ...
 $ SRX033493: int  758 0 4 0 1 0 52 29 1079 0 ...
 $ SRX033486: int  419 0 1 0 1 0 27 12 565 0 ...
 $ SRX033494: int  857 0 5 0 2 0 45 28 726 0 ...
show(des)
          num.tech.reps   strain experiment.number lane.number
SRX033480             1 C57BL/6J                 6           1
SRX033488             1 C57BL/6J                 7           1
SRX033481             1 C57BL/6J                 6           2
SRX033489             1 C57BL/6J                 7           2
SRX033482             1 C57BL/6J                 6           3
SRX033490             1 C57BL/6J                 7           3
SRX033483             1 C57BL/6J                 6           5
SRX033476             1 C57BL/6J                 4           6
SRX033478             1 C57BL/6J                 4           7
SRX033479             1 C57BL/6J                 4           8
SRX033472             1   DBA/2J                 4           1
SRX033473             1   DBA/2J                 4           2
SRX033474             1   DBA/2J                 4           3
SRX033475             1   DBA/2J                 4           5
SRX033491             1   DBA/2J                 7           5
SRX033484             1   DBA/2J                 6           6
SRX033492             1   DBA/2J                 7           6
SRX033485             1   DBA/2J                 6           7
SRX033493             1   DBA/2J                 7           7
SRX033486             1   DBA/2J                 6           8
SRX033494             1   DBA/2J                 7           8
all(rownames(des) == colnames(dat))
[1] TRUE
with(des, table(strain))
strain
C57BL/6J   DBA/2J 
      10       11 

group <- factor(c(rep("1", 10), rep("2", 11)))
group
 [1] 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
Levels: 1 2

# this produces an object of type DGEList with can be manipulated in a
# similar way to any other list object in R
dge.glm <- DGEList(counts = dat, group = group)
names(dge.glm)
[1] "counts"  "samples"
dge.glm[["samples"]]
          group lib.size norm.factors
SRX033480     1  3040296            1
SRX033488     1  6303665            1
SRX033481     1  2717092            1
SRX033489     1  6545795            1
SRX033482     1  3016179            1
SRX033490     1  7097379            1
SRX033483     1  3707895            1
SRX033476     1  5165144            1
SRX033478     1  4953201            1
SRX033479     1  4192872            1
SRX033472     2  3970729            1
SRX033473     2  4733003            1
SRX033474     2  3702051            1
SRX033475     2  3569483            1
SRX033491     2  7276198            1
SRX033484     2  4422272            1
SRX033492     2  7115851            1
SRX033485     2  3467730            1
SRX033493     2  7339817            1
SRX033486     2  3879114            1
SRX033494     2  6771680            1
nrow(dge.glm[[1]])
[1] 36536
ncol(dge.glm[[1]])
[1] 21
str(dge.glm)
Formal class 'DGEList' [package "edgeR"] with 1 slots
  ..@ .Data:List of 2
  .. ..$ : int [1:36536, 1:21] 369 0 0 0 0 0 21 15 517 0 ...
  .. .. ..- attr(*, "dimnames")=List of 2
  .. .. .. ..$ : chr [1:36536] "ENSMUSG00000000001" "ENSMUSG00000000003" "ENSMUSG00000000028" "ENSMUSG00000000031" ...
  .. .. .. ..$ : chr [1:21] "SRX033480" "SRX033488" "SRX033481" "SRX033489" ...
  .. ..$ :'data.frame': 21 obs. of  3 variables:
  .. .. ..$ group       : Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
  .. .. ..$ lib.size    : num [1:21] 3040296 6303665 2717092 6545795 3016179 ...
  .. .. ..$ norm.factors: num [1:21] 1 1 1 1 1 1 1 1 1 1 ...
design <- model.matrix(~group)
design
   (Intercept) group2
1            1      0
2            1      0
3            1      0
4            1      0
5            1      0
6            1      0
7            1      0
8            1      0
9            1      0
10           1      0
11           1      1
12           1      1
13           1      1
14           1      1
15           1      1
16           1      1
17           1      1
18           1      1
19           1      1
20           1      1
21           1      1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
dge.glm.com.disp <- estimateGLMCommonDisp(dge.glm, design, verbose = TRUE)
Disp = 0.03893 , BCV = 0.1973 
dge.glm.trend.disp <- estimateGLMTrendedDisp(dge.glm.com.disp, design)
Loading required package: splines
dge.glm.tag.disp <- estimateGLMTagwiseDisp(dge.glm.trend.disp, design)
# plot the tagwise dispersion against log2-CPM (counts per million)
plotBCV(dge.glm.tag.disp)

plot of chunk unnamed-chunk-1

fit <- glmFit(dge.glm.tag.disp, design)
colnames(coef(fit))
[1] "(Intercept)" "group2"     
lrt <- glmLRT(fit, coef = 2)
topTags(lrt)
Coefficient:  group2 
                    logFC logCPM    LR    PValue       FDR
ENSMUSG00000020912 -5.187  3.155 401.8 2.238e-89 8.177e-85
ENSMUSG00000050141 -5.363  2.319 311.6 9.787e-70 1.788e-65
ENSMUSG00000035775 -4.543  2.674 298.0 9.042e-67 1.101e-62
ENSMUSG00000015484 -1.968  4.307 282.5 2.113e-63 1.930e-59
ENSMUSG00000024248 -3.152  3.463 277.0 3.337e-62 2.438e-58
ENSMUSG00000030532  1.547  5.572 269.4 1.568e-60 9.548e-57
ENSMUSG00000054354 -6.283  1.940 249.3 3.657e-56 1.909e-52
ENSMUSG00000023236  1.426  7.067 245.0 3.133e-55 1.431e-51
ENSMUSG00000050824  3.705  3.365 229.7 7.099e-52 2.882e-48
ENSMUSG00000015852 -2.362  3.138 214.8 1.210e-48 4.049e-45
tt.glm <- topTags(lrt, n = Inf)
class(tt.glm)
[1] "TopTags"
attr(,"package")
[1] "edgeR"
nrow(tt.glm$table[tt.glm$table$FDR < 0.01, ])
[1] 600
interestingSamples <- rownames(tt.glm$table[tt.glm$table$FDR < 1e-50, ])
cpm(dge.glm.tag.disp)[interestingSamples, ]
                   SRX033480 SRX033488 SRX033481 SRX033489 SRX033482
ENSMUSG00000020912     20.39    12.691     15.83    14.819    19.230
ENSMUSG00000050141     14.14    10.153     10.67     6.264     9.946
ENSMUSG00000035775     14.47    10.629     19.51    11.305    12.930
ENSMUSG00000015484     26.97    33.949     32.76    36.206    31.497
ENSMUSG00000024248     14.80    26.017     17.30    23.527    18.567
ENSMUSG00000030532     27.96    24.906     21.35    25.360    18.567
ENSMUSG00000054354     10.85     7.615     11.41     8.708     8.289
ENSMUSG00000023236     65.45    78.684     72.50    72.413    76.587
                   SRX033490 SRX033483 SRX033476 SRX033478 SRX033479
ENSMUSG00000020912    17.330    26.430    13.359    23.621    14.310
ENSMUSG00000050141     7.890     8.361     6.583    11.911    12.402
ENSMUSG00000035775    12.540    12.136     6.389    13.527    11.210
ENSMUSG00000015484    33.393    30.745    34.655    26.851    31.482
ENSMUSG00000024248    25.784    18.879    24.781    14.334    14.310
ENSMUSG00000030532    26.770    26.430    19.167    23.217    20.034
ENSMUSG00000054354     7.608     7.012     3.291     4.442     5.963
ENSMUSG00000023236    80.875    83.336    55.178    57.740    66.303
                   SRX033472 SRX033473 SRX033474 SRX033475 SRX033491
ENSMUSG00000020912    0.5037    0.4226    1.0805    0.2802    1.0995
ENSMUSG00000050141    0.2518    0.0000    0.0000    0.2802    0.8246
ENSMUSG00000035775    0.7555    0.0000    0.2701    0.2802    0.6872
ENSMUSG00000015484    7.0516    4.8595    9.1841    7.8443    8.5209
ENSMUSG00000024248    2.2666    1.2677    1.8908    3.3618    3.1610
ENSMUSG00000030532   69.7605   70.5683   63.2082   63.0343   73.8023
ENSMUSG00000054354    0.0000    0.0000    0.0000    0.0000    0.5497
ENSMUSG00000023236  195.4301  183.6044  176.1186  156.3252  206.9762
                   SRX033484 SRX033492 SRX033485 SRX033493 SRX033486
ENSMUSG00000020912    0.0000    0.7027    0.8651    0.1362    0.0000
ENSMUSG00000050141    0.0000    0.1405    0.0000    0.1362    0.2578
ENSMUSG00000035775    0.6784    0.1405    0.5767    0.1362    1.8045
ENSMUSG00000015484    9.4974    8.4319    8.9396    6.6759   10.3116
ENSMUSG00000024248    1.5829    1.6864    1.7302    1.7712    2.5779
ENSMUSG00000030532   70.0997   70.6873   62.5770   78.6123   59.2919
ENSMUSG00000054354    0.0000    0.0000    0.0000    0.0000    0.0000
ENSMUSG00000023236  177.9628  192.8090  207.0519  201.6399  204.6859
                   SRX033494
ENSMUSG00000020912    0.0000
ENSMUSG00000050141    0.1477
ENSMUSG00000035775    0.5907
ENSMUSG00000015484    8.8604
ENSMUSG00000024248    3.1012
ENSMUSG00000030532   71.3265
ENSMUSG00000054354    0.0000
ENSMUSG00000023236  192.7144
summary(de.glm <- decideTestsDGE(lrt, p = 0.05, adjust = "BH"))
   [,1] 
-1   451
0  35660
1    425
# plotting the tagwise log fold changes against log-cpm
tags.glm <- rownames(dge.glm.tag.disp)[as.logical(de.glm)]
plotSmear(lrt, de.tags = tags.glm)
abline(h = c(-2, 2), col = "blue")






library(DESeq)
Loading required package: BiocGenerics
Loading required package: parallel

Attaching package: 'BiocGenerics'

The following objects are masked from 'package:parallel':

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB

The following object is masked from 'package:limma':

    plotMA

The following object is masked from 'package:stats':

    xtabs

The following objects are masked from 'package:base':

    anyDuplicated, append, as.data.frame, as.vector, cbind,
    colnames, duplicated, eval, evalq, Filter, Find, get,
    intersect, is.unsorted, lapply, Map, mapply, match, mget,
    order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
    rbind, Reduce, rep.int, rownames, sapply, setdiff, sort,
    table, tapply, union, unique, unlist

Loading required package: Biobase
Welcome to Bioconductor

    Vignettes contain introductory material; view with
    'browseVignettes()'. To cite Bioconductor, see
    'citation("Biobase")', and for packages 'citation("pkgname")'.

Loading required package: locfit
Warning: package 'locfit' was built under R version 3.0.3
locfit 1.5-9.1   2013-03-22
Loading required package: lattice
Warning: package 'lattice' was built under R version 3.0.3
    Welcome to 'DESeq'. For improved performance, usability and
    functionality, please consider migrating to 'DESeq2'.

plot of chunk unnamed-chunk-1

# reading in the same count table data and grouping information
deSeqDat <- newCountDataSet(dat, group)
head(counts(deSeqDat))
                   SRX033480 SRX033488 SRX033481 SRX033489 SRX033482
ENSMUSG00000000001       369       744       287       769       348
ENSMUSG00000000003         0         0         0         0         0
ENSMUSG00000000028         0         1         0         1         1
ENSMUSG00000000031         0         0         0         0         0
ENSMUSG00000000037         0         1         1         5         0
ENSMUSG00000000049         0         1         0         1         0
                   SRX033490 SRX033483 SRX033476 SRX033478 SRX033479
ENSMUSG00000000001       803       433       469       585       321
ENSMUSG00000000003         0         0         0         0         0
ENSMUSG00000000028         1         0         7         6         1
ENSMUSG00000000031         0         0         0         0         0
ENSMUSG00000000037         4         0         0         0         0
ENSMUSG00000000049         0         0         0         0         0
                   SRX033472 SRX033473 SRX033474 SRX033475 SRX033491
ENSMUSG00000000001       301       461       309       374       781
ENSMUSG00000000003         0         0         0         0         0
ENSMUSG00000000028         1         1         1         1         1
ENSMUSG00000000031         0         0         0         0         0
ENSMUSG00000000037         4         1         1         0         1
ENSMUSG00000000049         0         0         0         0         0
                   SRX033484 SRX033492 SRX033485 SRX033493 SRX033486
ENSMUSG00000000001       555       820       294       758       419
ENSMUSG00000000003         0         0         0         0         0
ENSMUSG00000000028         2         1         1         4         1
ENSMUSG00000000031         0         0         0         0         0
ENSMUSG00000000037         2         1         1         1         1
ENSMUSG00000000049         0         0         0         0         0
                   SRX033494
ENSMUSG00000000001       857
ENSMUSG00000000003         0
ENSMUSG00000000028         5
ENSMUSG00000000031         0
ENSMUSG00000000037         2
ENSMUSG00000000049         0
deSeqDat <- estimateSizeFactors(deSeqDat)
sizeFactors(deSeqDat)
SRX033480 SRX033488 SRX033481 SRX033489 SRX033482 SRX033490 SRX033483 
   0.6439    1.3454    0.5785    1.4295    0.6355    1.5240    0.7933 
SRX033476 SRX033478 SRX033479 SRX033472 SRX033473 SRX033474 SRX033475 
   1.1272    1.0772    0.8984    0.8886    1.0255    0.7987    0.7796 
SRX033491 SRX033484 SRX033492 SRX033485 SRX033493 SRX033486 SRX033494 
   1.6162    0.9882    1.5720    0.7558    1.5922    0.8264    1.4715 
deSeqDat <- estimateDispersions(deSeqDat)
# plotting the estimated dispersions against the mean normalized counts
plotDispEsts(deSeqDat)

plot of chunk unnamed-chunk-1

## this takes a minute or so for JB
results <- nbinomTest(deSeqDat, levels(group)[1], levels(group)[2])
str(results)
'data.frame':   36536 obs. of  8 variables:
 $ id            : chr  "ENSMUSG00000000001" "ENSMUSG00000000003" "ENSMUSG00000000028" "ENSMUSG00000000031" ...
 $ baseMean      : num  489.18 0 1.57 0 1.1 ...
 $ baseMeanA     : num  509.685 0 1.657 0 0.859 ...
 $ baseMeanB     : num  470.53 0 1.49 0 1.32 ...
 $ foldChange    : num  0.923 NaN 0.898 NaN 1.537 ...
 $ log2FoldChange: num  -0.115 NaN -0.156 NaN 0.62 ...
 $ pval          : num  0.498 NA 0.829 NA 0.968 ...
 $ padj          : num  1 NA 1 NA 1 ...
plotMA(results)

plot of chunk unnamed-chunk-1




library(limma)
norm.factor <- calcNormFactors(dat)
dat.voomed <- voom(dat, design, plot = TRUE, 
                   lib.size = colSums(dat) * norm.factor)

plot of chunk unnamed-chunk-1

dat.voomed
An object of class "EList"
$E
                   SRX033480 SRX033488 SRX033481 SRX033489 SRX033482
ENSMUSG00000000001     6.941     6.916     6.720     6.871     6.860
ENSMUSG00000000003    -2.588    -3.625    -2.447    -3.716    -2.585
ENSMUSG00000000028    -2.588    -2.040    -2.447    -2.132    -1.000
ENSMUSG00000000031    -2.588    -3.625    -2.447    -3.716    -2.585
ENSMUSG00000000037    -2.588    -2.040    -0.862    -0.257    -2.585
                   SRX033490 SRX033483 SRX033476 SRX033478 SRX033479
ENSMUSG00000000001    6.8447      6.87    6.4977    6.8899     6.279
ENSMUSG00000000003   -3.8055     -2.89   -3.3773   -3.3036    -3.049
ENSMUSG00000000028   -2.2205     -2.89    0.5296    0.3968    -1.465
ENSMUSG00000000031   -3.8055     -2.89   -3.3773   -3.3036    -3.049
ENSMUSG00000000037   -0.6355     -2.89   -3.3773   -3.3036    -3.049
                   SRX033472 SRX033473 SRX033474 SRX033475 SRX033491
ENSMUSG00000000001    6.2110     6.615     6.397     6.693     6.737
ENSMUSG00000000003   -3.0250    -3.236    -2.877    -2.855    -3.873
ENSMUSG00000000028   -1.4400    -1.651    -1.292    -1.270    -2.288
ENSMUSG00000000031   -3.0250    -3.236    -2.877    -2.855    -3.873
ENSMUSG00000000037    0.1449    -1.651    -1.292    -2.855    -2.288
                   SRX033484 SRX033492 SRX033485 SRX033493 SRX033486
ENSMUSG00000000001    6.9151     6.855     6.375     6.718     6.760
ENSMUSG00000000003   -3.2025    -3.825    -2.827    -3.849    -2.952
ENSMUSG00000000028   -0.8806    -2.240    -1.242    -0.679    -1.367
ENSMUSG00000000031   -3.2025    -3.825    -2.827    -3.849    -2.952
ENSMUSG00000000037   -0.8806    -2.240    -1.242    -2.264    -1.367
                   SRX033494
ENSMUSG00000000001     7.005
ENSMUSG00000000003    -3.739
ENSMUSG00000000028    -0.280
ENSMUSG00000000031    -3.739
ENSMUSG00000000037    -1.417
36531 more rows ...

$weights
       [,1]   [,2]   [,3]   [,4]   [,5]   [,6]   [,7]   [,8]   [,9]  [,10]
[1,] 15.907 19.293 15.443 19.597 15.899 19.891 16.895 18.482 18.240 17.413
[2,]  1.632  1.547  1.632  1.531  1.632  1.517  1.632  1.595  1.612  1.632
[3,]  1.469  1.383  1.488  1.378  1.470  1.374  1.436  1.397  1.402  1.421
[4,]  1.632  1.547  1.632  1.531  1.632  1.517  1.632  1.595  1.612  1.632
[5,]  1.558  1.421  1.585  1.414  1.558  1.407  1.507  1.445  1.453  1.484
      [,11]  [,12]  [,13]  [,14]  [,15]  [,16]  [,17]  [,18]  [,19]  [,20]
[1,] 16.988 17.670 16.503 16.432 19.760 17.564 19.603 16.339 19.682 16.748
[2,]  1.632  1.632  1.632  1.632  1.531  1.632  1.539  1.632  1.535  1.632
[3,]  1.403  1.390  1.415  1.416  1.354  1.392  1.358  1.419  1.356  1.409
[4,]  1.632  1.632  1.632  1.632  1.531  1.632  1.539  1.632  1.535  1.632
[5,]  1.423  1.406  1.437  1.439  1.370  1.409  1.373  1.442  1.371  1.429
      [,21]
[1,] 19.318
[2,]  1.555
[3,]  1.364
[4,]  1.555
[5,]  1.377
36531 more rows ...

$design
  (Intercept) group2
1           1      0
2           1      0
3           1      0
4           1      0
5           1      0
16 more rows ...

$targets
[1] 3006197 6166939 2726341 6572633 3000692
16 more rows ...
fit <- lmFit(dat.voomed, design)
fit <- eBayes(fit)
topTable(fit)
                   X.Intercept.    group2 AveExpr      F   P.Value
ENSMUSG00000025867       12.477 -0.013979  12.471 154234 2.584e-55
ENSMUSG00000022892       12.268 -0.086479  12.223 138459 1.103e-54
ENSMUSG00000037852       12.321 -0.237470  12.194 134826 1.576e-54
ENSMUSG00000042700       10.716 -0.048029  10.692 133973 1.717e-54
ENSMUSG00000029461       10.300  0.020715  10.311 121795 6.182e-54
ENSMUSG00000020658        9.610 -0.019628   9.601 119935 7.604e-54
ENSMUSG00000060261        9.469 -0.015743   9.461 117841 9.635e-54
ENSMUSG00000032549       11.904  0.003545  11.905 117324 1.022e-53
ENSMUSG00000024462       10.227 -0.138929  10.153 116767 1.090e-53
ENSMUSG00000030102       12.085 -0.026149  12.073 112155 1.874e-53
                   adj.P.Val
ENSMUSG00000025867 9.442e-51
ENSMUSG00000022892 1.568e-50
ENSMUSG00000037852 1.568e-50
ENSMUSG00000042700 1.568e-50
ENSMUSG00000029461 4.424e-50
ENSMUSG00000020658 4.424e-50
ENSMUSG00000060261 4.424e-50
ENSMUSG00000032549 4.424e-50
ENSMUSG00000024462 4.424e-50
ENSMUSG00000030102 6.417e-50


## Take Home Problem

## Choose a specific threshold for the adjusted p value, 
## find the genes identified as differentially expressed using each of edgeR, 
## DESeq and voom+limma. Compare the number of genes in these 3 lists, 
## and draw a venn digram demonstrating the overlap (if any!).
cut <- 1e-4
edgeR <- subset(tt.glm$table, FDR < cut)
edgerR <- na.omit(edgeR)
DeSeq <- subset(results, padj < cut)
DeSeq <- na.omit(DeSeq)
hits.voomed <- topTable(fit, coef ="group2",  n = Inf)
voomed <- subset(hits.voomed, adj.P.Val < cut)
voomed<- na.omit(voomed)

library(VennDiagram)
Loading required package: grid

edger.genes <- rownames(edgeR)
deseq.genes <- DeSeq$id
voom.limma.genes <- rownames(voomed)

# Put the things you want to plot in a list. The names in the list will be
# put on the plot.
de.genes <- list(edger = edger.genes, deseq = deseq.genes, voom.limma = voom.limma.genes)

# Start a new plot
plot.new()

# Draw the Venn diagram. Note the argument `filename=NULL` tells it to
# create a plot object instead of outputting to file.
venn.plot <- venn.diagram(de.genes, filename = NULL, fill = c("red", "blue", "green"))

# Draw the plot on the screen.
grid.draw(venn.plot)

plot of chunk unnamed-chunk-1





## Mini Exercise

##Redo the above analysis but first filter the data and remove any gene that has: 
## 1. count equal tot zero across all samples 
## 2. count equal to zero in at least one sample in each genotype group
dat <- read.table("bottomly_count_table.tsv", header = TRUE, 
                  row.names = 1)
des <- read.table("bottomly_phenodata.tsv", header = TRUE, 
                  row.names = 1)

dat <- subset(dat, rowSums(dat) == 0 & (all(dat[, 1:10] != 0)) & (all(dat[, 11:21]!=0)))
all(rownames(des) == colnames(dat))
[1] TRUE
with(des, table(strain))
strain
C57BL/6J   DBA/2J 
      10       11 

group <- factor(c(rep("1", 10), rep("2", 11)))
group
 [1] 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
Levels: 1 2

# this produces an object of type DGEList with can be manipulated in a
# similar way to any other list object in R
dge.glm <- DGEList(counts = dat, group = group)
names(dge.glm)
[1] "counts"  "samples"
dge.glm[["samples"]]
          group lib.size norm.factors
SRX033480     1        0            1
SRX033488     1        0            1
SRX033481     1        0            1
SRX033489     1        0            1
SRX033482     1        0            1
SRX033490     1        0            1
SRX033483     1        0            1
SRX033476     1        0            1
SRX033478     1        0            1
SRX033479     1        0            1
SRX033472     2        0            1
SRX033473     2        0            1
SRX033474     2        0            1
SRX033475     2        0            1
SRX033491     2        0            1
SRX033484     2        0            1
SRX033492     2        0            1
SRX033485     2        0            1
SRX033493     2        0            1
SRX033486     2        0            1
SRX033494     2        0            1
nrow(dge.glm[[1]])
[1] 0
ncol(dge.glm[[1]])
[1] 21
str(dge.glm)
Formal class 'DGEList' [package "edgeR"] with 1 slots
  ..@ .Data:List of 2
  .. ..$ : logi[0 , 1:21] 
  .. .. ..- attr(*, "dimnames")=List of 2
  .. .. .. ..$ : NULL
  .. .. .. ..$ : chr [1:21] "SRX033480" "SRX033488" "SRX033481" "SRX033489" ...
  .. ..$ :'data.frame': 21 obs. of  3 variables:
  .. .. ..$ group       : Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
  .. .. ..$ lib.size    : num [1:21] 0 0 0 0 0 0 0 0 0 0 ...
  .. .. ..$ norm.factors: num [1:21] 1 1 1 1 1 1 1 1 1 1 ...
design <- model.matrix(~group)
design
   (Intercept) group2
1            1      0
2            1      0
3            1      0
4            1      0
5            1      0
6            1      0
7            1      0
8            1      0
9            1      0
10           1      0
11           1      1
12           1      1
13           1      1
14           1      1
15           1      1
16           1      1
17           1      1
18           1      1
19           1      1
20           1      1
21           1      1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
dge.glm.com.disp <- estimateGLMCommonDisp(dge.glm, design, verbose = TRUE)
Error: zero or negative dimensions not allowed
dge.glm.trend.disp <- estimateGLMTrendedDisp(dge.glm.com.disp, design)
dge.glm.tag.disp <- estimateGLMTagwiseDisp(dge.glm.trend.disp, design)
# plot the tagwise dispersion against log2-CPM (counts per million)
plotBCV(dge.glm.tag.disp)

plot of chunk unnamed-chunk-1

fit <- glmFit(dge.glm.tag.disp, design)
colnames(coef(fit))
[1] "(Intercept)" "group2"     
lrt <- glmLRT(fit, coef = 2)
topTags(lrt)
Coefficient:  group2 
                    logFC logCPM    LR    PValue       FDR
ENSMUSG00000020912 -5.187  3.155 401.8 2.238e-89 8.177e-85
ENSMUSG00000050141 -5.363  2.319 311.6 9.787e-70 1.788e-65
ENSMUSG00000035775 -4.543  2.674 298.0 9.042e-67 1.101e-62
ENSMUSG00000015484 -1.968  4.307 282.5 2.113e-63 1.930e-59
ENSMUSG00000024248 -3.152  3.463 277.0 3.337e-62 2.438e-58
ENSMUSG00000030532  1.547  5.572 269.4 1.568e-60 9.548e-57
ENSMUSG00000054354 -6.283  1.940 249.3 3.657e-56 1.909e-52
ENSMUSG00000023236  1.426  7.067 245.0 3.133e-55 1.431e-51
ENSMUSG00000050824  3.705  3.365 229.7 7.099e-52 2.882e-48
ENSMUSG00000015852 -2.362  3.138 214.8 1.210e-48 4.049e-45
tt.glm <- topTags(lrt, n = Inf)
class(tt.glm)
[1] "TopTags"
attr(,"package")
[1] "edgeR"
nrow(tt.glm$table[tt.glm$table$FDR < 0.01, ])
[1] 600
interestingSamples <- rownames(tt.glm$table[tt.glm$table$FDR < 1e-50, ])
cpm(dge.glm.tag.disp)[interestingSamples, ]
                   SRX033480 SRX033488 SRX033481 SRX033489 SRX033482
ENSMUSG00000020912     20.39    12.691     15.83    14.819    19.230
ENSMUSG00000050141     14.14    10.153     10.67     6.264     9.946
ENSMUSG00000035775     14.47    10.629     19.51    11.305    12.930
ENSMUSG00000015484     26.97    33.949     32.76    36.206    31.497
ENSMUSG00000024248     14.80    26.017     17.30    23.527    18.567
ENSMUSG00000030532     27.96    24.906     21.35    25.360    18.567
ENSMUSG00000054354     10.85     7.615     11.41     8.708     8.289
ENSMUSG00000023236     65.45    78.684     72.50    72.413    76.587
                   SRX033490 SRX033483 SRX033476 SRX033478 SRX033479
ENSMUSG00000020912    17.330    26.430    13.359    23.621    14.310
ENSMUSG00000050141     7.890     8.361     6.583    11.911    12.402
ENSMUSG00000035775    12.540    12.136     6.389    13.527    11.210
ENSMUSG00000015484    33.393    30.745    34.655    26.851    31.482
ENSMUSG00000024248    25.784    18.879    24.781    14.334    14.310
ENSMUSG00000030532    26.770    26.430    19.167    23.217    20.034
ENSMUSG00000054354     7.608     7.012     3.291     4.442     5.963
ENSMUSG00000023236    80.875    83.336    55.178    57.740    66.303
                   SRX033472 SRX033473 SRX033474 SRX033475 SRX033491
ENSMUSG00000020912    0.5037    0.4226    1.0805    0.2802    1.0995
ENSMUSG00000050141    0.2518    0.0000    0.0000    0.2802    0.8246
ENSMUSG00000035775    0.7555    0.0000    0.2701    0.2802    0.6872
ENSMUSG00000015484    7.0516    4.8595    9.1841    7.8443    8.5209
ENSMUSG00000024248    2.2666    1.2677    1.8908    3.3618    3.1610
ENSMUSG00000030532   69.7605   70.5683   63.2082   63.0343   73.8023
ENSMUSG00000054354    0.0000    0.0000    0.0000    0.0000    0.5497
ENSMUSG00000023236  195.4301  183.6044  176.1186  156.3252  206.9762
                   SRX033484 SRX033492 SRX033485 SRX033493 SRX033486
ENSMUSG00000020912    0.0000    0.7027    0.8651    0.1362    0.0000
ENSMUSG00000050141    0.0000    0.1405    0.0000    0.1362    0.2578
ENSMUSG00000035775    0.6784    0.1405    0.5767    0.1362    1.8045
ENSMUSG00000015484    9.4974    8.4319    8.9396    6.6759   10.3116
ENSMUSG00000024248    1.5829    1.6864    1.7302    1.7712    2.5779
ENSMUSG00000030532   70.0997   70.6873   62.5770   78.6123   59.2919
ENSMUSG00000054354    0.0000    0.0000    0.0000    0.0000    0.0000
ENSMUSG00000023236  177.9628  192.8090  207.0519  201.6399  204.6859
                   SRX033494
ENSMUSG00000020912    0.0000
ENSMUSG00000050141    0.1477
ENSMUSG00000035775    0.5907
ENSMUSG00000015484    8.8604
ENSMUSG00000024248    3.1012
ENSMUSG00000030532   71.3265
ENSMUSG00000054354    0.0000
ENSMUSG00000023236  192.7144
summary(de.glm <- decideTestsDGE(lrt, p = 0.05, adjust = "BH"))
   [,1] 
-1   451
0  35660
1    425
# plotting the tagwise log fold changes against log-cpm
tags.glm <- rownames(dge.glm.tag.disp)[as.logical(de.glm)]
plotSmear(lrt, de.tags = tags.glm)
abline(h = c(-2, 2), col = "blue")

plot of chunk unnamed-chunk-1