library("DEXSeq")

## Loading required package: Biobase
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## 
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## 
## The following object is masked from 'package:stats':
## 
##     xtabs
## 
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, as.data.frame, cbind, colnames, duplicated,
##     eval, Filter, Find, get, intersect, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rep.int, rownames, sapply,
##     setdiff, sort, table, tapply, union, unique, unlist
## 
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.

setwd("/home/mazin/teaching/NGS2014/hw.testing/2/diff/dexseq")
dexseq = read.HTSeqCounts(paste(0:3, "txt", sep = "."), data.frame(condition = factor(c(1, 
    1, 2, 2))), "dexseq.chr21.gff")
counts(dexseq)[1:3, ]

##                      0.txt 1.txt 2.txt 3.txt
## ENSG00000141956:E001     0     0     0     0
## ENSG00000141956:E002     6    14    10    12
## ENSG00000141956:E003     8    12     6     5

dexseq = estimateSizeFactors(dexseq)
sizeFactors(dexseq)

##  0.txt  1.txt  2.txt  3.txt 
## 0.7938 1.3444 1.2768 0.8165

dexseq = estimateDispersions(dexseq)

## Warning: Exons with less than 10 counts will not be tested. For more details please see the manual page of 'estimateDispersions', parameter 'minCount'
## Warning: Genes with more than 70 testable exons will be omitted from the analysis. For more details please see the manual page of 'estimateDispersions', parameter 'maxExon'.

dexseq = fitDispersionFunction(dexseq)

dexseq = testForDEU(dexseq)
dexseq = estimatelog2FoldChanges(dexseq)
dexseq.res = DEUresultTable(dexseq)

sum(dexseq.res$padjust < 0.1, na.rm = T)

## [1] 4

dexseq.res[!is.na(dexseq.res$padjust) & dexseq.res$padjust < 0.1, ]

##                               geneID exonID dispersion    pvalue   padjust
## ENSG00000142192:E023 ENSG00000142192   E023    0.01890 1.110e-16 2.567e-13
## ENSG00000157617:E001 ENSG00000157617   E001    0.02395 1.587e-04 9.174e-02
## ENSG00000160200:E035 ENSG00000160200   E035    0.28774 9.909e-05 9.174e-02
## ENSG00000160200:E036 ENSG00000160200   E036    0.23458 1.317e-04 9.174e-02
##                      meanBase log2fold(2/1)
## ENSG00000142192:E023  146.715        0.8973
## ENSG00000157617:E001   99.190        0.3774
## ENSG00000160200:E035   14.128       -2.0830
## ENSG00000160200:E036    6.845       -2.3867

gid = unique(as.character(dexseq.res[!is.na(dexseq.res$padjust) & dexseq.res$padjust < 
    0.1, "geneID"]))
for (g in gid) plotDEXSeq(dexseq, g, 0.1)

plot of chunk unnamed-chunk-1