Introduction

This document demonstrates a strategy to adjust the p-values for a differential translation efficiency analysis using riborex. The dataset involves a time-course experiment studying the effect of radiation on glioblatoma cells. Briefly, two glioblastoma cell lines U251 (p53 mt) and U343 (p53 wt) were profiled using RNA-Seq and Ribo-Seq at 3 time points: 0h, 1h and 2h post radiation.

Utility functions

suppressMessages(library(riborex))
suppressMessages(library(fdrtool))
suppressMessages(library(cowplot))
annotations <- read.table(file.path('..', 'annotations', 
                                    'hg38_gene_names_stripped.tsv'), 
                          header = F, 
                          col.names = c('gene_id', 'gene_name', 
                                        'gene_type'),
                          stringsAsFactors = F)
rownames(annotations) <- annotations$gene_id
histone.genes.df <- read.table(file.path('..', 'annotations', 
                                         'histone_genes.tsv'), 
                               header = T, 
                               stringsAsFactors = F,
                               sep = '\t') 
histone.genes.1 <- annotations[grep('HIST', annotations$gene_name), ]$gene_id
histone.genes.2 <-annotations[annotations$gene_name %in% 
                                histone.genes.df$Approved.Symbol, ]$gene_id
# Add RP1-34B20.21 separately since symbol doesn't have HIST in it.
histone.genes <- unique(c(histone.genes.1, histone.genes.2, 'ENSG00000282988'))
readcounts.dir <- file.path('..', 'read_counts', 'byCDS')
results.dir <- file.path('..', 'results', 'translation_efficiency', 
                         'without_histones_edgeRD')
rna.design.file <- file.path('..', 'design_files', 'rna_seq_design.tsv')
ribo.design.file <- file.path('..', 'design_files', 'ribo_seq_design.tsv')
## Suffix of htseq-count output
counts.suffix <- '.CDS.counts.tsv'
rna.design.info <- read.table(rna.design.file, header=T, 
                              stringsAsFactors=FALSE)
ribo.design.info <- read.table(ribo.design.file, header=T, 
                               stringsAsFactors=FALSE)
count.reads <- function(design.info, dirname){
 files <- paste(design.info$SampleFile, counts.suffix, sep='')
 sampleName <- design.info$SampleName
 sampleTable <- data.frame(sampleName=sampleName, fileName=files)
 ddsHTSeq <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, 
          directory = file.path(readcounts.dir, dirname),
          design = ~ 1)
 rownames(ddsHTSeq) <- gsub('\\.[0-9]+', '', rownames(ddsHTSeq))
 # Remove histones
 ddsHTSeq <- ddsHTSeq[!(rownames(ddsHTSeq) %in% histone.genes),]
 ddsHTSeq <- ddsHTSeq[ rowSums(counts(ddsHTSeq)) > 1, ]
 dds <- DESeq(ddsHTSeq)
 return(counts(dds))
}
filter_results <- function(df){
 df<- as.data.frame(df)
 df <- df[order(df$padj),]
 df$gene_name <- annotations[rownames(df),]$gene_name
 df.sig <- subset(df, padj<0.05)
 return (df.sig)
}
filter_results.edgeR <- function(df){
 df<- as.data.frame(df$table)
 df <- df[order(df$FDR),]
 df$gene_name <- annotations[rownames(df),]$gene_name
 df.sig <- subset(df, FDR<0.05)
 return (df.sig)
}
doPvalueAdjustment <- function(results){
  hist(results$pvalue,  main = 'DESeq2 unadjusted p-values', 
       xlab = 'Unadjusted p-values')
  results <- results[ !is.na(results$padj), ]
  results <- results[ !is.na(results$pvalue), ]
  results <- results[, -which(names(results) == 'padj')]
  resultsFDR <- fdrtool(results$stat, 
                        statistic= 'normal', 
                        plot = T)
  results[,'padj']  <- p.adjust(resultsFDR$pval,
                                method = 'BH')
  hist(resultsFDR$pval, 
       main = 'DESeq2 corrected p-values | Empirical null', 
       xlab = 'Corrected p-values')
  return (results)
}
plotPValueByNormalizedCount <- function(results){
  qs <- c(0, quantile(results$baseMean[results$baseMean > 0], 0:15/15))
  bins <- cut(results$baseMean, qs)
  levels(bins) <- paste0("~", round(signif((qs[-1] + qs[-length(qs)])/2, 2)))
  fractionSig <- tapply(results$pvalue, bins, function(p)
    mean(p < .05, na.rm = TRUE))
  barplot(fractionSig, xlab = "mean normalized count",
                     ylab = "fraction of raw p values < 0.05")
  
}
riborex.for.cellline <- function(rna.read.counts, ribo.read.counts, 
                                 cell.line, contrast, engine='DESeq2', 
                                 merge.T1=FALSE){
 rna.read.counts <- rna.read.counts[, grepl(cell.line, 
                                            colnames(rna.read.counts))]
 ribo.read.counts <- ribo.read.counts[, grepl(cell.line, 
                                              colnames(ribo.read.counts))]
 rna.conditions.time <- as.factor(as.vector(
   sapply(colnames(rna.read.counts), 
          function(x) unlist(strsplit(x, '_'))[3])))
 ribo.conditions.time <- as.factor(as.vector(
   sapply(colnames(ribo.read.counts), 
          function(x) unlist(strsplit(x, '_'))[4])))
 rna.conditions.cell <- as.factor(as.vector(
   sapply(colnames(rna.read.counts), 
          function(x) unlist(strsplit(x, '_'))[1])))
 ribo.conditions.cell <- as.factor(
   as.vector(sapply(colnames(ribo.read.counts), 
                    function(x) unlist(strsplit(x, '_'))[1])))
 
 if (merge.T1){
   levels(rna.conditions.time)[levels(rna.conditions.time)=='T0'] <- 'T0T1'
   levels(rna.conditions.time)[levels(rna.conditions.time)=='T1'] <- 'T0T1'
   levels(ribo.conditions.time)[levels(ribo.conditions.time)=='T0'] <- 'T0T1'
   levels(ribo.conditions.time)[levels(ribo.conditions.time)=='T1'] <- 'T0T1'
 }
 rna.conditions <- data.frame('time' = rna.conditions.time)
 ribo.conditions <- data.frame('time' = ribo.conditions.time)
 common.genes <- intersect(rownames(rna.read.counts), 
                           rownames(ribo.read.counts)) 
 rna.read.counts <- rna.read.counts[common.genes,]
 ribo.read.counts <- ribo.read.counts[common.genes,]
 colnames(rna.read.counts) <- paste(colnames(rna.read.counts),
                                    'RNA', sep='_')
 colnames(ribo.read.counts) <- paste(colnames(ribo.read.counts),
                                     'Ribo', sep='_')
 res <- riborex(rna.read.counts, 
                ribo.read.counts, 
                rna.conditions, 
                ribo.conditions, 
                contrast = contrast, 
                engine = engine)
 return (res)
}

Read data

rna.read.counts.all <- count.reads(rna.design.info, 'rna_seq')
the design is ~ 1 (just an intercept). is this intended?estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
-- replacing outliers and refitting for 19 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)
estimating dispersions
fitting model and testing
ribo.read.counts.all <- count.reads(ribo.design.info, 'ribo_seq')
the design is ~ 1 (just an intercept). is this intended?estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
-- replacing outliers and refitting for 26 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)
estimating dispersions
fitting model and testing
rna.conditions.time.all <- as.factor(
  as.vector(sapply(colnames(rna.read.counts.all), 
                   function(x) unlist(strsplit(x, '_'))[3])))
ribo.conditions.time.all <- as.factor(
  as.vector(sapply(colnames(ribo.read.counts.all), 
                   function(x) unlist(strsplit(x, '_'))[4])))
rna.conditions.cell.all <- as.factor(
  as.vector(sapply(colnames(rna.read.counts.all),
                   function(x) unlist(strsplit(x, '_'))[1])))
ribo.conditions.cell.all <- as.factor(as.vector(
  sapply(colnames(ribo.read.counts.all),
         function(x) unlist(strsplit(x, '_'))[1])))
rna.conditions.all <- data.frame('cell.type' = rna.conditions.cell.all, 
                                 'time' = rna.conditions.time.all)
ribo.conditions.all <- data.frame('cell.type' = ribo.conditions.cell.all, 
                                  'time' = ribo.conditions.time.all)
common.genes <- intersect(rownames(rna.read.counts.all), 
                          rownames(ribo.read.counts.all)) 
rna.read.counts.all <- rna.read.counts.all[common.genes,]
ribo.read.counts.all <- ribo.read.counts.all[common.genes,]
colnames(rna.read.counts.all) <- paste(colnames(rna.read.counts.all),
                                       'RNA', sep='_')
colnames(ribo.read.counts.all) <- paste(colnames(ribo.read.counts.all),
                                        'Ribo', sep='_')
contrast.T1vsT0 <- c('time', 'T1', 'T0')
contrast.T24vsT0 <- c('time', 'T24', 'T0')
contrast.T24vsT1 <- c('time', 'T24', 'T1')

Transaltion Efficiency : T1 vs T0

We performed an exploratoy analysis which indicated that T1 and T0 samples are very similar at both RNA-Seq and Ribo-seq levels.

res.T1vsT0.edgeR <- riborex(rna.read.counts.all, 
                            ribo.read.counts.all, 
                            rna.conditions.all, 
                            ribo.conditions.all, 
                            contrast = contrast.T1vsT0,
                            engine = 'edgeR')
edgeR mode selected
combining design matrix
applying edgeR to modified design matrix
res.T1vsT0.sig.edgeR <- filter_results.edgeR(res.T1vsT0.edgeR)
res.T1vsT0.edgeRD <- riborex(rna.read.counts.all, 
                             ribo.read.counts.all, 
                             rna.conditions.all, 
                             ribo.conditions.all, 
                             contrast = contrast.T1vsT0,
                             engine = 'edgeRD')
edgeRD mode selected
combining design matrix
applying edgeR to modified design matrix
res.T1vsT0.sig.edgeRD <- filter_results.edgeR(res.T1vsT0.edgeRD)
res.T1vsT0.DESeq2 <- riborex(rna.read.counts.all, 
                             ribo.read.counts.all, 
                             rna.conditions.all, 
                             ribo.conditions.all, 
                             contrast = contrast.T1vsT0,
                             engine = 'DESeq2')
DESeq2 mode selected
combining design matrix
applying DESeq2 to modified design matrix
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
res.T1vsT0.sig.DESeq2 <- filter_results(res.T1vsT0.DESeq2)
#pdf('All_T1vsT0_DESeq2Adjust.pdf')
res.T1vsT0.DESeq2Adj <- doPvalueAdjustment(res.T1vsT0.DESeq2)
Step 1... determine cutoff point
Step 2... estimate parameters of null distribution and eta0
Step 3... compute p-values and estimate empirical PDF/CDF
Step 4... compute q-values and local fdr
Step 5... prepare for plotting

NA

#dev.off()
res.T1vsT0.sig.DESeq2Adj <- filter_results(res.T1vsT0.DESeq2Adj)
number_ticks <- function(n) {function(limits) pretty(limits, n)}
counts <- data.frame('tool'=c('edgeR', 'edgeRD', 'DESeq2', 'DESeq2 corrected'),
                     'DEgenes' = c(dim(res.T1vsT0.sig.edgeR)[1], 
                                   dim(res.T1vsT0.sig.edgeRD)[1], 
                                   dim(res.T1vsT0.sig.DESeq2)[1],
                                   dim(res.T1vsT0.sig.DESeq2Adj)[1]))
ggplot(counts, aes(tool, DEgenes, fill=tool)) +
  geom_col(position = 'dodge', width = 0.3) +  
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_y_continuous(breaks=number_ticks(12)) + ggtitle('All T1vsT0') +
  geom_text(aes(label=DEgenes), position=position_dodge(width=0.9), vjust=-0.25)
ggsave('comparison_All_T1vsT0.pdf')
Saving 7.29 x 4.5 in image

All T24 vs T1

res.T24vsT1.edgeR <- riborex(rna.read.counts.all, 
                             ribo.read.counts.all, 
                             rna.conditions.all, 
                             ribo.conditions.all, 
                             contrast = contrast.T24vsT1,
                             engine = 'edgeR')
edgeR mode selected
combining design matrix
applying edgeR to modified design matrix
res.T24vsT1.sig.edgeR <- filter_results.edgeR(res.T24vsT1.edgeR)
res.T24vsT1.edgeRD <- riborex(rna.read.counts.all, 
                              ribo.read.counts.all, 
                              rna.conditions.all, 
                              ribo.conditions.all, 
                              contrast = contrast.T24vsT1,
                              engine = 'edgeRD')
edgeRD mode selected
combining design matrix
applying edgeR to modified design matrix
res.T24vsT1.sig.edgeRD <- filter_results.edgeR(res.T24vsT1.edgeRD)
res.T24vsT1.DESeq2 <- riborex(rna.read.counts.all, 
                              ribo.read.counts.all, 
                              rna.conditions.all, 
                              ribo.conditions.all, 
                              contrast = contrast.T24vsT1,
                              engine = 'DESeq2')
DESeq2 mode selected
combining design matrix
applying DESeq2 to modified design matrix
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
res.T24vsT1.sig.DESeq2 <- filter_results(res.T24vsT1.DESeq2)
#pdf('All_T24vsT1_DESeq2Adjust.pdf')
res.T24vsT1.DESeq2Adj <- doPvalueAdjustment(res.T24vsT1.DESeq2)
Step 1... determine cutoff point
Step 2... estimate parameters of null distribution and eta0
Step 3... compute p-values and estimate empirical PDF/CDF
Step 4... compute q-values and local fdr
Step 5... prepare for plotting

NA

res.T24vsT1.sig.DESeq2Adj <- filter_results(res.T24vsT1.DESeq2Adj)
#dev.off()
counts <- data.frame('tool'=c('edgeR', 'edgeRD', 'DESeq2', 'DESeq2 corrected'),
                     'DEgenes' = c(dim(res.T24vsT1.sig.edgeR)[1], 
                                   dim(res.T24vsT1.sig.edgeRD)[1], 
                                   dim(res.T24vsT1.sig.DESeq2)[1],
                                   dim(res.T24vsT1.sig.DESeq2Adj)[1]))
ggplot(counts, aes(tool, DEgenes, fill=tool)) + 
  geom_col(position = 'dodge', width = 0.3) +  
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
  scale_y_continuous(breaks=number_ticks(12)) +
  ggtitle('All T24vsT1') +
  geom_text(aes(label=DEgenes), position=position_dodge(width=0.9), vjust=-0.25)
ggsave('comparison_All_T24vsT1.pdf')
Saving 7.29 x 4.5 in image

U251: T1 vs T0

U251.res.T1vsT0.edgeR <- riborex.for.cellline(rna.read.counts.all, 
                                        ribo.read.counts.all, 
                                        'U251', 
                                        contrast.T1vsT0,
                                        engine = 'edgeR')
edgeR mode selected
combining design matrix
applying edgeR to modified design matrix
U251.res.T1vsT0.sig.edgeR <- filter_results.edgeR(U251.res.T1vsT0.edgeR)
U251.res.T1vsT0.edgeRD <- riborex.for.cellline(rna.read.counts.all, 
                                        ribo.read.counts.all, 
                                        'U251', 
                                        contrast.T1vsT0,
                                        engine = 'edgeRD')
edgeRD mode selected
combining design matrix
applying edgeR to modified design matrix
U251.res.T1vsT0.sig.edgeRD <- filter_results.edgeR(U251.res.T1vsT0.edgeRD)
U251.res.T1vsT0.DESeq2 <- riborex.for.cellline(rna.read.counts.all, 
                                        ribo.read.counts.all, 
                                        'U251', 
                                        contrast.T1vsT0,
                                        engine = 'DESeq2')
DESeq2 mode selected
combining design matrix
applying DESeq2 to modified design matrix
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
U251.res.T1vsT0.sig.DESeq2 <- filter_results(U251.res.T1vsT0.DESeq2)
#pdf('U251_T1vsT0_DESeq2Adjust.pdf')
U251.res.T1vsT0.DESeq2Adj <- doPvalueAdjustment(U251.res.T1vsT0.DESeq2)
Step 1... determine cutoff point
Step 2... estimate parameters of null distribution and eta0
Step 3... compute p-values and estimate empirical PDF/CDF
Step 4... compute q-values and local fdr
Step 5... prepare for plotting

NA

#dev.off()
U251.res.T1vsT0.sig.DESeq2Adj <- filter_results(U251.res.T1vsT0.DESeq2Adj)
counts <- data.frame('tool' = c('edgeR', 'edgeRD', 'DESeq2', 'DESeq2 corrected'),
                     'DEgenes' = c(dim(U251.res.T1vsT0.sig.edgeR)[1], 
                                   dim(U251.res.T1vsT0.sig.edgeRD)[1], 
                                   dim(U251.res.T1vsT0.sig.DESeq2)[1],
                                   dim(U251.res.T1vsT0.sig.DESeq2Adj)[1]))
ggplot(counts, aes(tool, DEgenes, fill=tool)) +
  geom_col(position = 'dodge', width = 0.3) +  
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
  scale_y_continuous(breaks=number_ticks(12)) +
  ggtitle('U251 T1vsT0') +
  geom_text(aes(label=DEgenes), position=position_dodge(width=0.9), vjust=-0.25)
ggsave('comparison_U251_T1vsT0.pdf')
Saving 7.29 x 4.5 in image

U251.res.T24vsT1.edgeR <- riborex.for.cellline(rna.read.counts.all, 
                                               ribo.read.counts.all, 
                                               'U251', 
                                               contrast.T24vsT1,
                                               engine='edgeR')
edgeR mode selected
combining design matrix
applying edgeR to modified design matrix
U251.res.T24vsT1.sig.edgeR <- filter_results.edgeR(U251.res.T24vsT1.edgeR)
U251.res.T24vsT1.edgeRD <- riborex.for.cellline(rna.read.counts.all, 
                                         ribo.read.counts.all, 
                                         'U251', 
                                         contrast.T24vsT1,
                                         engine='edgeRD')
edgeRD mode selected
combining design matrix
applying edgeR to modified design matrix
U251.res.T24vsT1.sig.edgeRD <- filter_results.edgeR(U251.res.T24vsT1.edgeRD)
U251.res.T24vsT1.DESeq2 <- riborex.for.cellline(rna.read.counts.all, 
                                         ribo.read.counts.all, 
                                         'U251', 
                                         contrast.T24vsT1,
                                         engine='DESeq2')
DESeq2 mode selected
combining design matrix
applying DESeq2 to modified design matrix
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
U251.res.T24vsT1.sig.DESeq2 <- filter_results(U251.res.T24vsT1.DESeq2)
#pdf('U251_T24vsT1_DESeq2Adjust.pdf')
U251.res.T24vsT1.DESeq2Adj <- doPvalueAdjustment(U251.res.T24vsT1.DESeq2)
Step 1... determine cutoff point
Step 2... estimate parameters of null distribution and eta0
Step 3... compute p-values and estimate empirical PDF/CDF
Step 4... compute q-values and local fdr
Step 5... prepare for plotting

NA

#dev.off()
U251.res.T24vsT1.sig.DESeq2Adj <- filter_results(U251.res.T24vsT1.DESeq2Adj)
counts <- data.frame('tool'=c('edgeR', 'edgeRD', 'DESeq2', 'DESeq2 corrected'),
                     'DEgenes' = c(dim(U251.res.T24vsT1.sig.edgeR)[1], 
                                   dim(U251.res.T24vsT1.sig.edgeRD)[1], 
                                   dim(U251.res.T24vsT1.sig.DESeq2)[1],
                                   dim(U251.res.T24vsT1.sig.DESeq2Adj)[1]))
ggplot(counts, aes(tool, DEgenes, fill=tool)) + 
  geom_col(position = 'dodge', width = 0.3) +  
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
  scale_y_continuous(breaks=number_ticks(12)) +
  ggtitle('U251 T24vsT1') + 
  geom_text(aes(label=DEgenes), position=position_dodge(width=0.9), vjust=-0.25)
ggsave('comparison_U251_T24vsT1.pdf')
Saving 7.29 x 4.5 in image

U343: T1 vs T0

U343.res.T1vsT0.edgeR <- riborex.for.cellline(rna.read.counts.all, 
                                              ribo.read.counts.all, 
                                              'U343', 
                                              contrast.T1vsT0,
                                              engine = 'edgeR')
edgeR mode selected
combining design matrix
applying edgeR to modified design matrix
U343.res.T1vsT0.sig.edgeR <- filter_results.edgeR(U343.res.T1vsT0.edgeR)
U343.res.T1vsT0.edgeRD <- riborex.for.cellline(rna.read.counts.all, 
                                               ribo.read.counts.all, 
                                               'U343', 
                                               contrast.T1vsT0,
                                               engine = 'edgeRD')
edgeRD mode selected
combining design matrix
applying edgeR to modified design matrix
U343.res.T1vsT0.sig.edgeRD <- filter_results.edgeR(U343.res.T1vsT0.edgeRD)
U343.res.T1vsT0.DESeq2 <- riborex.for.cellline(rna.read.counts.all, 
                                               ribo.read.counts.all, 
                                               'U343', 
                                               contrast.T1vsT0,
                                               engine = 'DESeq2')
DESeq2 mode selected
combining design matrix
applying DESeq2 to modified design matrix
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
U343.res.T1vsT0.sig.DESeq2 <- filter_results(U343.res.T1vsT0.DESeq2)
#pdf('U343_T1vsT0_DESeq2Adjust.pdf')
U343.res.T1vsT0.DESeq2Adj <- doPvalueAdjustment(U343.res.T1vsT0.DESeq2)
Step 1... determine cutoff point
Step 2... estimate parameters of null distribution and eta0
Step 3... compute p-values and estimate empirical PDF/CDF
Step 4... compute q-values and local fdr
Step 5... prepare for plotting

NA

#dev.off()
U343.res.T1vsT0.sig.DESeq2Adj <- filter_results(U343.res.T1vsT0.DESeq2Adj)
counts <- data.frame('tool'=c('edgeR', 'edgeRD', 'DESeq2', 'DESeq2 corrected'),
                     'DEgenes' = c(dim(U343.res.T1vsT0.sig.edgeR)[1], 
                                   dim(U343.res.T1vsT0.sig.edgeRD)[1], 
                                   dim(U343.res.T1vsT0.sig.DESeq2)[1],
                                   dim(U343.res.T1vsT0.sig.DESeq2Adj)[1]))
ggplot(counts, aes(tool, DEgenes, fill=tool)) + 
  geom_col(position = 'dodge', width = 0.3) +  
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) + scale_y_continuous(breaks=number_ticks(12)) +
  ggtitle('U343 T1vsT0') + geom_text(aes(label=DEgenes), position=position_dodge(width=0.9), vjust=-0.25)
ggsave('comparison_U343_T1vsT0.pdf')
Saving 7.29 x 4.5 in image

U343.res.T24vsT1.edgeR <- riborex.for.cellline(rna.read.counts.all, 
                                               ribo.read.counts.all, 
                                               'U343', 
                                               contrast.T24vsT1,
                                               engine='edgeR')
edgeR mode selected
combining design matrix
applying edgeR to modified design matrix
U343.res.T24vsT1.sig.edgeR <- filter_results.edgeR(U343.res.T24vsT1.edgeR)
U343.res.T24vsT1.edgeRD <- riborex.for.cellline(rna.read.counts.all, 
                                                ribo.read.counts.all, 
                                                'U343', 
                                                contrast.T24vsT1,
                                                engine='edgeRD')
edgeRD mode selected
combining design matrix
applying edgeR to modified design matrix
U343.res.T24vsT1.sig.edgeRD <- filter_results.edgeR(U343.res.T24vsT1.edgeRD)
U343.res.T24vsT1.DESeq2 <- riborex.for.cellline(rna.read.counts.all, 
                                                ribo.read.counts.all, 
                                                'U343', 
                                                contrast.T24vsT1,
                                                engine='DESeq2')
DESeq2 mode selected
combining design matrix
applying DESeq2 to modified design matrix
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
U343.res.T24vsT1.sig.DESeq2 <- filter_results(U343.res.T24vsT1.DESeq2)
#pdf('U343_T24vsT1_DESeq2Adjust.pdf')
U343.res.T24vsT1.DESeq2Adj <- doPvalueAdjustment(U343.res.T24vsT1.DESeq2)
Step 1... determine cutoff point
Step 2... estimate parameters of null distribution and eta0
Step 3... compute p-values and estimate empirical PDF/CDF
Step 4... compute q-values and local fdr
Step 5... prepare for plotting

NA

#dev.off()
U343.res.T24vsT1.sig.DESeq2Adj <- filter_results(U343.res.T24vsT1.DESeq2Adj)
counts <- data.frame('tool'=c('edgeR', 'edgeRD', 'DESeq2', 'DESeq2 corrected'),
                     'DEgenes' = c(dim(U343.res.T24vsT1.sig.edgeR)[1], 
                                   dim(U343.res.T24vsT1.sig.edgeRD)[1], 
                                   dim(U343.res.T24vsT1.sig.DESeq2)[1],
                                   dim(U343.res.T24vsT1.sig.DESeq2Adj)[1]))
ggplot(counts, aes(tool, DEgenes, fill=tool)) + 
  geom_col(position = 'dodge', width = 0.3) +  
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
  scale_y_continuous(breaks=number_ticks(12)) +
  ggtitle('U343 T24vsT1') + 
  geom_text(aes(label=DEgenes), position=position_dodge(width=0.9), vjust=-0.25)
ggsave('comparison_U343_T24vsT1.pdf')
Saving 7.29 x 4.5 in image

---
title: "Accounting for overdispersion in Riborex"
subtitle: When the theoretical distribution is too theoretical.
output:
  html_notebook: default
  html_document: default
  pdf_document: default
---

# Introduction

This  document demonstrates a strategy to adjust the p-values for a differential translation efficiency analysis using riborex. The dataset involves a time-course experiment studying the effect of radiation on glioblatoma cells. Briefly, two glioblastoma cell lines U251 (p53 mt) and U343 (p53 wt) were profiled using RNA-Seq and Ribo-Seq at 3 time points: 0h, 1h and 2h post radiation.

# Utility functions
```{r}
suppressMessages(library(riborex))
suppressMessages(library(fdrtool))
suppressMessages(library(cowplot))

annotations <- read.table(file.path('..', 'annotations', 
                                    'hg38_gene_names_stripped.tsv'), 
                          header = F, 
                          col.names = c('gene_id', 'gene_name', 
                                        'gene_type'),
                          stringsAsFactors = F)
rownames(annotations) <- annotations$gene_id


histone.genes.df <- read.table(file.path('..', 'annotations', 
                                         'histone_genes.tsv'), 
                               header = T, 
                               stringsAsFactors = F,
                               sep = '\t') 
histone.genes.1 <- annotations[grep('HIST', annotations$gene_name), ]$gene_id
histone.genes.2 <-annotations[annotations$gene_name %in% 
                                histone.genes.df$Approved.Symbol, ]$gene_id
# Add RP1-34B20.21 separately since symbol doesn't have HIST in it.
histone.genes <- unique(c(histone.genes.1, histone.genes.2, 'ENSG00000282988'))


readcounts.dir <- file.path('..', 'read_counts', 'byCDS')
results.dir <- file.path('..', 'results', 'translation_efficiency', 
                         'without_histones_edgeRD')
rna.design.file <- file.path('..', 'design_files', 'rna_seq_design.tsv')
ribo.design.file <- file.path('..', 'design_files', 'ribo_seq_design.tsv')

## Suffix of htseq-count output
counts.suffix <- '.CDS.counts.tsv'

rna.design.info <- read.table(rna.design.file, header=T, 
                              stringsAsFactors=FALSE)
ribo.design.info <- read.table(ribo.design.file, header=T, 
                               stringsAsFactors=FALSE)


count.reads <- function(design.info, dirname){
 files <- paste(design.info$SampleFile, counts.suffix, sep='')
 sampleName <- design.info$SampleName
 sampleTable <- data.frame(sampleName=sampleName, fileName=files)
 ddsHTSeq <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, 
          directory = file.path(readcounts.dir, dirname),
          design = ~ 1)
 rownames(ddsHTSeq) <- gsub('\\.[0-9]+', '', rownames(ddsHTSeq))
 # Remove histones
 ddsHTSeq <- ddsHTSeq[!(rownames(ddsHTSeq) %in% histone.genes),]
 ddsHTSeq <- ddsHTSeq[ rowSums(counts(ddsHTSeq)) > 1, ]
 dds <- DESeq(ddsHTSeq)

 return(counts(dds))
}

filter_results <- function(df){
 df<- as.data.frame(df)
 df <- df[order(df$padj),]
 df$gene_name <- annotations[rownames(df),]$gene_name
 df.sig <- subset(df, padj<0.05)
 return (df.sig)
}

filter_results.edgeR <- function(df){
 df<- as.data.frame(df$table)
 df <- df[order(df$FDR),]
 df$gene_name <- annotations[rownames(df),]$gene_name
 df.sig <- subset(df, FDR<0.05)
 return (df.sig)
}

doPvalueAdjustment <- function(results){
  hist(results$pvalue,  main = 'DESeq2 unadjusted p-values', 
       xlab = 'Unadjusted p-values')
  results <- results[ !is.na(results$padj), ]
  results <- results[ !is.na(results$pvalue), ]
  results <- results[, -which(names(results) == 'padj')]
  resultsFDR <- fdrtool(results$stat, 
                        statistic= 'normal', 
                        plot = T)
  results[,'padj']  <- p.adjust(resultsFDR$pval,
                                method = 'BH')
  hist(resultsFDR$pval, 
       main = 'DESeq2 corrected p-values | Empirical null', 
       xlab = 'Corrected p-values')
  return (results)
}



plotPValueByNormalizedCount <- function(results){
  qs <- c(0, quantile(results$baseMean[results$baseMean > 0], 0:15/15))
  bins <- cut(results$baseMean, qs)
  levels(bins) <- paste0("~", round(signif((qs[-1] + qs[-length(qs)])/2, 2)))
  fractionSig <- tapply(results$pvalue, bins, function(p)
    mean(p < .05, na.rm = TRUE))
  barplot(fractionSig, xlab = "mean normalized count",
                     ylab = "fraction of raw p values < 0.05")
  
}


riborex.for.cellline <- function(rna.read.counts, ribo.read.counts, 
                                 cell.line, contrast, engine='DESeq2', 
                                 merge.T1=FALSE){
 rna.read.counts <- rna.read.counts[, grepl(cell.line, 
                                            colnames(rna.read.counts))]
 ribo.read.counts <- ribo.read.counts[, grepl(cell.line, 
                                              colnames(ribo.read.counts))]
 rna.conditions.time <- as.factor(as.vector(
   sapply(colnames(rna.read.counts), 
          function(x) unlist(strsplit(x, '_'))[3])))
 ribo.conditions.time <- as.factor(as.vector(
   sapply(colnames(ribo.read.counts), 
          function(x) unlist(strsplit(x, '_'))[4])))
 rna.conditions.cell <- as.factor(as.vector(
   sapply(colnames(rna.read.counts), 
          function(x) unlist(strsplit(x, '_'))[1])))
 ribo.conditions.cell <- as.factor(
   as.vector(sapply(colnames(ribo.read.counts), 
                    function(x) unlist(strsplit(x, '_'))[1])))
 
 if (merge.T1){
   levels(rna.conditions.time)[levels(rna.conditions.time)=='T0'] <- 'T0T1'
   levels(rna.conditions.time)[levels(rna.conditions.time)=='T1'] <- 'T0T1'
   levels(ribo.conditions.time)[levels(ribo.conditions.time)=='T0'] <- 'T0T1'
   levels(ribo.conditions.time)[levels(ribo.conditions.time)=='T1'] <- 'T0T1'
 }
 rna.conditions <- data.frame('time' = rna.conditions.time)
 ribo.conditions <- data.frame('time' = ribo.conditions.time)
 common.genes <- intersect(rownames(rna.read.counts), 
                           rownames(ribo.read.counts)) 
 rna.read.counts <- rna.read.counts[common.genes,]
 ribo.read.counts <- ribo.read.counts[common.genes,]
 colnames(rna.read.counts) <- paste(colnames(rna.read.counts),
                                    'RNA', sep='_')
 colnames(ribo.read.counts) <- paste(colnames(ribo.read.counts),
                                     'Ribo', sep='_')
 res <- riborex(rna.read.counts, 
                ribo.read.counts, 
                rna.conditions, 
                ribo.conditions, 
                contrast = contrast, 
                engine = engine)
 return (res)
}

```

# Read data

```{r}
rna.read.counts.all <- count.reads(rna.design.info, 'rna_seq')
ribo.read.counts.all <- count.reads(ribo.design.info, 'ribo_seq')

rna.conditions.time.all <- as.factor(
  as.vector(sapply(colnames(rna.read.counts.all), 
                   function(x) unlist(strsplit(x, '_'))[3])))
ribo.conditions.time.all <- as.factor(
  as.vector(sapply(colnames(ribo.read.counts.all), 
                   function(x) unlist(strsplit(x, '_'))[4])))
rna.conditions.cell.all <- as.factor(
  as.vector(sapply(colnames(rna.read.counts.all),
                   function(x) unlist(strsplit(x, '_'))[1])))
ribo.conditions.cell.all <- as.factor(as.vector(
  sapply(colnames(ribo.read.counts.all),
         function(x) unlist(strsplit(x, '_'))[1])))

rna.conditions.all <- data.frame('cell.type' = rna.conditions.cell.all, 
                                 'time' = rna.conditions.time.all)
ribo.conditions.all <- data.frame('cell.type' = ribo.conditions.cell.all, 
                                  'time' = ribo.conditions.time.all)

common.genes <- intersect(rownames(rna.read.counts.all), 
                          rownames(ribo.read.counts.all)) 

rna.read.counts.all <- rna.read.counts.all[common.genes,]
ribo.read.counts.all <- ribo.read.counts.all[common.genes,]

colnames(rna.read.counts.all) <- paste(colnames(rna.read.counts.all),
                                       'RNA', sep='_')
colnames(ribo.read.counts.all) <- paste(colnames(ribo.read.counts.all),
                                        'Ribo', sep='_')

contrast.T1vsT0 <- c('time', 'T1', 'T0')
contrast.T24vsT0 <- c('time', 'T24', 'T0')
contrast.T24vsT1 <- c('time', 'T24', 'T1')
```

# Transaltion Efficiency : T1 vs T0
We performed an exploratoy analysis which indicated that T1 and T0 samples are very similar at both RNA-Seq and Ribo-seq levels.
```{r}
res.T1vsT0.edgeR <- riborex(rna.read.counts.all, 
                            ribo.read.counts.all, 
                            rna.conditions.all, 
                            ribo.conditions.all, 
                            contrast = contrast.T1vsT0,
                            engine = 'edgeR')
res.T1vsT0.sig.edgeR <- filter_results.edgeR(res.T1vsT0.edgeR)
res.T1vsT0.edgeRD <- riborex(rna.read.counts.all, 
                             ribo.read.counts.all, 
                             rna.conditions.all, 
                             ribo.conditions.all, 
                             contrast = contrast.T1vsT0,
                             engine = 'edgeRD')
res.T1vsT0.sig.edgeRD <- filter_results.edgeR(res.T1vsT0.edgeRD)
res.T1vsT0.DESeq2 <- riborex(rna.read.counts.all, 
                             ribo.read.counts.all, 
                             rna.conditions.all, 
                             ribo.conditions.all, 
                             contrast = contrast.T1vsT0,
                             engine = 'DESeq2')
res.T1vsT0.sig.DESeq2 <- filter_results(res.T1vsT0.DESeq2)
#pdf('All_T1vsT0_DESeq2Adjust.pdf')
res.T1vsT0.DESeq2Adj <- doPvalueAdjustment(res.T1vsT0.DESeq2)
#dev.off()
res.T1vsT0.sig.DESeq2Adj <- filter_results(res.T1vsT0.DESeq2Adj)
```

```{r}
number_ticks <- function(n) {function(limits) pretty(limits, n)}

counts <- data.frame('tool'=c('edgeR', 'edgeRD', 'DESeq2', 'DESeq2 corrected'),
                     'DEgenes' = c(dim(res.T1vsT0.sig.edgeR)[1], 
                                   dim(res.T1vsT0.sig.edgeRD)[1], 
                                   dim(res.T1vsT0.sig.DESeq2)[1],
                                   dim(res.T1vsT0.sig.DESeq2Adj)[1]))
ggplot(counts, aes(tool, DEgenes, fill=tool)) +
  geom_col(position = 'dodge', width = 0.3) +  
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_y_continuous(breaks=number_ticks(12)) + ggtitle('All T1vsT0') +
  geom_text(aes(label=DEgenes), position=position_dodge(width=0.9), vjust=-0.25)
ggsave('comparison_All_T1vsT0.pdf')
```
# All T24 vs T1 

```{r}
res.T24vsT1.edgeR <- riborex(rna.read.counts.all, 
                             ribo.read.counts.all, 
                             rna.conditions.all, 
                             ribo.conditions.all, 
                             contrast = contrast.T24vsT1,
                             engine = 'edgeR')
res.T24vsT1.sig.edgeR <- filter_results.edgeR(res.T24vsT1.edgeR)
res.T24vsT1.edgeRD <- riborex(rna.read.counts.all, 
                              ribo.read.counts.all, 
                              rna.conditions.all, 
                              ribo.conditions.all, 
                              contrast = contrast.T24vsT1,
                              engine = 'edgeRD')
res.T24vsT1.sig.edgeRD <- filter_results.edgeR(res.T24vsT1.edgeRD)
res.T24vsT1.DESeq2 <- riborex(rna.read.counts.all, 
                              ribo.read.counts.all, 
                              rna.conditions.all, 
                              ribo.conditions.all, 
                              contrast = contrast.T24vsT1,
                              engine = 'DESeq2')
res.T24vsT1.sig.DESeq2 <- filter_results(res.T24vsT1.DESeq2)
#pdf('All_T24vsT1_DESeq2Adjust.pdf')
res.T24vsT1.DESeq2Adj <- doPvalueAdjustment(res.T24vsT1.DESeq2)
res.T24vsT1.sig.DESeq2Adj <- filter_results(res.T24vsT1.DESeq2Adj)
#dev.off()
```

```{r}
counts <- data.frame('tool'=c('edgeR', 'edgeRD', 'DESeq2', 'DESeq2 corrected'),
                     'DEgenes' = c(dim(res.T24vsT1.sig.edgeR)[1], 
                                   dim(res.T24vsT1.sig.edgeRD)[1], 
                                   dim(res.T24vsT1.sig.DESeq2)[1],
                                   dim(res.T24vsT1.sig.DESeq2Adj)[1]))
ggplot(counts, aes(tool, DEgenes, fill=tool)) + 
  geom_col(position = 'dodge', width = 0.3) +  
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
  scale_y_continuous(breaks=number_ticks(12)) +
  ggtitle('All T24vsT1') +
  geom_text(aes(label=DEgenes), position=position_dodge(width=0.9), vjust=-0.25)
ggsave('comparison_All_T24vsT1.pdf')
```

# U251: T1 vs T0

```{r}
U251.res.T1vsT0.edgeR <- riborex.for.cellline(rna.read.counts.all, 
                                        ribo.read.counts.all, 
                                        'U251', 
                                        contrast.T1vsT0,
                                        engine = 'edgeR')
U251.res.T1vsT0.sig.edgeR <- filter_results.edgeR(U251.res.T1vsT0.edgeR)
U251.res.T1vsT0.edgeRD <- riborex.for.cellline(rna.read.counts.all, 
                                        ribo.read.counts.all, 
                                        'U251', 
                                        contrast.T1vsT0,
                                        engine = 'edgeRD')
U251.res.T1vsT0.sig.edgeRD <- filter_results.edgeR(U251.res.T1vsT0.edgeRD)
U251.res.T1vsT0.DESeq2 <- riborex.for.cellline(rna.read.counts.all, 
                                        ribo.read.counts.all, 
                                        'U251', 
                                        contrast.T1vsT0,
                                        engine = 'DESeq2')
U251.res.T1vsT0.sig.DESeq2 <- filter_results(U251.res.T1vsT0.DESeq2)
#pdf('U251_T1vsT0_DESeq2Adjust.pdf')
U251.res.T1vsT0.DESeq2Adj <- doPvalueAdjustment(U251.res.T1vsT0.DESeq2)
#dev.off()
U251.res.T1vsT0.sig.DESeq2Adj <- filter_results(U251.res.T1vsT0.DESeq2Adj)
```

```{r}
counts <- data.frame('tool' = c('edgeR', 'edgeRD', 'DESeq2', 'DESeq2 corrected'),
                     'DEgenes' = c(dim(U251.res.T1vsT0.sig.edgeR)[1], 
                                   dim(U251.res.T1vsT0.sig.edgeRD)[1], 
                                   dim(U251.res.T1vsT0.sig.DESeq2)[1],
                                   dim(U251.res.T1vsT0.sig.DESeq2Adj)[1]))
ggplot(counts, aes(tool, DEgenes, fill=tool)) +
  geom_col(position = 'dodge', width = 0.3) +  
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
  scale_y_continuous(breaks=number_ticks(12)) +
  ggtitle('U251 T1vsT0') +
  geom_text(aes(label=DEgenes), position=position_dodge(width=0.9), vjust=-0.25)
ggsave('comparison_U251_T1vsT0.pdf')
```

```{r}
U251.res.T24vsT1.edgeR <- riborex.for.cellline(rna.read.counts.all, 
                                               ribo.read.counts.all, 
                                               'U251', 
                                               contrast.T24vsT1,
                                               engine='edgeR')
U251.res.T24vsT1.sig.edgeR <- filter_results.edgeR(U251.res.T24vsT1.edgeR)
U251.res.T24vsT1.edgeRD <- riborex.for.cellline(rna.read.counts.all, 
                                         ribo.read.counts.all, 
                                         'U251', 
                                         contrast.T24vsT1,
                                         engine='edgeRD')
U251.res.T24vsT1.sig.edgeRD <- filter_results.edgeR(U251.res.T24vsT1.edgeRD)
U251.res.T24vsT1.DESeq2 <- riborex.for.cellline(rna.read.counts.all, 
                                         ribo.read.counts.all, 
                                         'U251', 
                                         contrast.T24vsT1,
                                         engine='DESeq2')
U251.res.T24vsT1.sig.DESeq2 <- filter_results(U251.res.T24vsT1.DESeq2)
#pdf('U251_T24vsT1_DESeq2Adjust.pdf')
U251.res.T24vsT1.DESeq2Adj <- doPvalueAdjustment(U251.res.T24vsT1.DESeq2)
#dev.off()
U251.res.T24vsT1.sig.DESeq2Adj <- filter_results(U251.res.T24vsT1.DESeq2Adj)
```

```{r}
counts <- data.frame('tool'=c('edgeR', 'edgeRD', 'DESeq2', 'DESeq2 corrected'),
                     'DEgenes' = c(dim(U251.res.T24vsT1.sig.edgeR)[1], 
                                   dim(U251.res.T24vsT1.sig.edgeRD)[1], 
                                   dim(U251.res.T24vsT1.sig.DESeq2)[1],
                                   dim(U251.res.T24vsT1.sig.DESeq2Adj)[1]))
ggplot(counts, aes(tool, DEgenes, fill=tool)) + 
  geom_col(position = 'dodge', width = 0.3) +  
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
  scale_y_continuous(breaks=number_ticks(12)) +
  ggtitle('U251 T24vsT1') + 
  geom_text(aes(label=DEgenes), position=position_dodge(width=0.9), vjust=-0.25)
ggsave('comparison_U251_T24vsT1.pdf')
```

# U343: T1 vs T0

```{r}
U343.res.T1vsT0.edgeR <- riborex.for.cellline(rna.read.counts.all, 
                                              ribo.read.counts.all, 
                                              'U343', 
                                              contrast.T1vsT0,
                                              engine = 'edgeR')
U343.res.T1vsT0.sig.edgeR <- filter_results.edgeR(U343.res.T1vsT0.edgeR)
U343.res.T1vsT0.edgeRD <- riborex.for.cellline(rna.read.counts.all, 
                                               ribo.read.counts.all, 
                                               'U343', 
                                               contrast.T1vsT0,
                                               engine = 'edgeRD')
U343.res.T1vsT0.sig.edgeRD <- filter_results.edgeR(U343.res.T1vsT0.edgeRD)
U343.res.T1vsT0.DESeq2 <- riborex.for.cellline(rna.read.counts.all, 
                                               ribo.read.counts.all, 
                                               'U343', 
                                               contrast.T1vsT0,
                                               engine = 'DESeq2')
U343.res.T1vsT0.sig.DESeq2 <- filter_results(U343.res.T1vsT0.DESeq2)
#pdf('U343_T1vsT0_DESeq2Adjust.pdf')
U343.res.T1vsT0.DESeq2Adj <- doPvalueAdjustment(U343.res.T1vsT0.DESeq2)
#dev.off()
U343.res.T1vsT0.sig.DESeq2Adj <- filter_results(U343.res.T1vsT0.DESeq2Adj)
```

```{r}
counts <- data.frame('tool'=c('edgeR', 'edgeRD', 'DESeq2', 'DESeq2 corrected'),
                     'DEgenes' = c(dim(U343.res.T1vsT0.sig.edgeR)[1], 
                                   dim(U343.res.T1vsT0.sig.edgeRD)[1], 
                                   dim(U343.res.T1vsT0.sig.DESeq2)[1],
                                   dim(U343.res.T1vsT0.sig.DESeq2Adj)[1]))
ggplot(counts, aes(tool, DEgenes, fill=tool)) + 
  geom_col(position = 'dodge', width = 0.3) +  
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) + scale_y_continuous(breaks=number_ticks(12)) +
  ggtitle('U343 T1vsT0') + geom_text(aes(label=DEgenes), position=position_dodge(width=0.9), vjust=-0.25)
ggsave('comparison_U343_T1vsT0.pdf')
```

```{r}
U343.res.T24vsT1.edgeR <- riborex.for.cellline(rna.read.counts.all, 
                                               ribo.read.counts.all, 
                                               'U343', 
                                               contrast.T24vsT1,
                                               engine='edgeR')
U343.res.T24vsT1.sig.edgeR <- filter_results.edgeR(U343.res.T24vsT1.edgeR)
U343.res.T24vsT1.edgeRD <- riborex.for.cellline(rna.read.counts.all, 
                                                ribo.read.counts.all, 
                                                'U343', 
                                                contrast.T24vsT1,
                                                engine='edgeRD')
U343.res.T24vsT1.sig.edgeRD <- filter_results.edgeR(U343.res.T24vsT1.edgeRD)
U343.res.T24vsT1.DESeq2 <- riborex.for.cellline(rna.read.counts.all, 
                                                ribo.read.counts.all, 
                                                'U343', 
                                                contrast.T24vsT1,
                                                engine='DESeq2')
U343.res.T24vsT1.sig.DESeq2 <- filter_results(U343.res.T24vsT1.DESeq2)
#pdf('U343_T24vsT1_DESeq2Adjust.pdf')
U343.res.T24vsT1.DESeq2Adj <- doPvalueAdjustment(U343.res.T24vsT1.DESeq2)
#dev.off()
U343.res.T24vsT1.sig.DESeq2Adj <- filter_results(U343.res.T24vsT1.DESeq2Adj)
```


```{r}
counts <- data.frame('tool'=c('edgeR', 'edgeRD', 'DESeq2', 'DESeq2 corrected'),
                     'DEgenes' = c(dim(U343.res.T24vsT1.sig.edgeR)[1], 
                                   dim(U343.res.T24vsT1.sig.edgeRD)[1], 
                                   dim(U343.res.T24vsT1.sig.DESeq2)[1],
                                   dim(U343.res.T24vsT1.sig.DESeq2Adj)[1]))
ggplot(counts, aes(tool, DEgenes, fill=tool)) + 
  geom_col(position = 'dodge', width = 0.3) +  
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
  scale_y_continuous(breaks=number_ticks(12)) +
  ggtitle('U343 T24vsT1') + 
  geom_text(aes(label=DEgenes), position=position_dodge(width=0.9), vjust=-0.25)
ggsave('comparison_U343_T24vsT1.pdf')
```