eXpress vs. sailfish

We ran eXpress and sailfish on one replicate of one sample from Flaveria bidentis.

TPMs

Here we check the correlation of TPMs

base <- "/data2/rnaseq/flaveria/assemblies/fb_sf/"
ex <- read.csv(paste(base, "1_1/fb_ex_1_1.xprs", sep = ""), sep = "\t", head = T)
sf <- read.csv(paste(base, "1_1/quant_bias_corrected_clean.sf", sep = ""), sep = "\t", 
    head = T)
both <- merge(ex, sf, by.x = "target_id", by.y = "Transcript")
library(ggplot2)
tpms <- both[, c("target_id", "tpm", "TPM")]
names(tpms) <- c("target_id", "eXpress", "sailfish")
tpms$eXpress[which(tpms$eXpress < 0.01)] <- 0
tpms$sailfish[which(tpms$sailfish < 0.01)] <- 0
print(cor(tpms[, 2:3]))

##          eXpress sailfish
## eXpress   1.0000   0.6657
## sailfish  0.6657   1.0000

Plot the individual points against one another

ggplot(tpms, aes(x = log(eXpress), y = log(sailfish))) + geom_point(size = 0.1)

plot of chunk unnamed-chunk-2

Plot the log count distributions

library(reshape2)
tpms_melt <- melt(tpms, id = "target_id")
ggplot(data = tpms_melt, aes(x = log(value), colour = variable)) + geom_density()

## Warning: Removed 129682 rows containing non-finite values (stat_density).
## Warning: Removed 105249 rows containing non-finite values (stat_density).

plot of chunk unnamed-chunk-3

Counts

Correlation

counts <- both[, c("target_id", "eff_counts", "EstimatedNumReads")]
names(counts) <- c("target_id", "eXpress", "sailfish")
counts$eXpress[which(counts$eXpress < 1)] <- 0
counts$sailfish[which(counts$sailfish < 1)] <- 0
library(DESeq)

## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## 
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## 
## The following object is masked from 'package:stats':
## 
##     xtabs
## 
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, as.vector, cbind,
##     colnames, duplicated, eval, evalq, Filter, Find, get,
##     intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rep.int, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unlist
## 
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Loading required package: locfit
## locfit 1.5-9.1    2013-03-22
## Loading required package: lattice
##     Welcome to 'DESeq'. For improved performance, usability and
##     functionality, please consider migrating to 'DESeq2'.

cds <- newCountDataSet(round(counts[, 2:3]), condition = c("a", "b"))
cds <- estimateSizeFactors(cds)
counts[, 2:3] <- counts(cds, normalize = TRUE)
print(cor(counts[, 2:3]))

##          eXpress sailfish
## eXpress   1.0000   0.8622
## sailfish  0.8622   1.0000

Plot the individual points against one another

ggplot(counts, aes(x = log(eXpress), y = log(sailfish))) + geom_point(size = 0.1)

plot of chunk unnamed-chunk-5

Plot the log count distributions

library(reshape2)
counts_melt <- melt(counts, id = "target_id")
ggplot(data = counts_melt, aes(x = log(round(value)), colour = variable)) + 
    geom_density()

## Warning: Removed 161827 rows containing non-finite values (stat_density).
## Warning: Removed 204848 rows containing non-finite values (stat_density).

plot of chunk unnamed-chunk-6