We ran eXpress and sailfish on one replicate of one sample from Flaveria bidentis.
Here we check the correlation of TPMs
base <- "/data2/rnaseq/flaveria/assemblies/fb_sf/"
ex <- read.csv(paste(base, "1_1/fb_ex_1_1.xprs", sep = ""), sep = "\t", head = T)
sf <- read.csv(paste(base, "1_1/quant_bias_corrected_clean.sf", sep = ""), sep = "\t",
head = T)
both <- merge(ex, sf, by.x = "target_id", by.y = "Transcript")
library(ggplot2)
tpms <- both[, c("target_id", "tpm", "TPM")]
names(tpms) <- c("target_id", "eXpress", "sailfish")
tpms$eXpress[which(tpms$eXpress < 0.01)] <- 0
tpms$sailfish[which(tpms$sailfish < 0.01)] <- 0
print(cor(tpms[, 2:3]))
## eXpress sailfish
## eXpress 1.0000 0.6657
## sailfish 0.6657 1.0000
Plot the individual points against one another
ggplot(tpms, aes(x = log(eXpress), y = log(sailfish))) + geom_point(size = 0.1)
Plot the log count distributions
library(reshape2)
tpms_melt <- melt(tpms, id = "target_id")
ggplot(data = tpms_melt, aes(x = log(value), colour = variable)) + geom_density()
## Warning: Removed 129682 rows containing non-finite values (stat_density).
## Warning: Removed 105249 rows containing non-finite values (stat_density).
Correlation
counts <- both[, c("target_id", "eff_counts", "EstimatedNumReads")]
names(counts) <- c("target_id", "eXpress", "sailfish")
counts$eXpress[which(counts$eXpress < 1)] <- 0
counts$sailfish[which(counts$sailfish < 1)] <- 0
library(DESeq)
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
##
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
##
## The following object is masked from 'package:stats':
##
## xtabs
##
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, as.vector, cbind,
## colnames, duplicated, eval, evalq, Filter, Find, get,
## intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rep.int, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unlist
##
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Loading required package: locfit
## locfit 1.5-9.1 2013-03-22
## Loading required package: lattice
## Welcome to 'DESeq'. For improved performance, usability and
## functionality, please consider migrating to 'DESeq2'.
cds <- newCountDataSet(round(counts[, 2:3]), condition = c("a", "b"))
cds <- estimateSizeFactors(cds)
counts[, 2:3] <- counts(cds, normalize = TRUE)
print(cor(counts[, 2:3]))
## eXpress sailfish
## eXpress 1.0000 0.8622
## sailfish 0.8622 1.0000
Plot the individual points against one another
ggplot(counts, aes(x = log(eXpress), y = log(sailfish))) + geom_point(size = 0.1)
Plot the log count distributions
library(reshape2)
counts_melt <- melt(counts, id = "target_id")
ggplot(data = counts_melt, aes(x = log(round(value)), colour = variable)) +
geom_density()
## Warning: Removed 161827 rows containing non-finite values (stat_density).
## Warning: Removed 204848 rows containing non-finite values (stat_density).