Comparison of eXpress and Salmon expression quantification

library(ggplot2)
library(reshape2)

setwd("/home/chris/documents/sorghum/express-salmon-compare")

real_tpm <- read.table("sorghum.tpm", sep="\t", as.is=T, header=F, colClasses=c("character", "numeric"))
names(real_tpm) <- c("id", "realtpm")

SORGHUM

express <- read.table("sorghum-express.csv", sep="\t", as.is=T, header=T)
#sapply(express, class)
salmon <- read.table("sorghum-salmon.csv", sep="\t", as.is=T, header=F)
names(salmon) <- c("id", "salmon.length", "salmon.tpm", "salmon.fpkm", "salmon.reads")
#sapply(salmon, class)

data <- merge(x=express, y=salmon, by.x="target_id", by.y="id", all=T)
#sapply(data,class)
data <- data[,c(1,15,17)]

data <- merge(x=data, y=real_tpm, by.x="target_id", by.y="id", all=T)
names(data) <- c("id", "express", "salmon", "real")
mdata <- melt(data, id="id")
ggplot(mdata, aes(x=log(value),colour=variable)) +
  geom_density() +
  xlim(-4,10) +
  ggtitle("sorghum tpm density plot")
## Warning: Removed 14169 rows containing non-finite values (stat_density).
## Warning: Removed 13473 rows containing non-finite values (stat_density).
## Warning: Removed 14707 rows containing non-finite values (stat_density).

plot of chunk unnamed-chunk-4

ggplot(data, aes(x=log(express), y=log(real))) +
  geom_point(alpha=0.1) +
  xlim(-1,10) +
  ylim(-1,10) +
  ggtitle("express vs real tpm")
## Warning: Removed 2332 rows containing missing values (geom_point).

plot of chunk unnamed-chunk-5

ggplot(data, aes(x=log(salmon), y=log(real))) +
  geom_point(alpha=0.1) +
  xlim(-1,10) +
  ylim(-1,10) +
  ggtitle("salmon vs real tpm")
## Warning: Removed 1464 rows containing missing values (geom_point).

plot of chunk unnamed-chunk-6

error <- data.frame((data$real - data$salmon)^2)
error$express_error <- (data$real - data$express)^2
names(error) <- c("salmon_error", "express_error")

df <- data.frame(colSums(x=error))
apply(df,1,sqrt)
##  salmon_error express_error 
##          5354         49817

RICE