library(ggplot2)
library(reshape2)
setwd("/home/chris/documents/sorghum/express-salmon-compare")
real_tpm <- read.table("sorghum.tpm", sep="\t", as.is=T, header=F, colClasses=c("character", "numeric"))
names(real_tpm) <- c("id", "realtpm")
express <- read.table("sorghum-express.csv", sep="\t", as.is=T, header=T)
#sapply(express, class)
salmon <- read.table("sorghum-salmon.csv", sep="\t", as.is=T, header=F)
names(salmon) <- c("id", "salmon.length", "salmon.tpm", "salmon.fpkm", "salmon.reads")
#sapply(salmon, class)
data <- merge(x=express, y=salmon, by.x="target_id", by.y="id", all=T)
#sapply(data,class)
data <- data[,c(1,15,17)]
data <- merge(x=data, y=real_tpm, by.x="target_id", by.y="id", all=T)
names(data) <- c("id", "express", "salmon", "real")
mdata <- melt(data, id="id")
ggplot(mdata, aes(x=log(value),colour=variable)) +
geom_density() +
xlim(-4,10) +
ggtitle("sorghum tpm density plot")
## Warning: Removed 14169 rows containing non-finite values (stat_density).
## Warning: Removed 13473 rows containing non-finite values (stat_density).
## Warning: Removed 14707 rows containing non-finite values (stat_density).
ggplot(data, aes(x=log(express), y=log(real))) +
geom_point(alpha=0.1) +
xlim(-1,10) +
ylim(-1,10) +
ggtitle("express vs real tpm")
## Warning: Removed 2332 rows containing missing values (geom_point).
ggplot(data, aes(x=log(salmon), y=log(real))) +
geom_point(alpha=0.1) +
xlim(-1,10) +
ylim(-1,10) +
ggtitle("salmon vs real tpm")
## Warning: Removed 1464 rows containing missing values (geom_point).
error <- data.frame((data$real - data$salmon)^2)
error$express_error <- (data$real - data$express)^2
names(error) <- c("salmon_error", "express_error")
df <- data.frame(colSums(x=error))
apply(df,1,sqrt)
## salmon_error express_error
## 5354 49817