Comparison of bowtie2, bwa, snap and salmon-bwa, and Salmon expression quantification

library(ggplot2)
library(reshape2)

SORGHUM

setwd("/home/chris/hydrogen/scripts/aligner_compare/sorghum")

real_tpm <- read.table("reads.tpm", sep="\t", as.is=T, header=F, colClasses=c("character", "numeric"))
names(real_tpm) <- c("id", "realtpm")

bowtie2 <- read.table("bowtie2/quant.sf", sep="\t", header=F, colClasses=c("character","numeric","numeric","numeric"))
bwa <- read.table("bwa/quant.sf", sep="\t", header=F, colClasses=c("character","numeric","numeric","numeric"))
snap <- read.table("snap/quant.sf", sep="\t", header=F, colClasses=c("character","numeric","numeric","numeric"))
salmonbwa <- read.table("salmon-bwa/quant.sf", sep="\t", header=F, colClasses=c("character","numeric","numeric","numeric"))

names(bowtie2) <- c("id", "bowtie2.length", "bowtie2.tpm", "bowtie2.reads")
names(bwa) <- c("id", "bwa.length", "bwa.tpm", "bwa.reads")
names(snap) <- c("id", "snap.length", "snap.tpm", "snap.reads")
names(salmonbwa) <- c("id", "salmonbwa.length", "salmonbwa.tpm", "salmonbwa.reads")

data <- merge(x=real_tpm, y=bowtie2, by.x="id", by.y="id", all=T)
data <- merge(x=data, y=bwa, by.x="id", by.y="id", all=T)
data <- merge(x=data, y=snap, by.x="id", by.y="id", all=T)
data <- merge(x=data, y=salmonbwa, by.x="id", by.y="id", all=T)
data <- data[,c(1,2,4,7,10,13)]
names(data) <- c("id", "real", "bowtie2", "bwa", "snap", "salmonbwa")
mdata <- melt(data, id="id")
ggplot(mdata, aes(x=log(value),colour=variable)) +
  geom_density() +
  xlim(-4,10) +
  ggtitle("sorghum tpm density plot")

plot of chunk unnamed-chunk-4

ggplot(data, aes(x=log(real), y=log(bowtie2))) +
  geom_point(alpha=0.1) +
  xlim(-1,10) +
  ylim(-1,10) +
  ggtitle("bowtie2 vs real tpm")

plot of chunk unnamed-chunk-5

ggplot(data, aes(x=log(real), y=log(bwa))) +
  geom_point(alpha=0.1) +
  xlim(-1,10) +
  ylim(-1,10) +
  ggtitle("bwa vs real tpm")

plot of chunk unnamed-chunk-6

ggplot(data, aes(x=log(real), y=log(snap))) +
  geom_point(alpha=0.1) +
  xlim(-1,10) +
  ylim(-1,10) +
  ggtitle("snap vs real tpm")

plot of chunk unnamed-chunk-7

ggplot(data, aes(x=log(real), y=log(salmonbwa))) +
  geom_point(alpha=0.1) +
  xlim(-1,10) +
  ylim(-1,10) +
  ggtitle("salmon-bwa vs real tpm")

plot of chunk unnamed-chunk-8

error <- data.frame((data$real - data$bwa)^2)
error$bowtie2_error <- (data$real - data$bowtie2)^2
error$snap_error <- (data$real - data$snap)^2
error$salmonbwa_error <- (data$real - data$salmonbwa)^2
names(error) <- c("bwa_error", "bowtie2_error", "snap_error", "salmon-bwa_error")

df <- data.frame(colSums(x=error))
apply(df,1,sqrt)
##        bwa_error    bowtie2_error       snap_error salmon-bwa_error 
##      14112.11786       5068.97560       3896.95416         95.25991

RICE

setwd("/home/chris/hydrogen/scripts/aligner_compare/rice")

real_tpm <- read.table("reads.tpm", sep="\t", as.is=T, header=F, colClasses=c("character", "numeric"))
names(real_tpm) <- c("id", "realtpm")

bowtie2 <- read.table("bowtie2/quant.sf", sep="\t", header=F, colClasses=c("character","numeric","numeric","numeric"))
bwa <- read.table("bwa/quant.sf", sep="\t", header=F, colClasses=c("character","numeric","numeric","numeric"))
snap <- read.table("snap/quant.sf", sep="\t", header=F, colClasses=c("character","numeric","numeric","numeric"))
salmonbwa <- read.table("salmon-bwa/quant.sf", sep="\t", header=F, colClasses=c("character","numeric","numeric","numeric"))

names(bowtie2) <- c("id", "bowtie2.length", "bowtie2.tpm", "bowtie2.reads")
names(bwa) <- c("id", "bwa.length", "bwa.tpm", "bwa.reads")
names(snap) <- c("id", "snap.length", "snap.tpm", "snap.reads")
names(salmonbwa) <- c("id", "salmonbwa.length", "salmonbwa.tpm", "salmonbwa.reads")

data <- merge(x=real_tpm, y=bowtie2, by.x="id", by.y="id", all=T)
data <- merge(x=data, y=bwa, by.x="id", by.y="id", all=T)
data <- merge(x=data, y=snap, by.x="id", by.y="id", all=T)
data <- merge(x=data, y=salmonbwa, by.x="id", by.y="id", all=T)
data <- data[,c(1,2,4,7,10,13)]
names(data) <- c("id", "real", "bowtie2", "bwa", "snap", "salmonbwa")
mdata <- melt(data, id="id")
ggplot(mdata, aes(x=log(value),colour=variable)) +
  geom_density() +
  xlim(-4,10) +
  ggtitle("rice tpm density plot")

plot of chunk unnamed-chunk-12

ggplot(data, aes(x=log(real), y=log(bowtie2))) +
  geom_point(alpha=0.1) +
  xlim(-1,10) +
  ylim(-1,10) +
  ggtitle("bowtie2 vs real tpm")

plot of chunk unnamed-chunk-13

ggplot(data, aes(x=log(real), y=log(bwa))) +
  geom_point(alpha=0.1) +
  xlim(-1,10) +
  ylim(-1,10) +
  ggtitle("bwa vs real tpm")

plot of chunk unnamed-chunk-14

ggplot(data, aes(x=log(real), y=log(snap))) +
  geom_point(alpha=0.1) +
  xlim(-1,10) +
  ylim(-1,10) +
  ggtitle("snap vs real tpm")

plot of chunk unnamed-chunk-15

ggplot(data, aes(x=log(real), y=log(salmonbwa))) +
  geom_point(alpha=0.1) +
  xlim(-1,10) +
  ylim(-1,10) +
  ggtitle("salmon-bwa vs real tpm")

plot of chunk unnamed-chunk-16

error <- data.frame((data$real - data$bwa)^2)
error$bowtie2_error <- (data$real - data$bowtie2)^2
error$snap_error <- (data$real - data$snap)^2
error$salmonbwa_error <- (data$real - data$salmonbwa)^2
names(error) <- c("bwa_error", "bowtie2_error", "snap_error", "salmon-bwa_error")

df <- data.frame(colSums(x=error))
apply(df,1,sqrt)
##        bwa_error    bowtie2_error       snap_error salmon-bwa_error 
##        17153.957         6491.273         9493.125         1567.724

MOUSE

setwd("/home/chris/hydrogen/scripts/aligner_compare/mouse")

real_tpm <- read.table("reads.tpm", sep="\t", as.is=T, header=F, colClasses=c("character", "numeric"))
names(real_tpm) <- c("id", "realtpm")

bowtie2 <- read.table("bowtie2/quant.sf", sep="\t", header=F, colClasses=c("character","numeric","numeric","numeric"))
bwa <- read.table("bwa/quant.sf", sep="\t", header=F, colClasses=c("character","numeric","numeric","numeric"))
snap <- read.table("snap/quant.sf", sep="\t", header=F, colClasses=c("character","numeric","numeric","numeric"))
salmonbwa <- read.table("salmon-bwa/quant.sf", sep="\t", header=F, colClasses=c("character","numeric","numeric","numeric"))

names(bowtie2) <- c("id", "bowtie2.length", "bowtie2.tpm", "bowtie2.reads")
names(bwa) <- c("id", "bwa.length", "bwa.tpm", "bwa.reads")
names(snap) <- c("id", "snap.length", "snap.tpm", "snap.reads")
names(salmonbwa) <- c("id", "salmonbwa.length", "salmonbwa.tpm", "salmonbwa.reads")

data <- merge(x=real_tpm, y=bowtie2, by.x="id", by.y="id", all=T)
data <- merge(x=data, y=bwa, by.x="id", by.y="id", all=T)
data <- merge(x=data, y=snap, by.x="id", by.y="id", all=T)
data <- merge(x=data, y=salmonbwa, by.x="id", by.y="id", all=T)
data <- data[,c(1,2,4,7,10,13)]
names(data) <- c("id", "real", "bowtie2", "bwa", "snap", "salmonbwa")
mdata <- melt(data, id="id")
ggplot(mdata, aes(x=log(value),colour=variable)) +
  geom_density() +
  xlim(-4,10) +
  ggtitle("mouse tpm density plot")

plot of chunk unnamed-chunk-20

ggplot(data, aes(x=log(real), y=log(bowtie2))) +
  geom_point(alpha=0.1) +
  xlim(-1,10) +
  ylim(-1,10) +
  ggtitle("bowtie2 vs real tpm")

plot of chunk unnamed-chunk-21

ggplot(data, aes(x=log(real), y=log(bwa))) +
  geom_point(alpha=0.1) +
  xlim(-1,10) +
  ylim(-1,10) +
  ggtitle("bwa vs real tpm")

plot of chunk unnamed-chunk-22

ggplot(data, aes(x=log(real), y=log(snap))) +
  geom_point(alpha=0.1) +
  xlim(-1,10) +
  ylim(-1,10) +
  ggtitle("snap vs real tpm")

plot of chunk unnamed-chunk-23

ggplot(data, aes(x=log(real), y=log(salmonbwa))) +
  geom_point(alpha=0.1) +
  xlim(-1,10) +
  ylim(-1,10) +
  ggtitle("salmon-bwa vs real tpm")

plot of chunk unnamed-chunk-24

error <- data.frame((data$real - data$bwa)^2)
error$bowtie2_error <- (data$real - data$bowtie2)^2
error$snap_error <- (data$real - data$snap)^2
error$salmonbwa_error <- (data$real - data$salmonbwa)^2
names(error) <- c("bwa_error", "bowtie2_error", "snap_error", "salmon-bwa_error")

df <- data.frame(colSums(x=error))
apply(df,1,sqrt)
##        bwa_error    bowtie2_error       snap_error salmon-bwa_error 
##        18108.409         7208.840        13313.957         3041.594

YEAST

setwd("/home/chris/hydrogen/scripts/aligner_compare/yeast")

real_tpm <- read.table("reads.tpm", sep="\t", as.is=T, header=F, colClasses=c("character", "numeric"))
names(real_tpm) <- c("id", "realtpm")

bowtie2 <- read.table("bowtie2/quant.sf", sep="\t", header=F, colClasses=c("character","numeric","numeric","numeric"))
bwa <- read.table("bwa/quant.sf", sep="\t", header=F, colClasses=c("character","numeric","numeric","numeric"))
snap <- read.table("snap/quant.sf", sep="\t", header=F, colClasses=c("character","numeric","numeric","numeric"))
salmonbwa <- read.table("salmon-bwa/quant.sf", sep="\t", header=F, colClasses=c("character","numeric","numeric","numeric"))

names(bowtie2) <- c("id", "bowtie2.length", "bowtie2.tpm", "bowtie2.reads")
names(bwa) <- c("id", "bwa.length", "bwa.tpm", "bwa.reads")
names(snap) <- c("id", "snap.length", "snap.tpm", "snap.reads")
names(salmonbwa) <- c("id", "salmonbwa.length", "salmonbwa.tpm", "salmonbwa.reads")

data <- merge(x=real_tpm, y=bowtie2, by.x="id", by.y="id", all=T)
data <- merge(x=data, y=bwa, by.x="id", by.y="id", all=T)
data <- merge(x=data, y=snap, by.x="id", by.y="id", all=T)
data <- merge(x=data, y=salmonbwa, by.x="id", by.y="id", all=T)
data <- data[,c(1,2,4,7,10,13)]
names(data) <- c("id", "real", "bowtie2", "bwa", "snap", "salmonbwa")
mdata <- melt(data, id="id")
ggplot(mdata, aes(x=log(value),colour=variable)) +
  geom_density() +
  xlim(-4,10) +
  ggtitle("sorghum tpm density plot")

plot of chunk unnamed-chunk-28

ggplot(data, aes(x=log(real), y=log(bowtie2))) +
  geom_point(alpha=0.1) +
  xlim(-1,10) +
  ylim(-1,10) +
  ggtitle("bowtie2 vs real tpm")

plot of chunk unnamed-chunk-29

ggplot(data, aes(x=log(real), y=log(bwa))) +
  geom_point(alpha=0.1) +
  xlim(-1,10) +
  ylim(-1,10) +
  ggtitle("bwa vs real tpm")

plot of chunk unnamed-chunk-30

ggplot(data, aes(x=log(real), y=log(snap))) +
  geom_point(alpha=0.1) +
  xlim(-1,10) +
  ylim(-1,10) +
  ggtitle("snap vs real tpm")

plot of chunk unnamed-chunk-31

ggplot(data, aes(x=log(real), y=log(salmonbwa))) +
  geom_point(alpha=0.1) +
  xlim(-1,10) +
  ylim(-1,10) +
  ggtitle("salmon-bwa vs real tpm")

plot of chunk unnamed-chunk-32

error <- data.frame((data$real - data$bwa)^2)
error$bowtie2_error <- (data$real - data$bowtie2)^2
error$snap_error <- (data$real - data$snap)^2
error$salmonbwa_error <- (data$real - data$salmonbwa)^2
names(error) <- c("bwa_error", "bowtie2_error", "snap_error", "salmon-bwa_error")

df <- data.frame(colSums(x=error))
apply(df,1,sqrt)
##        bwa_error    bowtie2_error       snap_error salmon-bwa_error 
##         22661.60         27646.82         21136.79         16155.67
#library(markdown)
#setwd("/home/chris/hydrogen/scripts/aligner_compare/")
#result <- rpubsUpload("Aligner Comparison", "compare.html")
#if (!is.null(result$continueUrl)) 
#    browseURL(result$continueUrl) else stop(result$error)