library(ggplot2)
#library(gplots)
library(reshape2)
setwd("~/documents/")
trinity <- read.table("trinity-Saccharomyces_cerevisiae.R64-1-1.75.pep.all.fa_Trinity.fasta_contigs.csv", header = T, sep = ",", as.is = T)
oases <- read.table("oases-Saccharomyces_cerevisiae.R64-1-1.75.pep.all.fa_Oases.fasta_contigs.csv", header = T, sep = ",", as.is = T)
trinity$assembler <- "trinity"
oases$assembler <- "oases"

both <- rbind(trinity, oases)
both$edit_distance_per_base <- as.numeric(both$edit_distance_per_base)
both$prop_gc <- as.numeric(both$prop_gc)

ggplot(both, aes(x=length, group=assembler, colour=assembler)) +
  geom_density() +
  ggtitle("length distribution") +
  xlim(0, 15000)
## Warning: Removed 74 rows containing non-finite values (stat_density).
## Warning: Removed 11 rows containing non-finite values (stat_density).

plot of chunk distributions

ggplot(both, aes(x=prop_gc, group=assembler, colour=assembler)) +
  geom_density() +
  ggtitle("GC content distribution")

plot of chunk distributions

ggplot(both, aes(x=linguistic_complexity_6, group=assembler, colour=assembler)) +
  geom_density() +
  ggtitle("linguistic complexity distribution")

plot of chunk distributions

ggplot(both, aes(x=orf_length, group=assembler, colour=assembler)) +
  geom_density() +
  ggtitle("Orf Length distribution") +
  xlim(0,2500)
## Warning: Removed 54 rows containing non-finite values (stat_density).
## Warning: Removed 11 rows containing non-finite values (stat_density).

plot of chunk distributions

ggplot(both, aes(x=edit_distance_per_base, group=assembler, colour=assembler)) +
  geom_density() +
  ggtitle("Edit distance distribution")
## Warning: Removed 47 rows containing non-finite values (stat_density).

plot of chunk distributions